From 79ca456b4837b3bc21cf9ef3c03c505c4b4909f6 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Sat, 19 Jun 2021 13:03:32 +0200 Subject: [PATCH] r600/sfn: rewrite NIR backend This is a rewite of the NIR backend. it adds some optimization and a scheduler. v2: - replace some magic numbers by constants - make sure constructor is always used with new - use default initialization in more places (changes suggested by Filip Gawin) Signed-off-by: Gert Wollny Reviewed-by: Filip Gawin Part-of: --- src/gallium/drivers/r600/meson.build | 119 +- src/gallium/drivers/r600/r600_asm.c | 4 +- src/gallium/drivers/r600/r600_asm.h | 2 + src/gallium/drivers/r600/r600_pipe.c | 13 +- src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp | 126 +- src/gallium/drivers/r600/sfn/sfn_alu_defines.h | 103 +- .../r600/sfn/sfn_alu_readport_validation.cpp | 329 +++ .../drivers/r600/sfn/sfn_alu_readport_validation.h | 41 + .../{sfn_ir_to_assembly.cpp => sfn_assembler.cpp} | 1626 +++++----- src/gallium/drivers/r600/sfn/sfn_assembler.h | 26 + .../drivers/r600/sfn/sfn_conditionaljumptracker.h | 4 - src/gallium/drivers/r600/sfn/sfn_debug.cpp | 4 + src/gallium/drivers/r600/sfn/sfn_debug.h | 6 +- src/gallium/drivers/r600/sfn/sfn_defines.h | 3 + src/gallium/drivers/r600/sfn/sfn_docu.txt | 47 +- .../drivers/r600/sfn/sfn_emitaluinstruction.cpp | 1046 ------- .../drivers/r600/sfn/sfn_emitaluinstruction.h | 116 - .../drivers/r600/sfn/sfn_emitinstruction.cpp | 169 -- src/gallium/drivers/r600/sfn/sfn_emitinstruction.h | 102 - .../drivers/r600/sfn/sfn_emitssboinstruction.cpp | 741 ----- .../drivers/r600/sfn/sfn_emitssboinstruction.h | 60 - .../drivers/r600/sfn/sfn_emittexinstruction.cpp | 671 ----- .../drivers/r600/sfn/sfn_emittexinstruction.h | 96 - src/gallium/drivers/r600/sfn/sfn_instr.cpp | 522 ++++ src/gallium/drivers/r600/sfn/sfn_instr.h | 314 ++ src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp | 2449 +++++++++++++++ src/gallium/drivers/r600/sfn/sfn_instr_alu.h | 193 ++ .../drivers/r600/sfn/sfn_instr_alugroup.cpp | 361 +++ src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h | 89 + .../drivers/r600/sfn/sfn_instr_controlflow.cpp | 176 ++ .../drivers/r600/sfn/sfn_instr_controlflow.h | 81 + src/gallium/drivers/r600/sfn/sfn_instr_export.cpp | 524 ++++ src/gallium/drivers/r600/sfn/sfn_instr_export.h | 213 ++ src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp | 659 +++++ src/gallium/drivers/r600/sfn/sfn_instr_fetch.h | 152 + src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp | 411 +++ src/gallium/drivers/r600/sfn/sfn_instr_lds.h | 80 + src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp | 844 ++++++ src/gallium/drivers/r600/sfn/sfn_instr_mem.h | 177 ++ src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp | 1011 +++++++ src/gallium/drivers/r600/sfn/sfn_instr_tex.h | 166 ++ src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp | 188 ++ src/gallium/drivers/r600/sfn/sfn_instrfactory.h | 34 + .../drivers/r600/sfn/sfn_instruction_alu.cpp | 183 -- src/gallium/drivers/r600/sfn/sfn_instruction_alu.h | 142 - .../drivers/r600/sfn/sfn_instruction_base.cpp | 187 -- .../drivers/r600/sfn/sfn_instruction_base.h | 155 - .../drivers/r600/sfn/sfn_instruction_block.cpp | 57 - .../drivers/r600/sfn/sfn_instruction_block.h | 82 - .../drivers/r600/sfn/sfn_instruction_cf.cpp | 195 -- src/gallium/drivers/r600/sfn/sfn_instruction_cf.h | 142 - .../drivers/r600/sfn/sfn_instruction_export.cpp | 341 --- .../drivers/r600/sfn/sfn_instruction_export.h | 185 -- .../drivers/r600/sfn/sfn_instruction_fetch.cpp | 480 --- .../drivers/r600/sfn/sfn_instruction_fetch.h | 187 -- .../drivers/r600/sfn/sfn_instruction_gds.cpp | 180 -- src/gallium/drivers/r600/sfn/sfn_instruction_gds.h | 225 -- .../drivers/r600/sfn/sfn_instruction_lds.cpp | 151 - src/gallium/drivers/r600/sfn/sfn_instruction_lds.h | 82 - .../drivers/r600/sfn/sfn_instruction_misc.cpp | 68 - .../drivers/r600/sfn/sfn_instruction_misc.h | 69 - src/gallium/drivers/r600/sfn/sfn_instruction_tex.h | 143 - .../drivers/r600/sfn/sfn_instructionvisitor.cpp | 0 .../drivers/r600/sfn/sfn_instructionvisitor.h | 91 - src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h | 45 - src/gallium/drivers/r600/sfn/sfn_liverange.cpp | 1006 ------- src/gallium/drivers/r600/sfn/sfn_liverange.h | 314 -- .../drivers/r600/sfn/sfn_liverangeevaluator.cpp | 438 +++ .../drivers/r600/sfn/sfn_liverangeevaluator.h | 23 + .../r600/sfn/sfn_liverangeevaluator_helpers.cpp | 623 ++++ .../r600/sfn/sfn_liverangeevaluator_helpers.h | 162 + src/gallium/drivers/r600/sfn/sfn_memorypool.cpp | 86 + src/gallium/drivers/r600/sfn/sfn_memorypool.h | 69 + src/gallium/drivers/r600/sfn/sfn_nir.cpp | 429 +-- src/gallium/drivers/r600/sfn/sfn_nir.h | 52 +- .../r600/sfn/sfn_nir_legalize_image_load_store.cpp | 6 +- .../drivers/r600/sfn/sfn_nir_lower_64bit.cpp | 393 ++- .../drivers/r600/sfn/sfn_nir_lower_tess_io.cpp | 7 +- ...n_instruction_tex.cpp => sfn_nir_lower_tex.cpp} | 153 +- src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h | 10 + src/gallium/drivers/r600/sfn/sfn_optimizer.cpp | 627 ++++ src/gallium/drivers/r600/sfn/sfn_optimizer.h | 17 + src/gallium/drivers/r600/sfn/sfn_optimizers.cpp | 12 - src/gallium/drivers/r600/sfn/sfn_optimizers.h | 14 - src/gallium/drivers/r600/sfn/sfn_peephole.cpp | 212 ++ src/gallium/drivers/r600/sfn/sfn_peephole.h | 13 + src/gallium/drivers/r600/sfn/sfn_ra.cpp | 268 ++ src/gallium/drivers/r600/sfn/sfn_ra.h | 51 + src/gallium/drivers/r600/sfn/sfn_scheduler.cpp | 890 ++++++ src/gallium/drivers/r600/sfn/sfn_scheduler.h | 13 + src/gallium/drivers/r600/sfn/sfn_shader.cpp | 1379 +++++++++ src/gallium/drivers/r600/sfn/sfn_shader.h | 365 +++ src/gallium/drivers/r600/sfn/sfn_shader_base.cpp | 1188 -------- src/gallium/drivers/r600/sfn/sfn_shader_base.h | 231 -- .../drivers/r600/sfn/sfn_shader_compute.cpp | 112 - src/gallium/drivers/r600/sfn/sfn_shader_compute.h | 62 - src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp | 95 + src/gallium/drivers/r600/sfn/sfn_shader_cs.h | 39 + .../drivers/r600/sfn/sfn_shader_fragment.cpp | 1074 ------- src/gallium/drivers/r600/sfn/sfn_shader_fragment.h | 117 - src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp | 873 ++++++ src/gallium/drivers/r600/sfn/sfn_shader_fs.h | 88 + .../drivers/r600/sfn/sfn_shader_geometry.cpp | 343 --- src/gallium/drivers/r600/sfn/sfn_shader_geometry.h | 81 - src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp | 370 +++ src/gallium/drivers/r600/sfn/sfn_shader_gs.h | 66 + src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp | 108 - src/gallium/drivers/r600/sfn/sfn_shader_tcs.h | 33 - src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp | 275 ++ src/gallium/drivers/r600/sfn/sfn_shader_tess.h | 76 + .../drivers/r600/sfn/sfn_shader_tess_eval.cpp | 123 - .../drivers/r600/sfn/sfn_shader_tess_eval.h | 39 - src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp | 230 -- src/gallium/drivers/r600/sfn/sfn_shader_vertex.h | 83 - src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp | 663 +++++ src/gallium/drivers/r600/sfn/sfn_shader_vs.h | 156 + src/gallium/drivers/r600/sfn/sfn_shaderio.cpp | 448 --- src/gallium/drivers/r600/sfn/sfn_shaderio.h | 176 -- src/gallium/drivers/r600/sfn/sfn_value.cpp | 242 -- src/gallium/drivers/r600/sfn/sfn_value.h | 194 -- src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp | 380 --- src/gallium/drivers/r600/sfn/sfn_value_gpr.h | 208 -- src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp | 959 ++++++ src/gallium/drivers/r600/sfn/sfn_valuefactory.h | 299 ++ src/gallium/drivers/r600/sfn/sfn_valuepool.cpp | 526 ---- src/gallium/drivers/r600/sfn/sfn_valuepool.h | 242 -- .../drivers/r600/sfn/sfn_vertexstageexport.cpp | 535 ---- .../drivers/r600/sfn/sfn_vertexstageexport.h | 116 - src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp | 1072 +++++++ src/gallium/drivers/r600/sfn/sfn_virtualvalues.h | 460 +++ src/gallium/drivers/r600/sfn/tests/meson.build | 37 + .../drivers/r600/sfn/tests/sfn_instr_test.cpp | 798 +++++ .../r600/sfn/tests/sfn_instrfromstring_test.cpp | 728 +++++ .../drivers/r600/sfn/tests/sfn_liverange_test.cpp | 217 ++ .../drivers/r600/sfn/tests/sfn_optimizer_test.cpp | 300 ++ .../r600/sfn/tests/sfn_shaderfromstring_test.cpp | 123 + .../drivers/r600/sfn/tests/sfn_test_shaders.cpp | 3116 ++++++++++++++++++++ .../drivers/r600/sfn/tests/sfn_test_shaders.h | 115 + .../drivers/r600/sfn/tests/sfn_value_test.cpp | 244 ++ .../r600/sfn/tests/sfn_valuefactory_test.cpp | 285 ++ 140 files changed, 27319 insertions(+), 16146 deletions(-) create mode 100644 src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h rename src/gallium/drivers/r600/sfn/{sfn_ir_to_assembly.cpp => sfn_assembler.cpp} (52%) create mode 100644 src/gallium/drivers/r600/sfn/sfn_assembler.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitinstruction.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_alu.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_export.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_export.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_fetch.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_lds.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_mem.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_tex.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instrfactory.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_alu.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_base.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_block.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_cf.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_export.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_gds.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_lds.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_misc.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_tex.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_liverange.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_liverange.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_memorypool.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_memorypool.h rename src/gallium/drivers/r600/sfn/{sfn_instruction_tex.cpp => sfn_nir_lower_tex.cpp} (65%) create mode 100644 src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_optimizer.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_optimizer.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_optimizers.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_optimizers.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_peephole.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_peephole.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_ra.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_ra.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_scheduler.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_scheduler.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_base.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_base.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_compute.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_cs.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_fragment.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_fs.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_geometry.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_gs.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tcs.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tess.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_vertex.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_vs.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shaderio.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shaderio.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_value.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_value.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_value_gpr.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_valuefactory.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_valuepool.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_valuepool.h delete mode 100644 src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp delete mode 100644 src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_virtualvalues.h create mode 100644 src/gallium/drivers/r600/sfn/tests/meson.build create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_valuefactory_test.cpp diff --git a/src/gallium/drivers/r600/meson.build b/src/gallium/drivers/r600/meson.build index e59917d..7059f6d 100644 --- a/src/gallium/drivers/r600/meson.build +++ b/src/gallium/drivers/r600/meson.build @@ -107,6 +107,10 @@ files_r600 = files( 'sb/sb_valtable.cpp', 'sfn/sfn_alu_defines.cpp', 'sfn/sfn_alu_defines.h', + 'sfn/sfn_alu_readport_validation.cpp', + 'sfn/sfn_alu_readport_validation.h', + 'sfn/sfn_assembler.cpp', + 'sfn/sfn_assembler.h', 'sfn/sfn_callstack.cpp', 'sfn/sfn_callstack.h', 'sfn/sfn_conditionaljumptracker.cpp', @@ -114,73 +118,66 @@ files_r600 = files( 'sfn/sfn_defines.h', 'sfn/sfn_debug.cpp', 'sfn/sfn_debug.h', - 'sfn/sfn_emitaluinstruction.cpp', - 'sfn/sfn_emitaluinstruction.h', - 'sfn/sfn_emitinstruction.cpp', - 'sfn/sfn_emitinstruction.h', - 'sfn/sfn_emitssboinstruction.cpp', - 'sfn/sfn_emitssboinstruction.h', - 'sfn/sfn_emittexinstruction.cpp', - 'sfn/sfn_emittexinstruction.h', - 'sfn/sfn_emitinstruction.h', - 'sfn/sfn_instruction_alu.cpp', - 'sfn/sfn_instruction_alu.h', - 'sfn/sfn_instruction_base.cpp', - 'sfn/sfn_instruction_base.h', - 'sfn/sfn_instruction_block.cpp', - 'sfn/sfn_instruction_block.h', - 'sfn/sfn_instruction_cf.cpp', - 'sfn/sfn_instruction_cf.h', - 'sfn/sfn_instruction_export.cpp', - 'sfn/sfn_instruction_export.h', - 'sfn/sfn_instruction_fetch.cpp', - 'sfn/sfn_instruction_fetch.h', - 'sfn/sfn_instruction_gds.cpp', - 'sfn/sfn_instruction_gds.h', - 'sfn/sfn_instruction_lds.cpp', - 'sfn/sfn_instruction_lds.h', - 'sfn/sfn_instruction_misc.cpp', - 'sfn/sfn_instruction_misc.h', - 'sfn/sfn_instruction_tex.cpp', - 'sfn/sfn_instruction_tex.h', - 'sfn/sfn_ir_to_assembly.cpp', - 'sfn/sfn_ir_to_assembly.h', - 'sfn/sfn_liverange.cpp', - 'sfn/sfn_liverange.h', + 'sfn/sfn_instr.cpp', + 'sfn/sfn_instr.h', + 'sfn/sfn_instr_alu.cpp', + 'sfn/sfn_instr_alu.h', + 'sfn/sfn_instr_alugroup.cpp', + 'sfn/sfn_instr_alugroup.h', + 'sfn/sfn_instr_controlflow.cpp', + 'sfn/sfn_instr_controlflow.h', + 'sfn/sfn_instr_export.cpp', + 'sfn/sfn_instr_export.h', + 'sfn/sfn_instr_fetch.cpp', + 'sfn/sfn_instr_fetch.h', + 'sfn/sfn_instr_mem.cpp', + 'sfn/sfn_instr_mem.h', + 'sfn/sfn_instr_lds.cpp', + 'sfn/sfn_instr_lds.h', + 'sfn/sfn_instr_tex.cpp', + 'sfn/sfn_instr_tex.h', + 'sfn/sfn_instrfactory.cpp', + 'sfn/sfn_instrfactory.h', + 'sfn/sfn_liverangeevaluator.cpp', + 'sfn/sfn_liverangeevaluator.h', + 'sfn/sfn_liverangeevaluator_helpers.cpp', + 'sfn/sfn_liverangeevaluator_helpers.h', + 'sfn/sfn_memorypool.cpp', + 'sfn/sfn_memorypool.h', 'sfn/sfn_nir.cpp', 'sfn/sfn_nir.h', 'sfn/sfn_nir_legalize_image_load_store.cpp', 'sfn/sfn_nir_lower_64bit.cpp', 'sfn/sfn_nir_lower_alu.cpp', 'sfn/sfn_nir_lower_alu.h', + 'sfn/sfn_nir_lower_tex.cpp', + 'sfn/sfn_nir_lower_tex.h', 'sfn/sfn_nir_lower_fs_out_to_vector.cpp', 'sfn/sfn_nir_lower_fs_out_to_vector.h', 'sfn/sfn_nir_lower_tess_io.cpp', 'sfn/sfn_nir_vectorize_vs_inputs.c', - 'sfn/sfn_shader_base.cpp', - 'sfn/sfn_shader_base.h', - 'sfn/sfn_shader_compute.cpp', - 'sfn/sfn_shader_compute.h', - 'sfn/sfn_shader_fragment.cpp', - 'sfn/sfn_shader_fragment.h', - 'sfn/sfn_shader_geometry.cpp', - 'sfn/sfn_shader_geometry.h', - 'sfn/sfn_shader_tcs.cpp', - 'sfn/sfn_shader_tcs.h', - 'sfn/sfn_shader_tess_eval.cpp', - 'sfn/sfn_shader_tess_eval.h', - 'sfn/sfn_shader_vertex.cpp', - 'sfn/sfn_shader_vertex.h', - 'sfn/sfn_shaderio.cpp', - 'sfn/sfn_shaderio.h', - 'sfn/sfn_value.cpp', - 'sfn/sfn_value.h', - 'sfn/sfn_value_gpr.cpp', - 'sfn/sfn_value_gpr.h', - 'sfn/sfn_valuepool.cpp', - 'sfn/sfn_valuepool.h', - 'sfn/sfn_vertexstageexport.cpp', - 'sfn/sfn_vertexstageexport.h', + 'sfn/sfn_optimizer.cpp', + 'sfn/sfn_peephole.cpp', + 'sfn/sfn_ra.cpp', + 'sfn/sfn_ra.h', + 'sfn/sfn_scheduler.cpp', + 'sfn/sfn_scheduler.h', + 'sfn/sfn_shader.cpp', + 'sfn/sfn_shader.h', + 'sfn/sfn_shader_cs.cpp', + 'sfn/sfn_shader_cs.h', + 'sfn/sfn_shader_fs.cpp', + 'sfn/sfn_shader_fs.h', + 'sfn/sfn_shader_gs.cpp', + 'sfn/sfn_shader_gs.h', + 'sfn/sfn_shader_tess.cpp', + 'sfn/sfn_shader_tess.h', + 'sfn/sfn_shader_vs.cpp', + 'sfn/sfn_shader_vs.h', + 'sfn/sfn_valuefactory.cpp', + 'sfn/sfn_valuefactory.h', + 'sfn/sfn_virtualvalues.cpp', + 'sfn/sfn_virtualvalues.h', ) egd_tables_h = custom_target( @@ -200,11 +197,13 @@ libr600 = static_library( 'r600', [files_r600, egd_tables_h], c_args : [r600_c_args, '-Wstrict-overflow=0'], + cpp_args: '-std=c++17', gnu_symbol_visibility : 'hidden', include_directories : [ inc_src, inc_mapi, inc_mesa, inc_include, inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common, inc_gallium_drivers, ], + dependencies: [dep_libdrm_radeon, dep_elf, dep_llvm, idep_nir, idep_nir_headers], ) @@ -212,3 +211,9 @@ driver_r600 = declare_dependency( compile_args : '-DGALLIUM_R600', link_with : [libr600, libmesa, libradeonwinsys], ) + +if with_tests + subdir('sfn/tests') +endif + + diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 92ea9c5..954af06 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -407,8 +407,8 @@ static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu * } assignment[4] = alu; } else { - if (assignment[chan]) { - assert(0); /* ALU.chan has already been allocated. */ + if (assignment[chan]) { + assert(0); /* ALU.chan has already been allocated. */ return -1; } assignment[chan] = alu; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 3072e64..952a950 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -355,6 +355,8 @@ void eg_bytecode_export_read(struct r600_bytecode *bc, void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, unsigned *num_format, unsigned *format_comp, unsigned *endian); +int r600_load_ar(struct r600_bytecode *bc); + static inline int fp64_switch(int i) { switch (i) { diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index b178c70..4ba7075 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -420,11 +420,14 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) if (is_nir_enabled(&rscreen->b)) return 1; return 0; - case PIPE_CAP_INT64_DIVMOD: - /* it is actually not supported, but the nir lowering hdanles this corectly wheras - * the glsl lowering path seems to not initialize the buildins correctly. - */ - return is_nir_enabled(&rscreen->b); + + case PIPE_CAP_TWO_SIDED_COLOR: + return !is_nir_enabled(&rscreen->b); + case PIPE_CAP_INT64_DIVMOD: + /* it is actually not supported, but the nir lowering handles this corectly wheras + * the glsl lowering path seems to not initialize the buildins correctly. + */ + return is_nir_enabled(&rscreen->b); case PIPE_CAP_CULL_DISTANCE: return 1; diff --git a/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp b/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp index 8690fc2..b3bf317 100644 --- a/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp @@ -46,7 +46,7 @@ const std::map alu_ops = { {op1_cos ,AluOp(1, 1, AluOp::t,"COS")}, {op1_exp_ieee ,AluOp(1, 1, AluOp::t,"EXP_IEEE")}, {op1_floor ,AluOp(1, 1, AluOp::a,"FLOOR")}, - {op1_flt_to_int ,AluOp(1, 0, AluOp::a,"FLT_TO_INT")}, + {op1_flt_to_int ,AluOp(1, 0, AluOp::v,"FLT_TO_INT")}, {op1_flt_to_uint ,AluOp(1, 1, AluOp::t,"FLT_TO_UINT")}, {op1_flt_to_int_rpi ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_RPI")}, {op1_flt_to_int_floor ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_FLOOR")}, @@ -84,15 +84,15 @@ const std::map alu_ops = { {op1_recipsqrt_ieee1 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_IEEE")}, {op1_recip_int ,AluOp(1, 0, AluOp::t,"RECIP_INT")}, {op1_recip_uint ,AluOp(1, 0, AluOp::t,"RECIP_UINT")}, - {op1_recip_64 ,AluOp(1, 1, AluOp::t,"RECIP_64")}, - {op1_recip_clamped_64 ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED_64")}, - {op1_recipsqrt_64 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_64")}, - {op1_recipsqrt_clamped_64,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")}, + {op1_recip_64 ,AluOp(2, 1, AluOp::t,"RECIP_64")}, + {op1_recip_clamped_64 ,AluOp(2, 1, AluOp::t,"RECIP_CLAMPED_64")}, + {op1_recipsqrt_64 ,AluOp(2, 1, AluOp::t,"RECIPSQRT_64")}, + {op1_recipsqrt_clamped_64,AluOp(2, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")}, {op1_rndne ,AluOp(1, 1, AluOp::a,"RNDNE")}, {op1_sqrt_ieee ,AluOp(1, 1, AluOp::t,"SQRT_IEEE")}, {op1_sin ,AluOp(1, 1, AluOp::t,"SIN")}, {op1_trunc ,AluOp(1, 1, AluOp::a,"TRUNC")}, - {op1_sqrt_64 ,AluOp(1, 1, AluOp::t,"SQRT_64")}, + {op1_sqrt_64 ,AluOp(2, 1, AluOp::t,"SQRT_64")}, {op1_ubyte0_flt ,AluOp(1, 1, AluOp::v,"UBYTE0_FLT")}, {op1_ubyte1_flt ,AluOp(1, 1, AluOp::v,"UBYTE1_FLT")}, {op1_ubyte2_flt ,AluOp(1, 1, AluOp::v,"UBYTE2_FLT")}, @@ -273,53 +273,73 @@ const std::map alu_src_const = { }; const std::map lds_ops = { - {DS_OP_ADD , {2, "DS_ADD"}}, - {DS_OP_SUB , {2, "DS_SUB"}}, - {DS_OP_RSUB , {2, "DS_RSUB"}}, - {DS_OP_INC , {2, "DS_INC"}}, - {DS_OP_DEC , {2, "DS_DEC"}}, - {DS_OP_MIN_INT , {2, "DS_MIN_INT"}}, - {DS_OP_MAX_INT , {2, "DS_MAX_INT"}}, - {DS_OP_MIN_UINT , {2, "DS_MIN_UINT"}}, - {DS_OP_MAX_UINT , {2, "DS_MAX_UINT"}}, - {DS_OP_AND , {2, "DS_AND"}}, - {DS_OP_OR , {2, "DS_OR"}}, - {DS_OP_XOR , {2, "DS_XOR"}}, - {DS_OP_MSKOR , {3, "DS_MSKOR"}}, - {DS_OP_WRITE , {2, "DS_WRITE"}}, - {DS_OP_WRITE_REL , {3, "DS_WRITE_REL"}}, - {DS_OP_WRITE2 , {3, "DS_WRITE2"}}, - {DS_OP_CMP_STORE , {3, "DS_CMP_STORE"}}, - {DS_OP_CMP_STORE_SPF , {3, "DS_CMP_STORE_SPF"}}, - {DS_OP_BYTE_WRITE , {2, "DS_BYTE_WRITE"}}, - {DS_OP_SHORT_WRITE , {2, "DS_SHORT_WRITE"}}, - {DS_OP_ADD_RET , {2, "DS_ADD_RET"}}, - {DS_OP_SUB_RET , {2, "DS_SUB_RET"}}, - {DS_OP_RSUB_RET , {2, "DS_RSUB_RET"}}, - {DS_OP_INC_RET , {2, "DS_INC_RET"}}, - {DS_OP_DEC_RET , {2, "DS_DEC_RET"}}, - {DS_OP_MIN_INT_RET , {2, "DS_MIN_INT_RET"}}, - {DS_OP_MAX_INT_RET , {2, "DS_MAX_INT_RET"}}, - {DS_OP_MIN_UINT_RET , {2, "DS_MIN_UINT_RET"}}, - {DS_OP_MAX_UINT_RET , {2, "DS_MAX_UINT_RET"}}, - {DS_OP_AND_RET , {2, "DS_AND_RET"}}, - {DS_OP_OR_RET , {2, "DS_OR_RET"}}, - {DS_OP_XOR_RET , {2, "DS_XOR_RET"}}, - {DS_OP_MSKOR_RET , {3, "DS_MSKOR_RET"}}, - {DS_OP_XCHG_RET , {2, "DS_XCHG_RET"}}, - {DS_OP_XCHG_REL_RET , {3, "DS_XCHG_REL_RET"}}, - {DS_OP_XCHG2_RET , {3, "DS_XCHG2_RET"}}, - {DS_OP_CMP_XCHG_RET , {3, "DS_CMP_XCHG_RET"}}, - {DS_OP_CMP_XCHG_SPF_RET, {3, "DS_CMP_XCHG_SPF_RET"}}, - {DS_OP_READ_RET , {1, "DS_READ_RET"}}, - {DS_OP_READ_REL_RET , {1, "DS_READ_REL_RET"}}, - {DS_OP_READ2_RET , {2, "DS_READ2_RET"}}, - {DS_OP_READWRITE_RET , {3, "DS_READWRITE_RET"}}, - {DS_OP_BYTE_READ_RET , {1, "DS_BYTE_READ_RET"}}, - {DS_OP_UBYTE_READ_RET, {1, "DS_UBYTE_READ_RET"}}, - {DS_OP_SHORT_READ_RET, {1, "DS_SHORT_READ_RET"}}, - {DS_OP_USHORT_READ_RET, {1, "DS_USHORT_READ_RET"}}, - {DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "DS_ATOMIC_ORDERED_ALLOC_RET"}} + {DS_OP_ADD , {2, "ADD"}}, + {DS_OP_SUB , {2, "SUB"}}, + {DS_OP_RSUB , {2, "RSUB"}}, + {DS_OP_INC , {2, "INC"}}, + {DS_OP_DEC , {2, "DEC"}}, + {DS_OP_MIN_INT , {2, "MIN_INT"}}, + {DS_OP_MAX_INT , {2, "MAX_INT"}}, + {DS_OP_MIN_UINT , {2, "MIN_UINT"}}, + {DS_OP_MAX_UINT , {2, "MAX_UINT"}}, + {DS_OP_AND , {2, "AND"}}, + {DS_OP_OR , {2, "OR"}}, + {DS_OP_XOR , {2, "XOR"}}, + {DS_OP_MSKOR , {3, "MSKOR"}}, + {DS_OP_WRITE , {2, "WRITE"}}, + {DS_OP_WRITE_REL , {3, "WRITE_REL"}}, + {DS_OP_WRITE2 , {3, "WRITE2"}}, + {DS_OP_CMP_STORE , {3, "CMP_STORE"}}, + {DS_OP_CMP_STORE_SPF , {3, "CMP_STORE_SPF"}}, + {DS_OP_BYTE_WRITE , {2, "BYTE_WRITE"}}, + {DS_OP_SHORT_WRITE , {2, "SHORT_WRITE"}}, + {DS_OP_ADD_RET , {2, "ADD_RET"}}, + {DS_OP_SUB_RET , {2, "SUB_RET"}}, + {DS_OP_RSUB_RET , {2, "RSUB_RET"}}, + {DS_OP_INC_RET , {2, "INC_RET"}}, + {DS_OP_DEC_RET , {2, "DEC_RET"}}, + {DS_OP_MIN_INT_RET , {2, "MIN_INT_RET"}}, + {DS_OP_MAX_INT_RET , {2, "MAX_INT_RET"}}, + {DS_OP_MIN_UINT_RET , {2, "MIN_UINT_RET"}}, + {DS_OP_MAX_UINT_RET , {2, "MAX_UINT_RET"}}, + {DS_OP_AND_RET , {2, "AND_RET"}}, + {DS_OP_OR_RET , {2, "OR_RET"}}, + {DS_OP_XOR_RET , {2, "XOR_RET"}}, + {DS_OP_MSKOR_RET , {3, "MSKOR_RET"}}, + {DS_OP_XCHG_RET , {2, "XCHG_RET"}}, + {DS_OP_XCHG_REL_RET , {3, "XCHG_REL_RET"}}, + {DS_OP_XCHG2_RET , {3, "XCHG2_RET"}}, + {DS_OP_CMP_XCHG_RET , {3, "CMP_XCHG_RET"}}, + {DS_OP_CMP_XCHG_SPF_RET, {3, "CMP_XCHG_SPF_RET"}}, + {DS_OP_READ_RET , {1, "READ_RET"}}, + {DS_OP_READ_REL_RET , {1, "READ_REL_RET"}}, + {DS_OP_READ2_RET , {2, "READ2_RET"}}, + {DS_OP_READWRITE_RET , {3, "READWRITE_RET"}}, + {DS_OP_BYTE_READ_RET , {1, "BYTE_READ_RET"}}, + {DS_OP_UBYTE_READ_RET, {1, "UBYTE_READ_RET"}}, + {DS_OP_SHORT_READ_RET, {1, "SHORT_READ_RET"}}, + {DS_OP_USHORT_READ_RET, {1, "USHORT_READ_RET"}}, + {DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "ATOMIC_ORDERED_ALLOC_RET"}}, + {LDS_ADD_RET, {2, "LDS_ADD_RET"}}, + {LDS_ADD, {2, "LDS_ADD"}}, + {LDS_AND_RET, {2, "LDS_AND_RET"}}, + {LDS_AND, {2, "LDS_AND"}}, + {LDS_WRITE, {2, "LDS_WRITE"}}, + {LDS_OR_RET, {2, "LDS_OR_RET"}}, + {LDS_OR, {2, "LDS_OR"}}, + {LDS_MAX_INT_RET, {2, "LDS_MAX_INT_RET"}}, + {LDS_MAX_INT, {2, "LDS_MAX_INT"}}, + {LDS_MAX_UINT_RET, {2, "LDS_MAX_UINT_RET"}}, + {LDS_MAX_UINT, {2, "LDS_MAX_UINT"}}, + {LDS_MIN_INT_RET, {2, "LDS_MIN_INT_RET"}}, + {LDS_MIN_INT, {2, "LDS_MIN_INT"}}, + {LDS_MIN_UINT_RET, {2, "LDS_MIN_UINT_RET"}}, + {LDS_MIN_UINT, {2, "LDS_MIN_UINT"}}, + {LDS_XOR_RET, {2, "LDS_XOR"}}, + {LDS_XOR, {2, "LDS_XOR"}}, + {LDS_XCHG_RET, {2, "LDS_XCHG_RET"}}, + {LDS_CMP_XCHG_RET, {3, "LDS_CMP_XCHG_RET"}}, + {LDS_WRITE_REL, {3, "LDS_WRITE_REL"}}, }; } diff --git a/src/gallium/drivers/r600/sfn/sfn_alu_defines.h b/src/gallium/drivers/r600/sfn/sfn_alu_defines.h index 4481c49..4d2781f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_alu_defines.h +++ b/src/gallium/drivers/r600/sfn/sfn_alu_defines.h @@ -27,6 +27,8 @@ #ifndef r600_sfn_alu_defines_h #define r600_sfn_alu_defines_h +#include "../r600_isa.h" + #include #include @@ -235,12 +237,71 @@ enum EAluOp { op3_cnde_int = 28<< 6, op3_cndgt_int = 29<< 6, op3_cndge_int = 30<< 6, - op3_mul_lit = 31<< 6 + op3_mul_lit = 31<< 6, + op_invalid = 0xffff +}; + +enum AluModifiers { + alu_src0_neg, + alu_src0_abs, + alu_src0_rel, + alu_src1_neg, + alu_src1_abs, + alu_src1_rel, + alu_src2_neg, + alu_src2_rel, + alu_dst_clamp, + alu_dst_rel, + alu_last_instr, + alu_update_exec, + alu_update_pred, + alu_write, + alu_op3, + alu_is_trans, + alu_is_cayman_trans, + alu_is_lds, + alu_lds_group_start, + alu_lds_group_end, + alu_lds_address, + alu_no_schedule_bias, + alu_64bit_op, + alu_flag_count +}; + +enum AluDstModifiers { + omod_off = 0, + omod_mul2 = 1, + omod_mul4 = 2, + omod_divl2 = 3 +}; + +enum AluPredSel { + pred_off = 0, + pred_zero = 2, + pred_one = 3 }; +enum AluBankSwizzle { + alu_vec_012 = 0, + sq_alu_scl_201 = 0, + alu_vec_021 = 1, + sq_alu_scl_122 = 1, + alu_vec_120 = 2, + sq_alu_scl_212 = 2, + alu_vec_102 = 3, + sq_alu_scl_221 = 3, + alu_vec_201 = 4, + sq_alu_scl_unknown = 4, + alu_vec_210 = 5, + alu_vec_unknown = 6 +}; +inline AluBankSwizzle operator ++(AluBankSwizzle& x) { + x = static_cast(x + 1); + return x; +} -using AluOpFlags=std::bitset<32>; +using AluOpFlags=std::bitset; struct AluOp { static constexpr int x = 1; @@ -314,6 +375,8 @@ struct AluInlineConstantDescr { extern const std::map alu_src_const; +#define LDSOP2(X) LDS_ ## X = LDS_OP2_LDS_ ## X + enum ESDOp { DS_OP_ADD = 0, DS_OP_SUB = 1, @@ -362,9 +425,31 @@ enum ESDOp { DS_OP_SHORT_READ_RET = 56, DS_OP_USHORT_READ_RET = 57, DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63, - DS_OP_INVALID = 64 + DS_OP_INVALID = 64, + LDSOP2(ADD_RET), + LDSOP2(ADD), + LDSOP2(AND_RET), + LDSOP2(AND), + LDSOP2(WRITE), + LDSOP2(OR_RET), + LDSOP2(OR), + LDSOP2(MAX_INT_RET), + LDSOP2(MAX_INT), + LDSOP2(MAX_UINT_RET), + LDSOP2(MAX_UINT), + LDSOP2(MIN_INT_RET), + LDSOP2(MIN_INT), + LDSOP2(MIN_UINT_RET), + LDSOP2(MIN_UINT), + LDSOP2(XOR_RET), + LDSOP2(XOR), + LDSOP2(XCHG_RET), + LDS_CMP_XCHG_RET = LDS_OP3_LDS_CMP_XCHG_RET, + LDS_WRITE_REL = LDS_OP3_LDS_WRITE_REL }; +#undef LDSOP2 + struct LDSOp { int nsrc; const char *name; @@ -372,6 +457,18 @@ struct LDSOp { extern const std::map lds_ops; +struct KCacheLine { + int bank{0}; + int addr{0}; + int len{0}; + enum KCacheLockMode { + free, + lock_1, + lock_2 + } mode{free}; +}; + + } #endif // ALU_DEFINES_H diff --git a/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp b/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp new file mode 100644 index 0000000..6cbd4bf --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp @@ -0,0 +1,329 @@ +#include "sfn_alu_readport_validation.h" + +#include + +namespace r600 { + +class ReserveReadport : public ConstRegisterVisitor { +public: + ReserveReadport(AluReadportReservation& reserv); + + void visit(const LocalArray& value) override; + void visit(const LiteralConstant& value) override; + void visit(const InlineConstant& value) override; + + void reserve_gpr(int sel, int chan); + + AluReadportReservation& reserver; + int cycle = -1; + int isrc = -1; + int src0_sel = -1; + int src0_chan = -1; + bool success = true; + + static const int max_const_readports = 2; +}; + + +class ReserveReadportVec : public ReserveReadport { +public: + using ReserveReadport::ReserveReadport; + + void visit(const Register& value) override; + void visit(const LocalArrayValue& value) override; + void visit(const UniformValue& value) override; +}; + +class ReserveReadportTrans : public ReserveReadport +{ +public: + ReserveReadportTrans(AluReadportReservation& reserv); + + int n_consts; +}; + +class ReserveReadportTransPass1 : public ReserveReadportTrans { +public: + using ReserveReadportTrans::ReserveReadportTrans; + + void visit(const Register& value) override; + void visit(const LocalArrayValue& value) override; + void visit(const UniformValue& value) override; + void visit(const InlineConstant& value) override; + void visit(const LiteralConstant& value) override; +}; + + +class ReserveReadportTransPass2 : public ReserveReadportTrans { +public: + using ReserveReadportTrans::ReserveReadportTrans; + + void visit(const Register& value) override; + void visit(const LocalArrayValue& value) override; + void visit(const UniformValue& value) override; +}; + +bool AluReadportReservation::schedule_vec_src(PVirtualValue src[3], int nsrc, AluBankSwizzle swz) +{ + ReserveReadportVec visitor(*this); + + if (src[0]->as_register()) { + visitor.src0_sel = src[0]->sel(); + visitor.src0_chan = src[0]->chan(); + } else { + visitor.src0_sel = 0xffff; + visitor.src0_chan = 8; + } + + for (int i = 0; i < nsrc; ++i) { + visitor.cycle = cycle_vec(swz, i); + visitor.isrc = i; + src[i]->accept(visitor); + } + + return visitor.success; +} + +bool AluReadportReservation::schedule_vec_instruction(const AluInstr& alu, AluBankSwizzle swz) +{ + ReserveReadportVec visitor(*this); + + for (unsigned i = 0; i < alu.n_sources() && visitor.success; ++i) { + visitor.cycle = cycle_vec(swz, i); + visitor.isrc = i; + if (i == 1 && alu.src(i).equal_to(alu.src(0))) + continue; + alu.src(i).accept(visitor); + } + return visitor.success; +} + +bool AluReadportReservation::schedule_trans_instruction(const AluInstr& alu, AluBankSwizzle swz) +{ + + ReserveReadportTransPass1 visitor1(*this); + + for (unsigned i = 0; i < alu.n_sources(); ++i) { + visitor1.cycle = cycle_trans(swz, i); + alu.src(i).accept(visitor1); + } + if (!visitor1.success) + return false; + + + ReserveReadportTransPass2 visitor2(*this); + visitor2.n_consts = visitor1.n_consts; + + + for (unsigned i = 0; i < alu.n_sources(); ++i) { + visitor2.cycle = cycle_trans(swz, i); + + alu.src(i).accept(visitor2); + } + return visitor2.success; +} + + +AluReadportReservation::AluReadportReservation() +{ + for (int i = 0; i < max_chan_channels; ++i) { + for (int j = 0; j < max_gpr_readports; ++j) + m_hw_gpr[j][i] = -1; + m_hw_const_addr[i] = -1; + m_hw_const_chan[i] = -1; + m_hw_const_bank[i] = -1; + } +} + + +bool AluReadportReservation::reserve_gpr(int sel, int chan, int cycle) +{ + if (m_hw_gpr[cycle][chan] == -1) { + m_hw_gpr[cycle][chan] = sel; + } + else if (m_hw_gpr[cycle][chan] != sel) { + return false; + } + return true; +} + +bool AluReadportReservation::reserve_const(const UniformValue& value) +{ + int match = -1; + int empty = -1; + + for (int res = 0; res < ReserveReadport::max_const_readports; ++res) { + if (m_hw_const_addr[res] == -1) + empty = res; + else if ((m_hw_const_addr[res] == value.sel()) && + (m_hw_const_bank[res] == value.kcache_bank()) && + (m_hw_const_chan[res] == (value.chan() >> 1))) + match = res; + } + + if (match < 0) { + if (empty >= 0) { + m_hw_const_addr[empty] = value.sel(); + (m_hw_const_bank[empty] = value.kcache_bank()); + m_hw_const_chan[empty] = value.chan() >> 1; + } else { + return false; + } + } + return true; +} + +bool AluReadportReservation::add_literal(uint32_t value) +{ + for (unsigned i = 0; i < m_nliterals; ++i) { + if (m_literals[i] == value) + return true; + } + if (m_nliterals < m_literals.size()) { + m_literals[m_nliterals++] = value; + return true; + } + return false; +} + +int AluReadportReservation::cycle_vec(AluBankSwizzle swz, int src) +{ + static const int mapping[AluBankSwizzle::alu_vec_unknown][max_gpr_readports] = { + {0, 1, 2}, + {0, 2, 1}, + {1, 0, 2}, + {1, 2, 0}, + {2, 0, 1}, + {2, 1, 0} + }; + return mapping[swz][src]; +} + +int AluReadportReservation::cycle_trans(AluBankSwizzle swz, int src) +{ + static const int mapping[AluBankSwizzle::sq_alu_scl_unknown][max_gpr_readports] = { + {2, 1, 0}, + {1, 2, 2}, + {2, 1, 2}, + {2, 2, 1}, + }; + return mapping[swz][src]; +} + + +ReserveReadport::ReserveReadport(AluReadportReservation& reserv): + reserver(reserv) +{ +} + +void ReserveReadport::visit(const LocalArray& value) +{ + (void)value; + unreachable("a full array is not available here"); +} + +void ReserveReadport::visit(const LiteralConstant& value) +{ + success &= reserver.add_literal(value.value()); +} + +void ReserveReadport::visit(const InlineConstant& value) +{ + (void)value; +} + +void ReserveReadportVec::visit(const Register& value) +{ + reserve_gpr(value.sel(), value.chan()); +} + +void ReserveReadportVec::visit(const LocalArrayValue& value) +{ + // Set the hightest non-sign bit to indicated that we use the + // AR register + reserve_gpr(0x4000000 | value.sel(), value.chan()); +} + +void ReserveReadport::reserve_gpr(int sel, int chan) +{ + if (isrc == 1 && src0_sel == sel && src0_chan == chan) + return; + success &= reserver.reserve_gpr(sel, chan, cycle); +} + +void ReserveReadportVec::visit(const UniformValue& value) +{ + // kcache bank? + success &= reserver.reserve_const(value); +} + +ReserveReadportTrans::ReserveReadportTrans(AluReadportReservation& reserv): + ReserveReadport(reserv), + n_consts(0) +{} + +void ReserveReadportTransPass1::visit(const Register& value) +{ + (void)value; +} + +void ReserveReadportTransPass1::visit(const LocalArrayValue& value) +{ + (void)value; +} + +void ReserveReadportTransPass1::visit(const UniformValue& value) +{ + if (n_consts >= max_const_readports) { + success = false; + return; + } + n_consts++; + success &= reserver.reserve_const(value); +} + +void ReserveReadportTransPass1::visit(const InlineConstant& value) +{ + (void)value; + if (n_consts >= max_const_readports) { + success = false; + return; + } + n_consts++; +} + +void ReserveReadportTransPass1::visit(const LiteralConstant& value) +{ + if (n_consts >= max_const_readports) { + success = false; + return; + } + n_consts++; + success &= reserver.add_literal(value.value()); +} + +void ReserveReadportTransPass2::visit(const Register& value) +{ + if (cycle < n_consts) { + success = false; + return; + } + reserve_gpr(value.sel(), value.chan()); +} + +void ReserveReadportTransPass2::visit(const LocalArrayValue& value) +{ + if (cycle < n_consts) { + success = false; + return; + } + reserve_gpr(0x4000000 | value.sel(), value.chan()); +} + +void ReserveReadportTransPass2::visit(const UniformValue& value) +{ + (void)value; +} + + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h b/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h new file mode 100644 index 0000000..7ca23ce --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h @@ -0,0 +1,41 @@ +#ifndef ALUREADPORTVALIDATION_H +#define ALUREADPORTVALIDATION_H + +#include "sfn_instr_alu.h" + +namespace r600 { + +class AluReadportReservation { +public: + AluReadportReservation(); + AluReadportReservation(const AluReadportReservation& orig) = default; + AluReadportReservation& operator = (const AluReadportReservation& orig) = default; + + bool schedule_vec_src(PVirtualValue src[3], int nsrc, AluBankSwizzle swz); + + bool schedule_vec_instruction(const AluInstr& alu, AluBankSwizzle swz); + bool schedule_trans_instruction(const AluInstr& alu, AluBankSwizzle swz); + + bool reserve_gpr(int sel, int chan, int cycle); + bool reserve_const(const UniformValue& value); + + bool add_literal(uint32_t value); + + static int cycle_vec(AluBankSwizzle swz, int src); + static int cycle_trans(AluBankSwizzle swz, int src); + + static const int max_chan_channels = 4; + static const int max_gpr_readports = 3; + + std::array, max_gpr_readports> m_hw_gpr; + std::array m_hw_const_addr; + std::array m_hw_const_chan; + std::array m_hw_const_bank; + std::array m_literals; + uint32_t m_nliterals{0}; +}; + + +} + +#endif // ALUREADPORTVALIDATION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp similarity index 52% rename from src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp rename to src/gallium/drivers/r600/sfn/sfn_assembler.cpp index f97dba9..b2f67a2 100644 --- a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp @@ -1,322 +1,300 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_ir_to_assembly.h" +#include "sfn_assembler.h" +#include "sfn_debug.h" +#include "sfn_instr_alugroup.h" +#include "sfn_instr_controlflow.h" +#include "sfn_instr_fetch.h" +#include "sfn_instr_export.h" +#include "sfn_instr_mem.h" +#include "sfn_instr_tex.h" + #include "sfn_conditionaljumptracker.h" #include "sfn_callstack.h" -#include "sfn_instruction_gds.h" -#include "sfn_instruction_misc.h" -#include "sfn_instruction_fetch.h" -#include "sfn_instruction_lds.h" -#include "../r600_shader.h" #include "../eg_sq.h" namespace r600 { +Assembler::Assembler(r600_shader *sh, const r600_shader_key& key): + m_sh(sh), m_key(key) +{ +} -using std::vector; - - - -struct AssemblyFromShaderLegacyImpl : public ConstInstructionVisitor { - - AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key); - - - bool emit(const Instruction::Pointer i); - void reset_addr_register() {m_last_addr.reset();} +extern const std::map ds_opcode_map; +class AssamblerVisitor : public ConstInstrVisitor { public: - bool visit(const AluInstruction& i) override; - bool visit(const ExportInstruction& i) override; - bool visit(const TexInstruction& i) override; - bool visit(const FetchInstruction& i) override; - bool visit(const IfInstruction& i) override; - bool visit(const ElseInstruction& i) override; - bool visit(const IfElseEndInstruction& i) override; - bool visit(const LoopBeginInstruction& i) override; - bool visit(const LoopEndInstruction& i) override; - bool visit(const LoopBreakInstruction& i) override; - bool visit(const LoopContInstruction& i) override; - bool visit(const StreamOutIntruction& i) override; - bool visit(const MemRingOutIntruction& i) override; - bool visit(const EmitVertex& i) override; - bool visit(const WaitAck& i) override; - bool visit(const WriteScratchInstruction& i) override; - bool visit(const GDSInstr& i) override; - bool visit(const RatInstruction& i) override; - bool visit(const LDSWriteInstruction& i) override; - bool visit(const LDSReadInstruction& i) override; - bool visit(const LDSAtomicInstruction& i) override; - bool visit(const GDSStoreTessFactor& i) override; - bool visit(const InstructionBlock& i) override; - - bool emit_load_addr(PValue addr); - bool emit_fs_pixel_export(const ExportInstruction & exi); - bool emit_vs_pos_export(const ExportInstruction & exi); - bool emit_vs_param_export(const ExportInstruction & exi); - bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src); - bool copy_src(r600_bytecode_alu_src& src, const Value& s); - - EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx); + AssamblerVisitor(r600_shader *sh, const r600_shader_key& key); + + void visit(const AluInstr& instr) override; + void visit(const AluGroup& instr) override; + void visit(const TexInstr& instr) override; + void visit(const ExportInstr& instr) override; + void visit(const FetchInstr& instr) override; + void visit(const Block& instr) override; + void visit(const IfInstr& instr) override; + void visit(const ControlFlowInstr& instr) override; + void visit(const WriteScratchInstr& instr) override; + void visit(const StreamOutInstr& instr) override; + void visit(const MemRingOutInstr& instr) override; + void visit(const EmitVertexInstr& instr) override; + void visit(const GDSInstr& instr) override; + void visit(const WriteTFInstr& instr) override; + void visit(const LDSAtomicInstr& instr) override; + void visit(const LDSReadInstr& instr) override; + void visit(const RatInstr& instr) override; + + void finalize(); + + const uint32_t sf_vtx = 1; + const uint32_t sf_tex = 2; + const uint32_t sf_alu = 4; + const uint32_t sf_addr_register = 8; + const uint32_t sf_all = 0xf; + + void clear_states(const uint32_t& states); + bool copy_dst(r600_bytecode_alu_dst& dst, const Register& d, bool write); + PVirtualValue copy_src(r600_bytecode_alu_src& src, const VirtualValue& s); + + EBufferIndexMode + emit_index_reg(const VirtualValue& addr, unsigned idx); + + void emit_endif(); + void emit_else(); + void emit_loop_begin(bool vpm); + void emit_loop_end(); + void emit_loop_break(); + void emit_loop_cont(); + + void emit_alu_op(const AluInstr& ai); + void emit_lds_op(const AluInstr& lds); + + void emit_wait_ack(); + + /* Start initialized in constructor */ + const r600_shader_key& m_key; + r600_shader *m_shader; + r600_bytecode *m_bc; ConditionalJumpTracker m_jump_tracker; CallStack m_callstack; + bool ps_alpha_to_one; + /* End initialized in constructor */ -public: - r600_bytecode *m_bc; - r600_shader *m_shader; - r600_shader_key *m_key; - r600_bytecode_output m_output; - unsigned m_max_color_exports; - bool has_pos_output; - bool has_param_output; - PValue m_last_addr; - int m_loop_nesting; std::set m_nliterals_in_group; std::set vtx_fetch_results; std::set tex_fetch_results; - bool m_last_op_was_barrier; -}; + PRegister m_last_addr{nullptr}; -AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh, - r600_shader_key *key) -{ - impl = new AssemblyFromShaderLegacyImpl(sh, key); -} + unsigned m_max_color_exports{0}; + int m_loop_nesting{0}; -AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy() -{ - delete impl; -} + bool m_ack_suggested{false}; + bool m_has_param_output{false}; + bool m_has_pos_output{false}; + bool m_last_op_was_barrier{false}; + bool m_result{true}; +}; -bool AssemblyFromShaderLegacy::do_lower(const std::vector& ir) +bool Assembler::lower(Shader *shader) { - if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX && - impl->m_shader->ninput > 0) - r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS); + AssamblerVisitor ass(m_sh, m_key); + auto& blocks = shader->func(); + for (auto b : blocks) { + b->accept(ass); + if (!ass.m_result) + return false; + } - std::vector exports; + ass.finalize(); - for (const auto& block : ir) { - if (!impl->visit(block)) - return false; - } /* - for (const auto& i : exports) { - if (!impl->emit_export(static_cast(*i))) - return false; - }*/ + return ass.m_result; +} +AssamblerVisitor::AssamblerVisitor(r600_shader *sh, const r600_shader_key& key): + m_key(key), + m_shader(sh), + + m_bc(&sh->bc), + m_callstack(sh->bc), + ps_alpha_to_one(key.ps.alpha_to_one) +{ + if (m_shader->processor_type == PIPE_SHADER_FRAGMENT) + m_max_color_exports = MAX2(m_key.ps.nr_cbufs, 1); + + if (m_shader->processor_type == PIPE_SHADER_VERTEX && + m_shader->ninput > 0) + r600_bytecode_add_cfinst(m_bc, CF_OP_CALL_FS); +} + +void AssamblerVisitor::finalize() +{ const struct cf_op_info *last = nullptr; - if (impl->m_bc->cf_last) - last = r600_isa_cf(impl->m_bc->cf_last->op); + + if (m_bc->cf_last) + last = r600_isa_cf(m_bc->cf_last->op); /* alu clause instructions don't have EOP bit, so add NOP */ - if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END - || impl->m_bc->cf_last->op == CF_OP_POP) - r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP); + if (m_shader->bc.gfx_level < CAYMAN && + (!last || last->flags & CF_ALU || m_bc->cf_last->op == CF_OP_LOOP_END + || m_bc->cf_last->op == CF_OP_POP)) + r600_bytecode_add_cfinst(m_bc, CF_OP_NOP); + + /* A fetch shader only can't be EOP (results in hang), but we can replace it + * by a NOP */ + else if (last && m_bc->cf_last->op == CF_OP_CALL_FS) + m_bc->cf_last->op = CF_OP_NOP; + + if (m_shader->bc.gfx_level != CAYMAN) + m_bc->cf_last->end_of_program = 1; + else + cm_bytecode_add_cf_end(m_bc); +} - /* A fetch shader only can't be EOP (results in hang), but we can replace it - * by a NOP */ - else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS) - impl->m_bc->cf_last->op = CF_OP_NOP; +extern const std::map opcode_map; - if (impl->m_shader->bc.gfx_level != CAYMAN) - impl->m_bc->cf_last->end_of_program = 1; - else - cm_bytecode_add_cf_end(impl->m_bc); +void AssamblerVisitor::visit(const AluInstr& ai) +{ + assert(vtx_fetch_results.empty()); + assert(tex_fetch_results.empty()); - return true; + if (unlikely(ai.has_alu_flag(alu_is_lds))) + emit_lds_op(ai); + else + emit_alu_op(ai); } -bool AssemblyFromShaderLegacyImpl::visit(const InstructionBlock& block) +void AssamblerVisitor::emit_lds_op(const AluInstr& lds) { - for (const auto& i : block) { - - if (i->type() != Instruction::vtx) { - vtx_fetch_results.clear(); - if (i->type() != Instruction::tex) - tex_fetch_results.clear(); - } + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(alu)); - m_last_op_was_barrier &= i->type() == Instruction::alu; + alu.is_lds_idx_op = true; + alu.op = lds.lds_opcode(); - sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n"; + /* All paired LDS fetch + read from queue instructions + * have to fit into the same ALU CF, 256 DW fit in, but we leave some + * space for weired things the backend assembler might do. */ + const bool is_lds_start = lds.has_alu_flag(alu_lds_group_start); + const unsigned expected_alu_clause_fill = m_bc->cf_last->ndw + + 2 * lds.required_slots(); - if (!i->accept(*this)) - return false; + if (is_lds_start && expected_alu_clause_fill > 240) + m_bc->force_add_cf = 1; - if (i->type() != Instruction::alu) - reset_addr_register(); + bool has_lds_fetch = false; + switch (alu.op) { + case DS_OP_WRITE: + alu.op = LDS_OP2_LDS_WRITE; + break; + case LDS_WRITE_REL: + alu.op = LDS_OP3_LDS_WRITE_REL; + alu.lds_idx = 1; + break; + case DS_OP_READ_RET: + alu.op = LDS_OP1_LDS_READ_RET; + FALLTHROUGH; + case LDS_ADD_RET: + case LDS_AND_RET: + case LDS_OR_RET: + case LDS_MAX_INT_RET: + case LDS_MAX_UINT_RET: + case LDS_MIN_INT_RET: + case LDS_MIN_UINT_RET: + case LDS_XOR_RET: + case LDS_XCHG_RET: + case LDS_CMP_XCHG_RET: + has_lds_fetch = true; + break; + default: + ; } - return true; -} + copy_src(alu.src[0], lds.src(0)); -AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh, - r600_shader_key *key): - m_callstack(sh->bc), - m_bc(&sh->bc), - m_shader(sh), - m_key(key), - has_pos_output(false), - has_param_output(false), - m_loop_nesting(0), - m_last_op_was_barrier(false) -{ - m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1); - -} + if (lds.n_sources() > 1) + copy_src(alu.src[1], lds.src(1)); + else + alu.src[1].sel = V_SQ_ALU_SRC_0; -extern const std::map opcode_map; + if (lds.n_sources() > 2) + copy_src(alu.src[2], lds.src(2)); + else + alu.src[2].sel = V_SQ_ALU_SRC_0; -bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr) -{ - m_bc->ar_reg = addr->sel(); - m_bc->ar_chan = addr->chan(); - m_bc->ar_loaded = 0; - m_last_addr = addr; + alu.last = lds.has_alu_flag(alu_last_instr); - sfn_log << SfnLog::assembly << " Prepare " << *addr << " to address register\n"; + int r = r600_bytecode_add_alu(m_bc, &alu); + if (has_lds_fetch) + m_bc->cf_last->nlds_read++; - return true; + if (r) + m_result = false; } -bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai) +void AssamblerVisitor::emit_alu_op(const AluInstr& ai) { - struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(alu)); - PValue addr_in_use; if (opcode_map.find(ai.opcode()) == opcode_map.end()) { std::cerr << "Opcode not handled for " << ai <<"\n"; - return false; + m_result = false; + return; } + // skip multiple barriers if (m_last_op_was_barrier && ai.opcode() == op0_group_barrier) - return true; + return; m_last_op_was_barrier = ai.opcode() == op0_group_barrier; - for (unsigned i = 0; i < ai.n_sources(); ++i) { - auto& s = ai.src(i); - if (s.type() == Value::literal) { - auto& v = static_cast(s); - if (v.value() != 0 && - v.value() != 1 && - v.value_float() != 1.0f && - v.value_float() != 0.5f && - v.value() != 0xffffffff) - m_nliterals_in_group.insert(v.value()); - } - } - - /* This instruction group would exceed the limit of literals, so - * force a new instruction group by adding a NOP as last - * instruction. This will no loner be needed with a real - * scheduler */ - if (m_nliterals_in_group.size() > 4) { - sfn_log << SfnLog::assembly << " Have " << m_nliterals_in_group.size() << " inject a last op (nop)\n"; - alu.op = ALU_OP0_NOP; - alu.last = 1; - alu.dst.chan = 3; - int retval = r600_bytecode_add_alu(m_bc, &alu); - if (retval) - return false; - memset(&alu, 0, sizeof(alu)); - m_nliterals_in_group.clear(); - for (unsigned i = 0; i < ai.n_sources(); ++i) { - auto& s = ai.src(i); - if (s.type() == Value::literal) { - auto& v = static_cast(s); - m_nliterals_in_group.insert(v.value()); - } - } - } - alu.op = opcode_map.at(ai.opcode()); - /* Missing test whether ai actually has a dest */ auto dst = ai.dest(); - if (dst) { - if (!copy_dst(alu.dst, *dst)) - return false; - - alu.dst.write = ai.flag(alu_write); - alu.dst.clamp = ai.flag(alu_dst_clamp); - - if (dst->type() == Value::gpr_array_value) { - auto& v = static_cast(*dst); - PValue addr = v.indirect(); - if (addr) { - if (!m_last_addr || *addr != *m_last_addr) { - emit_load_addr(addr); - addr_in_use = addr; - } - alu.dst.rel = addr ? 1 : 0;; - } + if (!copy_dst(alu.dst, *dst, ai.has_alu_flag(alu_write))) { + m_result = false; + return; } + + alu.dst.write = ai.has_alu_flag(alu_write); + alu.dst.clamp = ai.has_alu_flag(alu_dst_clamp); + alu.dst.rel = dst->addr() ? 1 : 0; + } else { + alu.dst.chan = ai.dest_chan(); } alu.is_op3 = ai.n_sources() == 3; + EBufferIndexMode kcache_index_mode = bim_none; + PVirtualValue buffer_offset = nullptr; + for (unsigned i = 0; i < ai.n_sources(); ++i) { - auto& s = ai.src(i); + buffer_offset = copy_src(alu.src[i], ai.src(i)); + alu.src[i].neg = ai.has_alu_flag(AluInstr::src_neg_flags[i]); + if (!alu.is_op3) + alu.src[i].abs = ai.has_alu_flag(AluInstr::src_abs_flags[i]); - if (!copy_src(alu.src[i], s)) - return false; - alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]); - - if (s.type() == Value::gpr_array_value) { - auto& v = static_cast(s); - PValue addr = v.indirect(); - if (addr) { - assert(!addr_in_use || (*addr_in_use == *addr)); - if (!m_last_addr || *addr != *m_last_addr) { - emit_load_addr(addr); - addr_in_use = addr; - } - alu.src[i].rel = addr ? 1 : 0; - } + if (buffer_offset && kcache_index_mode == bim_none) { + kcache_index_mode = bim_zero; + alu.src[i].kc_bank = 1; + alu.src[i].kc_rel = 1; + } + + if (ai.has_lds_access()) { + assert(m_bc->cf_last->nlds_read > 0); + m_bc->cf_last->nlds_read--; } - if (!alu.is_op3) - alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]); } if (ai.bank_swizzle() != alu_vec_unknown) alu.bank_swizzle_force = ai.bank_swizzle(); - alu.last = ai.flag(alu_last_instr); - alu.update_pred = ai.flag(alu_update_pred); - alu.execute_mask = ai.flag(alu_update_exec); + alu.last = ai.has_alu_flag(alu_last_instr); + //alu.update_pred = ai.has_alu_flag(alu_update_pred); + alu.execute_mask = ai.has_alu_flag(alu_update_exec); /* If the destination register is equal to the last loaded address register * then clear the latter one, because the values will no longer be identical */ @@ -326,11 +304,10 @@ bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai) if (dst) sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n"; - if (dst && m_last_addr) - if (*dst == *m_last_addr) { - sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n"; - m_last_addr.reset(); - } + if (dst && m_last_addr && *dst == *m_last_addr) { + sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n"; + m_last_addr = nullptr; + } auto cf_op = ai.cf_type(); @@ -351,7 +328,8 @@ bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai) if (alu.last) m_nliterals_in_group.clear(); - bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type); + + m_result = !r600_bytecode_add_alu_type(m_bc, &alu, type); if (ai.opcode() == op1_mova_int) m_bc->ar_loaded = 0; @@ -362,262 +340,248 @@ bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai) if (ai.opcode() == op1_set_cf_idx1) m_bc->index_loaded[1] = 1; - m_bc->force_add_cf |= (ai.opcode() == op2_kille || ai.opcode() == op2_killne_int || ai.opcode() == op1_set_cf_idx0 || ai.opcode() == op1_set_cf_idx1); - return retval; } -bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi) +void AssamblerVisitor::visit(const AluGroup& group) { - r600_bytecode_output output; - memset(&output, 0, sizeof(output)); - assert(exi.gpr().type() == Value::gpr_vector); - const auto& gpr = exi.gpr(); - output.gpr = gpr.sel(); - output.elem_size = 3; - output.swizzle_x = gpr.chan_i(0); - output.swizzle_y = gpr.chan_i(1); - output.swizzle_z = gpr.chan_i(2); - output.swizzle_w = gpr.chan_i(3); - output.burst_count = 1; - output.array_base = 60 + exi.location(); - output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT; - output.type = exi.export_type(); + clear_states(sf_vtx | sf_tex); + if (group.slots() == 0) + return; - if (r600_bytecode_add_output(m_bc, &output)) { - R600_ERR("Error adding pixel export at location %d\n", exi.location()); - return false; + if (group.has_lds_group_start()) { + if (m_bc->cf_last->ndw + 2 * (*group.begin())->required_slots() > 240) { + assert(m_bc->cf_last->nlds_read == 0); + m_bc->force_add_cf = 1; + m_last_addr = nullptr; + } + } else if (m_bc->cf_last) { + if (m_bc->cf_last->ndw + 2 * group.slots() > 240) { + m_bc->force_add_cf = 1; + m_last_addr = nullptr; + } else { + auto instr = *group.begin(); + if (instr && + !instr->has_alu_flag(alu_is_lds) && + instr->opcode() == op0_group_barrier && + m_bc->cf_last->ndw + 14 > 240) { + m_bc->force_add_cf = 1; + m_last_addr = nullptr; + } + } } - return true; -} + auto addr = group.addr(); + if (addr.first) { + if (!addr.second) { + if (!m_last_addr || !m_bc->ar_loaded || + !m_last_addr->equal_to(*addr.first)) { + m_bc->ar_reg = addr.first->sel(); + m_bc->ar_chan = addr.first->chan(); + m_last_addr = addr.first; + m_bc->ar_loaded = 0; -bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi) + r600_load_ar(m_bc); + } + } else { + emit_index_reg(*addr.first, 0); + } + } + + for (auto& i : group) { + if (i) + i->accept(*this); + } +} + +void AssamblerVisitor::visit(const TexInstr& tex_instr) { - r600_bytecode_output output; - assert(exi.gpr().type() == Value::gpr_vector); - const auto& gpr = exi.gpr(); + clear_states(sf_vtx | sf_alu); - memset(&output, 0, sizeof(output)); - output.gpr = gpr.sel(); - output.elem_size = 3; - output.swizzle_x = gpr.chan_i(0); - output.swizzle_y = gpr.chan_i(1); - output.swizzle_z = gpr.chan_i(2); - output.swizzle_w = gpr.chan_i(3); - output.burst_count = 1; - output.array_base = exi.location(); - output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT; - output.type = exi.export_type(); + int sampler_offset = 0; + auto addr = tex_instr.sampler_offset(); + EBufferIndexMode index_mode = bim_none; + if (addr) + index_mode = emit_index_reg(*addr, 1); - if (r600_bytecode_add_output(m_bc, &output)) { - R600_ERR("Error adding pixel export at location %d\n", exi.location()); - return false; + if (tex_fetch_results.find(tex_instr.src().sel()) != + tex_fetch_results.end()) { + m_bc->force_add_cf = 1; + tex_fetch_results.clear(); } - return true; -} + r600_bytecode_tex tex; + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); + tex.op = tex_instr.opcode(); + tex.sampler_id = tex_instr.sampler_id() + sampler_offset; + tex.resource_id = tex_instr.resource_id() + sampler_offset; + tex.src_gpr = tex_instr.src().sel(); + tex.dst_gpr = tex_instr.dst().sel(); + tex.dst_sel_x = tex_instr.dest_swizzle(0); + tex.dst_sel_y = tex_instr.dest_swizzle(1); + tex.dst_sel_z = tex_instr.dest_swizzle(2); + tex.dst_sel_w = tex_instr.dest_swizzle(3); + tex.src_sel_x = tex_instr.src()[0]->chan(); + tex.src_sel_y = tex_instr.src()[1]->chan(); + tex.src_sel_z = tex_instr.src()[2]->chan(); + tex.src_sel_w = tex_instr.src()[3]->chan(); + tex.coord_type_x = !tex_instr.has_tex_flag(TexInstr::x_unnormalized); + tex.coord_type_y = !tex_instr.has_tex_flag(TexInstr::y_unnormalized); + tex.coord_type_z = !tex_instr.has_tex_flag(TexInstr::z_unnormalized); + tex.coord_type_w = !tex_instr.has_tex_flag(TexInstr::w_unnormalized); + tex.offset_x = tex_instr.get_offset(0); + tex.offset_y = tex_instr.get_offset(1); + tex.offset_z = tex_instr.get_offset(2); + tex.resource_index_mode = index_mode; + tex.sampler_index_mode = index_mode; + if (tex.dst_sel_x < 4 && + tex.dst_sel_y < 4 && + tex.dst_sel_z < 4 && + tex.dst_sel_w < 4) + tex_fetch_results.insert(tex.dst_gpr); -bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi) -{ - if (exi.location() >= m_max_color_exports && exi.location() < 60) { - R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n", - exi.location(), m_max_color_exports); - return true; + if (tex_instr.opcode() == TexInstr::get_gradient_h || + tex_instr.opcode() == TexInstr::get_gradient_v) + tex.inst_mod = tex_instr.has_tex_flag(TexInstr::grad_fine) ? 1 : 0; + else + tex.inst_mod = tex_instr.inst_mode(); + if (r600_bytecode_add_tex(m_bc, &tex)) { + R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); + m_result = false; } +} - assert(exi.gpr().type() == Value::gpr_vector); - const auto& gpr = exi.gpr(); +void AssamblerVisitor::visit(const ExportInstr& exi) +{ + const auto& value = exi.value(); r600_bytecode_output output; memset(&output, 0, sizeof(output)); - output.gpr = gpr.sel(); + output.gpr = value.sel(); output.elem_size = 3; - output.swizzle_x = gpr.chan_i(0); - output.swizzle_y = gpr.chan_i(1); - output.swizzle_z = gpr.chan_i(2); - output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ; + output.swizzle_x = value[0]->chan(); + output.swizzle_y = value[1]->chan(); + output.swizzle_z = value[2]->chan(); output.burst_count = 1; - output.array_base = exi.location(); output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT; output.type = exi.export_type(); - if (r600_bytecode_add_output(m_bc, &output)) { - R600_ERR("Error adding pixel export at location %d\n", exi.location()); - return false; - } - - return true; -} - - -bool AssemblyFromShaderLegacyImpl::visit(const ExportInstruction & exi) -{ + clear_states(sf_all); switch (exi.export_type()) { - case ExportInstruction::et_pixel: - return emit_fs_pixel_export(exi); - case ExportInstruction::et_pos: - return emit_vs_pos_export(exi); - case ExportInstruction::et_param: - return emit_vs_param_export(exi); + case ExportInstr::pixel: + output.swizzle_w = ps_alpha_to_one ? 5 : exi.value()[3]->chan(); + output.array_base = exi.location(); + break; + case ExportInstr::pos: + output.swizzle_w = exi.value()[3]->chan(); + output.array_base = 60 + exi.location(); + break; + case ExportInstr::param: + output.swizzle_w = exi.value()[3]->chan(); + output.array_base = exi.location(); + break; default: R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type()); - return false; + m_result = false; } -} - -bool AssemblyFromShaderLegacyImpl::visit(const IfInstruction & if_instr) -{ - int elems = m_callstack.push(FC_PUSH_VPM); - bool needs_workaround = false; - - if (m_bc->gfx_level == CAYMAN && m_bc->stack.loop > 1) - needs_workaround = true; - - if (m_bc->family != CHIP_HEMLOCK && - m_bc->family != CHIP_CYPRESS && - m_bc->family != CHIP_JUNIPER) { - unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size; - unsigned dmod2 = (elems) % m_bc->stack.entry_size; - if (elems && (!dmod1 || !dmod2)) - needs_workaround = true; + /* If all register elements pinned to fixed values + * we can override the gpr (the register allocator doesn't see + * this because it doesn't take these channels into account. */ + if (output.swizzle_x > 3 && output.swizzle_y > 3 && + output.swizzle_z > 3 && output.swizzle_w > 3) + output.gpr = 0; + + int r = 0; + if ((r =r600_bytecode_add_output(m_bc, &output))) { + R600_ERR("Error adding export at location %d : err: %d\n", exi.location(), r); + m_result = false; } - - auto& pred = if_instr.pred(); - - if (needs_workaround) { - r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH); - m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; - auto new_pred = pred; - new_pred.set_cf_type(cf_alu); - visit(new_pred); - } else - visit(pred); - - r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP); - - m_jump_tracker.push(m_bc->cf_last, jt_if); - return true; } -bool AssemblyFromShaderLegacyImpl::visit(UNUSED const ElseInstruction & else_instr) +void AssamblerVisitor::visit(const WriteScratchInstr& instr) { - r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE); - m_bc->cf_last->pop_count = 1; - return m_jump_tracker.add_mid(m_bc->cf_last, jt_if); -} - -bool AssemblyFromShaderLegacyImpl::visit(UNUSED const IfElseEndInstruction & endif_instr) -{ - m_callstack.pop(FC_PUSH_VPM); + clear_states(sf_all); - unsigned force_pop = m_bc->force_add_cf; - if (!force_pop) { - int alu_pop = 3; - if (m_bc->cf_last) { - if (m_bc->cf_last->op == CF_OP_ALU) - alu_pop = 0; - else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER) - alu_pop = 1; - } - alu_pop += 1; - if (alu_pop == 1) { - m_bc->cf_last->op = CF_OP_ALU_POP_AFTER; - m_bc->force_add_cf = 1; - } else if (alu_pop == 2) { - m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER; - m_bc->force_add_cf = 1; - } else { - force_pop = 1; - } - } + struct r600_bytecode_output cf; - if (force_pop) { - r600_bytecode_add_cfinst(m_bc, CF_OP_POP); - m_bc->cf_last->pop_count = 1; - m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; - } + memset(&cf, 0, sizeof(struct r600_bytecode_output)); - return m_jump_tracker.pop(m_bc->cf_last, jt_if); -} + cf.op = CF_OP_MEM_SCRATCH; + cf.elem_size = 3; + cf.gpr = instr.value().sel(); + cf.mark = 1; + cf.comp_mask = instr.write_mask(); + cf.swizzle_x = 0; + cf.swizzle_y = 1; + cf.swizzle_z = 2; + cf.swizzle_w = 3; + cf.burst_count = 1; -bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBeginInstruction& instr) -{ - r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10); - m_jump_tracker.push(m_bc->cf_last, jt_loop); - m_callstack.push(FC_LOOP); - ++m_loop_nesting; - return true; -} + if (instr.address()) { + cf.type = 3; + cf.index_gpr = instr.address()->sel(); -bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopEndInstruction& instr) -{ - r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END); - m_callstack.pop(FC_LOOP); - assert(m_loop_nesting); - --m_loop_nesting; - return m_jump_tracker.pop(m_bc->cf_last, jt_loop); -} + /* The docu seems to be wrong here: In indirect addressing the + * address_base seems to be the array_size */ + cf.array_size = instr.array_size(); + } else { + cf.type = 2; + cf.array_base = instr.location(); + } + /* This should be 0, but the address calculation is apparently wrong */ -bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBreakInstruction& instr) -{ - r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK); - return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop); -} -bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopContInstruction &instr) -{ - r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE); - return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop); + if (r600_bytecode_add_output(m_bc, &cf)){ + R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n"); + m_result = false; + } } -bool AssemblyFromShaderLegacyImpl::visit(const StreamOutIntruction& so_instr) +void AssamblerVisitor::visit(const StreamOutInstr& instr) { struct r600_bytecode_output output; memset(&output, 0, sizeof(struct r600_bytecode_output)); - output.gpr = so_instr.gpr().sel(); - output.elem_size = so_instr.element_size(); - output.array_base = so_instr.array_base(); + output.gpr = instr.value().sel(); + output.elem_size = instr.element_size(); + output.array_base = instr.array_base(); output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; - output.burst_count = so_instr.burst_count(); - output.array_size = so_instr.array_size(); - output.comp_mask = so_instr.comp_mask(); - output.op = so_instr.op(); + output.burst_count = instr.burst_count(); + output.array_size = instr.array_size(); + output.comp_mask = instr.comp_mask(); + output.op = instr.op(); assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); - if (r600_bytecode_add_output(m_bc, &output)) { R600_ERR("shader_from_nir: Error creating stream output instruction\n"); - return false; + m_result = false; } - return true; } - -bool AssemblyFromShaderLegacyImpl::visit(const MemRingOutIntruction& instr) +void AssamblerVisitor::visit(const MemRingOutInstr& instr) { struct r600_bytecode_output output; memset(&output, 0, sizeof(struct r600_bytecode_output)); - output.gpr = instr.gpr().sel(); + output.gpr = instr.value().sel(); output.type = instr.type(); output.elem_size = 3; output.comp_mask = 0xf; output.burst_count = 1; output.op = instr.op(); - if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) { + if (instr.type() == MemRingOutInstr::mem_write_ind || + instr.type() == MemRingOutInstr::mem_write_ind_ack) { output.index_gpr = instr.index_reg(); output.array_size = 0xfff; } @@ -625,99 +589,35 @@ bool AssemblyFromShaderLegacyImpl::visit(const MemRingOutIntruction& instr) if (r600_bytecode_add_output(m_bc, &output)) { R600_ERR("shader_from_nir: Error creating mem ring write instruction\n"); - return false; + m_result = false; } - return true; } - -bool AssemblyFromShaderLegacyImpl::visit(const TexInstruction & tex_instr) +void AssamblerVisitor::visit(const EmitVertexInstr& instr) { - int sampler_offset = 0; - auto addr = tex_instr.sampler_offset(); - EBufferIndexMode index_mode = bim_none; - - if (addr) { - if (addr->type() == Value::literal) { - const auto& boffs = static_cast(*addr); - sampler_offset = boffs.value(); - } else { - index_mode = emit_index_reg(*addr, 1); - } - } - - if (tex_fetch_results.find(tex_instr.src().sel()) != - tex_fetch_results.end()) { - m_bc->force_add_cf = 1; - tex_fetch_results.clear(); - } - - r600_bytecode_tex tex; - memset(&tex, 0, sizeof(struct r600_bytecode_tex)); - tex.op = tex_instr.opcode(); - tex.sampler_id = tex_instr.sampler_id() + sampler_offset; - tex.resource_id = tex_instr.resource_id() + sampler_offset; - tex.src_gpr = tex_instr.src().sel(); - tex.dst_gpr = tex_instr.dst().sel(); - tex.dst_sel_x = tex_instr.dest_swizzle(0); - tex.dst_sel_y = tex_instr.dest_swizzle(1); - tex.dst_sel_z = tex_instr.dest_swizzle(2); - tex.dst_sel_w = tex_instr.dest_swizzle(3); - tex.src_sel_x = tex_instr.src().chan_i(0); - tex.src_sel_y = tex_instr.src().chan_i(1); - tex.src_sel_z = tex_instr.src().chan_i(2); - tex.src_sel_w = tex_instr.src().chan_i(3); - tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized); - tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized); - tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized); - tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized); - tex.offset_x = tex_instr.get_offset(0); - tex.offset_y = tex_instr.get_offset(1); - tex.offset_z = tex_instr.get_offset(2); - tex.resource_index_mode = index_mode; - tex.sampler_index_mode = index_mode; - - if (tex.dst_sel_x < 4 && - tex.dst_sel_y < 4 && - tex.dst_sel_z < 4 && - tex.dst_sel_w < 4) - tex_fetch_results.insert(tex.dst_gpr); - - if (tex_instr.opcode() == TexInstruction::get_gradient_h || - tex_instr.opcode() == TexInstruction::get_gradient_v) - tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0; + int r = r600_bytecode_add_cfinst(m_bc, instr.op()); + if (!r) + m_bc->cf_last->count = instr.stream(); else - tex.inst_mod = tex_instr.inst_mode(); - if (r600_bytecode_add_tex(m_bc, &tex)) { - R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); - return false; - } - return true; + m_result = false; + assert(m_bc->cf_last->count < 4); } -bool AssemblyFromShaderLegacyImpl::visit(const FetchInstruction& fetch_instr) +void AssamblerVisitor::visit(const FetchInstr& fetch_instr) { - int buffer_offset = 0; - auto addr = fetch_instr.buffer_offset(); - auto index_mode = fetch_instr.buffer_index_mode(); + clear_states(sf_tex | sf_alu); - if (addr) { - if (addr->type() == Value::literal) { - const auto& boffs = static_cast(*addr); - buffer_offset = boffs.value(); - } else { - index_mode = emit_index_reg(*addr, 0); - } - } + auto buffer_offset = fetch_instr.resource_offset(); + EBufferIndexMode rat_index_mode = bim_none; - if (fetch_instr.has_prelude()) { - for(auto &i : fetch_instr.prelude()) { - if (!i->accept(*this)) - return false; - } - } + if (buffer_offset) + rat_index_mode = emit_index_reg(*buffer_offset, 0); - bool use_tc = fetch_instr.use_tc() || (m_bc->gfx_level == CAYMAN); + if (fetch_instr.has_fetch_flag(FetchInstr::wait_ack)) + emit_wait_ack(); + + bool use_tc = fetch_instr.has_fetch_flag(FetchInstr::use_tc) || + (m_bc->gfx_level == CAYMAN); if (!use_tc && vtx_fetch_results.find(fetch_instr.src().sel()) != vtx_fetch_results.end()) { @@ -725,7 +625,7 @@ bool AssemblyFromShaderLegacyImpl::visit(const FetchInstruction& fetch_instr) vtx_fetch_results.clear(); } - if (fetch_instr.use_tc() && + if (fetch_instr.has_fetch_flag(FetchInstr::use_tc) && tex_fetch_results.find(fetch_instr.src().sel()) != tex_fetch_results.end()) { m_bc->force_add_cf = 1; @@ -739,161 +639,59 @@ bool AssemblyFromShaderLegacyImpl::visit(const FetchInstruction& fetch_instr) struct r600_bytecode_vtx vtx; memset(&vtx, 0, sizeof(vtx)); - vtx.op = fetch_instr.vc_opcode(); - vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset; + vtx.op = fetch_instr.opcode(); + vtx.buffer_id = fetch_instr.resource_id(); vtx.fetch_type = fetch_instr.fetch_type(); vtx.src_gpr = fetch_instr.src().sel(); vtx.src_sel_x = fetch_instr.src().chan(); vtx.mega_fetch_count = fetch_instr.mega_fetch_count(); vtx.dst_gpr = fetch_instr.dst().sel(); - vtx.dst_sel_x = fetch_instr.swz(0); /* SEL_X */ - vtx.dst_sel_y = fetch_instr.swz(1); /* SEL_Y */ - vtx.dst_sel_z = fetch_instr.swz(2); /* SEL_Z */ - vtx.dst_sel_w = fetch_instr.swz(3); /* SEL_W */ - vtx.use_const_fields = fetch_instr.use_const_fields(); + vtx.dst_sel_x = fetch_instr.dest_swizzle(0); /* SEL_X */ + vtx.dst_sel_y = fetch_instr.dest_swizzle(1); /* SEL_Y */ + vtx.dst_sel_z = fetch_instr.dest_swizzle(2); /* SEL_Z */ + vtx.dst_sel_w = fetch_instr.dest_swizzle(3); /* SEL_W */ + vtx.use_const_fields = fetch_instr.has_fetch_flag(FetchInstr::use_const_field); vtx.data_format = fetch_instr.data_format(); vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */ - vtx.format_comp_all = fetch_instr.is_signed(); /* FORMAT_COMP_SIGNED */ + vtx.format_comp_all = fetch_instr.has_fetch_flag(FetchInstr::format_comp_signed); vtx.endian = fetch_instr.endian_swap(); - vtx.buffer_index_mode = index_mode; - vtx.offset = fetch_instr.offset(); - vtx.indexed = fetch_instr.indexed(); - vtx.uncached = fetch_instr.uncached(); + vtx.buffer_index_mode = rat_index_mode; + vtx.offset = fetch_instr.src_offset(); + vtx.indexed = fetch_instr.has_fetch_flag(FetchInstr::indexed); + vtx.uncached = fetch_instr.has_fetch_flag(FetchInstr::uncached); vtx.elem_size = fetch_instr.elm_size(); vtx.array_base = fetch_instr.array_base(); vtx.array_size = fetch_instr.array_size(); - vtx.srf_mode_all = fetch_instr.srf_mode_no_zero(); - + vtx.srf_mode_all = fetch_instr.has_fetch_flag(FetchInstr::srf_mode); - if (fetch_instr.use_tc()) { + if (fetch_instr.has_fetch_flag(FetchInstr::use_tc)) { if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) { R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); - return false; + m_result = false; } } else { if ((r600_bytecode_add_vtx(m_bc, &vtx))) { R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); - return false; - } - } - - m_bc->cf_last->vpm = (m_bc->type == PIPE_SHADER_FRAGMENT) && fetch_instr.use_vpm(); - m_bc->cf_last->barrier = 1; - - return true; -} - -bool AssemblyFromShaderLegacyImpl::visit(const EmitVertex &instr) -{ - int r = r600_bytecode_add_cfinst(m_bc, instr.op()); - if (!r) - m_bc->cf_last->count = instr.stream(); - assert(m_bc->cf_last->count < 4); - - return r == 0; -} - -bool AssemblyFromShaderLegacyImpl::visit(const WaitAck& instr) -{ - int r = r600_bytecode_add_cfinst(m_bc, instr.op()); - if (!r) { - m_bc->cf_last->cf_addr = instr.n_ack(); - m_bc->cf_last->barrier = 1; - } - - return r == 0; -} - -bool AssemblyFromShaderLegacyImpl::visit(const WriteScratchInstruction& instr) -{ - struct r600_bytecode_output cf; - - memset(&cf, 0, sizeof(struct r600_bytecode_output)); - - cf.op = CF_OP_MEM_SCRATCH; - cf.elem_size = 3; - cf.gpr = instr.gpr().sel(); - cf.mark = 1; - cf.comp_mask = instr.write_mask(); - cf.swizzle_x = 0; - cf.swizzle_y = 1; - cf.swizzle_z = 2; - cf.swizzle_w = 3; - cf.burst_count = 1; - - if (instr.indirect()) { - cf.type = 3; - cf.index_gpr = instr.address(); - - /* The docu seems to be wrong here: In indirect addressing the - * address_base seems to be the array_size */ - cf.array_size = instr.array_size(); - } else { - cf.type = 2; - cf.array_base = instr.location(); - } - /* This should be 0, but the address calculation is apparently wrong */ - - - if (r600_bytecode_add_output(m_bc, &cf)){ - R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n"); - return false; - } - - return true; -} - -extern const std::map ds_opcode_map; - -bool AssemblyFromShaderLegacyImpl::visit(const GDSInstr& instr) -{ - struct r600_bytecode_gds gds; - - int uav_idx = -1; - auto addr = instr.uav_id(); - if (addr->type() != Value::literal) { - emit_index_reg(*addr, 1); - } else { - const LiteralValue& addr_reg = static_cast(*addr); - uav_idx = addr_reg.value(); - } - - memset(&gds, 0, sizeof(struct r600_bytecode_gds)); - - gds.op = ds_opcode_map.at(instr.op()); - gds.dst_gpr = instr.dest_sel(); - gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base(); - gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one; - gds.src_gpr = instr.src_sel(); - - gds.src_sel_x = instr.src_swizzle(0); - gds.src_sel_y = instr.src_swizzle(1); - gds.src_sel_z = instr.src_swizzle(2); - - gds.dst_sel_x = instr.dest_swizzle(0); - gds.dst_sel_y = 7; - gds.dst_sel_z = 7; - gds.dst_sel_w = 7; - gds.src_gpr2 = 0; - gds.alloc_consume = 1; // Not Cayman + m_result = false; + } + } - int r = r600_bytecode_add_gds(m_bc, &gds); - if (r) - return false; - m_bc->cf_last->vpm = PIPE_SHADER_FRAGMENT == m_bc->type; + m_bc->cf_last->vpm = (m_bc->type == PIPE_SHADER_FRAGMENT) && + fetch_instr.has_fetch_flag(FetchInstr::vpm); m_bc->cf_last->barrier = 1; - return true; } -bool AssemblyFromShaderLegacyImpl::visit(const GDSStoreTessFactor& instr) +void AssamblerVisitor::visit(const WriteTFInstr& instr) { struct r600_bytecode_gds gds; + auto& value = instr.value(); + memset(&gds, 0, sizeof(struct r600_bytecode_gds)); - gds.src_gpr = instr.sel(); - gds.src_sel_x = instr.chan(0); - gds.src_sel_y = instr.chan(1); + gds.src_gpr = value.sel(); + gds.src_sel_x = value[0]->chan(); + gds.src_sel_y = value[1]->chan(); gds.src_sel_z = 4; gds.dst_sel_x = 7; gds.dst_sel_y = 7; @@ -901,14 +699,16 @@ bool AssemblyFromShaderLegacyImpl::visit(const GDSStoreTessFactor& instr) gds.dst_sel_w = 7; gds.op = FETCH_OP_TF_WRITE; - if (r600_bytecode_add_gds(m_bc, &gds) != 0) - return false; + if (r600_bytecode_add_gds(m_bc, &gds) != 0) { + m_result = false; + return; + } - if (instr.chan(2) != 7) { + if (value[2]->chan() != 7) { memset(&gds, 0, sizeof(struct r600_bytecode_gds)); - gds.src_gpr = instr.sel(); - gds.src_sel_x = instr.chan(2); - gds.src_sel_y = instr.chan(3); + gds.src_gpr = value.sel(); + gds.src_sel_x = value[2]->chan(); + gds.src_sel_y = value[3]->chan(); gds.src_sel_z = 4; gds.dst_sel_x = 7; gds.dst_sel_y = 7; @@ -916,178 +716,259 @@ bool AssemblyFromShaderLegacyImpl::visit(const GDSStoreTessFactor& instr) gds.dst_sel_w = 7; gds.op = FETCH_OP_TF_WRITE; - if (r600_bytecode_add_gds(m_bc, &gds)) - return false; + if (r600_bytecode_add_gds(m_bc, &gds)) { + m_result = false; + return; + } } - return true; } -bool AssemblyFromShaderLegacyImpl::visit(const LDSWriteInstruction& instr) +void AssamblerVisitor::visit(const RatInstr& instr) { - r600_bytecode_alu alu; - memset(&alu, 0, sizeof(r600_bytecode_alu)); + struct r600_bytecode_gds gds; - alu.last = true; - alu.is_lds_idx_op = true; - copy_src(alu.src[0], instr.address()); - copy_src(alu.src[1], instr.value0()); + /* The instruction writes to the retuen buffer loaction, and + * the value will actually be read bach, so make sure all previous writes + * have been finished */ + if (m_ack_suggested /*&& instr.has_instr_flag(Instr::ack_rat_return_write)*/) + emit_wait_ack(); - if (instr.num_components() == 1) { - alu.op = LDS_OP2_LDS_WRITE; - } else { - alu.op = LDS_OP3_LDS_WRITE_REL; - alu.lds_idx = 1; - copy_src(alu.src[2], instr.value1()); + int rat_idx = instr.rat_id(); + EBufferIndexMode rat_index_mode = bim_none; + auto addr = instr.rat_id_offset(); + + if (addr) + rat_index_mode = emit_index_reg(*addr, 1); + + memset(&gds, 0, sizeof(struct r600_bytecode_gds)); + + r600_bytecode_add_cfinst(m_bc, instr.cf_opcode()); + auto cf = m_bc->cf_last; + cf->rat.id = rat_idx + m_shader->rat_base; + cf->rat.inst = instr.rat_op(); + cf->rat.index_mode = rat_index_mode; + cf->output.type = instr.need_ack() ? 3 : 1; + cf->output.gpr = instr.data_gpr(); + cf->output.index_gpr = instr.index_gpr(); + cf->output.comp_mask = instr.comp_mask(); + cf->output.burst_count = instr.burst_count(); + assert(instr.data_swz(0) == PIPE_SWIZZLE_X); + if (cf->rat.inst != RatInstr::STORE_TYPED) { + assert(instr.data_swz(1) == PIPE_SWIZZLE_Y || + instr.data_swz(1) == PIPE_SWIZZLE_MAX) ; + assert(instr.data_swz(2) == PIPE_SWIZZLE_Z || + instr.data_swz(2) == PIPE_SWIZZLE_MAX) ; } - return r600_bytecode_add_alu(m_bc, &alu) == 0; + cf->vpm = m_bc->type == PIPE_SHADER_FRAGMENT; + cf->barrier = 1; + cf->mark = instr.need_ack(); + cf->output.elem_size = instr.elm_size(); + + m_ack_suggested |= instr.need_ack(); } -bool AssemblyFromShaderLegacyImpl::visit(const LDSReadInstruction& instr) + +void AssamblerVisitor::clear_states(const uint32_t& states) { - int r; - unsigned nread = 0; - unsigned nfetch = 0; - unsigned n_values = instr.num_values(); + if (states & sf_vtx) + vtx_fetch_results.clear(); - r600_bytecode_alu alu_fetch; - r600_bytecode_alu alu_read; + if (states & sf_tex) + tex_fetch_results.clear(); - /* We must add a new ALU clause if the fetch and read op would be split otherwise - * r600_asm limits at 120 slots = 240 dwords */ - if (m_bc->cf_last->ndw > 240 - 4 * n_values) - m_bc->force_add_cf = 1; + if (states & sf_alu) { + m_last_op_was_barrier = false; + m_last_addr = nullptr; + } - while (nread < n_values) { - if (nfetch < n_values) { - memset(&alu_fetch, 0, sizeof(r600_bytecode_alu)); - alu_fetch.is_lds_idx_op = true; - alu_fetch.op = LDS_OP1_LDS_READ_RET; - - copy_src(alu_fetch.src[0], instr.address(nfetch)); - alu_fetch.src[1].sel = V_SQ_ALU_SRC_0; - alu_fetch.src[2].sel = V_SQ_ALU_SRC_0; - alu_fetch.last = 1; - r = r600_bytecode_add_alu(m_bc, &alu_fetch); - m_bc->cf_last->nlds_read++; - if (r) - return false; - } +} - if (nfetch >= n_values) { - memset(&alu_read, 0, sizeof(r600_bytecode_alu)); - copy_dst(alu_read.dst, instr.dest(nread)); - alu_read.op = ALU_OP1_MOV; - alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; - alu_read.last = 1; - alu_read.dst.write = 1; - r = r600_bytecode_add_alu(m_bc, &alu_read); - m_bc->cf_last->nqueue_read++; - if (r) - return false; - ++nread; - } - ++nfetch; - } - assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read); - return true; +void AssamblerVisitor::visit(const Block& block) +{ + if (block.empty()) + return; + + m_bc->force_add_cf = block.has_instr_flag(Instr::force_cf); + sfn_log << SfnLog::assembly << "Translate block size: " << block.size() << " new_cf:" << m_bc->force_add_cf << "\n"; + + for (const auto& i : block) { + sfn_log << SfnLog::assembly << "Translate " << *i << " "; + i->accept(*this); + sfn_log << SfnLog::assembly << (m_result ? "good" : "fail") << "\n"; + + if (!m_result) + break; + } } -bool AssemblyFromShaderLegacyImpl::visit(const LDSAtomicInstruction& instr) +void AssamblerVisitor::visit(const IfInstr& instr) { - if (m_bc->cf_last->ndw > 240 - 4) - m_bc->force_add_cf = 1; + int elems = m_callstack.push(FC_PUSH_VPM); + bool needs_workaround = false; - r600_bytecode_alu alu_fetch; - r600_bytecode_alu alu_read; + if (m_bc->gfx_level == CAYMAN && m_bc->stack.loop > 1) + needs_workaround = true; - memset(&alu_fetch, 0, sizeof(r600_bytecode_alu)); - alu_fetch.is_lds_idx_op = true; - alu_fetch.op = instr.op(); + if (m_bc->gfx_level == EVERGREEN && + m_bc->family != CHIP_HEMLOCK && + m_bc->family != CHIP_CYPRESS && + m_bc->family != CHIP_JUNIPER) { + unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size; + unsigned dmod2 = (elems) % m_bc->stack.entry_size; - copy_src(alu_fetch.src[0], instr.address()); - copy_src(alu_fetch.src[1], instr.src0()); + if (elems && (!dmod1 || !dmod2)) + needs_workaround = true; + } - if (instr.src1()) - copy_src(alu_fetch.src[2], *instr.src1()); - alu_fetch.last = 1; - int r = r600_bytecode_add_alu(m_bc, &alu_fetch); - if (r) - return false; + auto pred = instr.predicate(); + auto [addr, dummy ] = pred->indirect_addr(); {} + if (addr) { + if (!m_last_addr || !m_bc->ar_loaded || + !m_last_addr->equal_to(*addr)) { + m_bc->ar_reg = addr->sel(); + m_bc->ar_chan = addr->chan(); + m_last_addr = addr; + m_bc->ar_loaded = 0; + + r600_load_ar(m_bc); + } + } - memset(&alu_read, 0, sizeof(r600_bytecode_alu)); - copy_dst(alu_read.dst, instr.dest()); - alu_read.op = ALU_OP1_MOV; - alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; - alu_read.last = 1; - alu_read.dst.write = 1; - r = r600_bytecode_add_alu(m_bc, &alu_read); - if (r) - return false; - return true; + if (needs_workaround) { + r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH); + m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; + pred->set_cf_type(cf_alu); + } + + clear_states(sf_tex|sf_vtx); + pred->accept(*this); + + r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP); + clear_states(sf_all); + + m_jump_tracker.push(m_bc->cf_last, jt_if); +} + +void AssamblerVisitor::visit(const ControlFlowInstr& instr) +{ + clear_states(sf_all); + switch (instr.cf_type()) { + case ControlFlowInstr::cf_else: + emit_else(); + break; + case ControlFlowInstr::cf_endif: + emit_endif(); + break; + case ControlFlowInstr::cf_loop_begin: + emit_loop_begin(instr.has_instr_flag(Instr::vpm)); + break; + case ControlFlowInstr::cf_loop_end: + emit_loop_end(); + break; + case ControlFlowInstr::cf_loop_break: + emit_loop_break(); + break; + case ControlFlowInstr::cf_loop_continue: + emit_loop_cont(); + break; + case ControlFlowInstr::cf_wait_ack: + { + int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK); + if (!r) { + m_bc->cf_last->cf_addr = 0; + m_bc->cf_last->barrier = 1; + m_ack_suggested = false; + } else { + m_result = false; + } + } + break; + default: + unreachable("Unknown CF instruction type"); + } } -bool AssemblyFromShaderLegacyImpl::visit(const RatInstruction& instr) +void AssamblerVisitor::visit(const GDSInstr& instr) { struct r600_bytecode_gds gds; - int rat_idx = instr.rat_id(); - EBufferIndexMode rat_index_mode = bim_none; - auto addr = instr.rat_id_offset(); + bool indirect = false; + auto addr = instr.uav_id(); if (addr) { - if (addr->type() != Value::literal) { - rat_index_mode = emit_index_reg(*addr, 1); - } else { - const LiteralValue& addr_reg = static_cast(*addr); - rat_idx += addr_reg.value(); - } + indirect = true; + emit_index_reg(*addr, 1); } + memset(&gds, 0, sizeof(struct r600_bytecode_gds)); - r600_bytecode_add_cfinst(m_bc, instr.cf_opcode()); - auto cf = m_bc->cf_last; - cf->rat.id = rat_idx + m_shader->rat_base; - cf->rat.inst = instr.rat_op(); - cf->rat.index_mode = rat_index_mode; - cf->output.type = instr.need_ack() ? 3 : 1; - cf->output.gpr = instr.data_gpr(); - cf->output.index_gpr = instr.index_gpr(); - cf->output.comp_mask = instr.comp_mask(); - cf->output.burst_count = instr.burst_count(); - assert(instr.data_swz(0) == PIPE_SWIZZLE_X); - if (cf->rat.inst != RatInstruction::STORE_TYPED) { - assert(instr.data_swz(1) == PIPE_SWIZZLE_Y || - instr.data_swz(1) == PIPE_SWIZZLE_MAX) ; - assert(instr.data_swz(2) == PIPE_SWIZZLE_Z || - instr.data_swz(2) == PIPE_SWIZZLE_MAX) ; + gds.op = ds_opcode_map.at(instr.opcode()); + gds.dst_gpr = instr.dest()->sel(); + gds.uav_id = instr.uav_base(); + gds.uav_index_mode = indirect ? bim_one : bim_none; + gds.src_gpr = instr.src().sel(); + + gds.src_sel_x = instr.src()[0]->chan() < 7 ? instr.src()[0]->chan() : 4; + gds.src_sel_y = instr.src()[1]->chan(); + gds.src_sel_z = instr.src()[2]->chan() < 7 ? instr.src()[2]->chan() : 4; + + gds.dst_sel_x = 7; + gds.dst_sel_y = 7; + gds.dst_sel_z = 7; + gds.dst_sel_w = 7; + + switch (instr.dest()->chan()) { + case 0: gds.dst_sel_x = 0;break; + case 1: gds.dst_sel_y = 0;break; + case 2: gds.dst_sel_z = 0;break; + case 3: gds.dst_sel_w = 0; } - cf->vpm = m_bc->type == PIPE_SHADER_FRAGMENT; - cf->barrier = 1; - cf->mark = instr.need_ack(); - cf->output.elem_size = instr.elm_size(); - return true; + gds.src_gpr2 = 0; + gds.alloc_consume = m_bc->gfx_level < CAYMAN ? 1 : 0; // Not Cayman + + int r = r600_bytecode_add_gds(m_bc, &gds); + if (r) { + m_result = false; + return; + } + m_bc->cf_last->vpm = PIPE_SHADER_FRAGMENT == m_bc->type; + m_bc->cf_last->barrier = 1; +} + +void AssamblerVisitor::visit(const LDSAtomicInstr& instr) +{ + (void)instr; + unreachable("LDSAtomicInstr must be lowered to ALUInstr"); +} + +void AssamblerVisitor::visit(const LDSReadInstr& instr) +{ + (void)instr; + unreachable("LDSReadInstr must be lowered to ALUInstr"); } EBufferIndexMode -AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx) +AssamblerVisitor::emit_index_reg(const VirtualValue& addr, unsigned idx) { assert(idx < 2); if (!m_bc->index_loaded[idx] || m_loop_nesting || - m_bc->index_reg[idx] != addr.sel() - || m_bc->index_reg_chan[idx] != addr.chan()) { + m_bc->index_reg[idx] != (unsigned)addr.sel() + || m_bc->index_reg_chan[idx] != (unsigned)addr.chan()) { struct r600_bytecode_alu alu; // Make sure MOVA is not last instr in clause - if ((m_bc->cf_last->ndw>>1) >= 110) + + if (!m_bc->cf_last || (m_bc->cf_last->ndw>>1) >= 110) m_bc->force_add_cf = 1; if (m_bc->gfx_level != CAYMAN) { EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0; + memset(&alu, 0, sizeof(alu)); alu.op = opcode_map.at(op1_mova_int); alu.dst.chan = 0; @@ -1126,18 +1007,87 @@ AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx) m_bc->index_reg[idx] = addr.sel(); m_bc->index_reg_chan[idx] = addr.chan(); m_bc->index_loaded[idx] = true; + m_bc->force_add_cf = 1; sfn_log << SfnLog::assembly << "\n"; } return idx == 0 ? bim_zero : bim_one; } -bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst, - const Value& d) +void AssamblerVisitor::emit_else() +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE); + m_bc->cf_last->pop_count = 1; + m_result &= m_jump_tracker.add_mid(m_bc->cf_last, jt_if); +} + +void AssamblerVisitor::emit_endif() +{ + m_callstack.pop(FC_PUSH_VPM); + + unsigned force_pop = m_bc->force_add_cf; + if (!force_pop) { + int alu_pop = 3; + if (m_bc->cf_last) { + if (m_bc->cf_last->op == CF_OP_ALU) + alu_pop = 0; + else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER) + alu_pop = 1; + } + alu_pop += 1; + if (alu_pop == 1) { + m_bc->cf_last->op = CF_OP_ALU_POP_AFTER; + m_bc->force_add_cf = 1; + } else { + force_pop = 1; + } + } + + if (force_pop) { + r600_bytecode_add_cfinst(m_bc, CF_OP_POP); + m_bc->cf_last->pop_count = 1; + m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; + } + + m_result &= m_jump_tracker.pop(m_bc->cf_last, jt_if); +} + +void AssamblerVisitor::emit_loop_begin(bool vpm) +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10); + m_bc->cf_last->vpm = vpm && m_bc->type == PIPE_SHADER_FRAGMENT; + m_jump_tracker.push(m_bc->cf_last, jt_loop); + m_callstack.push(FC_LOOP); + ++m_loop_nesting; +} + +void AssamblerVisitor::emit_loop_end() { - assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value); + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END); + m_callstack.pop(FC_LOOP); + assert(m_loop_nesting); + --m_loop_nesting; + m_result |= m_jump_tracker.pop(m_bc->cf_last, jt_loop); +} + +void AssamblerVisitor::emit_loop_break() +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK); + m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop); +} + +void AssamblerVisitor::emit_loop_cont() +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE); + m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop); +} - if (d.sel() > 124) { - R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel()); +bool AssamblerVisitor::copy_dst(r600_bytecode_alu_dst& dst, + const Register& d, bool write) +{ + if (write && d.sel() > 124) { + R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", + d.sel()); + m_result = false; return false; } @@ -1155,74 +1105,84 @@ bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst, return true; } -bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s) +void AssamblerVisitor::emit_wait_ack() { + int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK); + if (!r) { + m_bc->cf_last->cf_addr = 0; + m_bc->cf_last->barrier = 1; + m_ack_suggested = false; + } else + m_result = false; +} - if (s.type() == Value::gpr && s.sel() > 124) { - R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel()); - return false; - } +class EncodeSourceVisitor : public ConstRegisterVisitor { +public: - if (s.type() == Value::lds_direct) { - R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n"); - return false; - } + EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc); + void visit(const Register& value) override; + void visit(const LocalArray& value) override; + void visit(const LocalArrayValue& value) override; + void visit(const UniformValue& value) override; + void visit(const LiteralConstant& value) override; + void visit(const InlineConstant& value) override; - if (s.type() == Value::kconst && s.sel() < 512) { - R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel()); - return false; - } + r600_bytecode_alu_src& src; + r600_bytecode *m_bc; + PVirtualValue m_buffer_offset{nullptr}; +}; - if (s.type() == Value::literal) { - auto& v = static_cast(s); - if (v.value() == 0) { - src.sel = ALU_SRC_0; - src.chan = 0; - return true; - } - if (v.value() == 1) { - src.sel = ALU_SRC_1_INT; - src.chan = 0; - return true; - } - if (v.value_float() == 1.0f) { - src.sel = ALU_SRC_1; - src.chan = 0; - return true; - } - if (v.value_float() == 0.5f) { - src.sel = ALU_SRC_0_5; - src.chan = 0; - return true; - } - if (v.value() == 0xffffffff) { - src.sel = ALU_SRC_M_1_INT; - src.chan = 0; - return true; - } - src.value = v.value(); - } +PVirtualValue AssamblerVisitor::copy_src(r600_bytecode_alu_src& src, const VirtualValue& s) +{ + EncodeSourceVisitor visitor(src, m_bc); src.sel = s.sel(); src.chan = s.chan(); - if (s.type() == Value::kconst) { - const UniformValue& cv = static_cast(s); - src.kc_bank = cv.kcache_bank(); - auto addr = cv.addr(); - if (addr) { - src.kc_rel = 1; - emit_index_reg(*addr, 0); - auto type = m_bc->cf_last->op; - if (r600_bytecode_add_cf(m_bc)) { - return false; - } - m_bc->cf_last->op = type; - } - } - return true; + s.accept(visitor); + return visitor.m_buffer_offset; +} + +EncodeSourceVisitor::EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc): + src(s), m_bc(bc) +{ +} + +void EncodeSourceVisitor::visit(const Register& value) +{ + assert(value.sel() <= 124 && "Only have 124 registers"); +} + +void EncodeSourceVisitor::visit(const LocalArray& value) +{ + (void)value; + unreachable("An array can't be a source register"); +} + +void EncodeSourceVisitor::visit(const LocalArrayValue& value) +{ + src.rel = value.addr() ? 1 : 0; +} + +void EncodeSourceVisitor::visit(const UniformValue& value) +{ + assert(value.sel() >= 512 && "Uniform values must have a sel >= 512"); + m_buffer_offset = value.buf_addr(); + src.kc_bank = value.kcache_bank(); +} + +void EncodeSourceVisitor::visit(const LiteralConstant& value) +{ + src.value = value.value(); } +void EncodeSourceVisitor::visit(const InlineConstant& value) +{ + (void)value; +} + + + const std::map opcode_map = { {op2_add, ALU_OP2_ADD}, @@ -1253,6 +1213,10 @@ const std::map opcode_map = { {op2_mul_64, ALU_OP2_MUL_64}, {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32}, {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64}, + {op2_prede_int, ALU_OP2_PRED_SETE_INT}, + {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT}, + {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT}, + {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT}, {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT}, {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT}, {op2_pred_sete, ALU_OP2_PRED_SETE}, diff --git a/src/gallium/drivers/r600/sfn/sfn_assembler.h b/src/gallium/drivers/r600/sfn/sfn_assembler.h new file mode 100644 index 0000000..796ff60 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_assembler.h @@ -0,0 +1,26 @@ +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +#include "../r600_pipe.h" +#include "../r600_shader.h" + +#include "sfn_shader.h" + +namespace r600 { + +class Assembler +{ +public: + Assembler(r600_shader *sh, const r600_shader_key& key); + + bool lower(Shader *shader); +private: + r600_shader *m_sh; + const r600_shader_key& m_key; +}; + + + +} + +#endif // ASSAMBLY_H diff --git a/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h b/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h index 76cc02a..cd2b975 100644 --- a/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h +++ b/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h @@ -38,10 +38,7 @@ enum JumpType { /** Class to link the jump locations - */ - - class ConditionalJumpTracker { public: @@ -49,7 +46,6 @@ public: ~ConditionalJumpTracker(); /* Mark the start of a loop or a if/else */ - void push(r600_bytecode_cf *start, JumpType type); /* Mark the end of a loop or a if/else and fixup the jump sites */ diff --git a/src/gallium/drivers/r600/sfn/sfn_debug.cpp b/src/gallium/drivers/r600/sfn/sfn_debug.cpp index 92357fc..fce891e 100644 --- a/src/gallium/drivers/r600/sfn/sfn_debug.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_debug.cpp @@ -61,6 +61,10 @@ static const struct debug_named_value sfn_debug_options[] = { {"nomerge", SfnLog::nomerge, "Skip register merge step"}, {"tex", SfnLog::tex, "Log texture ops"}, {"trans", SfnLog::trans, "Log generic translation messages"}, + {"schedule", SfnLog::schedule, "Log scheduling"}, + {"opt", SfnLog::opt, "Log optimization"}, + {"steps", SfnLog::steps, "Log shaders at transformation steps"}, + {"noopt", SfnLog::noopt, "Don't run backend optimizations"}, DEBUG_NAMED_VALUE_END }; diff --git a/src/gallium/drivers/r600/sfn/sfn_debug.h b/src/gallium/drivers/r600/sfn/sfn_debug.h index 3910b27..fd3c0fb 100644 --- a/src/gallium/drivers/r600/sfn/sfn_debug.h +++ b/src/gallium/drivers/r600/sfn/sfn_debug.h @@ -64,8 +64,12 @@ public: merge = 1 << 10, tex = 1 << 11, trans = 1 << 12, - all = (1 << 13) - 1, + schedule = 1 << 13, + opt = 1 << 14, + all = (1 << 15) - 1, nomerge = 1 << 16, + steps = 1 << 17, + noopt = 1 << 18 }; SfnLog(); diff --git a/src/gallium/drivers/r600/sfn/sfn_defines.h b/src/gallium/drivers/r600/sfn/sfn_defines.h index 31a10ae..c5a18b0 100644 --- a/src/gallium/drivers/r600/sfn/sfn_defines.h +++ b/src/gallium/drivers/r600/sfn/sfn_defines.h @@ -303,6 +303,9 @@ enum EVFetchFlagShift { vtx_alt_const, vtx_use_tc, vtx_vpm, + vtx_is_mega_fetch, + vtx_uncached, + vtx_indexed, vtx_unknown }; diff --git a/src/gallium/drivers/r600/sfn/sfn_docu.txt b/src/gallium/drivers/r600/sfn/sfn_docu.txt index 97a9c36..4784599 100644 --- a/src/gallium/drivers/r600/sfn/sfn_docu.txt +++ b/src/gallium/drivers/r600/sfn/sfn_docu.txt @@ -2,44 +2,33 @@ This code is an attempt to implement a NIR backend for r600. -## State - -Supported hardware: Evergreen and NI (tested on CEDAR and BARTS) +Supported hardware: Cayman, Evergreen and NI (tested on CAYMAN, CEDAR and BARTS) -Thanks to soft fp64 the OpenGL version is now 4.5 +Thanks to soft fp64 the OpenGL version is now 4.5 also for EG. -sb has been enabled for nir to be able to run some more demanding work loads. The aim is -still to get rid of it. +sb can bee enabled for nir, it still gives some improvements, e.g. with Xonotic +The aim is still to get rid of it. -piglits gpu passes mostly like with TGSI, there are some fixes but also a few regressions. +## State -CTS gles - - 2 passes like with TGSI - - 3 no regressions, a few fixes compared to TGSI - - 31 - * a few fixes with interpolation specifiers - * synchronization has some unstable tests, this might be because global synchronization is missing (in both) +TODO: -GL CTS: - * a few regressions and a hang with KHR-GL43.compute_shader.shared-max +piglits gpu passes mostly like with TGSI, there are some fixes but also a few regressions. piglit: - * spilling arrays is broken on Barts (but it works on Cedar) - * a few tests fail because the register limit is exhausted, and needlessly so, because - with better RA it would work + * spilling arrays is broken on Barts and CAYMAN (but it works on Cedar) ## Needed optimizations: - - Register allocator and scheduler (Could the sb allocator and scheduler - be ported?) - - peepholes: - - compare + set predicate - - - copy propagation: - - Moves from inputs are usually not required, they could be forwarded - - texture operations often move additional parameters in extra registers - but they are actually needed in the same registers they come from and - could just be swizzled into the right place - (lower in NIR like it is done in e.g. in ETNAVIV) + - compare + set predicate / kill + - use clause local registers + - reduce register usage + - don't rely on the backend to schedule addr load and Index load as well + - don't rely on the backend to merge some alu groups + +## There are still some hangs + + + diff --git a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp deleted file mode 100644 index 3068225..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp +++ /dev/null @@ -1,1046 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "sfn_emitaluinstruction.h" -#include "sfn_debug.h" - -#include "gallium/drivers/r600/r600_shader.h" - -namespace r600 { - -using std::vector; - -EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor): - EmitInstruction (processor) -{ - -} - -bool EmitAluInstruction::do_emit(nir_instr* ir) -{ - const nir_alu_instr& instr = *nir_instr_as_alu(ir); - - r600::sfn_log << SfnLog::instr << "emit '" - << *ir - << " bitsize: " << static_cast(instr.dest.dest.ssa.bit_size) - << "' (" << __func__ << ")\n"; - - preload_src(instr); - - if (get_chip_class() == CAYMAN) { - switch (instr.op) { - case nir_op_fcos_r600: return emit_alu_cm_trig(instr, op1_cos); - case nir_op_fexp2: return emit_alu_cm_trig(instr, op1_exp_ieee); - case nir_op_flog2: return emit_alu_cm_trig(instr, op1_log_clamped); - case nir_op_frcp: return emit_alu_cm_trig(instr, op1_recip_ieee); - case nir_op_frsq: return emit_alu_cm_trig(instr, op1_recipsqrt_ieee1); - case nir_op_fsin_r600: return emit_alu_cm_trig(instr, op1_sin); - case nir_op_fsqrt: return emit_alu_cm_trig(instr, op1_sqrt_ieee); - default: - ; - } - } - - switch (instr.op) { - /* These are in the ALU instruction list, but they should be texture instructions */ - case nir_op_b2b1: return emit_mov(instr); - case nir_op_b2b32: return emit_mov(instr); - case nir_op_b2f32: return emit_alu_b2f(instr); - case nir_op_b2i32: return emit_b2i32(instr); - case nir_op_b32all_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true); - case nir_op_b32all_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true); - case nir_op_b32all_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true); - case nir_op_b32all_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true); - case nir_op_b32all_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true); - case nir_op_b32all_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true); - case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false); - case nir_op_b32any_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false); - case nir_op_b32any_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false); - case nir_op_b32any_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false); - case nir_op_b32any_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false); - case nir_op_b32any_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false); - case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1}); - case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true); - case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true); - case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true); - case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true); - case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true); - case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true); - case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false); - case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false); - case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false); - case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false); - case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false); - case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false); - case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1}); - case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int); - case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int); - - case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int); - case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int); - case nir_op_cube_r600: return emit_cube(instr); - case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10); - case nir_op_f2b32: return emit_alu_f2b32(instr); - case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int); - case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint); - case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs}); - case nir_op_fadd: return emit_alu_op2(instr, op2_add); - case nir_op_fceil: return emit_alu_op1(instr, op1_ceil); - case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos); - case nir_op_fcsel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1}); - case nir_op_fcsel_ge: return emit_alu_op3(instr, op3_cndge, {0, 1, 2}); - case nir_op_fcsel_gt: return emit_alu_op3(instr, op3_cndgt, {0, 1, 2}); - - /* These are in the ALU instruction list, but they should be texture instructions */ - case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false); - case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false); - case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true); - case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false); - case nir_op_fddy_coarse: - case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true); - case nir_op_fdot2: return emit_dot(instr, 2); - case nir_op_fdot3: return emit_dot(instr, 3); - case nir_op_fdot4: return emit_dot(instr, 4); - case nir_op_fdph: return emit_fdph(instr); - case nir_op_feq32: return emit_alu_op2(instr, op2_sete_dx10); - case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10); - case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee); - case nir_op_ffloor: return emit_alu_op1(instr, op1_floor); - case nir_op_ffma: - if (use_legacy_math_rules()) - return emit_alu_op2(instr, op3_muladd); - return emit_alu_op3(instr, op3_muladd_ieee); - case nir_op_ffract: return emit_alu_op1(instr, op1_fract); - case nir_op_fge32: return emit_alu_op2(instr, op2_setge_dx10); - case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10); - case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int); - case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped); - case nir_op_flt32: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse); - case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse); - case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10); - case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10); - case nir_op_fmul: - if (use_legacy_math_rules()) - return emit_alu_op2(instr, op2_mul); - return emit_alu_op2(instr, op2_mul_ieee); - case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg}); - case nir_op_fneu32: return emit_alu_op2(instr, op2_setne_dx10); - case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10); - case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee); - case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne); - case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1); - case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp}); - case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin); - case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee); - case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1); - case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc); - case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int); - case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int); - case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt); - case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int); - case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int); - case nir_op_ibfe: return emit_alu_op3(instr, op3_bfe_int); - case nir_op_i32csel_ge: return emit_alu_op3(instr, op3_cndge_int, {0, 1, 2}); - case nir_op_i32csel_gt: return emit_alu_op3(instr, op3_cndgt_int, {0, 1, 2}); - case nir_op_ieq32: return emit_alu_op2_int(instr, op2_sete_int); - case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int); - case nir_op_ifind_msb_rev: return emit_alu_op1(instr, op1_ffbh_int); - case nir_op_ige32: return emit_alu_op2_int(instr, op2_setge_int); - case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int); - case nir_op_ilt32: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse); - case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse); - case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int); - case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int); - case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int); - case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int); - case nir_op_ine32: return emit_alu_op2_int(instr, op2_setne_int); - case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int); - case nir_op_ineg: return emit_alu_ineg(instr); - case nir_op_inot: return emit_alu_op1(instr, op1_not_int); - case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int); - case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int); - case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int); - case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int); - case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int); - case nir_op_mov:return emit_mov(instr); - case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr); - case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr); - case nir_op_slt: return emit_alu_op2(instr, op2_setgt, op2_opt_reverse); - case nir_op_sge: return emit_alu_op2(instr, op2_setge); - case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt); - case nir_op_ubfe: return emit_alu_op3(instr, op3_bfe_uint); - case nir_op_ufind_msb_rev: return emit_alu_op1(instr, op1_ffbh_uint); - case nir_op_uge32: return emit_alu_op2_int(instr, op2_setge_uint); - case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint); - case nir_op_ult32: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse); - case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse); - case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24, {0, 1, 2}); - case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint); - case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint); - case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24); - case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint); - case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0); - case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1); - case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr); - case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr); - case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int); - case nir_op_vec2: return emit_create_vec(instr, 2); - case nir_op_vec3: return emit_create_vec(instr, 3); - case nir_op_vec4: return emit_create_vec(instr, 4); - default: - return false; - } -} - -void EmitAluInstruction::preload_src(const nir_alu_instr& instr) -{ - const nir_op_info *op_info = &nir_op_infos[instr.op]; - assert(op_info->num_inputs <= 4); - - unsigned nsrc_comp = num_src_comp(instr); - sfn_log << SfnLog::reg << "Preload:\n"; - for (unsigned i = 0; i < op_info->num_inputs; ++i) { - for (unsigned c = 0; c < nsrc_comp; ++c) { - m_src[i][c] = from_nir(instr.src[i], c); - sfn_log << SfnLog::reg << " " << *m_src[i][c]; - - } - sfn_log << SfnLog::reg << "\n"; - } - if (instr.op == nir_op_fdph) { - m_src[1][3] = from_nir(instr.src[1], 3); - sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n"; - } - - split_constants(instr, nsrc_comp); -} - -unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr) -{ - switch (instr.op) { - case nir_op_fdot2: - case nir_op_bany_inequal2: - case nir_op_ball_iequal2: - case nir_op_bany_fnequal2: - case nir_op_ball_fequal2: - case nir_op_b32any_inequal2: - case nir_op_b32all_iequal2: - case nir_op_b32any_fnequal2: - case nir_op_b32all_fequal2: - case nir_op_unpack_64_2x32_split_y: - return 2; - - case nir_op_fdot3: - case nir_op_bany_inequal3: - case nir_op_ball_iequal3: - case nir_op_bany_fnequal3: - case nir_op_ball_fequal3: - case nir_op_b32any_inequal3: - case nir_op_b32all_iequal3: - case nir_op_b32any_fnequal3: - case nir_op_b32all_fequal3: - case nir_op_cube_r600: - return 3; - - case nir_op_fdot4: - case nir_op_fdph: - case nir_op_bany_inequal4: - case nir_op_ball_iequal4: - case nir_op_bany_fnequal4: - case nir_op_ball_fequal4: - case nir_op_b32any_inequal4: - case nir_op_b32all_iequal4: - case nir_op_b32any_fnequal4: - case nir_op_b32all_fequal4: - return 4; - - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - return 1; - - default: - return nir_dest_num_components(instr.dest.dest); - - } -} - -bool EmitAluInstruction::emit_cube(const nir_alu_instr& instr) -{ - AluInstruction *ir = nullptr; - const uint16_t src0_chan[4] = {2, 2, 0, 1}; - const uint16_t src1_chan[4] = {1, 0, 2, 2}; - - for (int i = 0; i < 4; ++i) { - ir = new AluInstruction(op2_cube, from_nir(instr.dest, i), - from_nir(instr.src[0], src0_chan[i]), - from_nir(instr.src[0], src1_chan[i]), {alu_write}); - emit_instruction(ir); - } - ir->set_flag(alu_last_instr); - return true; -} - -void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp) -{ - const nir_op_info *op_info = &nir_op_infos[instr.op]; - if (op_info->num_inputs < 2) - return; - - int nconst = 0; - std::array c; - std::array idx; - for (unsigned i = 0; i < op_info->num_inputs; ++i) { - PValue& src = m_src[i][0]; - assert(src); - sfn_log << SfnLog::reg << "Split test " << *src; - - if (src->type() == Value::kconst) { - c[nconst] = static_cast(src.get()); - idx[nconst++] = i; - sfn_log << SfnLog::reg << " is constant " << i; - } - sfn_log << SfnLog::reg << "\n"; - } - - if (nconst < 2) - return; - - unsigned sel = c[0]->sel(); - unsigned kcache = c[0]->kcache_bank(); - sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ; - - for (int i = 1; i < nconst; ++i) { - sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n"; - if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) { - AluInstruction *ir = nullptr; - auto v = get_temp_vec4(); - for (unsigned k = 0; k < nsrc_comp; ++k) { - ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write}); - emit_instruction(ir); - m_src[idx[i]][k] = v[k]; - } - make_last(ir); - } - } -} - -bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr) -{ - if (instr.src[0].negate || instr.src[0].abs) { - std::cerr << "source modifiers not supported with int ops\n"; - return false; - } - - AluInstruction *ir = nullptr; - for (int i = 0; i < 4 ; ++i) { - if (instr.dest.write_mask & (1 << i)){ - ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i), - m_src[0][i], write); - emit_instruction(ir); - } - } - make_last(ir); - return true; -} - -bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, - const AluOpFlags& flags) -{ - AluInstruction *ir = nullptr; - for (int i = 0; i < 4 ; ++i) { - if (instr.dest.write_mask & (1 << i)){ - ir = new AluInstruction(opcode, from_nir(instr.dest, i), - m_src[0][i], write); - - if (flags.test(alu_src0_abs) || instr.src[0].abs) - ir->set_flag(alu_src0_abs); - - if (instr.src[0].negate ^ flags.test(alu_src0_neg)) - ir->set_flag(alu_src0_neg); - - if (flags.test(alu_dst_clamp) || instr.dest.saturate) - ir->set_flag(alu_dst_clamp); - - emit_instruction(ir); - } - } - make_last(ir); - - return true; -} - -bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr) -{ - /* If the op is a plain move beween SSA values we can just forward - * the register reference to the original register */ - if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa && - !instr.src[0].abs && !instr.src[0].negate && !instr.dest.saturate) { - bool result = true; - for (int i = 0; i < 4 ; ++i) { - if (instr.dest.write_mask & (1 << i)){ - result &= inject_register(instr.dest.dest.ssa.index, i, - m_src[0][i], true); - } - } - return result; - } else { - return emit_alu_op1(instr, op1_mov); - } -} - -bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, - bool absolute) -{ - AluInstruction *ir = nullptr; - std::set src_idx; - - if (get_chip_class() == CAYMAN) { - int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3; - for (int i = 0; i < last_slot; ++i) { - bool write_comp = instr.dest.write_mask & (1 << i); - ir = new AluInstruction(opcode, from_nir(instr.dest, i), - m_src[0][write_comp ? i : 0], write_comp ? write : empty); - if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs); - if (instr.src[0].negate) ir->set_flag(alu_src0_neg); - if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); - - if (i == (last_slot - 1)) ir->set_flag(alu_last_instr); - - emit_instruction(ir); - } - } else { - for (int i = 0; i < 4 ; ++i) { - if (instr.dest.write_mask & (1 << i)){ - ir = new AluInstruction(opcode, from_nir(instr.dest, i), - m_src[0][i], last_write); - if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs); - if (instr.src[0].negate) ir->set_flag(alu_src0_neg); - if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); - emit_instruction(ir); - } - } - } - return true; -} - -bool EmitAluInstruction::emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode) -{ - AluInstruction *ir = nullptr; - std::set src_idx; - - unsigned last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3; - - for (unsigned j = 0; j < nir_dest_num_components(instr.dest.dest); ++j) { - for (unsigned i = 0; i < last_slot; ++i) { - bool write_comp = instr.dest.write_mask & (1 << j) && (i == j); - ir = new AluInstruction(opcode, from_nir(instr.dest, i), - m_src[0][j], write_comp ? write : empty); - if (instr.src[0].abs) ir->set_flag(alu_src0_abs); - if (instr.src[0].negate) ir->set_flag(alu_src0_neg); - if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); - - if (i == (last_slot - 1)) ir->set_flag(alu_last_instr); - - emit_instruction(ir); - } - } - return true; -} - - -bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op) -{ - AluInstruction *ir = nullptr; - - if (get_chip_class() < CAYMAN) { - std::array v; - - for (int i = 0; i < 4; ++i) { - if (!(instr.dest.write_mask & (1 << i))) - continue; - v[i] = from_nir(instr.dest, i); - ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write}); - if (instr.src[0].abs) ir->set_flag(alu_src0_abs); - if (instr.src[0].negate) ir->set_flag(alu_src0_neg); - emit_instruction(ir); - } - make_last(ir); - - for (int i = 0; i < 4; ++i) { - if (!(instr.dest.write_mask & (1 << i))) - continue; - ir = new AluInstruction(op, v[i], v[i], {alu_write}); - emit_instruction(ir); - if (op == op1_flt_to_uint) - make_last(ir); - } - make_last(ir); - } else { - for (int i = 0; i < 4; ++i) { - if (!(instr.dest.write_mask & (1 << i))) - continue; - ir = new AluInstruction(op, from_nir(instr.dest, i), m_src[0][i], {alu_write}); - if (instr.src[0].abs) ir->set_flag(alu_src0_abs); - if (instr.src[0].negate) ir->set_flag(alu_src0_neg); - emit_instruction(ir); - if (op == op1_flt_to_uint) - make_last(ir); - } - make_last(ir); - } - - return true; -} - -bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr) -{ - AluInstruction *ir = nullptr; - for (int i = 0; i < 4 ; ++i) { - if (instr.dest.write_mask & (1 << i)){ - ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i), - m_src[0][i], literal(0.0f), write); - emit_instruction(ir); - } - } - make_last(ir); - return true; -} - -bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr) -{ - AluInstruction *ir = nullptr; - for (int i = 0; i < 4 ; ++i) { - if (!(instr.dest.write_mask & (1 << i))) - continue; - - ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i), - m_src[0][i], Value::one_i, write); - emit_instruction(ir); - } - make_last(ir); - - return true; -} - -bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr) -{ - AluInstruction *ir = nullptr; - for (unsigned i = 0; i < 2; ++i) { - if (!(instr.dest.write_mask & (1 << i))) - continue; - ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), - m_src[0][i], write); - emit_instruction(ir); - } - ir->set_flag(alu_last_instr); - return true; -} - -bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp) -{ - emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0), - m_src[0][comp], last_write)); - return true; -} - -bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc) -{ - AluInstruction *ir = nullptr; - std::set src_slot; - for(unsigned i = 0; i < nc; ++i) { - if (instr.dest.write_mask & (1 << i)){ - auto src = m_src[i][0]; - ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write); - if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); - - // FIXME: This is a rather crude approach to fix the problem that - // r600 can't read from four different slots of the same component - // here we check only for the register index - if (src->type() == Value::gpr) - src_slot.insert(src->sel()); - if (src_slot.size() >= 3) { - src_slot.clear(); - ir->set_flag(alu_last_instr); - } - emit_instruction(ir); - } - } - if (ir) - ir->set_flag(alu_last_instr); - return true; -} - -bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n) -{ - const nir_alu_src& src0 = instr.src[0]; - const nir_alu_src& src1 = instr.src[1]; - EAluOp dot4_op = use_legacy_math_rules() ? op2_dot4 : op2_dot4_ieee; - - AluInstruction *ir = nullptr; - for (int i = 0; i < n ; ++i) { - ir = new AluInstruction(dot4_op, from_nir(instr.dest, i), - m_src[0][i], m_src[1][i], - instr.dest.write_mask & (1 << i) ? write : empty); - - if (src0.negate) ir->set_flag(alu_src0_neg); - if (src0.abs) ir->set_flag(alu_src0_abs); - if (src1.negate) ir->set_flag(alu_src1_neg); - if (src1.abs) ir->set_flag(alu_src1_abs); - - if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); - emit_instruction(ir); - } - for (int i = n; i < 4 ; ++i) { - ir = new AluInstruction(dot4_op, from_nir(instr.dest, i), - Value::zero, Value::zero, - instr.dest.write_mask & (1 << i) ? write : empty); - emit_instruction(ir); - } - - if (ir) - ir->set_flag(alu_last_instr); - return true; -} - -bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr) -{ - const nir_alu_src& src0 = instr.src[0]; - const nir_alu_src& src1 = instr.src[1]; - - AluInstruction *ir = nullptr; - for (int i = 0; i < 3 ; ++i) { - ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i), - m_src[0][i], m_src[1][i], - instr.dest.write_mask & (1 << i) ? write : empty); - if (src0.negate) ir->set_flag(alu_src0_neg); - if (src0.abs) ir->set_flag(alu_src0_abs); - if (src1.negate) ir->set_flag(alu_src1_neg); - if (src1.abs) ir->set_flag(alu_src1_abs); - if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); - emit_instruction(ir); - } - - ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f, - m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty); - if (src1.negate) ir->set_flag(alu_src1_neg); - if (src1.abs) ir->set_flag(alu_src1_abs); - emit_instruction(ir); - - ir->set_flag(alu_last_instr); - return true; - -} - -bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op) -{ - AluInstruction *ir = nullptr; - for (int i = 0; i < 4 ; ++i) { - if (instr.dest.write_mask & (1 << i)) { - ir = new AluInstruction(op, from_nir(instr.dest, i), - m_src[0][i], Value::zero, - write); - emit_instruction(ir); - } - } - if (ir) - ir->set_flag(alu_last_instr); - return true; -} - -bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr) -{ - AluInstruction *ir = nullptr; - for (int i = 0; i < 4 ; ++i) { - if (instr.dest.write_mask & (1 << i)){ - ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i), - m_src[0][i], Value::one_f, write); - if (instr.src[0].negate) ir->set_flag(alu_src0_neg); - if (instr.src[0].abs) ir->set_flag(alu_src0_abs); - if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); - emit_instruction(ir); - } - } - if (ir) - ir->set_flag(alu_last_instr); - return true; -} - -bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all) -{ - - AluInstruction *ir = nullptr; - PValue v[4]; // this might need some additional temp register creation - for (unsigned i = 0; i < 4 ; ++i) - v[i] = from_nir(instr.dest, i); - - EAluOp combine = all ? op2_and_int : op2_or_int; - - /* For integers we can not use the modifiers, so this needs some emulation */ - /* Should actually be lowered with NIR */ - if (instr.src[0].negate == instr.src[1].negate && - instr.src[0].abs == instr.src[1].abs) { - - for (unsigned i = 0; i < nc ; ++i) { - ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write); - emit_instruction(ir); - } - if (ir) - ir->set_flag(alu_last_instr); - } else { - std::cerr << "Negate in iequal/inequal not (yet) supported\n"; - return false; - } - - for (unsigned i = 0; i < nc/2 ; ++i) { - ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write); - emit_instruction(ir); - } - if (ir) - ir->set_flag(alu_last_instr); - - if (nc > 2) { - ir = new AluInstruction(combine, v[0], v[0], v[2], last_write); - emit_instruction(ir); - } - - return true; -} - -bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all) -{ - AluInstruction *ir = nullptr; - PValue v[4]; // this might need some additional temp register creation - for (unsigned i = 0; i < 4 ; ++i) - v[i] = from_nir(instr.dest, i); - - for (unsigned i = 0; i < nc ; ++i) { - ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write); - - if (instr.src[0].abs) - ir->set_flag(alu_src0_abs); - if (instr.src[0].negate) - ir->set_flag(alu_src0_neg); - - if (instr.src[1].abs) - ir->set_flag(alu_src1_abs); - if (instr.src[1].negate) - ir->set_flag(alu_src1_neg); - - emit_instruction(ir); - } - if (ir) - ir->set_flag(alu_last_instr); - - for (unsigned i = 0; i < nc ; ++i) { - ir = new AluInstruction(op1_max4, v[i], v[i], write); - if (all) ir->set_flag(alu_src0_neg); - emit_instruction(ir); - } - - for (unsigned i = nc; i < 4 ; ++i) { - ir = new AluInstruction(op1_max4, v[i], - all ? Value::one_f : Value::zero, write); - if (all) - ir->set_flag(alu_src0_neg); - - emit_instruction(ir); - } - - ir->set_flag(alu_last_instr); - - if (all) - op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10; - else - op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10; - - ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write); - if (all) - ir->set_flag(alu_src1_neg); - emit_instruction(ir); - - return true; -} - -bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all) -{ - AluInstruction *ir = nullptr; - PValue v[4]; // this might need some additional temp register creation - for (unsigned i = 0; i < 4 ; ++i) - v[i] = from_nir(instr.dest, i); - - for (unsigned i = 0; i < 2 ; ++i) { - ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write); - if (instr.src[0].abs) - ir->set_flag(alu_src0_abs); - if (instr.src[0].negate) - ir->set_flag(alu_src0_neg); - - if (instr.src[1].abs) - ir->set_flag(alu_src1_abs); - if (instr.src[1].negate) - ir->set_flag(alu_src1_neg); - - emit_instruction(ir); - } - if (ir) - ir->set_flag(alu_last_instr); - - op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int; - ir = new AluInstruction(op, v[0], v[0], v[1], last_write); - emit_instruction(ir); - - return true; -} - -bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode) -{ - const nir_alu_src& src0 = instr.src[0]; - const nir_alu_src& src1 = instr.src[1]; - - AluInstruction *ir = nullptr; - - if (get_chip_class() == CAYMAN) { - for (int k = 0; k < 4; ++k) { - if (instr.dest.write_mask & (1 << k)) { - - for (int i = 0; i < 4; i++) { - ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[1][k], (i == k) ? write : empty); - if (src0.negate) ir->set_flag(alu_src0_neg); - if (src0.abs) ir->set_flag(alu_src0_abs); - if (src1.negate) ir->set_flag(alu_src1_neg); - if (src1.abs) ir->set_flag(alu_src1_abs); - if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); - if (i == 3) ir->set_flag(alu_last_instr); - emit_instruction(ir); - } - } - } - } else { - for (int i = 0; i < 4 ; ++i) { - if (instr.dest.write_mask & (1 << i)){ - ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write); - if (src0.negate) ir->set_flag(alu_src0_neg); - if (src0.abs) ir->set_flag(alu_src0_abs); - if (src1.negate) ir->set_flag(alu_src1_neg); - if (src1.abs) ir->set_flag(alu_src1_abs); - if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); - emit_instruction(ir); - } - } - } - return true; -} - -bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts) -{ - - const nir_alu_src& src0 = instr.src[0]; - const nir_alu_src& src1 = instr.src[1]; - - if (src0.negate || src1.negate || - src0.abs || src1.abs) { - std::cerr << "R600: don't support modifiers with integer operations"; - return false; - } - return emit_alu_op2(instr, opcode, opts); -} - -bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops) -{ - const nir_alu_src *src0 = &instr.src[0]; - const nir_alu_src *src1 = &instr.src[1]; - - int idx0 = 0; - int idx1 = 1; - if (ops & op2_opt_reverse) { - std::swap(src0, src1); - std::swap(idx0, idx1); - } - - bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate; - - AluInstruction *ir = nullptr; - for (int i = 0; i < 4 ; ++i) { - if (instr.dest.write_mask & (1 << i)){ - ir = new AluInstruction(opcode, from_nir(instr.dest, i), - m_src[idx0][i], m_src[idx1][i], write); - - if (src0->negate) ir->set_flag(alu_src0_neg); - if (src0->abs) ir->set_flag(alu_src0_abs); - if (src1_negate) ir->set_flag(alu_src1_neg); - if (src1->abs) ir->set_flag(alu_src1_abs); - if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); - emit_instruction(ir); - } - } - if (ir) - ir->set_flag(alu_last_instr); - return true; -} - -bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, - std::array reorder) -{ - const nir_alu_src *src[3]; - src[0] = &instr.src[reorder[0]]; - src[1] = &instr.src[reorder[1]]; - src[2] = &instr.src[reorder[2]]; - - AluInstruction *ir = nullptr; - for (int i = 0; i < 4 ; ++i) { - if (instr.dest.write_mask & (1 << i)){ - ir = new AluInstruction(opcode, from_nir(instr.dest, i), - m_src[reorder[0]][i], - m_src[reorder[1]][i], - m_src[reorder[2]][i], - write); - - if (src[0]->negate) ir->set_flag(alu_src0_neg); - if (src[1]->negate) ir->set_flag(alu_src1_neg); - if (src[2]->negate) ir->set_flag(alu_src2_neg); - - if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); - ir->set_flag(alu_write); - emit_instruction(ir); - } - } - make_last(ir); - return true; -} - -bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr) -{ - AluInstruction *ir = nullptr; - for (int i = 0; i < 4 ; ++i) { - if (instr.dest.write_mask & (1 << i)){ - ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero, - m_src[0][i], write); - emit_instruction(ir); - } - } - if (ir) - ir->set_flag(alu_last_instr); - - return true; -} - -static const char swz[] = "xyzw01?_"; - -void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src, - const GPRVector::Values& v, GPRVector::Values& out, int ncomp) -{ - - AluInstruction *alu = nullptr; - for (int i = 0; i < ncomp; ++i) { - alu = new AluInstruction(op1_mov, out[i], v[i], {alu_write}); - if (src.abs) - alu->set_flag(alu_src0_abs); - if (src.negate) - alu->set_flag(alu_src0_neg); - emit_instruction(alu); - } - make_last(alu); -} - -bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, - bool fine) -{ - - GPRVector::Values v; - std::array writemask = {0,1,2,3}; - - int ncomp = nir_dest_num_components(instr.dest.dest); - GPRVector::Swizzle src_swz = {7,7,7,7}; - for (auto i = 0; i < ncomp; ++i) - src_swz[i] = instr.src[0].swizzle[i]; - - auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz); - - if (instr.src[0].abs || instr.src[0].negate) { - GPRVector tmp = get_temp_vec4(); - split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp); - src = tmp; - } - - for (int i = 0; i < 4; ++i) { - writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7; - v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0); - } - - /* This is querying the dreivatives of the output fb, so we would either need - * access to the neighboring pixels or to the framebuffer. Neither is currently - * implemented */ - GPRVector dst(v); - - auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue()); - tex->set_dest_swizzle(writemask); - - if (fine) - tex->set_flag(TexInstruction::grad_fine); - - emit_instruction(tex); - - return true; -} - -bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr) -{ - auto tmp = get_temp_register(); - emit_instruction(op2_lshr_int, tmp, - {m_src[0][0], PValue(new LiteralValue(16))}, - {alu_write, alu_last_instr}); - - emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0), - {tmp}, {alu_write, alu_last_instr}); - - return true; -} - -bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr) -{ - emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0), - {m_src[0][0]},{alu_write, alu_last_instr}); - return true; -} - -bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr) -{ - PValue x = get_temp_register(); - PValue y = get_temp_register(); - - emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write}); - emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr}); - - emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr}); - - emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr}); - - return true; -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h deleted file mode 100644 index 509f5cf..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h +++ /dev/null @@ -1,116 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_EMITALUINSTRUCTION_H -#define SFN_EMITALUINSTRUCTION_H - -#include "sfn_emitinstruction.h" - -#include "sfn_alu_defines.h" -#include "sfn_instruction_alu.h" -#include "sfn_instruction_tex.h" - -namespace r600 { - - -class EmitAluInstruction : public EmitInstruction -{ -public: - EmitAluInstruction(ShaderFromNirProcessor& processor); - -private: - - enum AluOp2Opts { - op2_opt_none = 0, - op2_opt_reverse = 1, - op2_opt_neg_src1 = 1 << 1 - }; - - bool do_emit(nir_instr* instr) override; - - void split_constants(const nir_alu_instr& instr, unsigned nsrc_comp); - - bool emit_mov(const nir_alu_instr& instr); - bool emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, const AluOpFlags &flags = 0); - bool emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none); - - bool emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode); - bool emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode); - - bool emit_alu_inot(const nir_alu_instr& instr); - bool emit_alu_ineg(const nir_alu_instr& instr); - bool emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none); - - bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array reorder={0,1,2}); - bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false); - - bool emit_alu_b2f(const nir_alu_instr& instr); - bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op); - bool emit_dot(const nir_alu_instr& instr, int n); - bool emit_create_vec(const nir_alu_instr& instr, unsigned nc); - bool emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all); - bool emit_any_iequal(const nir_alu_instr& instr, unsigned nc); - - bool emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all); - bool emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all); - - bool emit_fdph(const nir_alu_instr &instr); - bool emit_discard_if(const nir_intrinsic_instr *instr); - - bool emit_alu_f2b32(const nir_alu_instr& instr); - bool emit_b2i32(const nir_alu_instr& instr); - bool emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op); - bool emit_pack_64_2x32_split(const nir_alu_instr& instr); - bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp); - - bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine); - bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr); - bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr); - bool emit_pack_32_2x16_split(const nir_alu_instr& instr); - - bool emit_cube(const nir_alu_instr& instr); -private: - void make_last(AluInstruction *ir) const; - void split_alu_modifiers(const nir_alu_src &src, const GPRVector::Values& v, - GPRVector::Values& out, int ncomp); - - void preload_src(const nir_alu_instr& instr); - unsigned num_src_comp(const nir_alu_instr& instr); - - using vreg = std::array; - - std::array m_src[4]; -}; - -inline void EmitAluInstruction::make_last(AluInstruction *ir) const -{ - if (ir) - ir->set_flag(alu_last_instr); -} - -} - -#endif // SFN_EMITALUINSTRUCTION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp deleted file mode 100644 index 7978ff8..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_emitinstruction.h" - -#include "sfn_shader_base.h" - -namespace r600 { - -EmitInstruction::EmitInstruction(ShaderFromNirProcessor& processor): - m_proc(processor) -{ - -} - -EmitInstruction::~EmitInstruction() -{ -} - -bool EmitInstruction::emit(nir_instr* instr) -{ - return do_emit(instr); -} - -bool EmitInstruction::use_legacy_math_rules(void) -{ - return m_proc.use_legacy_math_rules(); -} - -PValue EmitInstruction::from_nir(const nir_src& v, unsigned component, unsigned swizzled) -{ - return m_proc.from_nir(v, component, swizzled); -} - -PValue EmitInstruction::from_nir(const nir_alu_src& v, unsigned component) -{ - return m_proc.from_nir(v, component); -} - -PValue EmitInstruction::from_nir(const nir_tex_src& v, unsigned component) -{ - return m_proc.from_nir(v, component); -} - -PValue EmitInstruction::from_nir(const nir_alu_dest& v, unsigned component) -{ - return m_proc.from_nir(v, component); -} - -PValue EmitInstruction::from_nir(const nir_dest& v, unsigned component) -{ - return m_proc.from_nir(v, component); -} - -PValue EmitInstruction::from_nir(const nir_src& v, unsigned component) -{ - return m_proc.from_nir(v, component); -} - -void EmitInstruction::emit_instruction(Instruction *ir) -{ - return m_proc.emit_instruction(ir); -} - -void EmitInstruction::emit_instruction(AluInstruction *ir) -{ - return m_proc.emit_instruction(ir); -} - -bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest, - std::vector src0, - const std::set& m_flags) -{ - return m_proc.emit_instruction(opcode, dest,src0, m_flags); -} - -const nir_variable * -EmitInstruction::get_deref_location(const nir_src& v) const -{ - return m_proc.get_deref_location(v); -} - -PValue EmitInstruction::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel) -{ - return m_proc.from_nir_with_fetch_constant(src, component, channel); -} - -GPRVector EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask, - const GPRVector::Swizzle& swizzle, bool match) -{ - return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle, match); -} - -PGPRValue EmitInstruction::get_temp_register(int channel) -{ - return m_proc.get_temp_register(channel); -} - -GPRVector EmitInstruction::get_temp_vec4(const GPRVector::Swizzle& swizzle) -{ - return m_proc.get_temp_vec4(swizzle); -} - -PValue EmitInstruction::create_register_from_nir_src(const nir_src& src, unsigned swizzle) -{ - return m_proc.create_register_from_nir_src(src, swizzle); -} - -enum amd_gfx_level EmitInstruction::get_chip_class(void) const -{ - return m_proc.get_chip_class(); -} - -PValue EmitInstruction::literal(uint32_t value) -{ - return m_proc.literal(value); -} - -GPRVector EmitInstruction::vec_from_nir(const nir_dest& dst, int num_components) -{ - return m_proc.vec_from_nir(dst, num_components); -} - -bool EmitInstruction::inject_register(unsigned sel, unsigned swizzle, - const PValue& reg, bool map) -{ - return m_proc.inject_register(sel, swizzle, reg, map); -} - -int EmitInstruction::remap_atomic_base(int base) -{ - return m_proc.remap_atomic_base(base); -} - -void EmitInstruction::set_has_txs_cube_array_comp() -{ - m_proc.sh_info().has_txq_cube_array_z_comp = 1; -} - -const std::set EmitInstruction::empty = {}; -const std::set EmitInstruction::write = {alu_write}; -const std::set EmitInstruction::last_write = {alu_write, alu_last_instr}; -const std::set EmitInstruction::last = {alu_last_instr}; - -} - diff --git a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h deleted file mode 100644 index 79080a5..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h +++ /dev/null @@ -1,102 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef EMITINSTRUCTION_H -#define EMITINSTRUCTION_H - -#include "compiler/nir/nir.h" -#include "sfn_defines.h" -#include "sfn_value.h" -#include "sfn_instruction_alu.h" - -namespace r600 { - -class ShaderFromNirProcessor; - -class EmitInstruction -{ -public: - EmitInstruction(ShaderFromNirProcessor& processor); - virtual ~EmitInstruction(); - bool emit(nir_instr* instr); - - static const std::set empty; - static const std::set write; - static const std::set last_write; - static const std::set last; - -protected: - virtual bool do_emit(nir_instr* instr) = 0; - - // forwards from ValuePool - PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled); - PValue from_nir(const nir_src& v, unsigned component); - PValue from_nir(const nir_alu_src& v, unsigned component); - PValue from_nir(const nir_tex_src& v, unsigned component); - PValue from_nir(const nir_alu_dest& v, unsigned component); - PValue from_nir(const nir_dest& v, unsigned component); - - PValue create_register_from_nir_src(const nir_src& src, unsigned comp); - - PGPRValue get_temp_register(int channel = -1); - GPRVector get_temp_vec4(const GPRVector::Swizzle& swizzle = {0,1,2,3}); - - // forwards from ShaderFromNirProcessor - void emit_instruction(Instruction *ir); - void emit_instruction(AluInstruction *ir); - bool emit_instruction(EAluOp opcode, PValue dest, - std::vector src0, - const std::set& m_flags); - bool use_legacy_math_rules(void); - - PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1); - GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask, - const GPRVector::Swizzle& swizzle, bool match = false); - - const nir_variable *get_deref_location(const nir_src& v) const; - - enum amd_gfx_level get_chip_class(void) const; - - PValue literal(uint32_t value); - - GPRVector vec_from_nir(const nir_dest& dst, int num_components); - - bool inject_register(unsigned sel, unsigned swizzle, - const PValue& reg, bool map); - - int remap_atomic_base(int base); - - void set_has_txs_cube_array_comp(); -private: - - ShaderFromNirProcessor& m_proc; -}; - -} - - - -#endif // EMITINSTRUCTION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp deleted file mode 100644 index 40f2730..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp +++ /dev/null @@ -1,741 +0,0 @@ -#include "sfn_emitssboinstruction.h" - -#include "sfn_instruction_fetch.h" -#include "sfn_instruction_gds.h" -#include "sfn_instruction_misc.h" -#include "sfn_instruction_tex.h" -#include "../r600_pipe.h" -#include "../r600_asm.h" - -namespace r600 { - -#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) - -EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor): - EmitInstruction(processor), - m_require_rat_return_address(false), - m_ssbo_image_offset(0) -{ -} - -void EmitSSBOInstruction::set_ssbo_offset(int offset) -{ - m_ssbo_image_offset = offset; -} - - -void EmitSSBOInstruction::set_require_rat_return_address() -{ - m_require_rat_return_address = true; -} - -bool -EmitSSBOInstruction::load_rat_return_address() -{ - if (m_require_rat_return_address) { - m_rat_return_address = get_temp_vec4(); - emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write})); - emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write})); - emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)), - literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr})); - emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1), - m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0), - {alu_write, alu_last_instr})); - m_require_rat_return_address = false; - } - return true; -} - - -bool EmitSSBOInstruction::do_emit(nir_instr* instr) -{ - const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - switch (intr->intrinsic) { - case nir_intrinsic_atomic_counter_add: - case nir_intrinsic_atomic_counter_and: - case nir_intrinsic_atomic_counter_exchange: - case nir_intrinsic_atomic_counter_max: - case nir_intrinsic_atomic_counter_min: - case nir_intrinsic_atomic_counter_or: - case nir_intrinsic_atomic_counter_xor: - case nir_intrinsic_atomic_counter_comp_swap: - return emit_atomic(intr); - case nir_intrinsic_atomic_counter_read: - case nir_intrinsic_atomic_counter_post_dec: - return emit_unary_atomic(intr); - case nir_intrinsic_atomic_counter_inc: - return emit_atomic_inc(intr); - case nir_intrinsic_atomic_counter_pre_dec: - return emit_atomic_pre_dec(intr); - case nir_intrinsic_load_ssbo: - return emit_load_ssbo(intr); - case nir_intrinsic_store_ssbo: - return emit_store_ssbo(intr); - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_comp_swap: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_exchange: - return emit_ssbo_atomic_op(intr); - case nir_intrinsic_image_store: - return emit_image_store(intr); - case nir_intrinsic_image_load: - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: - case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: - case nir_intrinsic_image_atomic_umin: - case nir_intrinsic_image_atomic_umax: - case nir_intrinsic_image_atomic_imin: - case nir_intrinsic_image_atomic_imax: - return emit_image_load(intr); - case nir_intrinsic_image_size: - return emit_image_size(intr); - case nir_intrinsic_get_ssbo_size: - return emit_buffer_size(intr); - case nir_intrinsic_memory_barrier: - case nir_intrinsic_memory_barrier_image: - case nir_intrinsic_memory_barrier_buffer: - case nir_intrinsic_group_memory_barrier: - return make_stores_ack_and_waitack(); - default: - return false; - } -} - -bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr) -{ - bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses); - - ESDOp op = read_result ? get_opcode(instr->intrinsic) : - get_opcode_wo(instr->intrinsic); - - if (DS_OP_INVALID == op) - return false; - - - - GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7}); - - int base = remap_atomic_base(nir_intrinsic_base(instr)); - - PValue uav_id = from_nir(instr->src[0], 0); - - PValue value = from_nir_with_fetch_constant(instr->src[1], 0); - - GDSInstr *ir = nullptr; - if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap) { - PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0); - ir = new GDSInstr(op, dest, value, value2, uav_id, base); - } else { - ir = new GDSInstr(op, dest, value, uav_id, base); - } - - emit_instruction(ir); - return true; -} - -bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr) -{ - bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses); - - ESDOp op = read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic); - - if (DS_OP_INVALID == op) - return false; - - GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7}); - - PValue uav_id = from_nir(instr->src[0], 0); - - auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr))); - - emit_instruction(ir); - return true; -} - -ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode) const -{ - switch (opcode) { - case nir_intrinsic_atomic_counter_add: - return DS_OP_ADD_RET; - case nir_intrinsic_atomic_counter_and: - return DS_OP_AND_RET; - case nir_intrinsic_atomic_counter_exchange: - return DS_OP_XCHG_RET; - case nir_intrinsic_atomic_counter_inc: - return DS_OP_INC_RET; - case nir_intrinsic_atomic_counter_max: - return DS_OP_MAX_UINT_RET; - case nir_intrinsic_atomic_counter_min: - return DS_OP_MIN_UINT_RET; - case nir_intrinsic_atomic_counter_or: - return DS_OP_OR_RET; - case nir_intrinsic_atomic_counter_read: - return DS_OP_READ_RET; - case nir_intrinsic_atomic_counter_xor: - return DS_OP_XOR_RET; - case nir_intrinsic_atomic_counter_post_dec: - return DS_OP_DEC_RET; - case nir_intrinsic_atomic_counter_comp_swap: - return DS_OP_CMP_XCHG_RET; - case nir_intrinsic_atomic_counter_pre_dec: - default: - return DS_OP_INVALID; - } -} - -ESDOp EmitSSBOInstruction::get_opcode_wo(const nir_intrinsic_op opcode) const -{ - switch (opcode) { - case nir_intrinsic_atomic_counter_add: - return DS_OP_ADD; - case nir_intrinsic_atomic_counter_and: - return DS_OP_AND; - case nir_intrinsic_atomic_counter_inc: - return DS_OP_INC; - case nir_intrinsic_atomic_counter_max: - return DS_OP_MAX_UINT; - case nir_intrinsic_atomic_counter_min: - return DS_OP_MIN_UINT; - case nir_intrinsic_atomic_counter_or: - return DS_OP_OR; - case nir_intrinsic_atomic_counter_xor: - return DS_OP_XOR; - case nir_intrinsic_atomic_counter_post_dec: - return DS_OP_DEC; - case nir_intrinsic_atomic_counter_comp_swap: - return DS_OP_CMP_XCHG_RET; - case nir_intrinsic_atomic_counter_exchange: - return DS_OP_XCHG_RET; - case nir_intrinsic_atomic_counter_pre_dec: - default: - return DS_OP_INVALID; - } -} - -RatInstruction::ERatOp -EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const -{ - switch (opcode) { - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_image_atomic_add: - return RatInstruction::ADD_RTN; - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_image_atomic_and: - return RatInstruction::AND_RTN; - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_image_atomic_exchange: - return RatInstruction::XCHG_RTN; - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_image_atomic_or: - return RatInstruction::OR_RTN; - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_image_atomic_imin: - return RatInstruction::MIN_INT_RTN; - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_image_atomic_imax: - return RatInstruction::MAX_INT_RTN; - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_image_atomic_umin: - return RatInstruction::MIN_UINT_RTN; - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_image_atomic_umax: - return RatInstruction::MAX_UINT_RTN; - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_image_atomic_xor: - return RatInstruction::XOR_RTN; - case nir_intrinsic_ssbo_atomic_comp_swap: - case nir_intrinsic_image_atomic_comp_swap: - if (util_format_is_float(format)) - return RatInstruction::CMPXCHG_FLT_RTN; - else - return RatInstruction::CMPXCHG_INT_RTN; - case nir_intrinsic_image_load: - return RatInstruction::NOP_RTN; - default: - unreachable("Unsupported RAT instruction"); - } -} - -RatInstruction::ERatOp -EmitSSBOInstruction::get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const -{ - switch (opcode) { - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_image_atomic_add: - return RatInstruction::ADD; - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_image_atomic_and: - return RatInstruction::AND; - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_image_atomic_or: - return RatInstruction::OR; - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_image_atomic_imin: - return RatInstruction::MIN_INT; - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_image_atomic_imax: - return RatInstruction::MAX_INT; - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_image_atomic_umin: - return RatInstruction::MIN_UINT; - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_image_atomic_umax: - return RatInstruction::MAX_UINT; - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_image_atomic_xor: - return RatInstruction::XOR; - case nir_intrinsic_ssbo_atomic_comp_swap: - case nir_intrinsic_image_atomic_comp_swap: - if (util_format_is_float(format)) - return RatInstruction::CMPXCHG_FLT; - else - return RatInstruction::CMPXCHG_INT; - default: - unreachable("Unsupported WO RAT instruction"); - } -} - -bool EmitSSBOInstruction::load_atomic_inc_limits() -{ - m_atomic_update = get_temp_register(); - m_atomic_update->set_keep_alive(); - emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1), - {alu_write, alu_last_instr})); - return true; -} - -bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr) -{ - bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses); - PValue uav_id = from_nir(instr->src[0], 0); - GPRVector dest = read_result ? make_dest(instr): GPRVector(0, {7,7,7,7}); - auto ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, - m_atomic_update, uav_id, - remap_atomic_base(nir_intrinsic_base(instr))); - emit_instruction(ir); - return true; -} - -bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr) -{ - GPRVector dest = make_dest(instr); - - PValue uav_id = from_nir(instr->src[0], 0); - - auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id, - remap_atomic_base(nir_intrinsic_base(instr))); - emit_instruction(ir); - - emit_instruction(new AluInstruction(op2_sub_int, dest.x(), dest.x(), literal(1), last_write)); - - return true; -} - -bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr) -{ - GPRVector dest = make_dest(instr); - - /** src0 not used, should be some offset */ - auto addr = from_nir(instr->src[1], 0); - PValue addr_temp = create_register_from_nir_src(instr->src[1], 1); - - /** Should be lowered in nir */ - emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))}, - {alu_write, alu_last_instr})); - - const EVTXDataFormat formats[4] = { - fmt_32, - fmt_32_32, - fmt_32_32_32, - fmt_32_32_32_32 - }; - - const std::array dest_swt[4] = { - {0,7,7,7}, - {0,1,7,7}, - {0,1,2,7}, - {0,1,2,3} - }; - - /* TODO fix resource index */ - auto ir = new FetchInstruction(dest, addr_temp, - R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset - , from_nir(instr->src[0], 0), - formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int); - ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]); - ir->set_flag(vtx_use_tc); - - emit_instruction(ir); - return true; -} - -bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr) -{ - - GPRVector::Swizzle swz = {7,7,7,7}; - for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) - swz[i] = i; - - auto orig_addr = from_nir(instr->src[2], 0); - - GPRVector addr_vec = get_temp_vec4({0,1,2,7}); - - auto temp2 = get_temp_vec4(); - - auto rat_id = from_nir(instr->src[1], 0); - - emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr, - PValue(new LiteralValue(2)), write)); - emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write)); - emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write)); - - - auto values = vec_from_nir_with_fetch_constant(instr->src[0], - (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true); - - auto cf_op = cf_mem_rat; - //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat; - auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED, - values, addr_vec, m_ssbo_image_offset, rat_id, 1, - 1, 0, false); - emit_instruction(store); - m_store_ops.push_back(store); - - for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) { - emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), get_chip_class() == CAYMAN ? last_write : write)); - emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0), - {addr_vec.reg_i(0), Value::one_i}, last_write)); - store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED, - temp2, addr_vec, m_ssbo_image_offset, rat_id, 1, - 1, 0, false); - emit_instruction(store); - if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT)) - m_store_ops.push_back(store); - } - - return true; -} - -bool -EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin) -{ - int imageid = 0; - PValue image_offset; - - if (nir_src_is_const(intrin->src[0])) - imageid = nir_src_as_int(intrin->src[0]); - else - image_offset = from_nir(intrin->src[0], 0); - - auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3}); - auto undef = from_nir(intrin->src[2], 0); - auto value = vec_from_nir_with_fetch_constant(intrin->src[3], 0xf, {0,1,2,3}); - auto unknown = from_nir(intrin->src[4], 0); - - if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D && - nir_intrinsic_image_array(intrin)) { - emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write})); - emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write})); - } - - auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat; - auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid, - image_offset, 1, 0xf, 0, false); - - //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT)) - m_store_ops.push_back(store); - - emit_instruction(store); - return true; -} - -bool -EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin) -{ - int imageid = 0; - PValue image_offset; - - if (nir_src_is_const(intrin->src[0])) - imageid = nir_src_as_int(intrin->src[0]); - else - image_offset = from_nir(intrin->src[0], 0); - - bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses); - auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) : - get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT); - - auto coord_orig = from_nir(intrin->src[1], 0, 0); - auto coord = get_temp_register(0); - - emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write)); - - if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { - emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), - from_nir(intrin->src[3], 0), {alu_write})); - emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3), - from_nir(intrin->src[2], 0), {alu_last_instr, alu_write})); - } else { - emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), - from_nir(intrin->src[2], 0), {alu_write})); - emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write)); - } - - - GPRVector out_vec({coord, coord, coord, coord}); - - auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset, - image_offset, 1, 0xf, 0, true); - emit_instruction(atomic); - - if (read_result) { - emit_instruction(new WaitAck(0)); - - GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components); - auto fetch = new FetchInstruction(vc_fetch, - no_index_offset, - fmt_32, - vtx_nf_int, - vtx_es_none, - m_rat_return_address.reg_i(1), - dest, - 0, - false, - 0xf, - R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid, - 0, - bim_none, - false, - false, - 0, - 0, - 0, - image_offset, - {0,7,7,7}); - fetch->set_flag(vtx_srf_mode); - fetch->set_flag(vtx_use_tc); - fetch->set_flag(vtx_vpm); - emit_instruction(fetch); - } - - return true; - -} - -bool -EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin) -{ - int imageid = 0; - PValue image_offset; - - if (nir_src_is_const(intrin->src[0])) - imageid = nir_src_as_int(intrin->src[0]); - else - image_offset = from_nir(intrin->src[0], 0); - - bool read_retvalue = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses); - auto rat_op = read_retvalue ? get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin)): - get_rat_opcode_wo(intrin->intrinsic, nir_intrinsic_format(intrin)); - - GPRVector::Swizzle swz = {0,1,2,3}; - auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz); - - if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D && - nir_intrinsic_image_array(intrin)) { - emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write})); - emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write})); - } - - if (intrin->intrinsic != nir_intrinsic_image_load) { - if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) { - emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), - from_nir(intrin->src[4], 0), {alu_write})); - emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3), - from_nir(intrin->src[3], 0), {alu_last_instr, alu_write})); - } else { - emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), - from_nir(intrin->src[3], 0), {alu_last_instr, alu_write})); - } - } - auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat; - - auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid, - image_offset, 1, 0xf, 0, true); - emit_instruction(store); - return read_retvalue ? fetch_return_value(intrin) : true; -} - -bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin) -{ - emit_instruction(new WaitAck(0)); - - pipe_format format = nir_intrinsic_format(intrin); - unsigned fmt = fmt_32; - unsigned num_format = 0; - unsigned format_comp = 0; - unsigned endian = 0; - - int imageid = 0; - PValue image_offset; - - if (nir_src_is_const(intrin->src[0])) - imageid = nir_src_as_int(intrin->src[0]); - else - image_offset = from_nir(intrin->src[0], 0); - - r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian); - - GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest)); - - auto fetch = new FetchInstruction(vc_fetch, - no_index_offset, - (EVTXDataFormat)fmt, - (EVFetchNumFormat)num_format, - (EVFetchEndianSwap)endian, - m_rat_return_address.reg_i(1), - dest, - 0, - false, - 0x3, - R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid, - 0, - bim_none, - false, - false, - 0, - 0, - 0, - image_offset, {0,1,2,3}); - fetch->set_flag(vtx_srf_mode); - fetch->set_flag(vtx_use_tc); - fetch->set_flag(vtx_vpm); - if (format_comp) - fetch->set_flag(vtx_format_comp_signed); - - emit_instruction(fetch); - return true; -} - -bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin) -{ - GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest)); - GPRVector src{0,{4,4,4,4}}; - - assert(nir_src_as_uint(intrin->src[1]) == 0); - - auto const_offset = nir_src_as_const_value(intrin->src[0]); - auto dyn_offset = PValue(); - int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET; - if (const_offset) - res_id += const_offset[0].u32; - else - dyn_offset = from_nir(intrin->src[0], 0); - - if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) { - emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)), - res_id, - bim_none)); - return true; - } else { - emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src, - 0/* ?? */, - res_id, dyn_offset)); - if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE && - nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) { - /* Need to load the layers from a const buffer */ - - set_has_txs_cube_array_comp(); - - if (const_offset) { - unsigned lookup_resid = const_offset[0].u32; - emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2), - PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4, - R600_BUFFER_INFO_CONST_BUFFER)), - EmitInstruction::last_write)); - } else { - /* If the adressing is indirect we have to get the z-value by using a binary search */ - GPRVector trgt; - GPRVector help; - - auto addr = help.reg_i(0); - auto comp = help.reg_i(1); - auto low_bit = help.reg_i(2); - auto high_bit = help.reg_i(3); - - emit_instruction(new AluInstruction(op2_lshr_int, addr, from_nir(intrin->src[0], 0), - literal(2), EmitInstruction::write)); - emit_instruction(new AluInstruction(op2_and_int, comp, from_nir(intrin->src[0], 0), - literal(3), EmitInstruction::last_write)); - - emit_instruction(new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, R600_SHADER_BUFFER_INFO_SEL, - R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none)); - - emit_instruction(new AluInstruction(op3_cnde_int, comp, high_bit, trgt.reg_i(0), trgt.reg_i(2), - EmitInstruction::write)); - emit_instruction(new AluInstruction(op3_cnde_int, high_bit, high_bit, trgt.reg_i(1), trgt.reg_i(3), - EmitInstruction::last_write)); - - emit_instruction(new AluInstruction(op3_cnde_int, dest.reg_i(2), low_bit, comp, high_bit, EmitInstruction::last_write)); - } - } - } - return true; -} - -bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr) -{ - std::array dst_elms; - - - for (uint16_t i = 0; i < 4; ++i) { - dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7); - } - - GPRVector dst(dst_elms); - GPRVector src(0,{4,4,4,4}); - - auto const_offset = nir_src_as_const_value(intr->src[0]); - auto dyn_offset = PValue(); - int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET; - if (const_offset) - res_id += const_offset[0].u32; - else - assert(0 && "dynamic buffer offset not supported in buffer_size"); - - emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)), - res_id, bim_none)); - - return true; -} - -bool EmitSSBOInstruction::make_stores_ack_and_waitack() -{ - for (auto&& store: m_store_ops) - store->set_ack(); - - if (!m_store_ops.empty()) - emit_instruction(new WaitAck(0)); - - m_store_ops.clear(); - - return true; -} - -GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir) -{ - GPRVector::Values v; - int i; - for (i = 0; i < 4; ++i) - v[i] = from_nir(ir->dest, i); - return GPRVector(v); -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h deleted file mode 100644 index 4d5fa0f..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef SFN_EMITSSBOINSTRUCTION_H -#define SFN_EMITSSBOINSTRUCTION_H - -#include "sfn_emitinstruction.h" -#include "sfn_instruction_gds.h" -#include "sfn_value_gpr.h" - -namespace r600 { - -class EmitSSBOInstruction: public EmitInstruction { -public: - EmitSSBOInstruction(ShaderFromNirProcessor& processor); - - void set_ssbo_offset(int offset); - - void set_require_rat_return_address(); - bool load_rat_return_address(); - bool load_atomic_inc_limits(); - -private: - bool do_emit(nir_instr *instr); - - bool emit_atomic(const nir_intrinsic_instr* instr); - bool emit_unary_atomic(const nir_intrinsic_instr* instr); - bool emit_atomic_inc(const nir_intrinsic_instr* instr); - bool emit_atomic_pre_dec(const nir_intrinsic_instr* instr); - - bool emit_load_ssbo(const nir_intrinsic_instr* instr); - bool emit_store_ssbo(const nir_intrinsic_instr* instr); - - bool emit_image_size(const nir_intrinsic_instr *intrin); - bool emit_image_load(const nir_intrinsic_instr *intrin); - bool emit_image_store(const nir_intrinsic_instr *intrin); - bool emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin); - bool emit_buffer_size(const nir_intrinsic_instr *intrin); - - bool fetch_return_value(const nir_intrinsic_instr *intrin); - - bool make_stores_ack_and_waitack(); - - ESDOp get_opcode(nir_intrinsic_op opcode) const; - ESDOp get_opcode_wo(const nir_intrinsic_op opcode) const; - - RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const; - RatInstruction::ERatOp get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const; - - - GPRVector make_dest(const nir_intrinsic_instr* instr); - - PGPRValue m_atomic_update; - - bool m_require_rat_return_address; - GPRVector m_rat_return_address; - int m_ssbo_image_offset; - std::vector m_store_ops; -}; - -} - -#endif // SFN_EMITSSBOINSTRUCTION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp deleted file mode 100644 index 326cd15..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp +++ /dev/null @@ -1,671 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_emittexinstruction.h" -#include "sfn_shader_base.h" -#include "sfn_instruction_fetch.h" - -namespace r600 { - -EmitTexInstruction::EmitTexInstruction(ShaderFromNirProcessor &processor): - EmitInstruction (processor) -{ -} - -bool EmitTexInstruction::do_emit(nir_instr* instr) -{ - nir_tex_instr* ir = nir_instr_as_tex(instr); - - TexInputs src; - if (!get_inputs(*ir, src)) - return false; - - if (ir->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - switch (ir->op) { - case nir_texop_txf: - return emit_buf_txf(ir, src); - case nir_texop_txs: - return emit_tex_txs(ir, src, {0,1,2,3}); - default: - return false; - } - } else { - switch (ir->op) { - case nir_texop_tex: - return emit_tex_tex(ir, src); - case nir_texop_txf: - return emit_tex_txf(ir, src); - case nir_texop_txb: - return emit_tex_txb(ir, src); - case nir_texop_txl: - return emit_tex_txl(ir, src); - case nir_texop_txd: - return emit_tex_txd(ir, src); - case nir_texop_txs: - return emit_tex_txs(ir, src, {0,1,2,3}); - case nir_texop_lod: - return emit_tex_lod(ir, src); - case nir_texop_tg4: - return emit_tex_tg4(ir, src); - case nir_texop_txf_ms: - return emit_tex_txf_ms(ir, src); - case nir_texop_query_levels: - return emit_tex_txs(ir, src, {3,7,7,7}); - case nir_texop_texture_samples: - return emit_tex_texture_samples(ir, src, {3,7,7,7}); - default: - - return false; - } - } -} - -bool EmitTexInstruction::emit_buf_txf(nir_tex_instr* instr, TexInputs &src) -{ - auto dst = make_dest(*instr); - - auto ir = new FetchInstruction(vc_fetch, no_index_offset, dst, src.coord.reg_i(0), 0, - instr->texture_index + R600_MAX_CONST_BUFFERS, - src.texture_offset, bim_none); - ir->set_flag(vtx_use_const_field); - emit_instruction(ir); - return true; -} - -bool EmitTexInstruction::emit_tex_tex(nir_tex_instr* instr, TexInputs& src) -{ - - r600::sfn_log << SfnLog::instr << "emit '" - << *reinterpret_cast(instr) - << "' (" << __func__ << ")\n"; - - auto tex_op = TexInstruction::sample; - - auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); - assert(!sampler.indirect); - - if (instr->is_shadow) { - emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator, - {alu_last_instr, alu_write})); - tex_op = TexInstruction::sample_c; - } - - auto dst = make_dest(*instr); - auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - if (instr->is_array) - handle_array_index(*instr, src.coord, irt); - - set_rect_coordinate_flags(instr, irt); - set_offsets(irt, src.offset); - - emit_instruction(irt); - return true; -} - -bool EmitTexInstruction::emit_tex_txd(nir_tex_instr* instr, TexInputs& src) -{ - r600::sfn_log << SfnLog::instr << "emit '" - << *reinterpret_cast(instr) - << "' (" << __func__ << ")\n"; - - auto tex_op = TexInstruction::sample_g; - auto dst = make_dest(*instr); - - GPRVector empty_dst(0,{7,7,7,7}); - - if (instr->is_shadow) { - emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator, - {alu_last_instr, alu_write})); - tex_op = TexInstruction::sample_c_g; - } - - auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); - assert(!sampler.indirect && "Indirect sampler selection not yet supported"); - - TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, src.ddx, - sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - irgh->set_dest_swizzle({7,7,7,7}); - - TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, src.ddy, - sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - irgv->set_dest_swizzle({7,7,7,7}); - - TexInstruction *ir = new TexInstruction(tex_op, dst, src.coord, sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - if (instr->is_array) - handle_array_index(*instr, src.coord, ir); - - set_rect_coordinate_flags(instr, ir); - set_offsets(ir, src.offset); - - emit_instruction(irgh); - emit_instruction(irgv); - emit_instruction(ir); - return true; -} - -bool EmitTexInstruction::emit_tex_txf(nir_tex_instr* instr, TexInputs& src) -{ - r600::sfn_log << SfnLog::instr << "emit '" - << *reinterpret_cast(instr) - << "' (" << __func__ << ")\n"; - - auto dst = make_dest(*instr); - - if (*src.coord.reg_i(3) != *src.lod) { - if (src.coord.sel() != src.lod->sel()) - emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {alu_write, alu_last_instr})); - else - src.coord.set_reg_i(3, src.lod); - } - - auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); - assert(!sampler.indirect); - - /* txf doesn't need rounding for the array index, but 1D has the array index - * in the z component */ - if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) - src.coord.set_reg_i(2, src.coord.reg_i(1)); - - auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord, - sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - - - if (src.offset) { - assert(src.offset->is_ssa); - AluInstruction *ir = nullptr; - for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) { - ir = new AluInstruction(op2_add_int, src.coord.reg_i(i), - {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write}); - emit_instruction(ir); - } - if (ir) - ir->set_flag(alu_last_instr); - } - - if (instr->is_array) - tex_ir->set_flag(TexInstruction::z_unnormalized); - - emit_instruction(tex_ir); - return true; -} - -bool EmitTexInstruction::emit_tex_lod(nir_tex_instr* instr, TexInputs& src) -{ - auto tex_op = TexInstruction::get_tex_lod; - - auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); - assert(!sampler.indirect && "Indirect sampler selection not yet supported"); - - auto dst = make_dest(*instr); - auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - irt->set_dest_swizzle({1,0,7,7}); - emit_instruction(irt); - - return true; - -} - -bool EmitTexInstruction::emit_tex_txl(nir_tex_instr* instr, TexInputs& src) -{ - r600::sfn_log << SfnLog::instr << "emit '" - << *reinterpret_cast(instr) - << "' (" << __func__ << ")\n"; - - auto tex_op = TexInstruction::sample_l; - if (instr->is_shadow) { - if (src.coord.sel() != src.comperator->sel()) - emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write})); - else - src.coord.set_reg_i(2, src.comperator); - tex_op = TexInstruction::sample_c_l; - } - - if (src.coord.sel() != src.lod->sel()) - emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {last_write})); - else - src.coord.set_reg_i(3, src.lod); - - auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); - assert(!sampler.indirect && "Indirect sampler selection not yet supported"); - - auto dst = make_dest(*instr); - auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - - if (instr->is_array) - handle_array_index(*instr, src.coord, irt); - - set_rect_coordinate_flags(instr, irt); - set_offsets(irt, src.offset); - - emit_instruction(irt); - return true; -} - -bool EmitTexInstruction::emit_tex_txb(nir_tex_instr* instr, TexInputs& src) -{ - auto tex_op = TexInstruction::sample_lb; - - std::array in_swizzle = {0,1,2,3}; - - if (instr->is_shadow) { - if (src.coord.sel() != src.comperator->sel()) - emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write})); - else - src.coord.set_reg_i(2, src.comperator); - tex_op = TexInstruction::sample_c_lb; - } - - if (src.coord.sel() != src.bias->sel()) - emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.bias, {last_write})); - else - src.coord.set_reg_i(3, src.bias); - - GPRVector tex_src(src.coord, in_swizzle); - - auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); - assert(!sampler.indirect && "Indirect sampler selection not yet supported"); - - auto dst = make_dest(*instr); - auto irt = new TexInstruction(tex_op, dst, tex_src, sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - if (instr->is_array) - handle_array_index(*instr, tex_src, irt); - - set_rect_coordinate_flags(instr, irt); - set_offsets(irt, src.offset); - - emit_instruction(irt); - return true; -} - -bool EmitTexInstruction::emit_tex_txs(nir_tex_instr* instr, TexInputs& tex_src, - const std::array& dest_swz) -{ - std::array dst_elms; - std::array src_elms; - - for (uint16_t i = 0; i < 4; ++i) { - dst_elms[i] = from_nir(instr->dest, (i < instr->dest.ssa.num_components) ? i : 7); - } - - GPRVector dst(dst_elms); - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)), - instr->sampler_index + R600_MAX_CONST_BUFFERS, - bim_none)); - } else { - for (uint16_t i = 0; i < 4; ++i) - src_elms[i] = tex_src.lod; - GPRVector src(src_elms); - - auto sampler = get_sampler_id(instr->sampler_index, tex_src.sampler_deref); - assert(!sampler.indirect && "Indirect sampler selection not yet supported"); - - auto ir = new TexInstruction(TexInstruction::get_resinfo, dst, src, - sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset); - ir->set_dest_swizzle(dest_swz); - emit_instruction(ir); - - if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { - PValue src(new UniformValue(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2), - sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER)); - - auto alu = new AluInstruction(op1_mov, dst[2], src, {last_write}); - emit_instruction(alu); - set_has_txs_cube_array_comp(); - } - } - - return true; - -} - -bool EmitTexInstruction::emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src, - const std::array &dest_swz) -{ - GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest)); - GPRVector help{0,{4,4,4,4}}; - - auto dyn_offset = PValue(); - int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index; - - auto ir = new TexInstruction(TexInstruction::get_nsampled, dest, help, - 0, res_id, src.sampler_offset); - ir->set_dest_swizzle(dest_swz); - emit_instruction(ir); - return true; -} - -bool EmitTexInstruction::emit_tex_tg4(nir_tex_instr* instr, TexInputs& src) -{ - r600::sfn_log << SfnLog::instr << "emit '" - << *reinterpret_cast(instr) - << "' (" << __func__ << ")\n"; - - TexInstruction *set_ofs = nullptr; - - auto tex_op = TexInstruction::gather4; - - if (instr->is_shadow) { - emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator, - {alu_last_instr, alu_write})); - tex_op = TexInstruction::gather4_c; - } - - auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); - assert(!sampler.indirect && "Indirect sampler selection not yet supported"); - - bool literal_offset = false; - if (src.offset) { - literal_offset = nir_src_as_const_value(*src.offset) != 0; - r600::sfn_log << SfnLog::tex << " really have offsets and they are " << - (literal_offset ? "literal" : "varying") << - "\n"; - - if (!literal_offset) { - GPRVector::Swizzle swizzle = {4,4,4,4}; - for (unsigned i = 0; i < instr->coord_components; ++i) - swizzle[i] = i; - - int noffsets = instr->coord_components; - if (instr->is_array) - --noffsets; - - auto ofs = vec_from_nir_with_fetch_constant(*src.offset, - ( 1 << noffsets) - 1, - swizzle); - GPRVector dummy(0, {7,7,7,7}); - tex_op = (tex_op == TexInstruction::gather4_c) ? - TexInstruction::gather4_c_o : TexInstruction::gather4_o; - - set_ofs = new TexInstruction(TexInstruction::set_offsets, dummy, - ofs, sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - set_ofs->set_dest_swizzle({7,7,7,7}); - } - } - - - /* pre CAYMAN needs swizzle */ - auto dst = make_dest(*instr); - auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - - if (get_chip_class() != CAYMAN) - irt->set_dest_swizzle({1,2,0,3}); - irt->set_gather_comp(instr->component); - - if (instr->is_array) - handle_array_index(*instr, src.coord, irt); - - if (literal_offset) { - r600::sfn_log << SfnLog::tex << "emit literal offsets\n"; - set_offsets(irt, src.offset); - } - - set_rect_coordinate_flags(instr, irt); - - if (set_ofs) - emit_instruction(set_ofs); - - emit_instruction(irt); - return true; -} - -bool EmitTexInstruction::emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src) -{ - assert(instr->src[0].src.is_ssa); - - r600::sfn_log << SfnLog::instr << "emit '" - << *reinterpret_cast(instr) - << "' (" << __func__ << ")\n"; - - auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); - assert(!sampler.indirect && "Indirect sampler selection not yet supported"); - - PGPRValue sample_id_dest_reg = get_temp_register(); - GPRVector sample_id_dest(sample_id_dest_reg->sel(), {7,7,7,7}); - sample_id_dest.set_reg_i(sample_id_dest_reg->chan(), sample_id_dest_reg); - std::array dest_swz = {7,7,7,7}; - dest_swz[sample_id_dest_reg->chan()] = 0; - - emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), - src.ms_index, - {alu_write, alu_last_instr})); - - auto tex_sample_id_ir = new TexInstruction(TexInstruction::ld, sample_id_dest, src.coord, - sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - tex_sample_id_ir->set_flag(TexInstruction::x_unnormalized); - tex_sample_id_ir->set_flag(TexInstruction::y_unnormalized); - tex_sample_id_ir->set_flag(TexInstruction::z_unnormalized); - tex_sample_id_ir->set_flag(TexInstruction::w_unnormalized); - tex_sample_id_ir->set_inst_mode(1); - - tex_sample_id_ir->set_dest_swizzle(dest_swz); - - emit_instruction(tex_sample_id_ir); - - if (src.ms_index->type() != Value::literal || - static_cast(*src.ms_index).value() != 0) { - PValue help = get_temp_register(); - - emit_instruction(new AluInstruction(op2_lshl_int, help, - src.ms_index, literal(2), - {alu_write, alu_last_instr})); - - emit_instruction(new AluInstruction(op2_lshr_int, sample_id_dest_reg, - {sample_id_dest_reg, help}, - {alu_write, alu_last_instr})); - } - - emit_instruction(new AluInstruction(op2_and_int, src.coord.reg_i(3), - {sample_id_dest_reg, PValue(new LiteralValue(15))}, - {alu_write, alu_last_instr})); - - auto dst = make_dest(*instr); - - /* txf doesn't need rounding for the array index, but 1D has the array index - * in the z component */ - if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) - src.coord.set_reg_i(2, src.coord.reg_i(1)); - - auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord, - sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - - - if (src.offset) { - assert(src.offset->is_ssa); - AluInstruction *ir = nullptr; - for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) { - ir = new AluInstruction(op2_add_int, src.coord.reg_i(i), - {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write}); - emit_instruction(ir); - } - if (ir) - ir->set_flag(alu_last_instr); - } - - emit_instruction(tex_ir); - return true; -} - -bool EmitTexInstruction::get_inputs(const nir_tex_instr& instr, TexInputs &src) -{ - sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n"; - - unsigned grad_components = instr.coord_components; - if (instr.is_array && !instr.array_is_lowered_cube) - --grad_components; - - - src.offset = nullptr; - bool retval = true; - for (unsigned i = 0; i < instr.num_srcs; ++i) { - switch (instr.src[i].src_type) { - case nir_tex_src_bias: - src.bias = from_nir(instr.src[i], 0); - break; - - case nir_tex_src_coord: { - src.coord = vec_from_nir_with_fetch_constant(instr.src[i].src, - (1 << instr.coord_components) - 1, - {0,1,2,3}); - } break; - case nir_tex_src_comparator: - src.comperator = from_nir(instr.src[i], 0); - break; - case nir_tex_src_ddx: { - sfn_log << SfnLog::tex << "Get DDX "; - src.ddx = vec_from_nir_with_fetch_constant(instr.src[i].src, - (1 << grad_components) - 1, - swizzle_from_comps(grad_components)); - sfn_log << SfnLog::tex << src.ddx << "\n"; - } break; - case nir_tex_src_ddy:{ - sfn_log << SfnLog::tex << "Get DDY "; - src.ddy = vec_from_nir_with_fetch_constant(instr.src[i].src, - (1 << grad_components) - 1, - swizzle_from_comps(grad_components)); - sfn_log << SfnLog::tex << src.ddy << "\n"; - } break; - case nir_tex_src_lod: - src.lod = from_nir_with_fetch_constant(instr.src[i].src, 0); - break; - case nir_tex_src_offset: - sfn_log << SfnLog::tex << " -- Find offset\n"; - src.offset = &instr.src[i].src; - break; - case nir_tex_src_sampler_deref: - src.sampler_deref = get_deref_location(instr.src[i].src); - break; - case nir_tex_src_texture_deref: - src.texture_deref = get_deref_location(instr.src[i].src); - break; - case nir_tex_src_ms_index: - src.ms_index = from_nir(instr.src[i], 0); - break; - case nir_tex_src_texture_offset: - src.texture_offset = from_nir(instr.src[i], 0); - break; - case nir_tex_src_sampler_offset: - src.sampler_offset = from_nir(instr.src[i], 0); - break; - case nir_tex_src_plane: - case nir_tex_src_projector: - case nir_tex_src_min_lod: - default: - sfn_log << SfnLog::tex << "Texture source type " << instr.src[i].src_type << " not supported\n"; - retval = false; - } - } - return retval; -} - -GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr) -{ - int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components : - instr.dest.reg.reg->num_components; - std::array dst_elms; - for (uint16_t i = 0; i < 4; ++i) - dst_elms[i] = from_nir(instr.dest, (i < num_dest_components) ? i : 7); - return GPRVector(dst_elms); -} - - -GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr, - const std::array& swizzle) -{ - int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components : - instr.dest.reg.reg->num_components; - std::array dst_elms; - for (uint16_t i = 0; i < 4; ++i) { - int k = swizzle[i]; - dst_elms[i] = from_nir(instr.dest, (k < num_dest_components) ? k : 7); - } - return GPRVector(dst_elms); -} - -void EmitTexInstruction::set_rect_coordinate_flags(nir_tex_instr* instr, - TexInstruction* ir) const -{ - if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) { - ir->set_flag(TexInstruction::x_unnormalized); - ir->set_flag(TexInstruction::y_unnormalized); - } -} - -void EmitTexInstruction::set_offsets(TexInstruction* ir, nir_src *offset) -{ - if (!offset) - return; - - assert(offset->is_ssa); - auto literal = nir_src_as_const_value(*offset); - assert(literal); - - for (int i = 0; i < offset->ssa->num_components; ++i) { - ir->set_offset(i, literal[i].i32); - } -} - -void EmitTexInstruction::handle_array_index(const nir_tex_instr& instr, const GPRVector& src, TexInstruction *ir) -{ - int src_idx = instr.sampler_dim == GLSL_SAMPLER_DIM_1D ? 1 : 2; - emit_instruction(new AluInstruction(op1_rndne, src.reg_i(2), src.reg_i(src_idx), - {alu_last_instr, alu_write})); - ir->set_flag(TexInstruction::z_unnormalized); -} - -EmitTexInstruction::SamplerId -EmitTexInstruction::get_sampler_id(int sampler_id, const nir_variable *deref) -{ - EmitTexInstruction::SamplerId result = {sampler_id, false}; - - if (deref) { - assert(glsl_type_is_sampler(deref->type)); - result.id = deref->data.binding; - } - return result; -} - -EmitTexInstruction::TexInputs::TexInputs(): - sampler_deref(nullptr), - texture_deref(nullptr), - offset(nullptr) -{ -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h deleted file mode 100644 index e11ebda..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h +++ /dev/null @@ -1,96 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_EMITTEXINSTRUCTION_H -#define SFN_EMITTEXINSTRUCTION_H - -#include "sfn_emitinstruction.h" -#include "sfn_instruction_tex.h" - -namespace r600 { - -class EmitTexInstruction : public EmitInstruction -{ -public: - EmitTexInstruction(ShaderFromNirProcessor& processor); - -private: - struct TexInputs { - TexInputs(); - const nir_variable *sampler_deref; - const nir_variable *texture_deref; - GPRVector coord; - PValue bias; - PValue comperator; - PValue lod; - GPRVector ddx; - GPRVector ddy; - nir_src *offset; - PValue gather_comp; - PValue ms_index; - PValue sampler_offset; - PValue texture_offset; - }; - - bool emit_tex_tex(nir_tex_instr* instr, TexInputs& src); - - bool emit_tex_txf(nir_tex_instr* instr, TexInputs &src); - bool emit_tex_txb(nir_tex_instr* instr, TexInputs& src); - bool emit_tex_txd(nir_tex_instr* instr, TexInputs& src); - bool emit_tex_txl(nir_tex_instr* instr, TexInputs& src); - bool emit_tex_txs(nir_tex_instr* instr, TexInputs& src, - const std::array &dest_swz); - bool emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src, - const std::array &dest_swz); - bool emit_tex_lod(nir_tex_instr* instr, TexInputs& src); - bool emit_tex_tg4(nir_tex_instr* instr, TexInputs& src); - bool emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src); - bool emit_buf_txf(nir_tex_instr* instr, TexInputs& src); - - bool get_inputs(const nir_tex_instr& instr, TexInputs &src); - - void set_rect_coordinate_flags(nir_tex_instr* instr, TexInstruction* ir) const; - - bool do_emit(nir_instr* instr) override; - - GPRVector make_dest(nir_tex_instr& instr); - GPRVector make_dest(nir_tex_instr &instr, const std::array &swizzle); - - void set_offsets(TexInstruction* ir, nir_src *offset); - void handle_array_index(const nir_tex_instr& instr, const GPRVector &src, TexInstruction* ir); - - struct SamplerId { - int id; - bool indirect; - }; - - SamplerId get_sampler_id(int sampler_id, const nir_variable *deref); - -}; - -} - -#endif // SFN_EMITTEXINSTRUCTION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.cpp b/src/gallium/drivers/r600/sfn/sfn_instr.cpp new file mode 100644 index 0000000..d81e329 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr.cpp @@ -0,0 +1,522 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2021 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_instr_alugroup.h" +#include "sfn_instr_export.h" +#include "sfn_instr_fetch.h" +#include "sfn_instr_mem.h" +#include "sfn_instr_lds.h" +#include "sfn_instr_tex.h" +#include "sfn_instr_controlflow.h" + +#include +#include +#include + +namespace r600 { + +using std::string; +using std::vector; + +Instr::Instr(): + m_use_count(0), + m_block_id(std::numeric_limits::max()), + m_index(std::numeric_limits::max()) +{ +} + +Instr::~Instr() +{ + +} + +void Instr::print(std::ostream& os) const +{ + do_print(os); +} + +bool Instr::ready() const +{ + for (auto& i : m_required_instr) + if (!i->ready()) + return false; + return do_ready(); +} + +int int_from_string_with_prefix(const std::string& str, const std::string& prefix) +{ + if (str.substr(0, prefix.length()) != prefix) { + std::cerr << "Expect '" << prefix << "' as start of '" << str << "'\n"; + assert(0); + } + + std::stringstream help(str.substr(prefix.length())); + int retval; + help >> retval; + return retval; +} + +int sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle &swz, bool& is_ssa) +{ + assert(str[0] == 'R' || str[0] == '_' || str[0] == 'S'); + int sel = 0; + + auto istr = str.begin() + 1; + + if (str[0] == '_') { + while (istr != str.end() && *istr == '_') + ++istr; + sel = std::numeric_limits::max(); + } else { + while (istr != str.end() && isdigit(*istr)) { + sel *= 10; + sel += *istr - '0'; + ++istr; + } + } + + assert(*istr == '.'); + istr++; + + int i = 0; + while (istr != str.end()) { + switch (*istr) { + case 'x': swz[i] = 0; break; + case 'y': swz[i] = 1; break; + case 'z': swz[i] = 2; break; + case 'w': swz[i] = 3; break; + case '0': swz[i] = 4; break; + case '1': swz[i] = 5; break; + case '_': swz[i] = 7; break; + default: + unreachable("Unknown swizzle character"); + } + ++istr; + ++i; + } + + is_ssa = str[0] == 'S'; + + return sel; +} + +bool Instr::is_last() const +{ + return true; +} + +bool Instr::set_dead() +{ + if (m_instr_flags.test(always_keep)) + return false; + bool is_dead = propagate_death(); + m_instr_flags.set(dead); + return is_dead; +} + +bool Instr::propagate_death() +{ + return true; +} + +bool Instr::replace_source(PRegister old_src, PVirtualValue new_src) +{ + (void)old_src; + (void)new_src; + return false; +} + +void Instr::add_required_instr(Instr *instr) +{ + assert(instr); + m_required_instr.push_back(instr); + instr->m_dependend_instr.push_back(this); +} + +void Instr::replace_required_instr(Instr *old_instr, Instr *new_instr) +{ + + for (auto i = m_required_instr.begin(); i != m_required_instr.end(); ++i) { + if (*i == old_instr) + *i = new_instr; + } +} + +bool Instr::replace_dest(PRegister new_dest, r600::AluInstr *move_instr) +{ + (void)new_dest; + (void)move_instr; + return false; +} + +void Instr::set_blockid(int id, int index) +{ + m_block_id = id; + m_index = index; + forward_set_blockid(id, index); +} + + +void Instr::forward_set_blockid(int id, int index) +{ + (void)id; + (void)index; +} + +InstrWithVectorResult::InstrWithVectorResult(const RegisterVec4& dest, + const RegisterVec4::Swizzle& dest_swizzle): + m_dest(dest), + m_dest_swizzle(dest_swizzle) +{ + for (int i = 0; i < 4; ++i) { + if (m_dest_swizzle[i] < 6) + m_dest[i]->add_parent(this); + } +} + +void InstrWithVectorResult::print_dest(std::ostream& os) const +{ + os << (m_dest[0]->is_ssa() ? 'S' : 'R' ) << m_dest.sel(); + os << "."; + for (int i = 0; i < 4; ++i) + os << VirtualValue::chanchar[m_dest_swizzle[i]]; +} + +bool InstrWithVectorResult::comp_dest(const RegisterVec4& dest, + const RegisterVec4::Swizzle& dest_swizzle) const +{ + for(int i = 0; i < 4; ++i) { + if (!m_dest[i]->equal_to(*dest[i])) { + return false; + } + if (m_dest_swizzle[i] != dest_swizzle[i]) + return false; + } + return true; +} + +void Block::do_print(std::ostream& os) const +{ + for (int j = 0; j < 2 * m_nesting_depth; ++j) + os << ' '; + os << "BLOCK START\n"; + for (auto& i : m_instructions) { + for (int j = 0; j < 2 * (m_nesting_depth + i->nesting_corr()) + 2; ++j) + os << ' '; + os << *i << "\n"; + } + for (int j = 0; j < 2 * m_nesting_depth; ++j) + os << ' '; + os << "BLOCK END\n"; +} + +bool Block::is_equal_to(const Block& lhs) const +{ + if (m_id != lhs.m_id || m_nesting_depth != lhs.m_nesting_depth) + return false; + + if (m_instructions.size() != lhs.m_instructions.size()) + return false; + + return std::inner_product(m_instructions.begin(), m_instructions.end(), lhs.m_instructions.begin(), + true, + [] (bool l, bool r) { return l && r;}, + [](PInst l, PInst r) { return l->equal_to(*r);}); +} + +inline bool operator != (const Block& lhs, const Block& rhs) +{ + return !lhs.is_equal_to(rhs); +} + +void Block::erase(iterator node) +{ + m_instructions.erase(node); +} + +void Block::set_type(Type t) +{ + m_blocK_type = t; + switch (t) { + case vtx: + case gds: + case tex: m_remaining_slots = 8; break; /* TODO: 16 for >= EVERGREEN */ + default: + m_remaining_slots = 0xffff; + } +} + +Block::Block(int nesting_depth, int id): + m_nesting_depth(nesting_depth), + m_id(id), + m_next_index(0) +{ + assert(!has_instr_flag(force_cf)); +} + +void Block::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void Block::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +void Block::push_back(PInst instr) +{ + instr->set_blockid(m_id, m_next_index++); + if (m_remaining_slots != 0xffff) { + uint32_t new_slots = instr->slots(); + m_remaining_slots -= new_slots; + } + if (m_lds_group_start) + m_lds_group_requirement += instr->slots(); + + m_instructions.push_back(instr); +} + +bool Block::try_reserve_kcache(const AluGroup& group) +{ + auto kcache_constants = group.get_kconsts(); + for (auto& kc : kcache_constants) { + auto u = kc->as_uniform(); + assert(u); + if (!try_reserve_kcache(*u)) + return false; + } + return true; +} + +bool Block::try_reserve_kcache(const UniformValue& u) +{ + const int kcache_banks = 4; // TODO: handle pre-evergreen + + int bank = u.kcache_bank(); + int sel = (u.sel() - 512); + int line = sel >> 4; + + bool found = false; + + for (int i = 0; i < kcache_banks && !found; ++i) { + if (m_kcache[i].mode) { + if (m_kcache[i].bank < bank) + continue; + + if ((m_kcache[i].bank == bank && + m_kcache[i].addr > line + 1) || + m_kcache[i].bank > bank) { + if (m_kcache[kcache_banks - 1].mode) + return false; + + memmove(&m_kcache[i+1],&m_kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine)); + m_kcache[i].mode = KCacheLine::lock_1; + m_kcache[i].bank = bank; + m_kcache[i].addr = line; + return true; + } + + int d = line - m_kcache[i].addr; + + if (d == -1) { + m_kcache[i].addr--; + if (m_kcache[i].mode == KCacheLine::lock_2) { + /* we are prepending the line to the current set, + * discarding the existing second line, + * so we'll have to insert line+2 after it */ + line += 2; + continue; + } else if (m_kcache[i].mode == KCacheLine::lock_1) { + m_kcache[i].mode = KCacheLine::lock_2; + return true; + } else { + /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */ + return false; + } + } else if (d == 1) { + m_kcache[i].mode = KCacheLine::lock_2; + return true; + } else if (d == 0) + return true; + } else { /* free kcache set - use it */ + m_kcache[i].mode = KCacheLine::lock_1; + m_kcache[i].bank = bank; + m_kcache[i].addr = line; + return true; + } + } + return false; +} + +void Block::lds_group_start(AluInstr *alu) +{ + assert(!m_lds_group_start); + m_lds_group_start = alu; + m_lds_group_requirement = 0; +} + +void Block::lds_group_end() +{ + assert(m_lds_group_start); + m_lds_group_start->set_required_slots(m_lds_group_requirement); + m_lds_group_start = 0; +} + +InstrWithVectorResult::InstrWithVectorResult(const InstrWithVectorResult& orig): + m_dest(orig.m_dest), + m_dest_swizzle(orig.m_dest_swizzle) +{ +} + +class InstrComparer : public ConstInstrVisitor { +public: + InstrComparer() = default; + bool result {false}; + +#define DECLARE_MEMBER(TYPE) \ + InstrComparer(const TYPE *instr) \ + { \ + this_ ## TYPE = instr; \ + } \ + \ + void visit(const TYPE& instr) \ + { \ + result = false; \ + if (!this_ ## TYPE) \ + return; \ + result = this_ ## TYPE->is_equal_to(instr); \ + } \ + \ + const TYPE *this_ ## TYPE{nullptr}; + + DECLARE_MEMBER(AluInstr); + DECLARE_MEMBER(AluGroup); + DECLARE_MEMBER(TexInstr); + DECLARE_MEMBER(ExportInstr); + DECLARE_MEMBER(FetchInstr); + DECLARE_MEMBER(Block); + DECLARE_MEMBER(ControlFlowInstr); + DECLARE_MEMBER(IfInstr); + DECLARE_MEMBER(WriteScratchInstr); + DECLARE_MEMBER(StreamOutInstr); + DECLARE_MEMBER(MemRingOutInstr); + DECLARE_MEMBER(EmitVertexInstr); + DECLARE_MEMBER(GDSInstr); + DECLARE_MEMBER(WriteTFInstr); + DECLARE_MEMBER(LDSAtomicInstr); + DECLARE_MEMBER(LDSReadInstr); + DECLARE_MEMBER(RatInstr); +}; + +class InstrCompareForward: public ConstInstrVisitor { +public: + + void visit(const AluInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const AluGroup& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const TexInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const ExportInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const FetchInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const Block& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const ControlFlowInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const IfInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const WriteScratchInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const StreamOutInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const MemRingOutInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const EmitVertexInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const GDSInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const WriteTFInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const LDSAtomicInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const LDSReadInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + void visit(const RatInstr& instr) override { + m_comparer = InstrComparer(&instr); + } + + InstrComparer m_comparer; +}; + + +bool Instr::equal_to(const Instr& lhs) const +{ + InstrCompareForward cmp; + accept(cmp); + lhs.accept(cmp.m_comparer); + + return cmp.m_comparer.result; +} + + + + +} // ns r600 diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.h b/src/gallium/drivers/r600/sfn/sfn_instr.h new file mode 100644 index 0000000..c70427e --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr.h @@ -0,0 +1,314 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2021 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include "sfn_virtualvalues.h" +#include "sfn_alu_defines.h" +#include "sfn_defines.h" +#include +#include +#include + +namespace r600 { + +class ConstInstrVisitor; + +class InstrVisitor; +class AluInstr; +class AluGroup; +class TexInstr; +class ExportInstr; +class FetchInstr; +class ControlFlowInstr; +class IfInstr; +class WriteScratchInstr; +class StreamOutInstr; +class MemRingOutInstr; +class EmitVertexInstr; +class GDSInstr; +class WriteTFInstr; +class LDSAtomicInstr; +class LDSReadInstr; +class RatInstr; + + +int int_from_string_with_prefix(const std::string& str, const std::string& prefix); +int sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle& swz, bool& is_ssa); + +class Instr : public Allocate { +public: + + enum Flags { + always_keep, + dead, + scheduled, + vpm, + force_cf, + ack_rat_return_write, + nflags + }; + + Instr(); + + Instr(const Instr& orig) = default; + + virtual ~Instr(); + + using Pointer = R600_POINTER_TYPE(Instr); + + void print(std::ostream& os) const; + bool equal_to(const Instr& lhs) const; + + virtual void accept(ConstInstrVisitor& visitor) const = 0; + virtual void accept(InstrVisitor& visitor) = 0; + virtual bool end_group() const { return true;} + + virtual bool is_last() const; + + void set_always_keep() {m_instr_flags.set(always_keep);} + bool set_dead(); + virtual void set_scheduled() { m_instr_flags.set(scheduled); forward_set_scheduled();} + void add_use() {++m_use_count;} + void dec_use() {assert(m_use_count > 0); --m_use_count;} + bool is_dead() const {return m_instr_flags.test(dead);} + bool is_scheduled() const {return m_instr_flags.test(scheduled);} + bool keep() const {return m_instr_flags.test(always_keep);} + bool has_uses() const {return m_use_count > 0;} + + bool has_instr_flag(Flags f) const {return m_instr_flags.test(f);} + void set_instr_flag(Flags f) { m_instr_flags.set(f);} + + virtual bool replace_source(PRegister old_src, PVirtualValue new_src); + virtual bool replace_dest(PRegister new_dest, AluInstr *move_instr); + + virtual int nesting_corr() const { return 0;} + + virtual bool end_block() const { return false;} + virtual int nesting_offset() const { return 0;} + + void set_blockid(int id, int index); + int block_id() const {return m_block_id;} + int index() const { return m_index;} + + void add_required_instr(Instr *instr); + void replace_required_instr(Instr *old_instr, Instr *new_instr); + + bool ready() const; + + virtual uint32_t slots() const {return 0;}; + + using InstrList = std::list>; + + const InstrList& dependend_instr() { return m_dependend_instr;} + +protected: + + const InstrList& required_instr() const {return m_required_instr; } + +private: + virtual void forward_set_blockid(int id, int index); + + virtual bool do_ready() const = 0; + + virtual void do_print(std::ostream& os) const = 0; + virtual bool propagate_death(); + virtual void forward_set_scheduled() {} + + InstrList m_required_instr; + InstrList m_dependend_instr; + + int m_use_count; + int m_block_id; + int m_index; + std::bitset m_instr_flags{0}; + +}; +using PInst = Instr::Pointer; + +class Block : public Instr { +public: + + enum Type { + cf, + alu, + tex, + vtx, + gds, + unknown + }; + + using Instructions = std::list>; + using Pointer = R600_POINTER_TYPE(Block); + using iterator = Instructions::iterator; + using reverse_iterator = Instructions::reverse_iterator; + using const_iterator = Instructions::const_iterator; + + Block(int nesting_depth, int id); + Block(const Block& orig) = delete; + + void push_back(PInst instr); + iterator begin() { return m_instructions.begin(); } + iterator end() { return m_instructions.end(); } + reverse_iterator rbegin() { return m_instructions.rbegin(); } + reverse_iterator rend() { return m_instructions.rend(); } + + const_iterator begin() const { return m_instructions.begin();} + const_iterator end() const { return m_instructions.end();} + + bool empty() const { return m_instructions.empty();} + + void erase(iterator node); + + bool is_equal_to(const Block& lhs) const; + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + int nesting_depth() const { return m_nesting_depth;} + + int id() const {return m_id;} + + auto type() const {return m_blocK_type; } + void set_type(Type t); + uint32_t remaining_slots() const { return m_remaining_slots;} + + bool try_reserve_kcache(const AluGroup& group); + + auto last_lds_instr() {return m_last_lds_instr;} + void set_last_lds_instr(Instr *instr) {m_last_lds_instr = instr;} + + void lds_group_start(AluInstr *alu); + void lds_group_end(); + bool lds_group_active() { return m_lds_group_start != nullptr;} + + size_t size() const { return m_instructions.size();} + +private: + bool try_reserve_kcache(const UniformValue& u); + + bool do_ready() const override {return true;}; + void do_print(std::ostream& os) const override; + Instructions m_instructions; + int m_nesting_depth; + int m_id; + int m_next_index; + + Type m_blocK_type{unknown}; + uint32_t m_remaining_slots{0xffff}; + + std::array m_kcache; + + Instr *m_last_lds_instr{nullptr}; + + int m_lds_group_requirement{0}; + AluInstr *m_lds_group_start{nullptr}; +}; + +class InstrWithVectorResult : public Instr { +public: + InstrWithVectorResult(const RegisterVec4& dest, const RegisterVec4::Swizzle& dest_swizzle); + + void set_dest_swizzle(const RegisterVec4::Swizzle& swz) {m_dest_swizzle = swz;} + int dest_swizzle(int i) const { return m_dest_swizzle[i];} + const RegisterVec4::Swizzle& all_dest_swizzle() const { return m_dest_swizzle;} + const RegisterVec4& dst() const {return m_dest;} + +protected: + InstrWithVectorResult(const InstrWithVectorResult& orig); + + void print_dest(std::ostream& os) const; + bool comp_dest(const RegisterVec4& dest, const RegisterVec4::Swizzle& dest_swizzle) const; + +private: + RegisterVec4 m_dest; + RegisterVec4::Swizzle m_dest_swizzle; +}; + +inline bool operator == (const Instr& lhs, const Instr& rhs) { + return lhs.equal_to(rhs); +} + +inline bool operator != (const Instr& lhs, const Instr& rhs) { + return !(lhs == rhs); +} + +inline std::ostream& operator << (std::ostream& os, const Instr& instr) +{ + instr.print(os); + return os; +} + +template >> +std::ostream& operator<<(std::ostream& os, const T& instr) { + instr.print(os); + return os; +} + +class ConstInstrVisitor { +public: + virtual void visit(const AluInstr& instr) = 0; + virtual void visit(const AluGroup& instr) = 0; + virtual void visit(const TexInstr& instr) = 0; + virtual void visit(const ExportInstr& instr) = 0; + virtual void visit(const FetchInstr& instr) = 0; + virtual void visit(const Block& instr) = 0; + virtual void visit(const ControlFlowInstr& instr) = 0; + virtual void visit(const IfInstr& instr) = 0; + virtual void visit(const WriteScratchInstr& instr) = 0; + virtual void visit(const StreamOutInstr& instr) = 0; + virtual void visit(const MemRingOutInstr& instr) = 0; + virtual void visit(const EmitVertexInstr& instr) = 0; + virtual void visit(const GDSInstr& instr) = 0; + virtual void visit(const WriteTFInstr& instr) = 0; + virtual void visit(const LDSAtomicInstr& instr) = 0; + virtual void visit(const LDSReadInstr& instr) = 0; + virtual void visit(const RatInstr& instr) = 0; +}; + +class InstrVisitor { +public: + virtual void visit(AluInstr *instr) = 0; + virtual void visit(AluGroup *instr) = 0; + virtual void visit(TexInstr *instr) = 0; + virtual void visit(ExportInstr *instr) = 0; + virtual void visit(FetchInstr *instr) = 0; + virtual void visit(Block *instr) = 0; + virtual void visit(ControlFlowInstr *instr) = 0; + virtual void visit(IfInstr *instr) = 0; + virtual void visit(WriteScratchInstr *instr) = 0; + virtual void visit(StreamOutInstr *instr) = 0; + virtual void visit(MemRingOutInstr *instr) = 0; + virtual void visit(EmitVertexInstr *instr) = 0; + virtual void visit(GDSInstr *instr) = 0; + virtual void visit(WriteTFInstr *instr) = 0; + virtual void visit(LDSAtomicInstr *instr) = 0; + virtual void visit(LDSReadInstr *instr) = 0; + virtual void visit(RatInstr *instr) = 0; +}; + + +} // ns r600 diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp new file mode 100644 index 0000000..006a34f --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -0,0 +1,2449 @@ +#include "sfn_instr_alu.h" +#include "sfn_instr_alugroup.h" +#include "sfn_instr_tex.h" +#include "sfn_shader.h" +#include "sfn_debug.h" + +#include +#include + + +namespace r600 { + +using std::string; +using std::istream; +using std::vector; + +AluInstr::AluInstr(EAluOp opcode, PRegister dest, + SrcValues src, + const std::set& flags, int slots): + m_opcode(opcode), + m_dest(dest), + m_bank_swizzle(alu_vec_unknown), + m_cf_type(cf_alu), + m_alu_slots(slots) +{ + m_src.swap(src); + + if (m_src.size() == 3) + m_alu_flags.set(alu_op3); + + for(auto f : flags) + m_alu_flags.set(f); + + ASSERT_OR_THROW(m_src.size() == static_cast(alu_ops.at(opcode).nsrc * m_alu_slots), + "Unexpected number of source values"); + + if (m_alu_flags.test(alu_write)) + ASSERT_OR_THROW(dest, "Write flag is set, but no destination register is given"); + + update_uses(); + +} + +AluInstr::AluInstr(EAluOp opcode): + AluInstr(opcode, nullptr, SrcValues(alu_ops.at(opcode).nsrc), {}, 1) +{ +} + +AluInstr::AluInstr(EAluOp opcode, int chan): + AluInstr(opcode, nullptr, SrcValues(), {}, 1) +{ + m_fallback_chan = chan; +} + +AluInstr::AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0, + const std::set& m_flags): + AluInstr(opcode, dest, SrcValues{src0}, m_flags, 1) +{ + +} + +AluInstr::AluInstr(EAluOp opcode, PRegister dest, + PVirtualValue src0, PVirtualValue src1, + const std::set& m_flags): + AluInstr(opcode, dest, SrcValues{src0, src1}, m_flags, 1) +{ + +} + +AluInstr::AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0, PVirtualValue src1, + PVirtualValue src2, + const std::set& m_flags): + AluInstr(opcode, dest, SrcValues{src0, src1, src2}, m_flags, 1) +{ + +} + +AluInstr::AluInstr(ESDOp op, PVirtualValue src0, PVirtualValue src1, PVirtualValue address): + m_lds_opcode(op) +{ + set_alu_flag(alu_is_lds); + + m_src.push_back(address); + if (src0) { + m_src.push_back(src0); + if (src1) + m_src.push_back(src1); + } + update_uses(); +} + +AluInstr::AluInstr(ESDOp op, const SrcValues& src, const std::set& flags): + m_lds_opcode(op), + m_src(src) +{ + for(auto f : flags) + set_alu_flag(f); + + set_alu_flag(alu_is_lds); + update_uses(); +} + +void AluInstr::update_uses() +{ + for (auto& s : m_src) { + auto r = s->as_register(); + if (r) { + r->add_use(this); + // move this to add_use + if (r->pin() == pin_array) { + auto array_elm = static_cast(r); + auto addr = array_elm->addr(); + if (addr && addr->as_register()) + addr->as_register()->add_use(this); + } + } + auto u = s->as_uniform(); + if (u && u->buf_addr() && u->buf_addr()->as_register()) + u->buf_addr()->as_register()->add_use(this); + } + + if (m_dest && has_alu_flag(alu_write)) { + m_dest->add_parent(this); + + if (m_dest->pin() == pin_array) { + // move this to add_parent + auto array_elm = static_cast(m_dest); + auto addr = array_elm->addr(); + if (addr && addr->as_register()) + addr->as_register()->add_use(this); + } + } +} + +void AluInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void AluInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +const std::map AluInstr::cf_map = { + {cf_alu_break, "BREAK"}, + {cf_alu_continue, "CONT"}, + {cf_alu_else_after, "ELSE_AFTER"}, + {cf_alu_extended, "EXTENDED"}, + {cf_alu_pop_after, "POP_AFTER"}, + {cf_alu_pop2_after, "POP2_AFTER"}, + {cf_alu_push_before, "PUSH_BEFORE"} +}; + +const std::map AluInstr::bank_swizzle_map = { + {alu_vec_012, "VEC_012"}, + {alu_vec_021, "VEC_021"}, + {alu_vec_102, "VEC_102"}, + {alu_vec_120, "VEC_120"}, + {alu_vec_201, "VEC_201"}, + {alu_vec_210, "VEC_210"} +}; + +const AluModifiers AluInstr::src_abs_flags[2] = +{alu_src0_abs, alu_src1_abs}; +const AluModifiers AluInstr::src_neg_flags[3] = +{alu_src0_neg, alu_src1_neg, alu_src2_neg}; +const AluModifiers AluInstr::src_rel_flags[3] = +{alu_src0_rel, alu_src1_rel, alu_src2_rel}; + +struct ValuePrintFlags { + ValuePrintFlags(int im, int f):index_mode(im), + flags(f) + { + } + int index_mode = 0; + int flags = 0; + static const int is_rel = 1; + static const int has_abs = 2; + static const int has_neg = 4; + static const int literal_is_float = 8; + static const int index_ar = 16; + static const int index_loopidx = 32; +}; + +void AluInstr::do_print(std::ostream& os) const +{ + const char swzchar[] = "xyzw01?_"; + + unsigned i = 0; + + os << "ALU "; + + if (has_alu_flag(alu_is_lds)) { + os << "LDS " << lds_ops.at(m_lds_opcode).name; + os << " __.x : "; + } else { + + os << alu_ops.at(m_opcode).name; + if (has_alu_flag(alu_dst_clamp)) + os << " CLAMP"; + + if (m_dest) { + if (has_alu_flag(alu_write)) + os << " " << *m_dest; + else + os << " __" << "." << swzchar[m_dest->chan()]; + if (!has_alu_flag(alu_write) && m_dest->pin() != pin_none) + os << "@" << m_dest->pin(); + os << " : "; + } else { + os << "__." << swzchar[dest_chan()] << " : "; + } + } + + const int n_source_per_slot = has_alu_flag(alu_is_lds) ? + m_src.size() : alu_ops.at(m_opcode).nsrc; + + for (int s = 0; s < m_alu_slots; ++s) { + + if (s > 0) + os << " +"; + + for (int k = 0; k < n_source_per_slot; ++k) { + int pflags = 0; + if (i) + os << ' '; + if (has_alu_flag(src_neg_flags[k])) pflags |= ValuePrintFlags::has_neg; + if (has_alu_flag(src_rel_flags[k])) pflags |= ValuePrintFlags::is_rel; + if (i < 2) + if (has_alu_flag(src_abs_flags[k])) pflags |= ValuePrintFlags::has_abs; + + if (pflags & ValuePrintFlags::has_neg) os << '-'; + if (pflags & ValuePrintFlags::has_abs) os << '|'; + os << *m_src[i]; + if (pflags & ValuePrintFlags::has_abs) os << '|'; + ++i; + } + } + + os << " {"; + if (has_alu_flag(alu_write)) os << 'W'; + if (has_alu_flag(alu_last_instr)) os << 'L'; + if (has_alu_flag(alu_update_exec)) os << 'E'; + if (has_alu_flag(alu_update_pred)) os << 'P'; + os << "}"; + + auto bs_name = bank_swizzle_map.find(m_bank_swizzle); + if (bs_name != bank_swizzle_map.end()) + os << ' ' << bs_name->second; + + auto cf_name = cf_map.find(m_cf_type); + if (cf_name != cf_map.end()) + os << ' ' << cf_name->second; +} + +bool AluInstr::can_propagate_src() const +{ + /* We can use the source in the next instruction */ + if (!can_copy_propagate()) + return false; + + auto src_reg = m_src[0]->as_register(); + if (!src_reg) + return true; + + assert(m_dest); + + + + if (!m_dest->is_ssa()) { + return false; + } + + if (m_dest->pin() == pin_fully) + return m_dest->equal_to(*src_reg); + + if (m_dest->pin() == pin_chan) + return src_reg->pin() == pin_none || + (src_reg->pin() == pin_chan && + src_reg->chan() == m_dest->chan()); + + return m_dest->pin() == pin_none || m_dest->pin() == pin_free; +} + +bool AluInstr::can_propagate_dest() const +{ + if (!can_copy_propagate()){ + return false; + } + + auto src_reg = m_src[0]->as_register(); + if (!src_reg) { + return false; + } + + assert(m_dest); + + if (src_reg->pin() == pin_fully || + src_reg->pin() == pin_group) { + return false; + } + + if (!src_reg->is_ssa()) + return false; + + if (src_reg->pin() == pin_chan) + return m_dest->pin() == pin_none || + m_dest->pin() == pin_free || + ((m_dest->pin() == pin_chan || + m_dest->pin() == pin_group) && + src_reg->chan() == m_dest->chan()); + + return (src_reg->pin() == pin_none || + src_reg->pin() == pin_free); +} + +bool AluInstr::can_copy_propagate() const +{ + if (m_opcode != op1_mov) + return false; + + if (has_alu_flag(alu_src0_abs) || + has_alu_flag(alu_src0_neg) || + has_alu_flag(alu_dst_clamp)) + return false; + + return has_alu_flag(alu_write); +} + +bool AluInstr::replace_source(PRegister old_src, PVirtualValue new_src) +{ + bool process = false; + + if (!check_readport_validation(old_src, new_src)) + return false; + + /* If the old source is an array element, we assume that there + * might have been an (untracked) indirect access, so don't replace + * this source */ + if (old_src->pin() == pin_array) + return false; + + if (new_src->get_addr()) { + for (auto& s : m_src) { + auto addr = s->get_addr(); + /* can't have two differen't indirect addresses in the same instr */ + if (addr && !addr->equal_to(*new_src->get_addr())) + return false; + } + } + + if (m_dest) { + /* We don't allow src and dst with rel and different indirect register addresses */ + if (m_dest->pin() == pin_array && new_src->pin() == pin_array) { + auto dav = static_cast(m_dest)->addr(); + auto sav = static_cast(new_src)->addr(); + if (dav && sav && dav->as_register() && !dav->equal_to(*sav)) + return false; + } + } + + /* Check the readports */ + if (m_alu_slots * alu_ops.at(m_opcode).nsrc > 2 || m_parent_group) { + AluReadportReservation read_port_check = + !m_parent_group ? + AluReadportReservation() : + m_parent_group->readport_reserer(); + + int nsrc = alu_ops.at(m_opcode).nsrc; + PVirtualValue src[3]; + + for (int s = 0; s < m_alu_slots; ++s) { + for (int i = 0; i < nsrc; ++i) { + auto old_s = m_src[i + nsrc * s]; + src[i] = old_s->equal_to(*old_src) ? new_src : old_s; + } + AluBankSwizzle bs = alu_vec_012; + while (bs != alu_vec_unknown) { + if (read_port_check.schedule_vec_src(src, nsrc, bs)) { + break; + } + ++bs; + } + if (bs == alu_vec_unknown) + return false; + } + if (m_parent_group) + m_parent_group->set_readport_reserer(read_port_check); + } + + for (unsigned i = 0; i < m_src.size(); ++i) { + if (old_src->equal_to(*m_src[i])) { + m_src[i] = new_src; + process = true; + } + } + if (process) { + auto r = new_src->as_register(); + if (r) + r->add_use(this); + old_src->del_use(this); + } + return process; +} + +void AluInstr::set_sources(SrcValues src) +{ + for (auto& s : m_src) { + auto r = s->as_register(); + if (r) + r->del_use(this); + } + m_src.swap(src); + for (auto& s : m_src) { + auto r = s->as_register(); + if (r) + r->add_use(this); + } +} + +bool AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr) +{ + if (m_dest->equal_to(*new_dest)) + return false; + + if (m_dest->uses().size() > 1) + return false; + + if (new_dest->pin() == pin_array) + return false; + + /* Currently we bail out when an array write should be moved, because + * decalring an array write is currently not well defined. The + * Whole "backwards" copy propagation shoul dprobably be replaced by some + * forward peep holew optimization */ + /* + if (new_dest->pin() == pin_array) { + auto dav = static_cast(new_dest)->addr(); + for (auto s: m_src) { + if (s->pin() == pin_array) { + auto sav = static_cast(s)->addr(); + if (dav && sav && dav->as_register() && !dav->equal_to(*sav)) + return false; + } + } + } + */ + + if (m_dest->pin() == pin_chan && + new_dest->chan() != m_dest->chan()) + return false; + + + if (m_dest->pin() == pin_chan) { + if (new_dest->pin() == pin_group) + new_dest->set_pin(pin_chgr); + else + new_dest->set_pin(pin_chan); + } + + m_dest = new_dest; + if (!move_instr->has_alu_flag(alu_last_instr)) + reset_alu_flag(alu_last_instr); + + if (has_alu_flag(alu_is_cayman_trans)) { + /* Copy propagation puts an instruction into the w channel, but we + * don't have the slots for a w channel */ + if (m_dest->chan() == 3 && m_alu_slots < 4) { + m_alu_slots = 4; + assert(m_src.size() == 3); + m_src.push_back(m_src[0]); + } + } + + return true; +} + +void AluInstr::pin_sources_to_chan() +{ + for (auto s: m_src) { + auto r = s->as_register(); + if (r) { + if (r->pin() == pin_free) + r->set_pin(pin_chan); + else if (r->pin() == pin_group) + r->set_pin(pin_chgr); + } + } +} + +bool AluInstr::check_readport_validation(PRegister old_src, PVirtualValue new_src) const +{ + bool success = true; + AluReadportReservation rpr_sum; + + if (m_src.size() < 3) + return true; + + unsigned nsrc = alu_ops.at(m_opcode).nsrc; + assert(nsrc * m_alu_slots == m_src.size()); + + + for (int s = 0; s < m_alu_slots && success; ++s) { + for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) { + auto ireg = m_src.begin() + s * nsrc; + + AluReadportReservation rpr = rpr_sum; + PVirtualValue s[3]; + + for (unsigned i = 0; i < nsrc; ++i, ++ireg) + s[i] = old_src->equal_to(**ireg) ? new_src : *ireg; + + if (rpr.schedule_vec_src(s, nsrc, i)) { + rpr_sum = rpr; + break; + } else { + success = false; + } + } + } + return success; +} + +void AluInstr::add_extra_dependency(PVirtualValue value) +{ + auto reg = value->as_register(); + if (reg) + m_extra_dependencies.insert(reg); +} + + +bool AluInstr::is_equal_to(const AluInstr& lhs) const +{ + if (lhs.m_opcode != m_opcode || + lhs.m_bank_swizzle != m_bank_swizzle || + lhs.m_cf_type != m_cf_type || + lhs.m_alu_flags != m_alu_flags) { + return false; + } + + if (m_dest) { + if (!lhs.m_dest) { + return false; + } else { + if (has_alu_flag(alu_write)) { + if (!m_dest->equal_to(*lhs.m_dest)) + return false; + } else { + if (m_dest->chan() != lhs.m_dest->chan()) + return false; + } + } + } else { + if (lhs.m_dest) + return false; + } + + if (m_src.size() != lhs.m_src.size()) + return false; + + for (unsigned i = 0; i < m_src.size(); ++i) { + if (!m_src[i]->equal_to(*lhs.m_src[i])) + return false; + } + + return true; +} + +class ResolveIndirectArrayAddr: public ConstRegisterVisitor { +public: + void visit(const Register& value){(void) value;} + void visit(const LocalArray& value) {(void)value; unreachable("An array can't be used as address");} + void visit(const LocalArrayValue& value); + void visit(const UniformValue& value); + void visit(const LiteralConstant& value) {(void)value;} + void visit(const InlineConstant& value) {(void)value;} + + PRegister addr{nullptr}; + bool is_index{false}; +}; + +void ResolveIndirectArrayAddr::visit(const LocalArrayValue& value) +{ + auto a = value.addr(); + if (a) + addr = a->as_register(); +} + +void ResolveIndirectArrayAddr::visit(const UniformValue& value) +{ + auto a = value.buf_addr(); + if (a) { + addr = a->as_register(); + is_index = true; + } +} + +std::pair AluInstr::indirect_addr() const +{ + ResolveIndirectArrayAddr visitor; + + if (m_dest) { + m_dest->accept(visitor); + if (visitor.addr) + return {visitor.addr, false}; + } + + for (auto s: m_src) { + s->accept(visitor); + if (visitor.addr) { + return {visitor.addr, visitor.is_index}; + } + } + return {nullptr, false}; +} + +AluGroup *AluInstr::split(ValueFactory& vf) +{ + if (m_alu_slots == 1) + return nullptr; + + sfn_log << SfnLog::instr << "Split " << *this << "\n"; + + auto group = new AluGroup(); + + m_dest->del_parent(this); + + for (int s = 0; s < m_alu_slots; ++s) { + + PRegister dst = m_dest->chan() == s ? m_dest : vf.dummy_dest(s); + if (dst->pin() != pin_chgr) { + auto pin = pin_chan; + if (dst->pin() == pin_group && m_dest->chan() == s) + pin = pin_chgr; + dst->set_pin(pin); + } + + SrcValues src; + for (int i = 0; i < alu_ops.at(m_opcode).nsrc; ++i) { + auto old_src = m_src[s * alu_ops.at(m_opcode).nsrc + i]; + // Make it easy for the scheduler and pin the register to the + // channel, otherwise scheduler would have to check whether a + // channel switch is possible + auto r = old_src->as_register(); + if (r) { + if (r->pin() == pin_free || r->pin() == pin_none) + r->set_pin(pin_chan); + else if (r->pin() == pin_group) + r->set_pin(pin_chgr); + } + src.push_back(old_src); + } + + auto instr = new AluInstr(m_opcode, dst, src, {}, 1); + instr->set_blockid(block_id(), index()); + + if (s == 0 || !m_alu_flags.test(alu_64bit_op)) { + if (has_alu_flag(alu_src0_neg)) + instr->set_alu_flag(alu_src0_neg); + if (has_alu_flag(alu_src1_neg)) + instr->set_alu_flag(alu_src1_neg); + if (has_alu_flag(alu_src2_neg)) + instr->set_alu_flag(alu_src2_neg); + if (has_alu_flag(alu_src0_abs)) + instr->set_alu_flag(alu_src0_abs); + if (has_alu_flag(alu_src1_abs)) + instr->set_alu_flag(alu_src1_abs); + } + if (has_alu_flag(alu_dst_clamp)) + instr->set_alu_flag(alu_dst_clamp); + + if (s == m_dest->chan()) + instr->set_alu_flag(alu_write); + + m_dest->add_parent(instr); + sfn_log << SfnLog::instr << " " << *instr << "\n"; + + if (!group->add_instruction(instr)) { + std::cerr << "Unable to schedule '" << *instr << "' into\n" + << *group << "\n"; + + unreachable("Invalid group instruction"); + } + } + group->set_blockid(block_id(), index()); + + for (auto s : m_src) { + auto r = s->as_register(); + if (r) { + r->del_use(this); + } + } + + return group; +} + +/* Alu instructions that have SSA dest registers increase the regietsr pressure + * Alu instructions that read from SSA registers may decresase the register pressure + * hency evaluate a priorityx values based on register pressure change */ +int AluInstr::register_priority() const +{ + int priority = 0; + if (!has_alu_flag(alu_no_schedule_bias)) { + + if (m_dest && m_dest->is_ssa() && has_alu_flag(alu_write)) { + if (m_dest->pin() != pin_group && m_dest->pin() != pin_chgr) + priority--; + } + + for (const auto s : m_src) { + auto r = s->as_register(); + if (r && r->is_ssa()) { + int pending = 0; + for (auto b : r->uses()) { + if (!b->is_scheduled()) + ++pending; + } + if (pending == 1) + ++priority; + } + } + } + return priority; +} + +bool AluInstr::propagate_death() +{ + if (!m_dest) + return true; + + if (m_dest->pin() == pin_group || + m_dest->pin() == pin_chan) { + switch (m_opcode) { + case op2_interp_x: + case op2_interp_xy: + case op2_interp_z: + case op2_interp_zw: + reset_alu_flag(alu_write); + return false; + default: + ; + } + } + + if (m_dest->pin() == pin_array) + return false; + + /* We assume that nir does a good job in eliminating all ALU results that + * are not needed, and we don't let copy propagation doesn't make the + * instruction obsolte, so just keep all */ + if (has_alu_flag(alu_is_cayman_trans)) + return false; + + for (auto& src: m_src) { + auto reg = src->as_register(); + if (reg) + reg->del_use(this); + } + return true; +} + +bool AluInstr::has_lds_access() const +{ + if (has_alu_flag(alu_is_lds)) + return true; + + for (auto& s : m_src) + if (s->as_inline_const() && + (s->as_inline_const()->sel() == ALU_SRC_LDS_OQ_A_POP)) + return true; + + return false; +} + +struct OpDescr { + union { + EAluOp alu_opcode; + ESDOp lds_opcode; + }; + int nsrc; +}; + +static std::map s_alu_map_by_name; +static std::map s_lds_map_by_name; + +Instr::Pointer AluInstr::from_string(istream& is, ValueFactory& value_factory, AluGroup *group) +{ + vector tokens; + + while (is.good() && !is.eof()) { + string t; + is >> t; + if (t.length() > 0) { + tokens.push_back(t); + } + } + + std::set flags; + auto t = tokens.begin(); + + bool is_lds = false; + + if (*t == "LDS") { + is_lds = true; + t++; + } + + string opstr = *t++; + string deststr = *t++; + + if (deststr == "CLAMP") { + flags.insert(alu_dst_clamp); + deststr = *t++; + } + + assert(*t == ":"); + OpDescr op_descr = {{op_invalid}, -1}; + + if (is_lds) { + auto op = s_lds_map_by_name.find(opstr); + if (op == s_lds_map_by_name.end()) { + for(auto [opcode, opdescr] : lds_ops ) { + if (opstr == opdescr.name) { + op_descr.lds_opcode = opcode; + op_descr.nsrc = opdescr.nsrc; + s_alu_map_by_name[opstr] = op_descr; + break; + } + } + + if (op_descr.nsrc == -1) { + std::cerr << "'" << opstr << "'"; + unreachable("Unknown opcode"); + return nullptr; + } + } else { + op_descr = op->second; + } + } else { + auto op = s_alu_map_by_name.find(opstr); + if (op == s_alu_map_by_name.end()) { + for(auto [opcode, opdescr] : alu_ops ) { + if (opstr == opdescr.name) { + op_descr = {{opcode}, opdescr.nsrc}; + s_alu_map_by_name[opstr] = op_descr; + break; + } + } + + if (op_descr.nsrc == -1) { + std::cerr << "'" << opstr << "'"; + unreachable("Unknown opcode"); + return nullptr; + } + } else { + op_descr = op->second; + } + } + + int slots = 0; + + SrcValues sources; + do { + ++t; + for (int i = 0; i < op_descr.nsrc; ++i) { + string srcstr = *t++; + + if (srcstr[0] == '-') { + if (!slots) + flags.insert(AluInstr::src_neg_flags[i]); + else + assert(flags.find(AluInstr::src_neg_flags[i]) != flags.end()); + srcstr = srcstr.substr(1); + } + + if (srcstr[0] == '|') { + assert(srcstr[srcstr.length() - 1] == '|'); + if (!slots) + flags.insert(AluInstr::src_abs_flags[i]); + else + assert(flags.find(AluInstr::src_abs_flags[i]) != flags.end()); + srcstr = srcstr.substr(1, srcstr.length() - 2); + } + + auto src = value_factory.src_from_string(srcstr); + if (!src) { + std::cerr << "Unable to create src[" << i << "] from " << srcstr << "\n"; + assert(src); + } + sources.push_back(src); + } + ++slots; + } while (t != tokens.end() && *t == "+"); + + AluBankSwizzle bank_swizzle = alu_vec_unknown; + ECFAluOpCode cf = cf_alu; + + while (t != tokens.end()) { + + switch ((*t)[0]) { + case '{': { + auto iflag = t->begin() + 1; + while (iflag != t->end()) { + if (*iflag == '}') + break; + + switch (*iflag) { + case 'L': flags.insert(alu_last_instr); break; + case 'W': flags.insert(alu_write); break; + case 'E': flags.insert(alu_update_exec); break; + case 'P': flags.insert(alu_update_pred); break; + } + ++iflag; + } + } + break; + + case 'V': { + string bs = *t; + if (bs == "VEC_012") + bank_swizzle = alu_vec_012; + else if (bs == "VEC_021") + bank_swizzle = alu_vec_021; + else if (bs == "VEC_102") + bank_swizzle = alu_vec_102; + else if (bs == "VEC_120") + bank_swizzle = alu_vec_120; + else if (bs == "VEC_201") + bank_swizzle = alu_vec_201; + else if (bs == "VEC_210") + bank_swizzle = alu_vec_210; + else { + std::cerr << "'" << bs << "': "; + unreachable("Unknowe bankswizzle given"); + } + } + break; + + default: { + string cf_str = *t; + if (cf_str == "PUSH_BEFORE") + cf = cf_alu_push_before; + else if (cf_str == "POP_AFTER") + cf = cf_alu_pop_after; + else if (cf_str == "POP2_AFTER") + cf = cf_alu_pop2_after; + else if (cf_str == "EXTENDED") + cf = cf_alu_extended; + else if (cf_str == "BREAK") + cf = cf_alu_break; + else if (cf_str == "CONT") + cf = cf_alu_continue; + else if (cf_str == "ELSE_AFTER") + cf = cf_alu_else_after; + else { + std::cerr << " '" << cf_str << "' "; + unreachable("Unknown tocken in ALU instruction"); + } + } + } + ++t; + } + + PRegister dest = nullptr; + // construct instruction + if (deststr != "(null)") + dest = value_factory.dest_from_string(deststr); + + AluInstr *retval = nullptr; + if (is_lds) + retval = new AluInstr(op_descr.lds_opcode, sources, flags); + else + retval = new AluInstr(op_descr.alu_opcode, dest, sources, flags, slots); + + retval->set_bank_swizzle(bank_swizzle); + retval->set_cf_type(cf); + if (group) { + group->add_instruction(retval); + retval= nullptr; + } + return retval; +} + +bool AluInstr::do_ready() const +{ + /* Alu instructions are shuffled by the scheduler, so + * we have to make sure that required ops are already + * scheduled before marking this one ready */ + for (auto i: required_instr()) { + if (!i->is_scheduled()) + return false; + } + + for (auto s : m_src) { + auto r = s->as_register(); + if (r) { + if (!r->ready(block_id(), index())) + return false; + } + auto u = s->as_uniform(); + if (u && u->buf_addr() && u->buf_addr()->as_register()) { + if (!u->buf_addr()->as_register()->ready(block_id(), index())) + return false; + } + } + + if (m_dest && !m_dest->is_ssa()) { + if (m_dest->pin() == pin_array) { + auto av = static_cast(m_dest); + auto addr = av->addr(); + /* For true indiect dest access we have to make sure that all + * instructions that write the value before are schedukled */ + if (addr && (!addr->ready(block_id(), index()) || + !m_dest->ready(block_id(), index() - 1))) + return false; + } + + /* If a register is updates, we have to make sure that uses before that + * update are scheduled, otherwise we may use the updated value when we + * shouldn't */ + for (auto u : m_dest->uses()) { + if (u->block_id() <= block_id() && u->index() < index() && + !u->is_scheduled()) { + return false; + } + } + } + + for (auto& r : m_extra_dependencies) { + if (!r->ready(block_id(), index())) + return false; + } + + return true; +} + +void AluInstrVisitor::visit(AluGroup *instr) +{ + for (auto& i : *instr) { + if (i) + i->accept(*this); + } +} + +void AluInstrVisitor::visit(Block *instr) +{ + for (auto& i : *instr) + i->accept(*this); +} + +void AluInstrVisitor::visit(IfInstr *instr) +{ + instr->predicate()->accept(*this); +} + +static bool emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader); + +static bool emit_alu_op1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, + const AluOpFlags& flags = 0); +static bool emit_alu_op1_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_chan); +static bool emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader); +static bool emit_alu_neg(const nir_alu_instr& alu, Shader& shader); +static bool emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); +static bool emit_alu_op2_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_order); +static bool emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_order); +static bool emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); +static bool emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader); +static bool emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader); +static bool emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader); +static bool emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader); +static bool emit_alu_abs64(const nir_alu_instr& alu, Shader& shader); + +static bool emit_alu_op2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, + AluInstr::Op2Options opts = AluInstr::op2_opt_none); +static bool emit_alu_op2_int(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, + AluInstr::Op2Options opts = AluInstr::op2_opt_none); +static bool emit_alu_op3(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, + const std::array& src_shuffle = {0,1,2}); +static bool emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); +static bool emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader); +static bool emit_any_all_icomp(const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader); + +static bool emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); + +static bool emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); +static bool emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader); +static bool emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader); +static bool emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader); +static bool emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader); +static bool emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader); +static bool emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader); + +static bool emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader); +static bool emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader); + +static bool emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader); +static bool emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader); + +static bool emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); +static bool emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); + +static bool emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); +static bool emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); + +static bool emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); + +static bool emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader); + +static bool emit_alu_cube(const nir_alu_instr& alu, Shader& shader); + +static bool check_64_bit_op_src(nir_src *src, void *state) +{ + if (nir_src_bit_size(*src) == 64) { + *(bool*)state = true; + return false; + } + return true; +} + +static bool check_64_bit_op_dest(nir_dest *dest, void *state) +{ + if (nir_dest_bit_size(*dest) == 64) { + *(bool*)state = true; + return false; + } + return true; +} + +bool AluInstr::from_nir(nir_alu_instr *alu, Shader& shader) +{ + bool is_64bit_op = false; + nir_foreach_src(&alu->instr, check_64_bit_op_src, &is_64bit_op); + if (!is_64bit_op) + nir_foreach_dest(&alu->instr, check_64_bit_op_dest, &is_64bit_op); + + + if (is_64bit_op) { + switch (alu->op) { + case nir_op_pack_64_2x32: + case nir_op_unpack_64_2x32: + case nir_op_pack_64_2x32_split: + case nir_op_pack_half_2x16_split: + case nir_op_unpack_64_2x32_split_x: + case nir_op_unpack_64_2x32_split_y: break; + case nir_op_mov: return emit_alu_mov_64bit(*alu, shader); + case nir_op_fneg: return emit_alu_neg(*alu, shader); + case nir_op_ffract: return emit_alu_op1_64bit(*alu, op1_fract_64, shader, true); + case nir_op_feq32: return emit_alu_op2_64bit_one_dst(*alu, op2_sete_64, shader, false); + case nir_op_fge32: return emit_alu_op2_64bit_one_dst(*alu, op2_setge_64, shader, false); + case nir_op_flt32: return emit_alu_op2_64bit_one_dst(*alu, op2_setgt_64, shader, true); + case nir_op_fneu32: return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false); + case nir_op_ffma: return emit_alu_fma_64bit(*alu, op3_fma_64, shader); + + case nir_op_fadd: return emit_alu_op2_64bit(*alu, op2_add_64, shader, false); + case nir_op_fmul: return emit_alu_op2_64bit(*alu, op2_mul_64, shader, false); + case nir_op_fmax: return emit_alu_op2_64bit(*alu, op2_max_64, shader, false); + case nir_op_fmin: return emit_alu_op2_64bit(*alu, op2_min_64, shader, false); + case nir_op_b2f64: return emit_alu_b2f64(*alu, shader); + case nir_op_f2f64: return emit_alu_f2f64(*alu, shader); + case nir_op_i2f64: return emit_alu_i2f64(*alu, op1_int_to_flt, shader); + case nir_op_u2f64: return emit_alu_i2f64(*alu, op1_uint_to_flt, shader); + case nir_op_f2f32: return emit_alu_f2f32(*alu, shader); + case nir_op_fabs: return emit_alu_abs64(*alu, shader); + case nir_op_fsqrt: return emit_alu_op1_64bit_trans(*alu, op1_sqrt_64, shader); + case nir_op_frcp: return emit_alu_op1_64bit_trans(*alu, op1_recip_64, shader); + case nir_op_frsq: return emit_alu_op1_64bit_trans(*alu, op1_recipsqrt_64, shader); + case nir_op_vec2: return emit_alu_vec2_64(*alu, shader); + default: + return false; + ; + } + } + + + if (shader.chip_class() == ISA_CC_CAYMAN) { + switch (alu->op) { + case nir_op_fcos_r600: return emit_alu_trans_op1_cayman(*alu, op1_cos, shader); + case nir_op_fexp2: return emit_alu_trans_op1_cayman(*alu, op1_exp_ieee, shader); + case nir_op_flog2: return emit_alu_trans_op1_cayman(*alu, op1_log_clamped, shader); + case nir_op_frcp: return emit_alu_trans_op1_cayman(*alu, op1_recip_ieee, shader); + case nir_op_frsq: return emit_alu_trans_op1_cayman(*alu, op1_recipsqrt_ieee1, shader); + case nir_op_fsqrt: return emit_alu_trans_op1_cayman(*alu, op1_sqrt_ieee, shader); + case nir_op_fsin_r600: return emit_alu_trans_op1_cayman(*alu, op1_sin, shader); + case nir_op_i2f32: return emit_alu_op1(*alu, op1_int_to_flt, shader); + case nir_op_u2f32: return emit_alu_op1(*alu, op1_uint_to_flt, shader); + case nir_op_imul: return emit_alu_trans_op2_cayman(*alu, op2_mullo_int, shader); + case nir_op_imul_high: return emit_alu_trans_op2_cayman(*alu, op2_mulhi_int, shader); + case nir_op_umul_high: return emit_alu_trans_op2_cayman(*alu, op2_mulhi_uint, shader); + case nir_op_f2u32: return emit_alu_op1(*alu, op1_flt_to_uint, shader); + case nir_op_f2i32: return emit_alu_op1(*alu, op1_flt_to_int, shader); + default: + ; + } + } else { + switch (alu->op) { + case nir_op_fcos_r600: return emit_alu_trans_op1_eg(*alu, op1_cos, shader); + case nir_op_fexp2: return emit_alu_trans_op1_eg(*alu, op1_exp_ieee, shader); + case nir_op_flog2: return emit_alu_trans_op1_eg(*alu, op1_log_clamped, shader); + case nir_op_frcp: return emit_alu_trans_op1_eg(*alu, op1_recip_ieee, shader); + case nir_op_frsq: return emit_alu_trans_op1_eg(*alu, op1_recipsqrt_ieee1, shader); + case nir_op_fsin_r600: return emit_alu_trans_op1_eg(*alu, op1_sin, shader); + case nir_op_fsqrt: return emit_alu_trans_op1_eg(*alu, op1_sqrt_ieee, shader); + case nir_op_i2f32: return emit_alu_trans_op1_eg(*alu, op1_int_to_flt, shader); + case nir_op_u2f32: return emit_alu_trans_op1_eg(*alu, op1_uint_to_flt, shader); + case nir_op_imul: return emit_alu_trans_op2_eg(*alu, op2_mullo_int, shader); + case nir_op_imul_high: return emit_alu_trans_op2_eg(*alu, op2_mulhi_int, shader); + case nir_op_umul_high: return emit_alu_trans_op2_eg(*alu, op2_mulhi_uint, shader); + case nir_op_f2i32: return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_int, shader); + case nir_op_f2u32: return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_uint, shader); + default: + ; + } + } + + switch (alu->op) { + case nir_op_b2b1: return emit_alu_op1(*alu, op1_mov, shader); + case nir_op_b2b32: return emit_alu_op1(*alu, op1_mov, shader); + case nir_op_b2f32: return emit_alu_b2x(*alu, ALU_SRC_1, shader); + case nir_op_b2i32: return emit_alu_b2x(*alu, ALU_SRC_1_INT, shader); + + case nir_op_bfm: return emit_alu_op2_int(*alu, op2_bfm_int, shader, op2_opt_none); + case nir_op_bit_count: return emit_alu_op1(*alu, op1_bcnt_int, shader); + + case nir_op_bitfield_reverse: return emit_alu_op1(*alu, op1_bfrev_int, shader); + case nir_op_bitfield_select: return emit_alu_op3(*alu, op3_bfi_int, shader); + + case nir_op_b32all_fequal2: return emit_any_all_fcomp2(*alu, op2_sete_dx10, shader); + case nir_op_b32all_fequal3: return emit_any_all_fcomp(*alu, op2_sete, 3, true, shader); + case nir_op_b32all_fequal4: return emit_any_all_fcomp(*alu, op2_sete, 4, true, shader); + case nir_op_b32all_iequal2: return emit_any_all_icomp(*alu, op2_sete_int, 2, true, shader); + case nir_op_b32all_iequal3: return emit_any_all_icomp(*alu, op2_sete_int, 3, true, shader); + case nir_op_b32all_iequal4: return emit_any_all_icomp(*alu, op2_sete_int, 4, true, shader); + case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(*alu, op2_setne_dx10, shader); + case nir_op_b32any_fnequal3: return emit_any_all_fcomp(*alu, op2_setne, 3, false, shader); + case nir_op_b32any_fnequal4: return emit_any_all_fcomp(*alu, op2_setne, 4, false, shader); + case nir_op_b32any_inequal2: return emit_any_all_icomp(*alu, op2_setne_int, 2, false, shader); + case nir_op_b32any_inequal3: return emit_any_all_icomp(*alu, op2_setne_int, 3, false, shader); + case nir_op_b32any_inequal4: return emit_any_all_icomp(*alu, op2_setne_int, 4, false, shader); + case nir_op_b32csel: return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1}); + + case nir_op_f2b32: return emit_alu_comb_with_zero(*alu, op2_setne_dx10, shader); + case nir_op_fabs: return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_src0_abs}); + case nir_op_fadd: return emit_alu_op2(*alu, op2_add, shader); + case nir_op_fceil: return emit_alu_op1(*alu, op1_ceil, shader); + case nir_op_fcsel: return emit_alu_op3(*alu, op3_cnde, shader, {0, 2, 1}); + case nir_op_fcsel_ge: return emit_alu_op3(*alu, op3_cndge, shader, {0, 1, 2}); + case nir_op_fcsel_gt: return emit_alu_op3(*alu, op3_cndgt, shader, {0, 1, 2}); + + case nir_op_fdot2: return emit_dot(*alu, 2, shader); + case nir_op_fdot3: return emit_dot(*alu, 3, shader); + case nir_op_fdot4: return emit_dot(*alu, 4, shader); + + case nir_op_feq32: + case nir_op_feq: return emit_alu_op2(*alu, op2_sete_dx10, shader); + case nir_op_ffloor: return emit_alu_op1(*alu, op1_floor, shader); + case nir_op_ffract: return emit_alu_op1(*alu, op1_fract, shader); + case nir_op_fge32: return emit_alu_op2(*alu, op2_setge_dx10, shader); + case nir_op_fge: return emit_alu_op2(*alu, op2_setge_dx10, shader); + case nir_op_find_lsb: return emit_alu_op1(*alu, op1_ffbl_int, shader); + + case nir_op_flt32: return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse); + case nir_op_flt: return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse); + case nir_op_fmax: return emit_alu_op2(*alu, op2_max_dx10, shader); + case nir_op_fmin: return emit_alu_op2(*alu, op2_min_dx10, shader); + case nir_op_fmul: return emit_alu_op2(*alu, op2_mul_ieee, shader); + case nir_op_fneg: return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_src0_neg}); + case nir_op_fneu32: return emit_alu_op2(*alu, op2_setne_dx10, shader); + case nir_op_fneu: return emit_alu_op2(*alu, op2_setne_dx10, shader); + + case nir_op_fround_even: return emit_alu_op1(*alu, op1_rndne, shader); + case nir_op_fsat: return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_dst_clamp}); + case nir_op_fsub: return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1); + case nir_op_ftrunc: return emit_alu_op1(*alu, op1_trunc, shader); + case nir_op_i2b1: + case nir_op_i2b32: return emit_alu_i2orf2_b1(*alu, op2_setne_int, shader); + case nir_op_iadd: return emit_alu_op2_int(*alu, op2_add_int, shader); + case nir_op_iand: return emit_alu_op2_int(*alu, op2_and_int, shader); + case nir_op_ibfe: return emit_alu_op3(*alu, op3_bfe_int, shader); + case nir_op_i32csel_ge: return emit_alu_op3(*alu, op3_cndge_int, shader, {0, 1, 2}); + case nir_op_i32csel_gt: return emit_alu_op3(*alu, op3_cndgt_int, shader, {0, 1, 2}); + case nir_op_ieq32: return emit_alu_op2_int(*alu, op2_sete_int, shader); + case nir_op_ieq: return emit_alu_op2_int(*alu, op2_sete_int, shader); + case nir_op_ifind_msb_rev: return emit_alu_op1(*alu, op1_ffbh_int, shader); + case nir_op_ige32: return emit_alu_op2_int(*alu, op2_setge_int, shader); + case nir_op_ige: return emit_alu_op2_int(*alu, op2_setge_int, shader); + case nir_op_ilt32: return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse); + case nir_op_ilt: return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse); + case nir_op_imax: return emit_alu_op2_int(*alu, op2_max_int, shader); + case nir_op_imin: return emit_alu_op2_int(*alu, op2_min_int, shader); + case nir_op_ine32: return emit_alu_op2_int(*alu, op2_setne_int, shader); + case nir_op_ine: return emit_alu_op2_int(*alu, op2_setne_int, shader); + case nir_op_ineg: return emit_alu_comb_with_zero(*alu, op2_sub_int, shader); + case nir_op_inot: return emit_alu_op1(*alu, op1_not_int, shader); + case nir_op_ior: return emit_alu_op2_int(*alu, op2_or_int, shader); + case nir_op_ishl: return emit_alu_op2_int(*alu, op2_lshl_int, shader); + case nir_op_ishr: return emit_alu_op2_int(*alu, op2_ashr_int, shader); + case nir_op_isub: return emit_alu_op2_int(*alu, op2_sub_int, shader); + case nir_op_ixor: return emit_alu_op2_int(*alu, op2_xor_int, shader); + case nir_op_pack_64_2x32: return emit_pack_64_2x32(*alu, shader); + case nir_op_unpack_64_2x32: return emit_unpack_64_2x32(*alu, shader); + case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(*alu, shader); + case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(*alu, shader); + case nir_op_slt: return emit_alu_op2(*alu, op2_setgt, shader, op2_opt_reverse); + case nir_op_sge: return emit_alu_op2(*alu, op2_setge, shader); + case nir_op_ubfe: return emit_alu_op3(*alu, op3_bfe_uint, shader); + case nir_op_ufind_msb_rev: return emit_alu_op1(*alu, op1_ffbh_uint, shader); + case nir_op_uge32: return emit_alu_op2_int(*alu, op2_setge_uint, shader); + case nir_op_uge: return emit_alu_op2_int(*alu, op2_setge_uint, shader); + case nir_op_ult32: return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse); + case nir_op_ult: return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse); + case nir_op_umad24: return emit_alu_op3(*alu, op3_muladd_uint24, shader, {0, 1, 2}); + case nir_op_umax: return emit_alu_op2_int(*alu, op2_max_uint, shader); + case nir_op_umin: return emit_alu_op2_int(*alu, op2_min_uint, shader); + case nir_op_umul24: return emit_alu_op2(*alu, op2_mul_uint24, shader); + case nir_op_ushr: return emit_alu_op2_int(*alu, op2_lshr_int, shader); + case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(*alu, 0, shader); + case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(*alu, 1, shader); + case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(*alu, shader); + case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(*alu, shader); + + + case nir_op_ffma: return emit_alu_op3(*alu, op3_muladd_ieee, shader); + case nir_op_mov: return emit_alu_op1(*alu, op1_mov, shader); + case nir_op_f2i32: return emit_alu_op1(*alu, op1_flt_to_int, shader); + case nir_op_vec2: return emit_create_vec(*alu, 2, shader); + case nir_op_vec3: return emit_create_vec(*alu, 3, shader); + case nir_op_vec4: return emit_create_vec(*alu, 4, shader); + + case nir_op_fddx: + case nir_op_fddx_coarse: return emit_tex_fdd(*alu, TexInstr::get_gradient_h, false, shader); + case nir_op_fddx_fine: return emit_tex_fdd(*alu, TexInstr::get_gradient_h, true, shader); + case nir_op_fddy: + case nir_op_fddy_coarse: return emit_tex_fdd(*alu,TexInstr::get_gradient_v, false, shader); + case nir_op_fddy_fine: return emit_tex_fdd(*alu, TexInstr::get_gradient_v, true, shader); + case nir_op_cube_r600: return emit_alu_cube(*alu, shader); + default: + fprintf(stderr, "Unknown instruction '"); + nir_print_instr(&alu->instr, stderr); + fprintf(stderr, "'\n"); + assert(0); + return false; + } +} + +static Pin pin_for_components(const nir_alu_instr& alu) +{ + return (alu.dest.dest.is_ssa && + (nir_dest_num_components(alu.dest.dest) == 1)) ? pin_free : pin_none; + +} + +static bool emit_alu_op1_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_chan) +{ + auto& value_factory = shader.value_factory(); + + auto group = new AluGroup(); + + AluInstr *ir = nullptr; + + int swz[2] = {0,1}; + if (switch_chan) { + swz[0] = 1; + swz[1] = 0; + } + + for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) { + for (unsigned c = 0; c < 2 ; ++c) { + ir = new AluInstr(opcode, + value_factory.dest(alu.dest, 2 * i + c, pin_chan), + value_factory.src64(alu.src[0], i, swz[c]), + {alu_write}); + group->add_instruction(ir); + } + if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs); + if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + shader.emit_instruction(group); + return true; +} + +static bool emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + + AluInstr *ir = nullptr; + + for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) { + for (unsigned c = 0; c < 2 ; ++c) { + ir = new AluInstr(op1_mov, + value_factory.dest(alu.dest, 2 * i + c, pin_free), + value_factory.src64(alu.src[0], i, c), + {alu_write}); + shader.emit_instruction(ir); + } + if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs); + if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + return true; +} + +static bool emit_alu_neg(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + + auto group = new AluGroup(); + + AluInstr *ir = nullptr; + + for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) { + for (unsigned c = 0; c < 2 ; ++c) { + ir = new AluInstr(op1_mov, + value_factory.dest(alu.dest, 2 * i + c, pin_chan), + value_factory.src64(alu.src[0], i, c), + {alu_write}); + group->add_instruction(ir); + } + ir->set_alu_flag(alu_src0_neg); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + shader.emit_instruction(group); + return true; +} + +static bool emit_alu_abs64(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + + assert(nir_dest_num_components(alu.dest.dest) == 1); + + shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.dest, 0, pin_chan), + value_factory.src64(alu.src[0], 0, 0), + AluInstr::write)); + + auto ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, 1, pin_chan), + value_factory.src64(alu.src[0], 0, 1), + AluInstr::last_write); + ir->set_alu_flag(alu_src0_abs); + shader.emit_instruction(ir); + return true; +} + +static bool emit_alu_op2_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_src) +{ + auto& value_factory = shader.value_factory(); + auto group = new AluGroup(); + AluInstr *ir = nullptr; + int order[2] = {0, 1}; + if (switch_src) { + order[0] = 1; + order[1] = 0; + } + + int num_emit0 = opcode == op2_mul_64 ? 3 : 1; + + assert(num_emit0 == 1 || nir_dest_num_components(alu.dest.dest) == 1); + + + for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) { + int i = 0; + for (; i < num_emit0; ++i) { + auto dest = i < 2 ? value_factory.dest(alu.dest, i, pin_chan) : + value_factory.dummy_dest(i); + + ir = new AluInstr(opcode, + dest, + value_factory.src64(alu.src[order[0]], k, 1), + value_factory.src64(alu.src[order[1]], k, 1), + i < 2 ? AluInstr::write : AluInstr::empty); + + if (alu.src[0].abs) ir->set_alu_flag(switch_src ? alu_src1_abs : alu_src0_abs); + if (alu.src[1].abs) ir->set_alu_flag(switch_src ? alu_src0_abs : alu_src1_abs); + if (alu.src[0].negate) ir->set_alu_flag(switch_src ? alu_src1_neg : alu_src0_neg); + if (alu.src[1].negate) ir->set_alu_flag(switch_src ? alu_src0_neg : alu_src1_neg); + if (alu.dest.saturate && i == 0) { + ir->set_alu_flag(alu_dst_clamp); + } + + group->add_instruction(ir); + } + + auto dest = i == 1 ? value_factory.dest(alu.dest, i, pin_chan) : + value_factory.dummy_dest(i); + + ir = new AluInstr(opcode, + dest, + value_factory.src64(alu.src[order[0]], k, 0), + value_factory.src64(alu.src[order[1]], k, 0), + i == 1 ? AluInstr::write : AluInstr::empty); + group->add_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + + shader.emit_instruction(group); + return true; +} + +static bool emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu, EAluOp opcode, + Shader& shader, bool switch_order) +{ + auto& value_factory = shader.value_factory(); + AluInstr *ir = nullptr; + int order[2] = {0, 1}; + if (switch_order) { + order[0] = 1; + order[1] = 0; + } + + AluInstr::SrcValues src(4); + + for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) { + auto dest = value_factory.dest(alu.dest, 2 * k, pin_chan); + src[0] = value_factory.src64(alu.src[order[0]], k, 1); + src[1] = value_factory.src64(alu.src[order[1]], k, 1); + src[2] = value_factory.src64(alu.src[order[0]], k, 0); + src[3] = value_factory.src64(alu.src[order[1]], k, 0); + + ir = new AluInstr(opcode, dest, src, AluInstr::write, 2); + + if (alu.src[0].abs) ir->set_alu_flag(switch_order ? alu_src1_abs : alu_src0_abs); + if (alu.src[1].abs) ir->set_alu_flag(switch_order ? alu_src0_abs : alu_src1_abs); + if (alu.src[0].negate) ir->set_alu_flag(switch_order ? alu_src1_neg : alu_src0_neg); + if (alu.src[1].negate) ir->set_alu_flag(switch_order ? alu_src0_neg : alu_src1_neg); + ir->set_alu_flag(alu_64bit_op); + + shader.emit_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + + return true; +} + +static bool emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + auto group = new AluGroup(); + AluInstr *ir = nullptr; + for (unsigned i = 0; i < 3; ++i) { + ir = new AluInstr(opcode, + i < 2 ? value_factory.dest(alu.dest, i, pin_chan) : + value_factory.dummy_dest(i), + value_factory.src64(alu.src[0], 0, 1), + value_factory.src64(alu.src[0], 0, 0), + i < 2 ? AluInstr::write : AluInstr::empty); + + if (alu.src[0].abs || opcode == op1_sqrt_64) ir->set_alu_flag(alu_src1_abs); + if (alu.src[0].negate) ir->set_alu_flag(alu_src1_neg); + + group->add_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + shader.emit_instruction(group); + return true; + +} + +static bool emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + auto group = new AluGroup(); + AluInstr *ir = nullptr; + for (unsigned i = 0; i < 4 ; ++i) { + + int chan = i < 3 ? 1 : 0; + auto dest = i < 2 ? value_factory.dest(alu.dest, i, pin_chan) : + value_factory.dummy_dest(i); + + ir = new AluInstr(opcode, + dest, + value_factory.src64(alu.src[0], 0, chan), + value_factory.src64(alu.src[1], 0, chan), + value_factory.src64(alu.src[2], 0, chan), + i < 2 ? AluInstr::write : AluInstr::empty); + + if (i < 3) { + if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg); + if (alu.src[1].negate) ir->set_alu_flag(alu_src1_neg); + if (alu.src[2].negate) ir->set_alu_flag(alu_src2_neg); + } + + group->add_instruction(ir); + + } + if (ir) + ir->set_alu_flag(alu_last_instr); + shader.emit_instruction(group); + return true; +} + +static bool emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + auto group = new AluGroup(); + AluInstr *ir = nullptr; + + for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) { + ir = new AluInstr(op2_and_int, + value_factory.dest(alu.dest, 2 * i, pin_group), + value_factory.src(alu.src[0], i), + value_factory.zero(), + {alu_write}); + group->add_instruction(ir); + + ir = new AluInstr(op2_and_int, + value_factory.dest(alu.dest, 2 * i + 1, pin_group), + value_factory.src(alu.src[0], i), + value_factory.literal(0x3ff00000), + {alu_write}); + group->add_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + shader.emit_instruction(group); + return true; +} + +static bool emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader) +{ + /* int 64 to f64 should have been lowered, so we only handle i32 to f64 */ + auto& value_factory = shader.value_factory(); + auto group = new AluGroup(); + AluInstr *ir = nullptr; + + assert(nir_dest_num_components(alu.dest.dest) == 1); + + auto tmpx = value_factory.temp_register(); + shader.emit_instruction(new AluInstr(op2_and_int, tmpx, + value_factory.src(alu.src[0], 0), + value_factory.literal(0xffffff00), + AluInstr::write)); + auto tmpy = value_factory.temp_register(); + shader.emit_instruction(new AluInstr(op2_and_int, tmpy, + value_factory.src(alu.src[0], 0), + value_factory.literal(0xff), + AluInstr::last_write)); + + + auto tmpx2 = value_factory.temp_register(); + auto tmpy2 = value_factory.temp_register(); + shader.emit_instruction(new AluInstr(op, tmpx2, tmpx, + AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op, tmpy2, tmpy, + AluInstr::last_write)); + + auto tmpx3 = value_factory.temp_register(0); + auto tmpy3 = value_factory.temp_register(1); + auto tmpz3 = value_factory.temp_register(2); + auto tmpw3 = value_factory.temp_register(3); + + + ir = new AluInstr(op1_flt32_to_flt64, + tmpx3, + tmpx2, AluInstr::write); + group->add_instruction(ir); + ir = new AluInstr(op1_flt32_to_flt64, + tmpy3, + value_factory.zero(), AluInstr::write); + group->add_instruction(ir); + ir = new AluInstr(op1_flt32_to_flt64, + tmpz3, + tmpy2, AluInstr::write); + group->add_instruction(ir); + ir = new AluInstr(op1_flt32_to_flt64, + tmpw3, + value_factory.zero(), AluInstr::last_write); + group->add_instruction(ir); + shader.emit_instruction(group); + + group = new AluGroup(); + + ir = new AluInstr(op2_add_64, + value_factory.dest(alu.dest, 0, pin_chan), + tmpy3, tmpw3, AluInstr::write); + group->add_instruction(ir); + ir = new AluInstr(op2_add_64, + value_factory.dest(alu.dest, 1, pin_chan), + tmpx3, tmpz3, AluInstr::write); + group->add_instruction(ir); + shader.emit_instruction(group); + + return true; +} + +static bool emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + auto group = new AluGroup(); + AluInstr *ir = nullptr; + + assert(nir_dest_num_components(alu.dest.dest) == 1); + + ir = new AluInstr(op1_flt32_to_flt64, + value_factory.dest(alu.dest, 0, pin_chan), + value_factory.src(alu.src[0], 0), AluInstr::write); + group->add_instruction(ir); + ir = new AluInstr(op1_flt32_to_flt64, + value_factory.dest(alu.dest, 1, pin_chan), + value_factory.zero(), AluInstr::last_write); + group->add_instruction(ir); + shader.emit_instruction(group); + return true; +} + +static bool emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + auto group = new AluGroup(); + AluInstr *ir = nullptr; + + ir = new AluInstr(op1v_flt64_to_flt32, + value_factory.dest(alu.dest, 0, pin_chan), + value_factory.src64(alu.src[0], 0, 1), {alu_write}); + group->add_instruction(ir); + ir = new AluInstr(op1v_flt64_to_flt32, + value_factory.dummy_dest(1), + value_factory.src64(alu.src[0], 0, 0), AluInstr::last); + group->add_instruction(ir); + shader.emit_instruction(group); + return true; + +} + +static bool emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + AluInstr *ir = nullptr; + auto pin = pin_for_components(alu); + + for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) { + if (alu.dest.write_mask & (1 << i)){ + ir = new AluInstr(op2_and_int, + value_factory.dest(alu.dest, i, pin), + value_factory.src(alu.src[0], i), + value_factory.inline_const(mask, 0), + {alu_write}); + if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg); + if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs); + shader.emit_instruction(ir); + } + } + if (ir) + ir->set_alu_flag(alu_last_instr); + return true; +} + +static bool emit_alu_op1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, const AluOpFlags& flags) +{ + auto& value_factory = shader.value_factory(); + + AluInstr *ir = nullptr; + auto pin = pin_for_components(alu); + + for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) { + if (alu.dest.write_mask & (1 << i)){ + ir = new AluInstr(opcode, value_factory.dest(alu.dest, i, pin), + value_factory.src(alu.src[0], i), {alu_write}); + + if (flags.test(alu_src0_abs) || alu.src[0].abs) + ir->set_alu_flag(alu_src0_abs); + + if (alu.src[0].negate ^ flags.test(alu_src0_neg)) + ir->set_alu_flag(alu_src0_neg); + + if (flags.test(alu_dst_clamp) || alu.dest.saturate) + ir->set_alu_flag(alu_dst_clamp); + + shader.emit_instruction(ir); + } + } + if (ir) + ir->set_alu_flag(alu_last_instr); + return true; +} + +static bool emit_alu_op2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, + AluInstr::Op2Options opts) +{ + auto& value_factory = shader.value_factory(); + const nir_alu_src *src0 = &alu.src[0]; + const nir_alu_src *src1 = &alu.src[1]; + + int idx0 = 0; + int idx1 = 1; + if (opts & AluInstr::op2_opt_reverse) { + std::swap(src0, src1); + std::swap(idx0, idx1); + } + + bool src1_negate = (opts & AluInstr::op2_opt_neg_src1) ^ src1->negate; + + auto pin = pin_for_components(alu); + AluInstr *ir = nullptr; + for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) { + if (alu.dest.write_mask & (1 << i)){ + ir = new AluInstr(opcode, + value_factory.dest(alu.dest.dest, i, pin), + value_factory.src(*src0, i), + value_factory.src(*src1, i), {alu_write}); + + if (src0->negate) ir->set_alu_flag(alu_src0_neg); + if (src0->abs) ir->set_alu_flag(alu_src0_abs); + if (src1_negate) ir->set_alu_flag(alu_src1_neg); + if (src1->abs) ir->set_alu_flag(alu_src1_abs); + if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp); + shader.emit_instruction(ir); + } + } + if (ir) + ir->set_alu_flag(alu_last_instr); + return true; +} + +static bool emit_alu_op2_int(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, + AluInstr::Op2Options opts) +{ + assert(!alu.src[0].abs); + assert(!alu.src[0].negate); + assert(!alu.src[1].abs); + assert(!alu.src[1].negate); + + return emit_alu_op2(alu, opcode, shader, opts); +} + +static bool emit_alu_op3(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, + const std::array& src_shuffle) +{ + auto& value_factory = shader.value_factory(); + const nir_alu_src *src[3]; + src[0] = &alu.src[src_shuffle[0]]; + src[1] = &alu.src[src_shuffle[1]]; + src[2] = &alu.src[src_shuffle[2]]; + + auto pin = pin_for_components(alu); + AluInstr *ir = nullptr; + for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) { + if (alu.dest.write_mask & (1 << i)){ + ir = new AluInstr(opcode, value_factory.dest(alu.dest.dest, i, pin), + value_factory.src(*src[0], i), + value_factory.src(*src[1], i), + value_factory.src(*src[2], i), + {alu_write}); + + if (src[0]->negate) ir->set_alu_flag(alu_src0_neg); + if (src[1]->negate) ir->set_alu_flag(alu_src1_neg); + if (src[2]->negate) ir->set_alu_flag(alu_src2_neg); + + if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp); + ir->set_alu_flag(alu_write); + shader.emit_instruction(ir); + } + } + if (ir) + ir->set_alu_flag(alu_last_instr); + return true; +} + +static bool emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) +{ + AluInstr *ir = nullptr; + auto& value_factory = shader.value_factory(); + + PRegister tmp[2]; + tmp[0] = value_factory.temp_register(); + tmp[1] = value_factory.temp_register(); + + for (unsigned i = 0; i < 2 ; ++i) { + ir = new AluInstr(opcode, tmp[i], + value_factory.src(alu.src[0], i), + value_factory.src(alu.src[1], i), {alu_write}); + if (alu.src[0].abs) + ir->set_alu_flag(alu_src0_abs); + if (alu.src[0].negate) + ir->set_alu_flag(alu_src0_neg); + + if (alu.src[1].abs) + ir->set_alu_flag(alu_src1_abs); + if (alu.src[1].negate) + ir->set_alu_flag(alu_src1_neg); + + shader.emit_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + + opcode = (opcode == op2_setne_dx10) ? op2_or_int: op2_and_int; + ir = new AluInstr(opcode, + value_factory.dest(alu.dest, 0, pin_free), + tmp[0], tmp[1], AluInstr::last_write); + shader.emit_instruction(ir); + return true; +} + +static bool emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader) +{ + /* This should probabyl be lowered in nir */ + auto& value_factory = shader.value_factory(); + + AluInstr *ir = nullptr; + RegisterVec4 v = value_factory.temp_vec4(pin_group); + AluInstr::SrcValues s; + + for (int i = 0; i < nc ; ++i) { + s.push_back(v[i]); + } + + for (int i = nc; i < 4 ; ++i) + s.push_back(value_factory.inline_const(all ? ALU_SRC_1 : ALU_SRC_0, 0)); + + for (int i = 0; i < nc ; ++i) { + ir = new AluInstr(op, v[i], + value_factory.src(alu.src[0], i), + value_factory.src(alu.src[1], i), {alu_write}); + + if (alu.src[0].abs) + ir->set_alu_flag(alu_src0_abs); + if (alu.src[0].negate) + ir->set_alu_flag(alu_src0_neg); + + if (alu.src[1].abs) + ir->set_alu_flag(alu_src1_abs); + if (alu.src[1].negate) + ir->set_alu_flag(alu_src1_neg); + + shader.emit_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + + auto max_val = value_factory.temp_register(); + + ir = new AluInstr(op1_max4, max_val, s, AluInstr::last_write, 4); + + if (all) + ir->set_alu_flag(alu_src0_neg); + + shader.emit_instruction(ir); + + if (all) + op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10; + else + op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10; + + ir = new AluInstr(op, + value_factory.dest(alu.dest, 0, pin_free), + max_val, + value_factory.inline_const(ALU_SRC_1, 0), + AluInstr::last_write); + if (all) + ir->set_alu_flag(alu_src1_neg); + shader.emit_instruction(ir); + + return true; +} + +static bool emit_any_all_icomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader) +{ + assert(!alu.src[0].abs); + assert(!alu.src[0].negate); + assert(!alu.src[1].abs); + assert(!alu.src[1].negate); + + /* This should probabyl be lowered in nir */ + auto& value_factory = shader.value_factory(); + + AluInstr *ir = nullptr; + PRegister v[6]; + + auto dest = value_factory.dest(alu.dest.dest, 0, pin_free); + + for (int i = 0; i < nc + nc/2; ++i) + v[i] = value_factory.temp_register(); + + EAluOp combine = all ? op2_and_int : op2_or_int; + + for (int i = 0; i < nc ; ++i) { + ir = new AluInstr(op, v[i], value_factory.src(alu.src[0], i), + value_factory.src(alu.src[1], i), AluInstr::write); + shader.emit_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + + if (nc ==2) { + ir = new AluInstr(combine, dest, v[0], v[1], AluInstr::last_write); + shader.emit_instruction(ir); + return true; + } + + if (nc == 3) { + ir = new AluInstr(combine, v[3], v[0], v[1], AluInstr::last_write); + shader.emit_instruction(ir); + ir = new AluInstr(combine, dest, v[3], v[2], AluInstr::last_write); + shader.emit_instruction(ir); + return true; + } + + if (nc == 4) { + ir = new AluInstr(combine, v[4], v[0], v[1], AluInstr::write); + shader.emit_instruction(ir); + ir = new AluInstr(combine, v[5], v[2], v[3], AluInstr::last_write); + shader.emit_instruction(ir); + ir = new AluInstr(combine, dest, v[4], v[5], AluInstr::last_write); + shader.emit_instruction(ir); + return true; + } + + return false; +} + +static bool emit_dot(const nir_alu_instr& alu, int n, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + const nir_alu_src& src0 = alu.src[0]; + const nir_alu_src& src1 = alu.src[1]; + + auto dest = value_factory.dest(alu.dest.dest, 0, pin_free); + + AluInstr::SrcValues srcs(8); + + for (int i = 0; i < n ; ++i) { + srcs[2 * i ] = value_factory.src(src0, i); + srcs[2 * i + 1] = value_factory.src(src1, i); + } + + for (int i = n; i < 4 ; ++i) { + srcs[2 * i ] = value_factory.zero(); + srcs[2 * i + 1] = value_factory.zero(); + } + + AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4); + + if (src0.negate) ir->set_alu_flag(alu_src0_neg); + if (src0.abs) ir->set_alu_flag(alu_src0_abs); + if (src1.negate) ir->set_alu_flag(alu_src1_neg); + if (src1.abs) ir->set_alu_flag(alu_src1_abs); + + if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp); + + shader.emit_instruction(ir); + return true; +} + +static bool emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + AluInstr *ir = nullptr; + + for(unsigned i = 0; i < nc; ++i) { + if (instr.dest.write_mask & (1 << i)){ + auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]); + auto dst = value_factory.dest(instr.dest.dest, i, pin_chan); + ir = new AluInstr(op1_mov, dst, src, {alu_write}); + + if (instr.dest.saturate) ir->set_alu_flag(alu_dst_clamp); + if (instr.src[i].negate) ir->set_alu_flag(alu_src0_neg); + if (instr.src[i].abs) ir->set_alu_flag(alu_src0_abs); + + shader.emit_instruction(ir); + } + } + + if (ir) + ir->set_alu_flag(alu_last_instr); + return true; +} + +static bool emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + AluInstr *ir = nullptr; + Pin pin = nir_dest_num_components(alu.dest.dest) == 1 ? pin_free : pin_none; + + for (int i = 0; i < 4 ; ++i) { + if (alu.dest.write_mask & (1 << i)) { + ir = new AluInstr(opcode, + value_factory.dest(alu.dest, i, pin), + value_factory.src(alu.src[0], i), + value_factory.zero(), + AluInstr::write); + shader.emit_instruction(ir); + } + } + if (ir) + ir->set_alu_flag(alu_last_instr); + return true; +} + +static bool emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + AluInstr *ir = nullptr; + auto pin = pin_for_components(alu); + for (int i = 0; i < 4 ; ++i) { + if (alu.dest.write_mask & (1 << i)){ + ir = new AluInstr(opcode, + value_factory.dest(alu.dest, i, pin), + value_factory.zero(), + value_factory.src(alu.src[0], i), + AluInstr::write); + shader.emit_instruction(ir); + } + } + if (ir) + ir->set_alu_flag(alu_last_instr); + + return true; +} + +static bool emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + AluInstr *ir = nullptr; + for (unsigned i = 0; i < 2; ++i) { + ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none), + value_factory.src(alu.src[i], 0), AluInstr::write); + shader.emit_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + return true; +} + +static bool emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + AluInstr *ir = nullptr; + for (unsigned i = 0; i < 2; ++i) { + ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none), + value_factory.src(alu.src[0], i), AluInstr::write); + shader.emit_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + return true; +} + + +static bool emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + AluInstr *ir = nullptr; + for (unsigned i = 0; i < 2; ++i) { + ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none), + value_factory.src64(alu.src[0], 0, i), AluInstr::write); + shader.emit_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + return true; +} + +bool emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + AluInstr *ir = nullptr; + for (unsigned i = 0; i < 2; ++i) { + ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_chan), + value_factory.src64(alu.src[0], 0, i), AluInstr::write); + shader.emit_instruction(ir); + } + for (unsigned i = 0; i < 2; ++i) { + ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i + 2, pin_chan), + value_factory.src64(alu.src[1], 1, i), AluInstr::write); + shader.emit_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + return true; +} + +static bool emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + + auto x = value_factory.temp_register(); + auto y = value_factory.temp_register(); + auto yy = value_factory.temp_register(); + + shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, x, + value_factory.src(alu.src[0], 0), AluInstr::last_write)); + + shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, y, + value_factory.src(alu.src[1], 0), AluInstr::last_write)); + + shader.emit_instruction(new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::last_write)); + + shader.emit_instruction(new AluInstr(op2_or_int, + value_factory.dest(alu.dest, 0, pin_free), + x, yy, AluInstr::last_write)); + return true; +} + +static bool emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.dest, 0, pin_free), + value_factory.src64(alu.src[0], 0, comp), AluInstr::last_write)); + return true; +} + +static bool emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + shader.emit_instruction(new AluInstr(op1_flt16_to_flt32, value_factory.dest(alu.dest, 0, pin_free), + value_factory.src(alu.src[0], 0), AluInstr::last_write)); + return true; +} +static bool emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + auto tmp = value_factory.temp_register(); + shader.emit_instruction(new AluInstr(op2_lshr_int, tmp, + value_factory.src(alu.src[0], 0), + value_factory.literal(16), + AluInstr::last_write)); + + shader.emit_instruction(new AluInstr(op1_flt16_to_flt32, + value_factory.dest(alu.dest, 0, pin_free), + tmp, AluInstr::last_write)); + return true; +} + + + +static bool emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + const nir_alu_src& src0 = alu.src[0]; + + AluInstr *ir = nullptr; + auto pin = pin_for_components(alu); + + for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest); ++i) { + if (alu.dest.write_mask & (1 << i)){ + ir = new AluInstr(opcode, + value_factory.dest(alu.dest.dest, i, pin), + value_factory.src(src0, i), + AluInstr::last_write); + if (src0.negate) ir->set_alu_flag(alu_src0_neg); + if (src0.abs) ir->set_alu_flag(alu_src0_abs); + if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp); + ir->set_alu_flag(alu_is_trans); + shader.emit_instruction(ir); + } + } + + return true; +} + +static bool emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + AluInstr *ir = nullptr; + + PRegister reg[4]; + + int num_comp = nir_dest_num_components(alu.dest.dest); + + for (int i = 0; i < num_comp; ++i) { + reg[i] = value_factory.temp_register(); + ir = new AluInstr(op1_trunc, reg[i], value_factory.src(alu.src[0], i), AluInstr::last_write); + if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs); + if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg); + shader.emit_instruction(ir); + } + + auto pin = pin_for_components(alu); + for (int i = 0; i < num_comp; ++i) { + ir = new AluInstr(opcode, + value_factory.dest(alu.dest, i, pin), + reg[i], AluInstr::write); + if (opcode == op1_flt_to_uint) { + ir->set_alu_flag(alu_is_trans); + ir->set_alu_flag(alu_last_instr); + } + shader.emit_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + return true; +} + +static bool emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + const nir_alu_src& src0 = alu.src[0]; + + auto pin = pin_for_components(alu); + + /* todo: Actually we need only three channels, but then we have + * to make sure that we don't hava w dest */ + for (unsigned j = 0; j < 4; ++j) { + if (alu.dest.write_mask & (1 << j)) { + AluInstr::SrcValues srcs(4); + PRegister dest = value_factory.dest(alu.dest.dest, j, pin); + + for (unsigned i = 0; i < 4; ++i) + srcs[i] = value_factory.src(src0, j); + + auto ir = new AluInstr(opcode, dest, srcs, AluInstr::last_write, 4); + + if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs); + if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg); + if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp); + + ir->set_alu_flag(alu_is_cayman_trans); + + + shader.emit_instruction(ir); + } + } + return true; +} + +static bool emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + + const nir_alu_src& src0 = alu.src[0]; + const nir_alu_src& src1 = alu.src[1]; + + AluInstr *ir = nullptr; + + auto pin = pin_for_components(alu); + for (int i = 0; i < 4 ; ++i) { + if (alu.dest.write_mask & (1 << i)){ + ir = new AluInstr(opcode, value_factory.dest(alu.dest.dest, i, pin), + value_factory.src(src0, i), + value_factory.src(src1, i), AluInstr::last_write); + if (src0.negate) ir->set_alu_flag(alu_src0_neg); + if (src0.abs) ir->set_alu_flag(alu_src0_abs); + if (src1.negate) ir->set_alu_flag(alu_src1_neg); + if (src1.abs) ir->set_alu_flag(alu_src1_abs); + if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp); + ir->set_alu_flag(alu_is_trans); + shader.emit_instruction(ir); + } + } + return true; +} + +static bool emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + + const nir_alu_src& src0 = alu.src[0]; + const nir_alu_src& src1 = alu.src[1]; + + unsigned last_slot = 4; + + for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) { + if (alu.dest.write_mask & (1 << k)) { + AluInstr::SrcValues srcs(2 * last_slot); + PRegister dest = value_factory.dest(alu.dest.dest, k, pin_free); + + for (unsigned i = 0; i < last_slot ; ++i) { + srcs[2 * i ] = value_factory.src(src0, k); + srcs[2 * i + 1] = value_factory.src(src1, k); + } + + auto ir = new AluInstr(opcode, + dest, srcs, AluInstr::last_write, last_slot); + + if (src0.negate) ir->set_alu_flag(alu_src0_neg); + if (src0.abs) ir->set_alu_flag(alu_src0_abs); + if (src1.negate) ir->set_alu_flag(alu_src1_neg); + if (src1.abs) ir->set_alu_flag(alu_src1_abs); + if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp); + ir->set_alu_flag(alu_is_cayman_trans); + shader.emit_instruction(ir); + } + } + return true; +} + + +static bool emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + + int ncomp = nir_dest_num_components(alu.dest.dest); + RegisterVec4::Swizzle src_swz = {7,7,7,7}; + for (auto i = 0; i < ncomp; ++i) + src_swz[i] = alu.src[0].swizzle[i]; + + auto src = value_factory.src_vec4(alu.src[0].src, pin_group, src_swz); + + auto tmp = value_factory.temp_vec4(pin_group); + AluInstr *mv = nullptr; + for (int i = 0; i < ncomp; ++i) { + mv = new AluInstr(op1_mov, tmp[i], src[i], AluInstr::write); + if (alu.src[0].abs) + mv->set_alu_flag(alu_src0_abs); + if (alu.src[0].negate) + mv->set_alu_flag(alu_src0_neg); + shader.emit_instruction(mv); + } + if (mv) + mv->set_alu_flag(alu_last_instr); + + auto dst = value_factory.dest_vec4(alu.dest.dest, pin_group); + RegisterVec4::Swizzle dst_swz = {7,7,7,7}; + for (auto i = 0; i < ncomp; ++i) { + if (alu.dest.write_mask & (1 << i)) + dst_swz[i] = i; + } + + auto tex = new TexInstr(opcode, dst, dst_swz, tmp, 0, R600_MAX_CONST_BUFFERS); + + if (fine) + tex->set_tex_flag(TexInstr::grad_fine); + + shader.emit_instruction(tex); + + return true; +} + +static bool emit_alu_cube(const nir_alu_instr& alu, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + AluInstr *ir = nullptr; + + const uint16_t src0_chan[4] = {2, 2, 0, 1}; + const uint16_t src1_chan[4] = {1, 0, 2, 2}; + + auto group = new AluGroup(); + + for (int i = 0; i < 4; ++i) { + + + ir = new AluInstr(op2_cube, value_factory.dest(alu.dest.dest, i, pin_chan), + value_factory.src(alu.src[0], src0_chan[i]), + value_factory.src(alu.src[0], src1_chan[i]), + AluInstr::write); + group->add_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + shader.emit_instruction(group); + return true; +} + +const std::set AluInstr::empty; +const std::set AluInstr::write({alu_write}); +const std::set AluInstr::last({alu_last_instr}); +const std::set AluInstr::last_write({alu_write, alu_last_instr}); + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.h b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h new file mode 100644 index 0000000..ffe3118 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h @@ -0,0 +1,193 @@ +#ifndef INSTRALU_H +#define INSTRALU_H + +#include "sfn_instr.h" + +#include + +struct nir_alu_instr; + +namespace r600 { + +class Shader; +class ValueFactory; + +class AluInstr : public Instr { +public: + + using SrcValues = std::vector>; + + enum Op2Options { + op2_opt_none = 0, + op2_opt_reverse = 1, + op2_opt_neg_src1 = 1 << 1, + op2_opt_abs_src0 = 1 << 2 + }; + + static constexpr const AluBankSwizzle bs[6] = { + alu_vec_012, + alu_vec_021, + alu_vec_120, + alu_vec_102, + alu_vec_201, + alu_vec_210 + }; + + static const AluModifiers src_abs_flags[2]; + static const AluModifiers src_neg_flags[3]; + static const AluModifiers src_rel_flags[3]; + + AluInstr(EAluOp opcode); + AluInstr(EAluOp opcode, int chan); + AluInstr(EAluOp opcode, PRegister dest, + SrcValues src0, + const std::set& flags, int alu_slot); + + AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0, + const std::set& flags); + + AluInstr(EAluOp opcode, PRegister dest, + PVirtualValue src0, PVirtualValue src1, + const std::set& flags); + + AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0, PVirtualValue src1, + PVirtualValue src2, + const std::set& flags); + + AluInstr(ESDOp op, PVirtualValue src0, PVirtualValue src1, PVirtualValue address); + AluInstr(ESDOp op, const SrcValues& src, const std::set& flags); + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + auto opcode() const {assert(!has_alu_flag(alu_is_lds)); return m_opcode;} + auto lds_opcode() const {assert(has_alu_flag(alu_is_lds)); return m_lds_opcode;} + + bool can_propagate_src() const; + bool can_propagate_dest() const; + + bool replace_source(PRegister old_src, PVirtualValue new_src) override; + bool replace_dest(PRegister new_dest, AluInstr *move_instr) override; + + void set_op(EAluOp op) {m_opcode = op;} + + PRegister dest() const {return m_dest;} + unsigned n_sources() const {return m_src.size();} + + int dest_chan() const {return m_dest ? m_dest->chan() : m_fallback_chan;} + + PVirtualValue psrc(unsigned i) {return i < m_src.size() ? m_src[i] : nullptr;} + VirtualValue& src(unsigned i) {assert(i < m_src.size() && m_src[i]); return *m_src[i];} + const VirtualValue& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];} + + void set_sources(SrcValues src); + const SrcValues& sources() const {return m_src;} + void pin_sources_to_chan(); + + int register_priority() const; + + void reset_alu_flag(AluModifiers flag) {m_alu_flags.reset(flag);} + void set_alu_flag(AluModifiers flag) {m_alu_flags.set(flag);} + bool has_alu_flag(AluModifiers f) const {return m_alu_flags.test(f);} + + ECFAluOpCode cf_type() const {return m_cf_type;} + void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; } + void set_bank_swizzle(AluBankSwizzle swz) {m_bank_swizzle = swz;} + AluBankSwizzle bank_swizzle() const {return m_bank_swizzle;} + + void set_index_offset(unsigned offs) {m_idx_offset = offs;} + auto index_offset() const {return m_idx_offset;} + + bool is_equal_to(const AluInstr& lhs) const; + + bool has_lds_access() const; + + static const std::map cf_map; + static const std::map bank_swizzle_map; + static Instr::Pointer from_string(std::istream &is, ValueFactory& value_factory, AluGroup *); + static bool from_nir(nir_alu_instr *alu, Shader& shader); + + int alu_slots() const {return m_alu_slots;} + + AluGroup *split(ValueFactory &vf); + + bool end_group() const override { return m_alu_flags.test(alu_last_instr);} + + static const std::set empty; + static const std::set write; + static const std::set last; + static const std::set last_write; + + std::pair indirect_addr() const; + + void add_extra_dependency(PVirtualValue reg); + + void set_required_slots(int nslots) { m_required_slots = nslots;} + unsigned required_slots() const { return m_required_slots;} + + void add_priority(int priority) { m_priority += priority;} + int priority() const { return m_priority;} + void inc_priority() { ++m_priority;} + + void set_parent_group(AluGroup *group) { m_parent_group = group;} + +private: + friend class AluGroup; + + void update_uses(); + + bool do_ready() const override; + + bool can_copy_propagate() const; + + bool check_readport_validation(PRegister old_src, PVirtualValue new_src) const; + + void set_alu_flags(const AluOpFlags& flags) { m_alu_flags = flags; } + bool propagate_death() override; + + void do_print(std::ostream& os) const override; + + union { + EAluOp m_opcode; + ESDOp m_lds_opcode; + }; + + PRegister m_dest{nullptr}; + SrcValues m_src; + + AluOpFlags m_alu_flags; + AluBankSwizzle m_bank_swizzle{alu_vec_unknown}; + ECFAluOpCode m_cf_type{cf_alu}; + int m_alu_slots{1}; + int m_fallback_chan{0}; + unsigned m_idx_offset{0}; + unsigned m_required_slots{0}; + int m_priority{0}; + std::set, Allocator> m_extra_dependencies; + AluGroup *m_parent_group{nullptr}; +}; + +class AluInstrVisitor : public InstrVisitor { +public: + void visit(AluGroup *instr) override; + void visit(Block *instr) override; + void visit(IfInstr *instr) override; + + void visit(TexInstr *instr) override {(void)instr;} + void visit(ExportInstr *instr) override {(void)instr;} + void visit(FetchInstr *instr) override {(void)instr;} + void visit(ControlFlowInstr *instr) override {(void)instr;} + void visit(WriteScratchInstr *instr) override {(void)instr;} + void visit(StreamOutInstr *instr) override {(void)instr;} + void visit(MemRingOutInstr *instr) override {(void)instr;} + void visit(EmitVertexInstr *instr) override {(void)instr;} + void visit(GDSInstr *instr) override {(void)instr;}; + void visit(WriteTFInstr *instr) override {(void)instr;}; + void visit(LDSAtomicInstr *instr) override {(void)instr;}; + void visit(LDSReadInstr *instr) override {(void)instr;}; + void visit(RatInstr *instr) override {(void)instr;}; +}; + + +} +#endif // INSTRALU_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp new file mode 100644 index 0000000..c41a0f8 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp @@ -0,0 +1,361 @@ +#include "sfn_instr_alugroup.h" +#include "sfn_debug.h" +#include + +namespace r600 { + +AluGroup::AluGroup() +{ + std::fill(m_slots.begin(), m_slots.end(), nullptr); +} + +bool AluGroup::add_instruction(AluInstr *instr) +{ + /* we can only schedule one op that accesses LDS or + the LDS read queue */ + if (m_has_lds_op && instr->has_lds_access()) + return false; + + if (instr->has_alu_flag(alu_is_trans) && add_trans_instructions(instr)) + return true; + + if (add_vec_instructions(instr)) { + instr->set_parent_group(this); + return true; + } + + auto opinfo = alu_ops.find(instr->opcode()); + assert(opinfo != alu_ops.end()); + + if (s_max_slots > 4 && + opinfo->second.can_channel(AluOp::t) && + add_trans_instructions(instr)) { + instr->set_parent_group(this); + return true; + } + + return false; +} + +bool AluGroup::add_trans_instructions(AluInstr *instr) +{ + if (m_slots[4] || s_max_slots < 5) + return false; + + if (!update_indirect_access(instr)) + return false; + + /* LDS instructions have to be scheduled in X */ + if (instr->has_alu_flag(alu_is_lds)) + return false; + + auto opinfo = alu_ops.find(instr->opcode()); + assert(opinfo != alu_ops.end()); + + if (!opinfo->second.can_channel(AluOp::t)) + return false; + + + /* if we schedule a non-trans instr into the trans slot, we have to make + * sure that the corresponding vector slot is already occupied, otherwise + * the hardware will schedule it as vector op and the bank-swizzle as + * checked here (and in r600_asm.c) will not catch conflicts. + */ + if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) { + if (instr->dest() && instr->dest()->pin() == pin_free) { + int used_slot = 3; + while (!m_slots[used_slot] && used_slot >= 0) + --used_slot; + + // if we schedule a non-trans instr into the trans slot, + // there should always be some slot that is already used + assert(used_slot >= 0); + instr->dest()->set_chan(used_slot); + } + } + + for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown ; ++i) { + AluReadportReservation readports_evaluator = m_readports_evaluator; + if (readports_evaluator.schedule_trans_instruction(*instr, i)) { + m_readports_evaluator = readports_evaluator; + m_slots[4] = instr; + instr->pin_sources_to_chan(); + sfn_log << SfnLog::schedule << "T: " << *instr << "\n"; + + /* We added a vector op in the trans channel, so we have to + * make sure the corresponding vector channel is used */ + if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) + m_slots[instr->dest_chan()] = + new AluInstr(op0_nop, instr->dest_chan()); + return true; + } + } + return false; +} + +int AluGroup::free_slots() const +{ + int free_mask = 0; + for(int i = 0; i < s_max_slots; ++i) { + if (!m_slots[i]) + free_mask |= 1 << i; + } + return free_mask; +} + +class AluAllowSlotSwitch : public AluInstrVisitor { +public: + using AluInstrVisitor::visit; + + void visit(AluInstr *alu) { + yes = (alu->alu_slots() == 1 || alu->has_alu_flag(alu_is_cayman_trans)); + } + + bool yes{false}; + +}; + +bool AluGroup::add_vec_instructions(AluInstr *instr) +{ + if (!update_indirect_access(instr)) + return false; + + int param_src = -1; + for (auto& s : instr->sources()) { + auto is = s->as_inline_const(); + if (is) + param_src = is->sel() - ALU_SRC_PARAM_BASE; + } + + if (param_src >= 0) { + if (m_param_used < 0) + m_param_used = param_src; + else if (m_param_used != param_src) + return false; + } + + if (m_has_lds_op && instr->has_lds_access()) + return false; + + int preferred_chan = instr->dest_chan(); + if (!m_slots[preferred_chan]) { + if (instr->bank_swizzle() != alu_vec_unknown) { + if (try_readport(instr, instr->bank_swizzle())) + return true; + } else { + for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) { + if (try_readport(instr, i)) + return true; + } + } + } else { + + auto dest = instr->dest(); + if (dest && dest->pin() == pin_free) { + + for (auto u : dest->uses()) { + AluAllowSlotSwitch swich_allowed; + u->accept(swich_allowed); + if (!swich_allowed.yes) + return false; + } + + int free_chan = 0; + while (m_slots[free_chan] && free_chan < 4) + free_chan++; + + if (!m_slots[free_chan] && free_chan < 4) { + sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n"; + dest->set_chan(free_chan); + if (instr->bank_swizzle() != alu_vec_unknown) { + if (try_readport(instr, instr->bank_swizzle())) + return true; + } else { + for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) { + if (try_readport(instr, i)) + return true; + } + } + } + } + } + return false; +} + +bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle) +{ + int preferred_chan = instr->dest_chan(); + AluReadportReservation readports_evaluator = m_readports_evaluator; + if (readports_evaluator.schedule_vec_instruction(*instr, cycle)) { + m_readports_evaluator = readports_evaluator; + m_slots[preferred_chan] = instr; + m_has_lds_op |= instr->has_lds_access(); + sfn_log << SfnLog::schedule << "V: " << *instr << "\n"; + auto dest = instr->dest(); + if (dest && dest->pin() == pin_free) + dest->set_pin(pin_chan); + instr->pin_sources_to_chan(); + return true; + } + return false; +} + +bool AluGroup::update_indirect_access(AluInstr *instr) +{ + auto indirect_addr = instr->indirect_addr(); + + if (indirect_addr.first) { + if (!m_addr_used) { + m_addr_used = indirect_addr.first; + m_addr_is_index = indirect_addr.second; + } else if (!indirect_addr.first->equal_to(*m_addr_used)) { + return false; + } + } + + return true; +} + +void AluGroup::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void AluGroup::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +void AluGroup::set_scheduled() +{ + for (int i = 0; i < s_max_slots; ++i) { + if (m_slots[i]) + m_slots[i]->set_scheduled(); + } +} + +void AluGroup::fix_last_flag() +{ + bool last_seen = false; + for (int i = s_max_slots - 1; i >= 0; --i) { + if (m_slots[i]) { + if (!last_seen) { + m_slots[i]->set_alu_flag(alu_last_instr); + last_seen = true; + } else { + m_slots[i]->reset_alu_flag(alu_last_instr); + } + } + } +} + +bool AluGroup::is_equal_to(const AluGroup& other) const +{ + for (int i = 0; i < s_max_slots; ++i) { + if (!other.m_slots[i]) { + if (!m_slots[i]) + continue; + else + return false; + } + + if (m_slots[i]) { + if (!other.m_slots[i]) + return false; + else if (!m_slots[i]->is_equal_to(*other.m_slots[i])) + return false; + } + } + return true; +} + +bool AluGroup::has_lds_group_end() const +{ + for (int i = 0; i < s_max_slots; ++i) { + if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end)) + return true; + } + return false; +} + +bool AluGroup::do_ready() const +{ + for (int i = 0; i < s_max_slots; ++i) { + if (m_slots[i] && !m_slots[i]->ready()) + return false; + } + return true; +} + +void AluGroup::forward_set_blockid(int id, int index) +{ + for (int i = 0; i < s_max_slots; ++i) { + if (m_slots[i]) { + m_slots[i]->set_blockid(id, index); + } + } +} + +uint32_t AluGroup::slots() const +{ + uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1; + for (int i = 0; i < s_max_slots; ++i) { + if (m_slots[i]) + ++result; + } + if (m_addr_used) { + ++result; + if (m_addr_is_index) + ++result; + } + + return result; +} + +void AluGroup::do_print(std::ostream& os) const +{ + const char slotname[] = "xyzwt"; + + os << "ALU_GROUP_BEGIN\n"; + for (int i = 0; i < s_max_slots; ++i) { + if (m_slots[i]) { + for (int j = 0; j < 2 * m_nesting_depth + 4; ++j) + os << ' '; + os << slotname[i] << ": "; + m_slots[i]->print(os); + os << "\n"; + } + } + for (int i = 0; i < 2 * m_nesting_depth + 2; ++i) + os << ' '; + os << "ALU_GROUP_END"; +} + +AluInstr::SrcValues AluGroup::get_kconsts() const +{ + AluInstr::SrcValues result; + + for (int i = 0; i < s_max_slots; ++i) { + if (m_slots[i]) { + for (auto s : m_slots[i]->sources()) + if (s->as_uniform()) + result.push_back(s); + } + } + return result; +} + +void AluGroup::set_chipclass(r600_chip_class chip_class) +{ + switch (chip_class) { + case ISA_CC_CAYMAN: + s_max_slots = 4; + break; + default: + s_max_slots = 5; + } +} + +int AluGroup::s_max_slots = 5; + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h new file mode 100644 index 0000000..6b56a9f --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h @@ -0,0 +1,89 @@ +#ifndef ALUGROUP_H +#define ALUGROUP_H + +#include "sfn_instr_alu.h" +#include "sfn_alu_readport_validation.h" + +namespace r600 { + +class AluGroup : public Instr +{ +public: + using Slots = std::array; + + AluGroup(); + + using iterator = Slots::iterator; + using const_iterator = Slots::const_iterator; + + bool add_instruction(AluInstr *instr); + bool add_trans_instructions(AluInstr *instr); + bool add_vec_instructions(AluInstr *instr); + + bool is_equal_to(const AluGroup& other) const; + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + auto begin() {return m_slots.begin(); } + auto end() {return m_slots.begin() + s_max_slots; } + auto begin() const {return m_slots.begin(); } + auto end() const {return m_slots.begin() + s_max_slots; } + + bool end_group() const override { return true; } + + void set_scheduled() override; + + void set_nesting_depth(int depth) {m_nesting_depth = depth;} + + void fix_last_flag(); + + static void set_chipclass(r600_chip_class chip_class); + + int free_slots() const; + + auto addr() const {return std::make_pair(m_addr_used, m_addr_is_index);} + + uint32_t slots() const override; + + AluInstr::SrcValues get_kconsts() const; + + bool has_lds_group_start() const { return m_slots[0] ? + m_slots[0]->has_alu_flag(alu_lds_group_start) : false;} + + bool has_lds_group_end() const; + + const auto& readport_reserer() const { return m_readports_evaluator; } + void set_readport_reserer(const AluReadportReservation& rr) { + m_readports_evaluator = rr; + }; + + static bool has_t() { return s_max_slots == 5;} + +private: + void forward_set_blockid(int id, int index) override; + bool do_ready() const override; + void do_print(std::ostream& os) const override; + + bool update_indirect_access(AluInstr *instr); + bool try_readport(AluInstr *instr, AluBankSwizzle cycle); + + Slots m_slots; + + AluReadportReservation m_readports_evaluator; + + static int s_max_slots; + + PRegister m_addr_used{nullptr}; + + int m_param_used{-1}; + + int m_nesting_depth{0}; + bool m_has_lds_op{false}; + bool m_addr_is_index{false}; +}; + + +} + +#endif // ALUGROUP_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp new file mode 100644 index 0000000..e12b3a6 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp @@ -0,0 +1,176 @@ +#include "sfn_instr_controlflow.h" + +#include + +namespace r600 { + +ControlFlowInstr::ControlFlowInstr(CFType type): + m_type(type) +{ +} + +bool ControlFlowInstr::do_ready() const +{ + /* Have to rework this, but the CF should always */ + return true; +} + + +bool ControlFlowInstr::is_equal_to(const ControlFlowInstr& rhs) const +{ + return m_type == rhs.m_type; +} + +void ControlFlowInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void ControlFlowInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +void ControlFlowInstr::do_print(std::ostream& os) const +{ + switch (m_type) { + case cf_else: os << "ELSE"; break; + case cf_endif: os << "ENDIF";break; + case cf_loop_begin: os << "LOOP_BEGIN"; break; + case cf_loop_end: os << "LOOP_END"; break; + case cf_loop_break: os << "BREAK"; break; + case cf_loop_continue: os << "CONTINUE"; break; + case cf_wait_ack: os << "WAIT_ACK"; break; + default: + unreachable("Unknown CF type"); + } +} + +Instr::Pointer ControlFlowInstr::from_string(std::string type_str) +{ + if (type_str == "ELSE") + return new ControlFlowInstr(cf_else); + else if (type_str == "ENDIF") + return new ControlFlowInstr(cf_endif); + else if (type_str == "LOOP_BEGIN") + return new ControlFlowInstr(cf_loop_begin); + else if (type_str == "LOOP_END") + return new ControlFlowInstr(cf_loop_end); + else if (type_str == "BREAK") + return new ControlFlowInstr(cf_loop_break); + else if (type_str == "CONTINUE") + return new ControlFlowInstr(cf_loop_continue); + else if (type_str == "WAIT_ACK") + return new ControlFlowInstr(cf_wait_ack); + else + return nullptr; +} + +int ControlFlowInstr::nesting_corr() const +{ + switch (m_type) { + case cf_else: + case cf_endif: + case cf_loop_end: return -1; + default: + return 0; + } +} + +int ControlFlowInstr::nesting_offset() const +{ + switch (m_type) { + case cf_endif: + case cf_loop_end: return -1; + case cf_loop_begin: return 1; + default: + return 0; + } +} + +IfInstr::IfInstr(AluInstr *pred): + m_predicate(pred) +{ + assert(pred); +} + +IfInstr::IfInstr(const IfInstr& orig) +{ + m_predicate = new AluInstr(*orig.m_predicate); +} + +bool IfInstr::is_equal_to(const IfInstr& rhs) const +{ + return m_predicate->equal_to(*rhs.m_predicate); +} + +void IfInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void IfInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +bool IfInstr::replace_source(PRegister old_src, PVirtualValue new_src) +{ + return m_predicate->replace_source(old_src, new_src); +} + +bool IfInstr::do_ready() const +{ + return m_predicate->ready(); +} + +void IfInstr::forward_set_scheduled() +{ + m_predicate->set_scheduled(); +} + +void IfInstr::forward_set_blockid(int id, int index) +{ + m_predicate->set_blockid(id, index); +} + +void IfInstr::do_print(std::ostream& os) const +{ + os << "IF (( " << *m_predicate << " ))"; +} + +void IfInstr::set_predicate(AluInstr *new_predicate) +{ + m_predicate = new_predicate; + m_predicate->set_blockid(block_id(), index()); +} + +Instr::Pointer IfInstr::from_string(std::istream &is, ValueFactory& value_factory) +{ + std::string pred_start; + is >> pred_start; + if (pred_start != "((") + return nullptr; + char buf[2048]; + + is.get(buf, 2048, ')'); + std::string pred_end; + is >> pred_end; + + if (pred_end != "))") { + return nullptr; + } + + std::istringstream bufstr(buf); + + std::string instr_type; + bufstr >> instr_type; + + if (instr_type != "ALU") + return nullptr; + + auto pred = AluInstr::from_string(bufstr, value_factory, nullptr); + return new IfInstr(static_cast(pred)); +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h new file mode 100644 index 0000000..9c7273c --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h @@ -0,0 +1,81 @@ +#ifndef CONTROLFLOWINSTR_H +#define CONTROLFLOWINSTR_H + +#include "sfn_instr_alu.h" + +namespace r600 { + +class ControlFlowInstr : public Instr +{ +public: + enum CFType { + cf_else, + cf_endif, + cf_loop_begin, + cf_loop_end, + cf_loop_break, + cf_loop_continue, + cf_stream_write, + cf_wait_ack + }; + + ControlFlowInstr(CFType type); + + ControlFlowInstr(const ControlFlowInstr& orig) = default; + + bool is_equal_to(const ControlFlowInstr& lhs) const; + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + CFType cf_type() const { return m_type;} + + int nesting_corr() const override; + + static Instr::Pointer from_string(std::string type_str); + + bool end_block() const override { return true;} + + int nesting_offset() const override; + +private: + bool do_ready() const override; + void do_print(std::ostream& os) const override; + + CFType m_type; +}; + +class IfInstr : public Instr { +public: + + IfInstr(AluInstr *pred); + IfInstr(const IfInstr& orig); + + bool is_equal_to(const IfInstr& lhs) const; + + void set_predicate(AluInstr *new_predicate); + + AluInstr *predicate() const { return m_predicate; } + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + bool replace_source(PRegister old_src, PVirtualValue new_src) override; + + static Instr::Pointer from_string(std::istream &is, ValueFactory& value_factory); + + bool end_block() const override { return true;} + int nesting_offset() const override { return 1;} + +private: + bool do_ready() const override; + void do_print(std::ostream& os) const override; + void forward_set_blockid(int id, int index) override; + void forward_set_scheduled() override; + + AluInstr *m_predicate; +}; + +} + +#endif // CONTROLFLOWINSTR_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp new file mode 100644 index 0000000..5894b39 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp @@ -0,0 +1,524 @@ +#include "sfn_instr_export.h" +#include "sfn_valuefactory.h" + +#include + +namespace r600 { + +using std::string; + +static char *writemask_to_swizzle(int writemask, char *buf) +{ + const char *swz = "xyzw"; + for (int i = 0; i < 4; ++i) { + buf[i] = (writemask & (1 << i)) ? swz[i] : '_'; + } + return buf; +} + +WriteOutInstr::WriteOutInstr(const RegisterVec4& value): + m_value(value) +{ + m_value.add_use(this); + set_always_keep(); +} + +void WriteOutInstr::override_chan(int i, int chan) +{ + m_value.set_value(i, + new Register(m_value[i]->sel(), chan, + m_value[i]->pin())); +} + +ExportInstr::ExportInstr(ExportType type, unsigned loc, const RegisterVec4& value): + WriteOutInstr(value), + m_type(type), + m_loc(loc), + m_is_last(false) +{ +} + +void ExportInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void ExportInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + + +bool ExportInstr::is_equal_to(const ExportInstr& lhs) const +{ + return + + (m_type == lhs.m_type && + m_loc == lhs.m_loc && + value() == lhs.value() && + m_is_last == lhs.m_is_last); +} + +ExportInstr::ExportType ExportInstr::type_from_string(const std::string& s) +{ + (void)s; + return param; +} + +void ExportInstr::do_print(std::ostream& os) const +{ + os << "EXPORT"; + if (m_is_last) + os << "_DONE"; + + switch (m_type) { + case param: os << " PARAM "; break; + case pos: os << " POS "; break; + case pixel: os << " PIXEL "; break; + } + os << m_loc << " "; + value().print(os); +} + +bool ExportInstr::do_ready() const +{ + return value().ready(block_id(), index()); +} + +Instr::Pointer ExportInstr::from_string(std::istream& is, ValueFactory& vf) +{ + return from_string_impl(is, vf); +} + +Instr::Pointer ExportInstr::last_from_string(std::istream& is, ValueFactory &vf) +{ + auto result = from_string_impl(is, vf); + result->set_is_last_export(true); + return result; +} + +ExportInstr::Pointer ExportInstr::from_string_impl(std::istream& is, ValueFactory &vf) +{ + string typestr; + int pos; + string value_str; + + is >> typestr >> pos >> value_str; + + ExportInstr::ExportType type; + + if (typestr == "PARAM") + type = ExportInstr::param; + else if (typestr == "POS") + type = ExportInstr::pos; + else if (typestr == "PIXEL") + type = ExportInstr::pixel; + else + unreachable("Unknown export type"); + + RegisterVec4 value = vf.src_vec4_from_string(value_str); + + return new ExportInstr( type, pos, value); +} + +WriteScratchInstr::WriteScratchInstr(const RegisterVec4& value, PRegister addr, + int align, int align_offset, int writemask, int array_size): + WriteOutInstr(value), + m_address(addr), + m_align(align), + m_align_offset(align_offset), + m_writemask(writemask), + m_array_size(array_size - 1) +{ + addr->add_use(this); +} + +WriteScratchInstr::WriteScratchInstr(const RegisterVec4& value, int loc, + int align, int align_offset,int writemask): + WriteOutInstr(value), + m_loc(loc), + m_align(align), + m_align_offset(align_offset), + m_writemask(writemask) +{ + +} + +void WriteScratchInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void WriteScratchInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +bool WriteScratchInstr::is_equal_to(const WriteScratchInstr& lhs) const +{ + if (m_address) { + if (!lhs.m_address) + return false; + if (! m_address->equal_to(*lhs.m_address)) + return false; + } else if (lhs.m_address) + return false; + + return m_loc == lhs.m_loc && + m_align == lhs.m_align && + m_align_offset == lhs.m_align_offset && + m_writemask == lhs.m_writemask && + m_array_size == lhs.m_array_size && + value().sel() == lhs.value().sel(); +} + +bool WriteScratchInstr::do_ready() const +{ + return value().ready(block_id(), index()) && + (!m_address || m_address->ready(block_id(), index())); +} + +void WriteScratchInstr::do_print(std::ostream& os) const +{ + char buf[6]; + + os << "WRITE_SCRATCH "; + if (m_address) + os << "@" << *m_address << "[" << m_array_size + 1<<"]"; + else + os << m_loc; + + os << (value()[0]->is_ssa() ? " S" : " R") + << value().sel() << "." << writemask_to_swizzle(m_writemask, buf) + << " " << "AL:" << m_align << " ALO:" << m_align_offset; +} + +auto WriteScratchInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer +{ + string loc_str; + string value_str; + string align_str; + string align_offset_str; + int offset; + + int array_size = 0; + PVirtualValue addr_reg = nullptr; + + is >> loc_str >> value_str >> align_str >> align_offset_str; + + std::istringstream loc_ss(loc_str); + + auto align = int_from_string_with_prefix(align_str, "AL:"); + auto align_offset = int_from_string_with_prefix(align_offset_str, "ALO:"); + auto value = vf.src_vec4_from_string(value_str); + + int writemask = 0; + for (int i = 0; i < 4; ++i) { + if (value[i]->chan() == i) + writemask |= 1 << i; + } + + if (loc_str[0] == '@') { + + string addr_str; + char c; + loc_ss >> c; + loc_ss >> c; + + while (!loc_ss.eof() && c != '[') { + addr_str.append(1, c); + loc_ss >> c; + } + addr_reg = vf.src_from_string(addr_str); + assert(addr_reg && addr_reg->as_register()); + + loc_ss >> array_size; + loc_ss >> c; + assert(c == ']'); + return new WriteScratchInstr(value, addr_reg->as_register(), align, align_offset, writemask, array_size); + } else { + loc_ss >> offset; + return new WriteScratchInstr(value, offset, align, align_offset, writemask); + } +} + +StreamOutInstr::StreamOutInstr(const RegisterVec4& value, int num_components, + int array_base, int comp_mask, int out_buffer, + int stream): + WriteOutInstr(value), + m_element_size(num_components == 3 ? 3 : num_components - 1), + m_array_base(array_base), + m_writemask(comp_mask), + m_output_buffer(out_buffer), + m_stream(stream) +{ +} + +unsigned StreamOutInstr::op() const +{ + int op = 0; + switch (m_output_buffer) { + case 0: op = CF_OP_MEM_STREAM0_BUF0; break; + case 1: op = CF_OP_MEM_STREAM0_BUF1; break; + case 2: op = CF_OP_MEM_STREAM0_BUF2; break; + case 3: op = CF_OP_MEM_STREAM0_BUF3; break; + } + return 4 * m_stream + op; +} + +bool StreamOutInstr::is_equal_to(const StreamOutInstr& oth) const +{ + + return value() == oth.value() && + m_element_size == oth.m_element_size && + m_burst_count == oth.m_burst_count && + m_array_base == oth.m_array_base && + m_array_size == oth.m_array_size && + m_writemask == oth.m_writemask && + m_output_buffer == oth.m_output_buffer && + m_stream == oth.m_stream; +} + +void StreamOutInstr::do_print(std::ostream& os) const +{ + os << "WRITE STREAM(" << m_stream << ") " << value() + << " ES:" << m_element_size + << " BC:" << m_burst_count + << " BUF:" << m_output_buffer + << " ARRAY:" << m_array_base; + if (m_array_size != 0xfff) + os << "+" << m_array_size; +} + +bool StreamOutInstr::do_ready() const +{ + return value().ready(block_id(), index()); +} + +void StreamOutInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void StreamOutInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + + +MemRingOutInstr::MemRingOutInstr(ECFOpCode ring, EMemWriteType type, + const RegisterVec4& value, + unsigned base_addr, unsigned ncomp, + PRegister index): + WriteOutInstr(value), + m_ring_op(ring), + m_type(type), + m_base_address(base_addr), + m_num_comp(ncomp), + m_export_index(index) +{ + assert(m_ring_op == cf_mem_ring || m_ring_op == cf_mem_ring1|| + m_ring_op == cf_mem_ring2 || m_ring_op == cf_mem_ring3); + assert(m_num_comp <= 4); + + if (m_export_index) + m_export_index->add_use(this); +} + +unsigned MemRingOutInstr::ncomp() const +{ + switch (m_num_comp) { + case 1: return 0; + case 2: return 1; + case 3: + case 4: return 3; + default: + assert(0); + } + return 3; +} + +bool MemRingOutInstr::is_equal_to(const MemRingOutInstr& oth) const +{ + + bool equal = value() == oth.value() && + m_ring_op == oth.m_ring_op && + m_type == oth.m_type && + m_num_comp == oth.m_num_comp && + m_base_address == oth.m_base_address; + + if (m_type == mem_write_ind || m_type == mem_write_ind_ack) + equal &= (*m_export_index == *oth.m_export_index); + return equal; + +} + +static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" }; +void MemRingOutInstr::do_print(std::ostream& os) const +{ + + os << "MEM_RING " << (m_ring_op == cf_mem_ring ? 0 : m_ring_op - cf_mem_ring1 + 1); + os << " " << write_type_str[m_type] << " " << m_base_address; + os << " " << value(); + if (m_type == mem_write_ind || m_type == mem_write_ind_ack) + os << " @" << *m_export_index; + os << " ES:" << m_num_comp; +} + +void MemRingOutInstr::patch_ring(int stream, PRegister index) +{ + const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3}; + + assert(stream < 4); + m_ring_op = ring_op[stream]; + m_export_index = index; +} + +bool MemRingOutInstr::do_ready() const +{ + if (m_export_index && !m_export_index->ready(block_id(), index())) + return false; + + return value().ready(block_id(), index()); +} + +void MemRingOutInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void MemRingOutInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +static const std::map type_lookop = +{ + {"WRITE", MemRingOutInstr::mem_write}, + {"WRITE_IDX", MemRingOutInstr::mem_write_ind}, + {"WRITE_ACK", MemRingOutInstr::mem_write_ack}, + {"WRITE_IDX_ACK", MemRingOutInstr::mem_write_ind_ack} +}; + +auto MemRingOutInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer +{ + string type_str; + + int ring; + + int base_address; + string value_str; + + is >> ring >> type_str >> base_address >> value_str; + assert(ring < 4); + + auto itype = type_lookop.find(type_str); + assert(itype != type_lookop.end()); + + auto type = itype->second; + + PVirtualValue index{nullptr}; + if (type == mem_write_ind || type == mem_write_ind_ack) { + char c; + string index_str; + is >> c >> index_str; + assert('@' == c ); + index = vf.src_from_string(index_str); + } + + string elm_size_str; + is >> elm_size_str; + + int num_comp = int_from_string_with_prefix(elm_size_str, "ES:"); + + auto value = vf.src_vec4_from_string(value_str); + + ECFOpCode opcodes[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3}; + assert(ring < 4); + + return new MemRingOutInstr(opcodes[ring], type, value, base_address, num_comp, index->as_register()); +} + +EmitVertexInstr::EmitVertexInstr(int stream, bool cut): + m_stream(stream), + m_cut(cut) +{ + +} + + +bool EmitVertexInstr::is_equal_to(const EmitVertexInstr& oth) const +{ + return oth.m_stream == m_stream && + oth.m_cut == m_cut; +} + +void EmitVertexInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void EmitVertexInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +bool EmitVertexInstr::do_ready() const +{ + return true; +} + +void EmitVertexInstr::do_print(std::ostream& os) const +{ + os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream; +} + +auto EmitVertexInstr::from_string(std::istream& is, bool cut) -> Pointer +{ + char c; + is >> c; + assert(c == '@'); + + int stream; + is >> stream; + + return new EmitVertexInstr(stream, cut); +} + +void WriteTFInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void WriteTFInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +bool WriteTFInstr::is_equal_to(const WriteTFInstr& rhs) const +{ + return value() == rhs.value(); +} + +auto WriteTFInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer +{ + string value_str; + is >> value_str; + + auto value = vf.src_vec4_from_string(value_str); + + return new WriteTFInstr(value); +} + +bool WriteTFInstr::do_ready() const +{ + return value().ready(block_id(), index()); +} + +void WriteTFInstr::do_print(std::ostream& os) const +{ + os << "WRITE_TF " << value(); +} + +} + diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_export.h b/src/gallium/drivers/r600/sfn/sfn_instr_export.h new file mode 100644 index 0000000..ba20b8f --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_export.h @@ -0,0 +1,213 @@ +#ifndef INSTR_EXPORT_H +#define INSTR_EXPORT_H + +#include "sfn_instr.h" + +namespace r600 { + +class ValueFactory; + + +class WriteOutInstr: public Instr { +public: + WriteOutInstr(const RegisterVec4& value); + WriteOutInstr(const WriteOutInstr& orig) = delete; + + void override_chan(int i, int chan); + + const RegisterVec4& value() const {return m_value;}; + RegisterVec4& value() {return m_value;}; +private: + + RegisterVec4 m_value; +}; + +class ExportInstr: public WriteOutInstr { +public: + enum ExportType { + pixel, + pos, + param + }; + + using Pointer = R600_POINTER_TYPE(ExportInstr); + + ExportInstr(ExportType type, unsigned loc, const RegisterVec4& value); + ExportInstr(const ExportInstr& orig) = delete; + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + bool is_equal_to(const ExportInstr& lhs) const; + + + static ExportType type_from_string(const std::string& s); + + ExportType export_type() const {return m_type;} + + unsigned location() const {return m_loc;} + + void set_is_last_export(bool value) {m_is_last = value;} + bool is_last_export() const {return m_is_last;} + + static Instr::Pointer from_string(std::istream& is, ValueFactory &vf); + static Instr::Pointer last_from_string(std::istream& is, ValueFactory &vf); + +private: + static ExportInstr::Pointer from_string_impl(std::istream& is, ValueFactory &vf); + + bool do_ready() const override; + void do_print(std::ostream& os) const override; + + ExportType m_type; + unsigned m_loc; + bool m_is_last; +}; + +class WriteScratchInstr : public WriteOutInstr { +public: + WriteScratchInstr(const RegisterVec4& value, PRegister addr, + int align, int align_offset, int writemask, int array_size); + WriteScratchInstr(const RegisterVec4& value, int addr, int align, int align_offset, + int writemask); + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + bool is_equal_to(const WriteScratchInstr& lhs) const; + + unsigned location() const { return m_loc;}; + int write_mask() const { return m_writemask;} + auto address() const { return m_address;} + bool indirect() const { return !!m_address;} + int array_size() const { return m_array_size;} + + static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer; +private: + + bool do_ready() const override; + void do_print(std::ostream& os) const override; + + unsigned m_loc{0}; + PRegister m_address {nullptr}; + unsigned m_align; + unsigned m_align_offset; + unsigned m_writemask; + int m_array_size{0}; +}; + +class StreamOutInstr: public WriteOutInstr { +public: + StreamOutInstr(const RegisterVec4& value, int num_components, + int array_base, int comp_mask, int out_buffer, + int stream); + int element_size() const { return m_element_size;} + int burst_count() const { return m_burst_count;} + int array_base() const { return m_array_base;} + int array_size() const { return m_array_size;} + int comp_mask() const { return m_writemask;} + unsigned op() const; + + bool is_equal_to(const StreamOutInstr& lhs) const; + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; +private: + + bool do_ready() const override; + void do_print(std::ostream& os) const override; + + int m_element_size{0}; + int m_burst_count{1}; + int m_array_base{0}; + int m_array_size{0xfff}; + int m_writemask{0}; + int m_output_buffer{0}; + int m_stream{0}; +}; + +class MemRingOutInstr: public WriteOutInstr { +public: + + enum EMemWriteType { + mem_write = 0, + mem_write_ind = 1, + mem_write_ack = 2, + mem_write_ind_ack = 3, + }; + + MemRingOutInstr(ECFOpCode ring, EMemWriteType type, + const RegisterVec4& value, unsigned base_addr, + unsigned ncomp, PRegister m_index); + + unsigned op() const{return m_ring_op;} + unsigned ncomp() const; + unsigned addr() const {return m_base_address;} + EMemWriteType type() const {return m_type;} + unsigned index_reg() const {assert(m_export_index->sel() >= 0); return m_export_index->sel();} + unsigned array_base() const {return m_base_address; } + PVirtualValue export_index() const {return m_export_index;} + + void patch_ring(int stream, PRegister index); + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + bool is_equal_to(const MemRingOutInstr& lhs) const; + + static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer; + +private: + + bool do_ready() const override; + void do_print(std::ostream& os) const override; + + ECFOpCode m_ring_op; + EMemWriteType m_type; + unsigned m_base_address; + unsigned m_num_comp; + PRegister m_export_index; +}; + +class EmitVertexInstr : public Instr { +public: + EmitVertexInstr(int stream, bool cut); + ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;} + int stream() const { return m_stream;} + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + bool is_equal_to(const EmitVertexInstr& lhs) const; + + static auto from_string(std::istream& is, bool cut) -> Pointer; + +private: + bool do_ready() const override; + void do_print(std::ostream& os) const override; + + int m_stream; + bool m_cut; +}; + +class WriteTFInstr : public WriteOutInstr { +public: + using WriteOutInstr::WriteOutInstr; + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + bool is_equal_to(const WriteTFInstr& rhs) const; + + static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer; + +private: + bool do_ready() const override; + void do_print(std::ostream& os) const override; + +}; + + +} + +#endif // INSTR_EXPORT_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp new file mode 100644 index 0000000..e68c7b7 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp @@ -0,0 +1,659 @@ +#include "sfn_instr_fetch.h" +#include "sfn_valuefactory.h" +#include "sfn_defines.h" + +#include + +namespace r600 { + +using std::string; +using std::istringstream; + +FetchInstr::FetchInstr(EVFetchInstr opcode, + const RegisterVec4& dst, + const RegisterVec4::Swizzle& dest_swizzle, + PRegister src, + uint32_t src_offset, + EVFetchType fetch_type, + EVTXDataFormat data_format, + EVFetchNumFormat num_format, + EVFetchEndianSwap endian_swap, + uint32_t resource_id, + PRegister resource_offset): + InstrWithVectorResult(dst, dest_swizzle), + m_opcode(opcode), + m_src(src), + m_src_offset(src_offset), + m_fetch_type(fetch_type), + m_data_format(data_format), + m_num_format(num_format), + m_endian_swap(endian_swap), + m_resource_id(resource_id), + m_resource_offset(resource_offset), + m_mega_fetch_count(0), + m_array_base(0), + m_array_size(0), + m_elm_size(0) +{ + switch (m_opcode) { + case vc_fetch : + m_opname ="VFETCH"; + break; + case vc_semantic : + m_opname = "FETCH_SEMANTIC"; + break; + case vc_get_buf_resinfo : + set_print_skip(mfc); + set_print_skip(fmt); + set_print_skip(ftype); + m_opname = "GET_BUF_RESINFO"; + break; + case vc_read_scratch : + m_opname = "READ_SCRATCH"; + break; + default: + unreachable("Unknwon fetch instruction"); + } + + if (m_src) + m_src->add_use(this); + + if (m_resource_offset && m_resource_offset->as_register()) + m_resource_offset->as_register()->add_use(this); +} + +void FetchInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void FetchInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +bool FetchInstr::is_equal_to(const FetchInstr& rhs) const +{ + if (m_src) { + if (rhs.m_src) { + if (!m_src->equal_to(*rhs.m_src)) + return false; + } else + return false; + } else if (rhs.m_src) + return false; + + if (!comp_dest(rhs.dst(), rhs.all_dest_swizzle())) + return false; + + if (m_tex_flags != rhs.m_tex_flags) + return false; + + if (m_resource_offset && rhs.m_resource_offset) { + if (!m_resource_offset->equal_to(*rhs.m_resource_offset)) + return false; + } else if (!(!!m_resource_offset == !!rhs.m_resource_offset)) + return false; + + return m_opcode == rhs.m_opcode && + m_src_offset == rhs.m_src_offset && + m_fetch_type == rhs.m_fetch_type && + m_data_format == rhs.m_data_format && + m_num_format == rhs.m_num_format && + m_endian_swap == rhs.m_endian_swap && + m_resource_id == rhs.m_resource_id && + m_mega_fetch_count == rhs.m_mega_fetch_count && + m_array_base == rhs.m_array_base && + m_array_size == rhs.m_array_size && + m_elm_size == rhs.m_elm_size; +} + +bool FetchInstr::propagate_death() +{ + auto reg = m_src->as_register(); + if (reg) + reg->del_use(this); + return true; +} + +bool FetchInstr::replace_source(PRegister old_src, PVirtualValue new_src) +{ + bool success = false; + auto new_reg = new_src->as_register(); + if (new_reg) { + if (old_src->equal_to(*m_src)) { + m_src->del_use(this); + m_src = new_reg; + new_reg->add_use(this); + success = true; + } + if (m_resource_offset && old_src->equal_to(*m_resource_offset)) { + m_resource_offset->del_use(this); + m_resource_offset = new_reg; + new_reg->add_use(this); + success = true; + } + } + return success; +} + +bool FetchInstr::do_ready() const +{ + for (auto i: required_instr()) { + if (!i->is_scheduled()) + return false; + } + + bool result = m_src && m_src->ready(block_id(), index()); + if (m_resource_offset) { + auto r = m_resource_offset->as_register(); + if (r) + result &= r->ready(block_id(), index()); + } + return result; +} + +void FetchInstr::do_print(std::ostream& os) const +{ + os << m_opname << ' '; + + print_dest(os); + + os << " :"; + + if (m_opcode != vc_get_buf_resinfo) { + + if (m_src && m_src->chan() < 7) { + os << " " << *m_src; + if (m_src_offset) + os << " + " << m_src_offset << "b"; + } + } + + if (m_opcode != vc_read_scratch) + os << " RID:" << m_resource_id; + + if (m_resource_offset) { + os << " + "; + m_resource_offset->print(os); + } + + if (!m_skip_print.test(ftype)) { + switch (m_fetch_type) { + case vertex_data : os << " VERTEX"; break; + case instance_data : os << " INSTANCE_DATA"; break; + case no_index_offset : os << " NO_IDX_OFFSET"; break; + default: + unreachable("Unknwon fetch instruction type"); + } + } + + if (!m_skip_print.test(fmt)) { + os << " FMT("; + auto fmt = s_data_format_map.find(m_data_format); + if (fmt != s_data_format_map.end()) + os << fmt->second << ","; + else + unreachable("unknwon data format"); + + if (m_tex_flags.test(format_comp_signed)) + os << "S"; + else + os << "U"; + + switch (m_num_format) { + case vtx_nf_norm : os << "NORM"; break; + case vtx_nf_int : os << "INT"; break; + case vtx_nf_scaled: os << "SCALED"; break; + default: + unreachable("Unknwon number format"); + } + + os << ")"; + } + + if (m_array_base) { + if (m_opcode != vc_read_scratch) + os << " BASE:" << m_array_base; + else + os << " L[0x" << std::uppercase << std::hex << m_array_base << std::dec << "]"; + } + + if (m_array_size) + os << " SIZE:" << m_array_size + 1; + + if (m_tex_flags.test(is_mega_fetch) && !m_skip_print.test(mfc)) + os << " MFC:" << m_mega_fetch_count; + + if (m_elm_size) + os << " ES:" << m_elm_size; + + if (m_tex_flags.test(fetch_whole_quad)) os << " WQ"; + if (m_tex_flags.test(use_const_field)) os << " UCF"; + if (m_tex_flags.test(srf_mode)) os << " SRF"; + if (m_tex_flags.test(buf_no_stride)) os << " BNS"; + if (m_tex_flags.test(alt_const)) os << " AC"; + if (m_tex_flags.test(use_tc)) os << " TC"; + if (m_tex_flags.test(vpm)) os << " VPM"; + if (m_tex_flags.test(uncached) && m_opcode != vc_read_scratch) os << " UNCACHED"; + if (m_tex_flags.test(indexed) && m_opcode != vc_read_scratch) os << " INDEXED"; +} + +Instr::Pointer FetchInstr::from_string(std::istream& is, ValueFactory& vf) +{ + return from_string_impl(is, vc_fetch, vf); +} + +Instr::Pointer FetchInstr::from_string_impl(std::istream& is, EVFetchInstr opcode, ValueFactory& vf) +{ + std::string deststr; + is >> deststr; + + RegisterVec4::Swizzle dst_swz; + auto dest_reg = vf.dest_vec4_from_string(deststr, dst_swz, pin_group); + + char help; + is >> help; + assert(help == ':'); + + string srcstr; + is >> srcstr; + + std::cerr << "Get source " << srcstr << "\n"; + + auto src_reg = vf.src_from_string(srcstr)->as_register(); + assert(src_reg); + + string res_id_str; + string next; + is >> next; + + int src_offset_val = 0; + + if (next == "+") { + is >> src_offset_val; + is >> help; + assert(help == 'b'); + is >> res_id_str; + } else { + res_id_str = next; + } + + int res_id = int_from_string_with_prefix(res_id_str, "RID:"); + + string fetch_type_str; + is >> fetch_type_str; + + EVFetchType fetch_type = vertex_data; + if (fetch_type_str == "VERTEX") { + fetch_type = vertex_data; + } else { + assert("Fetch type not yet implemented"); + } + + string format_str; + is >> format_str; + + assert(!strncmp(format_str.c_str(), "FMT(", 4)); + string data_format; + string num_format_str; + + istringstream fmt_stream(format_str.substr(4)); + bool is_num_fmr = false; + assert(!fmt_stream.eof()); + + do { + char c; + fmt_stream >> c; + + if (c == ',') { + is_num_fmr = true; + continue; + } + + if (!is_num_fmr) + data_format.append(1, c); + else + num_format_str.append(1, c); + } while (!fmt_stream.eof()); + + EVTXDataFormat fmt = fmt_invalid; + + for (auto& [f, name] : s_data_format_map) { + if (data_format == name) { + fmt = f; + break; + } + } + + assert(fmt != fmt_invalid); + + bool fmt_signed = num_format_str[0] == 'S'; + assert(fmt_signed || num_format_str[0] == 'U'); + + size_t num_format_end = num_format_str.find(')'); + num_format_str = num_format_str.substr(1, num_format_end - 1) ; + + EVFetchNumFormat num_fmt; + if (num_format_str == "NORM") + num_fmt = vtx_nf_norm; + else if (num_format_str == "INT") + num_fmt = vtx_nf_int; + else if (num_format_str == "SCALED") + num_fmt = vtx_nf_scaled; + else { + std::cerr << "Number format: '" << num_format_str << "' : "; + unreachable("Unknown number format"); + } + + auto fetch = new FetchInstr(opcode, dest_reg, dst_swz, + src_reg, src_offset_val, fetch_type, fmt, num_fmt, + vtx_es_none, res_id, nullptr); + if (fmt_signed) + fetch->set_fetch_flag(format_comp_signed); + + while (!is.eof() && is.good()) { + std::string next_token; + is >> next_token; + + if (next_token.empty()) + break; + + if (next_token.find(':') != string::npos) { + fetch->set_param_from_string(next_token); + } else { + fetch->set_flag_from_string(next_token); + } + } + + return fetch; +} + +void FetchInstr::set_param_from_string(const std::string& token) +{ + if (token.substr(0,4) == "MFC:") + set_mfc(int_from_string_with_prefix(token, "MFC:")); + else if (token.substr(0,5) == "ARRB:") + set_array_base(int_from_string_with_prefix(token, "ARRB:")); + else if (token.substr(0,5) == "ARRS:") + set_array_size(int_from_string_with_prefix(token, "ARRS:")); + else if (token.substr(0,3) == "ES:") + set_element_size(int_from_string_with_prefix(token, "ES:")); + else { + std::cerr << "Token '" << token << "': "; + unreachable("Unknown token in fetch param list"); + } +} + +void FetchInstr::set_flag_from_string(const std::string& token) +{ + auto flag = s_flag_map.find(token.c_str()); + if (flag != s_flag_map.end()) + set_fetch_flag(flag->second); + else { + std::cerr << "Token: " << token << " : "; + unreachable("Unknown token in fetch flag list"); + } +} + + +const std::map FetchInstr::s_flag_map = { + {"WQ", fetch_whole_quad}, + {"UCF", use_const_field}, + {"SRF", srf_mode}, + {"BNS", buf_no_stride}, + {"AC", alt_const}, + {"TC", use_tc}, + {"VPM", vpm}, + {"UNCACHED", uncached}, + {"INDEXED", indexed} +}; + +const std::map FetchInstr::s_data_format_map = { + {fmt_invalid, "INVALID"}, + {fmt_8, "8"}, + {fmt_4_4, "4_4"}, + {fmt_3_3_2, "3_3_2"}, + {fmt_reserved_4, "RESERVED_4"}, + {fmt_16, "16"}, + {fmt_16_float, "16F"}, + {fmt_8_8, "8_8"}, + {fmt_5_6_5, "5_6_5"}, + {fmt_6_5_5, "6_5_5"}, + {fmt_1_5_5_5, "1_5_5_5"}, + {fmt_4_4_4_4, "4_4_4_4"}, + {fmt_5_5_5_1, "5_5_5_1"}, + {fmt_32, "32"}, + {fmt_32_float, "32F"}, + {fmt_16_16, "16_16"}, + {fmt_16_16_float, "16_16F"}, + {fmt_8_24, "8_24"}, + {fmt_8_24_float, "8_24F"}, + {fmt_24_8, "24_8"}, + {fmt_24_8_float, "24_8F"}, + {fmt_10_11_11, "10_11_11"}, + {fmt_10_11_11_float, "10_11_11F"}, + {fmt_11_11_10, "11_11_10"}, + {fmt_10_11_11_float, "11_11_10F"}, + {fmt_2_10_10_10, "2_10_10_10"}, + {fmt_8_8_8_8, "8_8_8_8"}, + {fmt_10_10_10_2, "10_10_10_2"}, + {fmt_x24_8_32_float, "X24_8_32F"}, + {fmt_32_32, "32_32"}, + {fmt_32_32_float, "32_32F"}, + {fmt_16_16_16_16, "16_16_16_16"}, + {fmt_16_16_16_16_float, "16_16_16_16F"}, + {fmt_reserved_33, "RESERVED_33"}, + {fmt_32_32_32_32, "32_32_32_32"}, + {fmt_32_32_32_32_float, "32_32_32_32F"}, + {fmt_reserved_36, "RESERVED_36"}, + {fmt_1, "1"}, + {fmt_1_reversed, "1_REVERSED"}, + {fmt_gb_gr, "GB_GR"}, + {fmt_bg_rg, "BG_RG"}, + {fmt_32_as_8, "32_AS_8"}, + {fmt_32_as_8_8, "32_AS_8_8"}, + {fmt_5_9_9_9_sharedexp, "5_9_9_9_SHAREDEXP"}, + {fmt_8_8_8, "8_8_8"}, + {fmt_16_16_16, "16_16_16"}, + {fmt_16_16_16_float, "16_16_16F"}, + {fmt_32_32_32, "32_32_32"}, + {fmt_32_32_32_float, "32_32_32F"}, + {fmt_bc1, "BC1"}, + {fmt_bc2, "BC2"}, + {fmt_bc3, "BC3"}, + {fmt_bc4, "BC4"}, + {fmt_bc5, "BC5"}, + {fmt_apc0, "APC0"}, + {fmt_apc1, "APC1"}, + {fmt_apc2, "APC2"}, + {fmt_apc3, "APC3"}, + {fmt_apc4, "APC4"}, + {fmt_apc5, "APC5"}, + {fmt_apc6, "APC6"}, + {fmt_apc7, "APC7"}, + {fmt_ctx1, "CTX1"}, + {fmt_reserved_63, "RESERVED_63"} +}; + + +QueryBufferSizeInstr::QueryBufferSizeInstr(const RegisterVec4& dst, + const RegisterVec4::Swizzle& dst_swz, + uint32_t resid): + FetchInstr(vc_get_buf_resinfo, + dst, dst_swz, + new Register( 0, 7, pin_fully), + 0, + no_index_offset, + fmt_32_32_32_32, + vtx_nf_norm, + vtx_es_none, + resid, + nullptr) +{ + set_fetch_flag(format_comp_signed); + set_print_skip(mfc); + set_print_skip(fmt); + set_print_skip(ftype); +} + +Instr::Pointer QueryBufferSizeInstr::from_string(std::istream& is, ValueFactory& vf) +{ + std::string deststr, res_id_str; + is >> deststr; + + char help; + is >> help; + assert(help == ':'); + + is >> res_id_str; + + RegisterVec4::Swizzle dst_swz; + auto dst = vf.dest_vec4_from_string(deststr, dst_swz, pin_group); + int res_id = int_from_string_with_prefix(res_id_str, "RID:"); + + return new QueryBufferSizeInstr( dst, dst_swz, res_id); +} + +LoadFromBuffer::LoadFromBuffer(const RegisterVec4& dst, const RegisterVec4::Swizzle& dst_swizzle, + PRegister addr, uint32_t addr_offset, + uint32_t resid, PRegister res_offset, EVTXDataFormat data_format): + FetchInstr(vc_fetch, dst, dst_swizzle, addr, addr_offset, no_index_offset, + data_format, vtx_nf_scaled, vtx_es_none, resid, res_offset) +{ + set_fetch_flag(format_comp_signed); + set_mfc(16); + override_opname("LOAD_BUF"); + set_print_skip(mfc); + set_print_skip(fmt); + set_print_skip(ftype); +} + +Instr::Pointer LoadFromBuffer::from_string(std::istream& is, ValueFactory& vf) +{ + std::string deststr; + is >> deststr; + + RegisterVec4::Swizzle dst_swz; + auto dst = vf.dest_vec4_from_string(deststr, dst_swz, pin_group); + + char help; + is >> help; + assert(help == ':'); + + string addrstr; + is >> addrstr; + auto addr_reg = vf.src_from_string(addrstr)->as_register(); + + string res_id_str; + string next; + is >> next; + + int addr_offset_val = 0; + + if (next == "+") { + is >> addr_offset_val; + is >> help; + assert(help == 'b'); + is >> res_id_str; + } else { + res_id_str = next; + } + + int res_id = int_from_string_with_prefix(res_id_str, "RID:"); + + next.clear(); + is >> next; + PRegister res_offset = nullptr; + if (next == "+") { + string res_offset_str; + is >> res_offset_str; + res_offset = vf.src_from_string(res_offset_str)->as_register(); + } + + auto fetch = new LoadFromBuffer( dst, dst_swz, + addr_reg, addr_offset_val, + res_id, res_offset, fmt_32_32_32_32_float); + is >> next; + if (next == "SRF") + fetch->set_fetch_flag(srf_mode); + + return fetch; +} + +class AddrResolver: public RegisterVisitor { +public: + AddrResolver(LoadFromScratch *lfs) : m_lfs(lfs) {} + + void visit(Register& value) { + m_lfs->set_fetch_flag(FetchInstr::indexed); + m_lfs->set_src(&value); + value.add_use(m_lfs); + } + void visit(LocalArray& value) {assert(0);(void)value;} + void visit(LocalArrayValue& value) {assert(0);(void)value;} + void visit(UniformValue& value) {assert(0);(void)value;} + void visit(LiteralConstant& value) { + m_lfs->set_array_base(value.value()); + m_lfs->set_src(new Register( 0, 7, pin_none)); + } + void visit(InlineConstant& value) {assert(0);(void)value;} + + LoadFromScratch *m_lfs; +}; + + + +LoadFromScratch::LoadFromScratch(const RegisterVec4& dst, const RegisterVec4::Swizzle& dst_swz, PVirtualValue addr, uint32_t scratch_size): + FetchInstr(vc_read_scratch, + dst, dst_swz, + nullptr, + 0, + no_index_offset, + fmt_32_32_32_32, + vtx_nf_int, + vtx_es_none, + 0, + nullptr) +{ + set_fetch_flag(uncached); + set_fetch_flag(wait_ack); + + assert(scratch_size >= 1); + set_array_size(scratch_size - 1); + set_array_base(0); + AddrResolver ar(this); + addr->accept(ar); + + set_print_skip(mfc); + set_print_skip(fmt); + set_print_skip(ftype); + set_element_size(3); +} + +Instr::Pointer LoadFromScratch::from_string(std::istream& is, ValueFactory &vf) +{ + std::string deststr; + is >> deststr; + + RegisterVec4::Swizzle dst_swz; + auto dest = vf.dest_vec4_from_string(deststr, dst_swz, pin_group); + + char help; + is >> help; + assert(help == ':'); + + string addrstr; + is >> addrstr; + auto addr_reg = vf.src_from_string(addrstr); + + string offsetstr; + is >> offsetstr; + int size = int_from_string_with_prefix(offsetstr, "SIZE:"); + assert(size >= 1); + + return new LoadFromScratch( dest, dst_swz, addr_reg, size); +} + +} + diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_fetch.h b/src/gallium/drivers/r600/sfn/sfn_instr_fetch.h new file mode 100644 index 0000000..3fd0f34 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_fetch.h @@ -0,0 +1,152 @@ +#ifndef INSTR_FETCH_H +#define INSTR_FETCH_H + +#include "sfn_instr.h" + +namespace r600 { + +class ValueFactory; + +class FetchInstr : public InstrWithVectorResult { +public: + + enum EFlags { + fetch_whole_quad, + use_const_field, + format_comp_signed, + srf_mode, + buf_no_stride, + alt_const, + use_tc, + vpm, + is_mega_fetch, + uncached, + indexed, + wait_ack, + unknown + }; + + enum EPrintSkip { + fmt, + ftype, + mfc, + count + }; + + FetchInstr(EVFetchInstr opcode, + const RegisterVec4& dst, + const RegisterVec4::Swizzle& dest_swizzle, + PRegister src, + uint32_t src_offset, + EVFetchType fetch_type, + EVTXDataFormat data_format, + EVFetchNumFormat num_format, + EVFetchEndianSwap endian_swap, + uint32_t resource_id, + PRegister resource_offset); + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + void set_src(PRegister src) { m_src = src; } + const auto& src() const {assert(m_src); return *m_src;} + uint32_t src_offset() const {return m_src_offset;} + + uint32_t resource_id() const {return m_resource_id;} + auto resource_offset() const {return m_resource_offset;} + + EVFetchType fetch_type() const {return m_fetch_type;} + EVTXDataFormat data_format() const {return m_data_format;} + void set_num_format(EVFetchNumFormat nf) {m_num_format = nf;} + EVFetchNumFormat num_format() const {return m_num_format;} + EVFetchEndianSwap endian_swap() const {return m_endian_swap;} + + uint32_t mega_fetch_count() const {return m_mega_fetch_count;} + uint32_t array_base() const {return m_array_base;} + uint32_t array_size() const {return m_array_size;} + uint32_t elm_size() const {return m_elm_size;} + + void reset_fetch_flag(EFlags flag) {m_tex_flags.reset(flag);} + void set_fetch_flag(EFlags flag) {m_tex_flags.set(flag);} + bool has_fetch_flag(EFlags flag) const { return m_tex_flags.test(flag);} + + EVFetchInstr opcode() const {return m_opcode;} + + bool is_equal_to(const FetchInstr& rhs) const; + + static Instr::Pointer from_string(std::istream& is, ValueFactory &vf); + + void set_mfc(int mfc) {m_tex_flags.set(is_mega_fetch); m_mega_fetch_count = mfc;} + void set_array_base(int arrb) {m_array_base = arrb;} + void set_array_size(int arrs) {m_array_size = arrs;} + + void set_element_size(int size) { m_elm_size = size;} + void set_print_skip(EPrintSkip skip) {m_skip_print.set(skip);} + uint32_t slots() const override {return 1;}; + + bool replace_source(PRegister old_src, PVirtualValue new_src) override; + +protected: + static Instr::Pointer from_string_impl(std::istream& is, EVFetchInstr opcode, ValueFactory &vf); + + void override_opname(const char *opname) { m_opname = opname;} + +private: + bool do_ready() const override; + + void do_print(std::ostream& os) const override; + + void set_param_from_string(const std::string& next_token); + void set_flag_from_string(const std::string& next_token); + + static const std::map s_data_format_map; + static const std::map s_flag_map; + + bool propagate_death() override; + + EVFetchInstr m_opcode; + + PRegister m_src; + uint32_t m_src_offset; + + EVFetchType m_fetch_type; + EVTXDataFormat m_data_format; + EVFetchNumFormat m_num_format; + EVFetchEndianSwap m_endian_swap; + + uint32_t m_resource_id; + PRegister m_resource_offset; + + std::bitset m_tex_flags; + std::bitset m_skip_print; + + uint32_t m_mega_fetch_count; + uint32_t m_array_base; + uint32_t m_array_size; + uint32_t m_elm_size; + + std::string m_opname; +}; + +class QueryBufferSizeInstr : public FetchInstr { +public: + QueryBufferSizeInstr(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle, uint32_t resid); + static Instr::Pointer from_string(std::istream& is, ValueFactory& vf); +}; + +class LoadFromBuffer : public FetchInstr { +public: + LoadFromBuffer(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle, + PRegister addr, uint32_t addr_offset, + uint32_t resid, PRegister res_offset, EVTXDataFormat data_format); + static Instr::Pointer from_string(std::istream& is, ValueFactory& vf); +}; + +class LoadFromScratch : public FetchInstr { +public: + LoadFromScratch(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle, PVirtualValue addr, uint32_t offset); + static Instr::Pointer from_string(std::istream& is, ValueFactory& vf); +}; + +} +#endif // INSTR_FETCH_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp new file mode 100644 index 0000000..caa48a9 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp @@ -0,0 +1,411 @@ +#include "sfn_instr_lds.h" +#include "sfn_instr_alu.h" +#include "sfn_debug.h" + +namespace r600 { + +using std::istream; + +LDSReadInstr::LDSReadInstr(std::vector>& value, + AluInstr::SrcValues& address): + m_address(address), + m_dest_value(value) +{ + assert(m_address.size() == m_dest_value.size()); + + for (auto& v: value) + v->add_parent(this); + + for (auto& s: m_address) + if (s->as_register()) + s->as_register()->add_use(this); +} + +void LDSReadInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void LDSReadInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +bool LDSReadInstr::remove_unused_components() +{ + uint8_t inactive_mask = 0; + for (size_t i = 0; i < m_dest_value.size(); ++i) { + if (m_dest_value[i]->uses().empty()) + inactive_mask |= 1 << i; + } + + if (!inactive_mask) + return false; + + auto new_addr = AluInstr::SrcValues(); + auto new_dest = std::vector>(); + + for (size_t i = 0; i < m_dest_value.size(); ++i) { + if ((1 << i) & inactive_mask) { + if (m_address[i]->as_register()) + m_address[i]->as_register()->del_use(this); + m_dest_value[i]->del_parent(this); + } else { + new_dest.push_back(m_dest_value[i]); + new_addr.push_back(m_address[i]); + } + } + + m_dest_value.swap(new_dest); + m_address.swap(new_addr); + + return m_address.size() != new_addr.size(); +} + +class SetLDSAddrProperty : public AluInstrVisitor { + using AluInstrVisitor::visit; + void visit(AluInstr *instr) override { + instr->set_alu_flag(alu_lds_address); + } +}; + +AluInstr *LDSReadInstr::split(std::vector& out_block, AluInstr *last_lds_instr) +{ + AluInstr* first_instr = nullptr; + SetLDSAddrProperty prop; + for (auto& addr: m_address) { + auto reg = addr->as_register(); + if (reg) { + reg->del_use(this); + if (reg->parents().size() == 1) { + for (auto& p: reg->parents()) { + p->accept(prop); + } + } + } + + auto instr = new AluInstr(DS_OP_READ_RET, nullptr, nullptr, addr); + instr->set_blockid(block_id(), index()); + + if (last_lds_instr) + instr->add_required_instr(last_lds_instr); + out_block.push_back(instr); + last_lds_instr = instr; + if (!first_instr) { + first_instr = instr; + first_instr->set_alu_flag(alu_lds_group_start); + } else { + /* In order to make it possible that the scheduler + * keeps the loads of a group close together, we + * require that the addresses are all already available + * when the first read instruction is emitted. + * Otherwise it might happen that the loads and reads from the + * queue are split across ALU cf clauses, and this is not allowed */ + first_instr->add_extra_dependency(addr); + } + } + + for (auto& dest: m_dest_value) { + dest->del_parent(this); + auto instr = new AluInstr(op1_mov, dest, + new InlineConstant(ALU_SRC_LDS_OQ_A_POP), + AluInstr::last_write); + instr->add_required_instr(last_lds_instr); + instr->set_blockid(block_id(), index()); + out_block.push_back(instr); + last_lds_instr = instr; + } + if (last_lds_instr) + last_lds_instr->set_alu_flag(alu_lds_group_end); + + return last_lds_instr; +} + +bool LDSReadInstr::do_ready() const +{ + unreachable("This instruction is not handled by the schduler"); + return false; +} + +void LDSReadInstr::do_print(std::ostream& os) const +{ + os << "LDS_READ "; + + os << "[ "; + for (auto d: m_dest_value) { + os << *d << " "; + } + os << "] : [ "; + for (auto a: m_address) { + os << *a << " "; + } + os << "]"; +} + +bool LDSReadInstr::is_equal_to(const LDSReadInstr& rhs) const +{ + if (m_address.size() != rhs.m_address.size()) + return false; + + for (unsigned i = 0; i < num_values(); ++i) { + if (!m_address[i]->equal_to(*rhs.m_address[i])) + return false; + if (!m_dest_value[i]->equal_to(*rhs.m_dest_value[i])) + return false; + } + return true; +} + +auto LDSReadInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer +{ + /* LDS_READ [ d1, d2, d3 ... ] : a1 a2 a3 ... */ + + std::string temp_str; + + is >> temp_str; + assert(temp_str == "["); + + std::vector > dests; + AluInstr::SrcValues srcs; + + is >> temp_str; + while (temp_str != "]") { + auto dst = value_factory.dest_from_string(temp_str); + assert(dst); + dests.push_back(dst); + is >> temp_str; + } + + is >> temp_str; + assert(temp_str == ":"); + is >> temp_str; + assert(temp_str == "["); + + is >> temp_str; + while (temp_str != "]") { + auto src = value_factory.src_from_string(temp_str); + assert(src); + srcs.push_back(src); + is >> temp_str; + }; + assert(srcs.size() == dests.size() && !dests.empty()); + + return new LDSReadInstr(dests, srcs); +} + +LDSAtomicInstr::LDSAtomicInstr(ESDOp op, PRegister dest, PVirtualValue address, + const SrcValues& srcs): + m_opcode(op), + m_address(address), + m_dest(dest), + m_srcs(srcs) +{ + if (m_dest) + m_dest->add_parent(this); + + if (m_address->as_register()) + m_address->as_register()->add_use(this); + + for (auto& s: m_srcs) { + if (s->as_register()) + s->as_register()->add_use(this); + } +} + + +void LDSAtomicInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void LDSAtomicInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +AluInstr *LDSAtomicInstr::split(std::vector& out_block, AluInstr *last_lds_instr) +{ + AluInstr::SrcValues srcs = {m_address}; + + for(auto& s : m_srcs) + srcs.push_back(s); + + for(auto& s :srcs) { + if (s->as_register()) + s->as_register()->del_use(this); + } + + SetLDSAddrProperty prop; + auto reg = srcs[0]->as_register(); + if (reg) { + reg->del_use(this); + if (reg->parents().size() == 1) { + for (auto& p: reg->parents()) { + p->accept(prop); + } + } + } + + auto op_instr = new AluInstr(m_opcode, srcs, {}); + op_instr->set_blockid(block_id(), index()); + + if (last_lds_instr) { + op_instr->add_required_instr(last_lds_instr); + } + + out_block.push_back(op_instr); + if (m_dest) { + op_instr->set_alu_flag(alu_lds_group_start); + m_dest->del_parent(this); + auto read_instr = new AluInstr(op1_mov, m_dest, + new InlineConstant(ALU_SRC_LDS_OQ_A_POP), + AluInstr::last_write); + read_instr->add_required_instr(op_instr); + read_instr->set_blockid(block_id(), index()); + read_instr->set_alu_flag(alu_lds_group_end); + out_block.push_back(read_instr); + last_lds_instr = read_instr; + } + return last_lds_instr; +} + +bool LDSAtomicInstr::replace_source(PRegister old_src, PVirtualValue new_src) +{ + bool process = false; + + + if (new_src->as_uniform() && m_srcs.size() > 2) { + int nconst = 0; + for (auto& s : m_srcs) { + if (s->as_uniform() && !s->equal_to(*old_src)) + ++nconst; + } + /* Conservative check: with two kcache values can always live, + * tree might be a problem, don't care for now, just reject + */ + if (nconst > 2) + return false; + } + + /* If the old source is an array element, we assume that there + * might have been an (untracked) indirect access, so don't replace + * this source */ + if (old_src->pin() == pin_array) + return false; + + if (new_src->get_addr()) { + for (auto& s : m_srcs) { + auto addr = s->get_addr(); + /* can't have two differen't indirect addresses in the same instr */ + if (addr && !addr->equal_to(*new_src->get_addr())) + return false; + } + } + + for (unsigned i = 0; i < m_srcs.size(); ++i) { + if (old_src->equal_to(*m_srcs[i])) { + m_srcs[i] = new_src; + process = true; + } + } + + if (process) { + auto r = new_src->as_register(); + if (r) + r->add_use(this); + old_src->del_use(this); + } + return process; +} + +bool LDSAtomicInstr::do_ready() const +{ + unreachable("This instruction is not handled by the schduler"); + return false; +} + +void LDSAtomicInstr::do_print(std::ostream& os) const +{ + auto ii = lds_ops.find(m_opcode); + assert(ii != lds_ops.end()); + + os << "LDS " << ii->second.name << " "; + if (m_dest) + os << *m_dest; + else + os << "__.x"; + + os << " [ " << *m_address << " ] : " << *m_srcs[0]; + if (m_srcs.size() > 1) + os << " " << *m_srcs[1]; +} + +bool LDSAtomicInstr::is_equal_to(const LDSAtomicInstr& rhs) const +{ + if (m_srcs.size() != rhs.m_srcs.size()) + return false; + + for (unsigned i = 0; i < m_srcs.size(); ++i) { + if (!m_srcs[i]->equal_to(*rhs.m_srcs[i])) + return false; + } + + return m_opcode == rhs.m_opcode && + sfn_value_equal(m_address, rhs.m_address) && + sfn_value_equal(m_dest, rhs.m_dest); +} + + +auto LDSAtomicInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer +{ + /* LDS WRITE2 __.x [ R1.x ] : R2.y R3.z */ + /* LDS WRITE __.x [ R1.x ] : R2.y */ + /* LDS ATOMIC_ADD_RET [ R5.y ] : R2.y */ + + std::string temp_str; + + is >> temp_str; + + ESDOp opcode = DS_OP_INVALID; + int nsrc = 0; + + for (auto& [op, opinfo] : lds_ops) { + if (temp_str == opinfo.name) { + opcode = op; + nsrc = opinfo.nsrc; + break; + } + } + + assert(opcode != DS_OP_INVALID); + + is >> temp_str; + + PRegister dest = nullptr; + if (temp_str[0] != '_') + dest = value_factory.dest_from_string(temp_str); + + is >> temp_str; + assert(temp_str == "["); + is >> temp_str; + auto addr = value_factory.src_from_string(temp_str); + + is >> temp_str; + assert(temp_str == "]"); + + is >> temp_str; + assert(temp_str == ":"); + + AluInstr::SrcValues srcs; + for (int i = 0; i < nsrc - 1; ++i) { + is >> temp_str; + auto src = value_factory.src_from_string(temp_str); + assert(src); + srcs.push_back(src); + } + + return new LDSAtomicInstr(opcode, dest, addr, srcs); +} + + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_lds.h b/src/gallium/drivers/r600/sfn/sfn_instr_lds.h new file mode 100644 index 0000000..8b1c163 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_lds.h @@ -0,0 +1,80 @@ +#ifndef LDSINSTR_H +#define LDSINSTR_H + +#include "sfn_instr_alu.h" +#include "sfn_valuefactory.h" + +namespace r600 { + +class LDSReadInstr : public Instr { +public: + LDSReadInstr(std::vector>& value, + AluInstr::SrcValues& address); + + unsigned num_values() const { return m_dest_value.size();} + auto address(unsigned i) const { return m_address[i];} + auto dest(unsigned i) const { return m_dest_value[i];} + + auto address(unsigned i){ return m_address[i];} + auto dest(unsigned i) { return m_dest_value[i];} + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + AluInstr *split(std::vector& out_block, AluInstr *last_lds_instr); + bool is_equal_to(const LDSReadInstr& lhs) const; + + static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer; + + bool remove_unused_components(); + +private: + + bool do_ready() const override; + + void do_print(std::ostream& os) const override; + + AluInstr::SrcValues m_address; + std::vector> m_dest_value; +}; + +class LDSAtomicInstr : public Instr { +public: + using SrcValues = AluInstr::SrcValues; + + LDSAtomicInstr(ESDOp op, PRegister dest, PVirtualValue address, const SrcValues& src); + + auto address() const { return m_address;} + auto dest() const { return m_dest;} + auto src0() const { return m_srcs[0];} + auto src1() const { return m_srcs.size() > 1 ? m_srcs[1] : nullptr;} + + PVirtualValue address() { return m_address;} + PRegister dest() { return m_dest;} + PVirtualValue src0() { return m_srcs[0];} + PVirtualValue src1() { return m_srcs.size() > 1 ? m_srcs[1] : nullptr;} + + unsigned op() const {return m_opcode;} + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + AluInstr *split(std::vector& out_block, AluInstr *last_lds_instr); + bool is_equal_to(const LDSAtomicInstr& lhs) const; + + static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer; + bool replace_source(PRegister old_src, PVirtualValue new_src) override; + +private: + bool do_ready() const override; + void do_print(std::ostream& os) const override; + + ESDOp m_opcode; + PVirtualValue m_address{nullptr}; + PRegister m_dest{nullptr}; + SrcValues m_srcs; +}; + +} + +#endif // LDSINSTR_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp new file mode 100644 index 0000000..ba46d4a --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp @@ -0,0 +1,844 @@ +#include "sfn_instr_mem.h" +#include "sfn_instr_fetch.h" +#include "sfn_instr_tex.h" +#include "sfn_shader.h" + +namespace r600 { + + +GDSInstr::GDSInstr(ESDOp op, Register *dest, + const RegisterVec4& src, int uav_base, + PRegister uav_id): + m_op(op), + m_dest(dest), + m_src(src), + m_uav_base(uav_base), + m_uav_id(uav_id) +{ + set_always_keep(); + + m_src.add_use(this); + m_dest->add_parent(this); + + if (m_uav_id) + m_uav_id->add_use(this); +} + +bool GDSInstr::is_equal_to(const GDSInstr& rhs) const +{ +#define NE(X) (X != rhs. X) + + if (NE(m_op) || + NE(m_src) || + NE(m_uav_base)) + return false; + + sfn_value_equal(m_dest, rhs.m_dest); + + return sfn_value_equal(m_uav_id, rhs.m_uav_id); +} + +void GDSInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void GDSInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +bool GDSInstr::do_ready() const +{ + return m_src.ready(block_id(), index()) && + (!m_uav_id || m_uav_id->ready(block_id(), index())); +} + +void GDSInstr::do_print(std::ostream& os) const +{ + os << "GDS " << lds_ops.at(m_op).name + << *m_dest; + os << " " << m_src; + os << " BASE:" << m_uav_base; + + if (m_uav_id) + os << " UAV:" << *m_uav_id; +} + +bool GDSInstr::emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader) +{ + switch (intr->intrinsic) { + case nir_intrinsic_atomic_counter_add: + case nir_intrinsic_atomic_counter_and: + case nir_intrinsic_atomic_counter_exchange: + case nir_intrinsic_atomic_counter_max: + case nir_intrinsic_atomic_counter_min: + case nir_intrinsic_atomic_counter_or: + case nir_intrinsic_atomic_counter_xor: + case nir_intrinsic_atomic_counter_comp_swap: + return emit_atomic_op2(intr, shader); + case nir_intrinsic_atomic_counter_read: + case nir_intrinsic_atomic_counter_post_dec: + return emit_atomic_read(intr, shader); + case nir_intrinsic_atomic_counter_inc: + return emit_atomic_inc(intr, shader); + case nir_intrinsic_atomic_counter_pre_dec: + return emit_atomic_pre_dec(intr, shader); + default: + return false; + } +} + +static ESDOp get_opcode(const nir_intrinsic_op opcode) +{ + switch (opcode) { + case nir_intrinsic_atomic_counter_add: + return DS_OP_ADD_RET; + case nir_intrinsic_atomic_counter_and: + return DS_OP_AND_RET; + case nir_intrinsic_atomic_counter_exchange: + return DS_OP_XCHG_RET; + case nir_intrinsic_atomic_counter_inc: + return DS_OP_INC_RET; + case nir_intrinsic_atomic_counter_max: + return DS_OP_MAX_UINT_RET; + case nir_intrinsic_atomic_counter_min: + return DS_OP_MIN_UINT_RET; + case nir_intrinsic_atomic_counter_or: + return DS_OP_OR_RET; + case nir_intrinsic_atomic_counter_read: + return DS_OP_READ_RET; + case nir_intrinsic_atomic_counter_xor: + return DS_OP_XOR_RET; + case nir_intrinsic_atomic_counter_post_dec: + return DS_OP_DEC_RET; + case nir_intrinsic_atomic_counter_comp_swap: + return DS_OP_CMP_XCHG_RET; + case nir_intrinsic_atomic_counter_pre_dec: + default: + return DS_OP_INVALID; + } +} + +static ESDOp get_opcode_wo(const nir_intrinsic_op opcode) +{ + switch (opcode) { + case nir_intrinsic_atomic_counter_add: + return DS_OP_ADD; + case nir_intrinsic_atomic_counter_and: + return DS_OP_AND; + case nir_intrinsic_atomic_counter_inc: + return DS_OP_INC; + case nir_intrinsic_atomic_counter_max: + return DS_OP_MAX_UINT; + case nir_intrinsic_atomic_counter_min: + return DS_OP_MIN_UINT; + case nir_intrinsic_atomic_counter_or: + return DS_OP_OR; + case nir_intrinsic_atomic_counter_xor: + return DS_OP_XOR; + case nir_intrinsic_atomic_counter_post_dec: + return DS_OP_DEC; + case nir_intrinsic_atomic_counter_comp_swap: + return DS_OP_CMP_XCHG_RET; + case nir_intrinsic_atomic_counter_exchange: + return DS_OP_XCHG_RET; + case nir_intrinsic_atomic_counter_pre_dec: + default: + return DS_OP_INVALID; + } +} + + +bool GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader) +{ + auto& vf = shader.value_factory(); + bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses); + + ESDOp op = read_result ? get_opcode(instr->intrinsic) : + get_opcode_wo(instr->intrinsic); + + if (DS_OP_INVALID == op) + return false; + + auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {} + offset += nir_intrinsic_base(instr); + + auto dest = vf.dest(instr->dest, 0, pin_free); + + PRegister src_as_register = nullptr; + auto src_val = vf.src(instr->src[1], 0); + if (!src_val->as_register()) { + auto temp_src_val = vf.temp_register(); + shader.emit_instruction(new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write)); + src_as_register = temp_src_val; + } else + src_as_register = src_val->as_register(); + + if (uav_id != nullptr) + shader.set_flag(Shader::sh_indirect_atomic); + + GDSInstr *ir = nullptr; + if (shader.chip_class() < ISA_CC_CAYMAN) { + RegisterVec4 src(nullptr, src_as_register, nullptr, nullptr, pin_free); + ir = new GDSInstr(op, dest, src, offset, uav_id); + + } else { + auto dest = vf.dest(instr->dest, 0, pin_free); + auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7}); + if (uav_id) + shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset), + AluInstr::write)); + else + shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write)); + shader.emit_instruction(new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write)); + ir = new GDSInstr(op, dest, tmp, 0, nullptr); + } + shader.emit_instruction(ir); + return true; +} + +bool GDSInstr::emit_atomic_read(nir_intrinsic_instr *instr, Shader& shader) +{ + auto& vf = shader.value_factory(); + + auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {} + offset += shader.remap_atomic_base(nir_intrinsic_base(instr)); + + auto dest = vf.dest(instr->dest, 0, pin_free); + + GDSInstr *ir = nullptr; + + if (shader.chip_class() < ISA_CC_CAYMAN) { + RegisterVec4 src = RegisterVec4(0, true, {7,7,7,7}); + ir = new GDSInstr(DS_OP_READ_RET, dest, src, offset, uav_id); + } else { + auto tmp = vf.temp_vec4(pin_group, {0, 7, 7, 7}); + if (uav_id) + shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset), + AluInstr::write)); + else + shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write)); + + ir = new GDSInstr(DS_OP_READ_RET, dest, tmp, 0, nullptr); + } + + shader.emit_instruction(ir); + return true; +} + +bool GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader) +{ + auto& vf = shader.value_factory(); + bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses); + + auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {} + offset += shader.remap_atomic_base(nir_intrinsic_base(instr)); + + GDSInstr *ir = nullptr; + + if (shader.chip_class() < ISA_CC_CAYMAN) { + auto dest = vf.dest(instr->dest, 0, pin_free); + RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan); + ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, + dest, src, offset, uav_id); + } else { + auto dest = vf.dest(instr->dest, 0, pin_free); + auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7}); + + if (uav_id) + shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset), + AluInstr::write)); + else + shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write)); + + shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write)); + ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, + dest, tmp, 0, nullptr); + } + shader.emit_instruction(ir); + return true; +} + +bool GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader) +{ + auto& vf = shader.value_factory(); + + auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {} + offset += shader.remap_atomic_base(nir_intrinsic_base(instr)); + + auto *tmp_dest = vf.temp_register(); + + GDSInstr *ir = nullptr; + + if (shader.chip_class() < ISA_CC_CAYMAN) { + RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan); + ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, src, offset, uav_id); + } else { + auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7}); + if (uav_id) + shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset), + AluInstr::write)); + else + shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write)); + + shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write)); + ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, tmp, 0, nullptr); + } + + shader.emit_instruction(ir); + shader.emit_instruction(new AluInstr(op2_sub_int, vf.dest(instr->dest, 0, pin_free), + tmp_dest, vf.one_i(), AluInstr::last_write)); + return true; +} + + +RatInstr::RatInstr(ECFOpCode cf_opcode, ERatOp rat_op, + const RegisterVec4& data, const RegisterVec4& index, + int rat_id, PRegister rat_id_offset, + int burst_count, int comp_mask, int element_size): + m_cf_opcode(cf_opcode), + m_rat_op(rat_op), + m_data(data), + m_index(index), + m_rat_id_offset(rat_id_offset), + m_rat_id(rat_id), + m_burst_count(burst_count), + m_comp_mask(comp_mask), + m_element_size(element_size) +{ + set_always_keep(); + + m_data.add_use(this); + m_index.add_use(this); + if (m_rat_id_offset) + m_rat_id_offset->add_use(this); +} + + +void RatInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void RatInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +bool RatInstr::is_equal_to(const RatInstr& lhs) const +{ + (void)lhs; + assert(0); + return false; +} + +bool RatInstr::do_ready() const +{ + if (m_rat_op != STORE_TYPED) { + for (auto i: required_instr()) { + if (!i->is_scheduled()) { + return false; + } + } + } + + return m_data.ready(block_id(), index()) && + m_index.ready(block_id(), index()); +} + +void RatInstr::do_print(std::ostream& os) const +{ + os << "MEM_RAT RAT " << m_rat_id; + if (m_rat_id_offset) + os << "+" << *m_rat_id_offset; + os << " @" << m_index; + os << " OP:" << m_rat_op << " " << m_data; + os << " BC:" << m_burst_count + << " MASK:" << m_comp_mask + << " ES:" << m_element_size; + if (m_need_ack) + os << " ACK"; +} + +static RatInstr::ERatOp +get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) +{ + switch (opcode) { + case nir_intrinsic_image_load: + return RatInstr::NOP_RTN; + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_image_atomic_add: + return RatInstr::ADD_RTN; + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_image_atomic_and: + return RatInstr::AND_RTN; + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_image_atomic_or: + return RatInstr::OR_RTN; + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_image_atomic_imin: + return RatInstr::MIN_INT_RTN; + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_image_atomic_imax: + return RatInstr::MAX_INT_RTN; + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_image_atomic_umin: + return RatInstr::MIN_UINT_RTN; + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_image_atomic_umax: + return RatInstr::MAX_UINT_RTN; + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_image_atomic_xor: + return RatInstr::XOR_RTN; + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_image_atomic_comp_swap: + if (util_format_is_float(format)) + return RatInstr::CMPXCHG_FLT_RTN; + else + return RatInstr::CMPXCHG_INT_RTN; + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_image_atomic_exchange: + return RatInstr::XCHG_RTN; + default: + unreachable("Unsupported WO RAT instruction"); + } +} + +static RatInstr::ERatOp +get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) +{ + switch (opcode) { + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_image_atomic_add: + return RatInstr::ADD; + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_image_atomic_and: + return RatInstr::AND; + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_image_atomic_or: + return RatInstr::OR; + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_image_atomic_imin: + return RatInstr::MIN_INT; + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_image_atomic_imax: + return RatInstr::MAX_INT; + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_image_atomic_umin: + return RatInstr::MIN_UINT; + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_image_atomic_umax: + return RatInstr::MAX_UINT; + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_image_atomic_xor: + return RatInstr::XOR; + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_image_atomic_comp_swap: + if (util_format_is_float(format)) + return RatInstr::CMPXCHG_FLT; + else + return RatInstr::CMPXCHG_INT; + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_image_atomic_exchange: + return RatInstr::XCHG_RTN; + default: + unreachable("Unsupported WO RAT instruction"); + } +} + +bool RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader) +{ + switch (intr->intrinsic) { + case nir_intrinsic_load_ssbo: + return emit_ssbo_load(intr, shader); + case nir_intrinsic_store_ssbo: + return emit_ssbo_store(intr, shader); + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_exchange: + return emit_ssbo_atomic_op(intr, shader); + case nir_intrinsic_image_store: + return emit_image_store(intr, shader); + case nir_intrinsic_image_load: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_atomic_umin: + case nir_intrinsic_image_atomic_umax: + case nir_intrinsic_image_atomic_imin: + case nir_intrinsic_image_atomic_imax: + return emit_image_load_or_atomic(intr, shader); + case nir_intrinsic_image_size: + return emit_image_size(intr, shader); + case nir_intrinsic_get_ssbo_size: + return emit_ssbo_size(intr, shader); + default: + return false; + } +} + +bool RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader) +{ + auto &vf = shader.value_factory(); + auto dest = vf.dest_vec4(intr->dest, pin_group); + + /** src0 not used, should be some offset */ + auto addr = vf.src(intr->src[1], 0); + auto addr_temp = vf.temp_register(); + + /** Should be lowered in nir */ + shader.emit_instruction(new AluInstr(op2_lshr_int, addr_temp, addr, vf.literal(2), + {alu_write, alu_last_instr})); + + const EVTXDataFormat formats[4] = { + fmt_32, + fmt_32_32, + fmt_32_32_32, + fmt_32_32_32_32 + }; + + RegisterVec4::Swizzle dest_swz[4] = { + {0,7,7,7}, + {0,1,7,7}, + {0,1,2,7}, + {0,1,2,3} + }; + + int comp_idx = nir_dest_num_components(intr->dest) - 1; + + auto [offset, res_offset] = shader.evaluate_resource_offset(intr, 0); {} + + auto res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + offset + + shader.ssbo_image_offset(); + + auto ir = new LoadFromBuffer(dest, dest_swz[comp_idx], addr_temp, 0, + res_id, res_offset, formats[comp_idx]); + ir->set_fetch_flag(FetchInstr::use_tc); + ir->set_num_format(vtx_nf_int); + + shader.emit_instruction(ir); + return true; +} + +bool RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader) +{ + + /* Forche the scheduler to not move the preparation too far away, by starting + * a new block (TODO: better priority handling in the scheduler)*/ + if (nir_src_num_components(instr->src[0]) > 2) + shader.start_new_block(0); + + auto &vf = shader.value_factory(); + auto orig_addr = vf.src(instr->src[2], 0); + + auto addr_base = vf.temp_register(); + + auto [offset, rat_id] = shader.evaluate_resource_offset(instr, 1); + + shader.emit_instruction(new AluInstr(op2_lshr_int, addr_base, orig_addr, + vf.literal(2), AluInstr::write)); + + for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) { + auto addr_vec = vf.temp_vec4(pin_group, {0,1,2,7}); + if (i == 0) { + shader.emit_instruction(new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write)); + } else { + shader.emit_instruction(new AluInstr(op2_add_int, addr_vec[0], addr_base, + vf.literal(i), + AluInstr::last_write)); + } + auto value = vf.src(instr->src[0], i); + PRegister v = vf.temp_register(0); + shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write)); + auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan); + auto store = new RatInstr(cf_mem_rat, RatInstr::STORE_TYPED, + value_vec, addr_vec, offset + shader.ssbo_image_offset(), + rat_id, 1, 1, 0); + shader.emit_instruction(store); + } + + return true; +} + +bool RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader) +{ + auto& vf = shader.value_factory(); + auto [imageid, image_offset] = shader.evaluate_resource_offset(intr, 0); {} + + bool read_result = !intr->dest.is_ssa || !list_is_empty(&intr->dest.ssa.uses); + auto opcode = read_result ? get_rat_opcode(intr->intrinsic, PIPE_FORMAT_R32_UINT) : + get_rat_opcode_wo(intr->intrinsic, PIPE_FORMAT_R32_UINT); + + auto coord_orig = vf.src(intr->src[1], 0); + auto coord = vf.temp_register(0); + + auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3}); + + shader.emit_instruction(new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write)); + + shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write)); + + + if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { + shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[3], 0), AluInstr::write)); + shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3], + vf.src(intr->src[2], 0), {alu_last_instr, alu_write})); + } else { + shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write)); + } + + + RegisterVec4 out_vec(coord, coord, coord, coord, pin_group); + + auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, out_vec, imageid + shader.ssbo_image_offset(), + image_offset, 1, 0xf, 0); + shader.emit_instruction(atomic); + + atomic->set_ack(); + if (read_result) { + atomic->set_instr_flag(ack_rat_return_write); + auto dest = vf.dest_vec4(intr->dest, pin_group); + + auto fetch = new FetchInstr(vc_fetch, + dest, {0, 1, 2, 3}, + shader.rat_return_address(), + 0, + no_index_offset, + fmt_32, + vtx_nf_int, + vtx_es_none, + R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid, + image_offset); + fetch->set_mfc(15); + fetch->set_fetch_flag(FetchInstr::srf_mode); + fetch->set_fetch_flag(FetchInstr::use_tc); + fetch->set_fetch_flag(FetchInstr::vpm); + fetch->set_fetch_flag(FetchInstr::wait_ack); + fetch->add_required_instr(atomic); + shader.chain_ssbo_read(fetch); + shader.emit_instruction(fetch); + } + + return true; + +} + +bool RatInstr::emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader) +{ + auto& vf = shader.value_factory(); + auto dest = vf.dest_vec4(intr->dest, pin_group); + + auto const_offset = nir_src_as_const_value(intr->src[0]); + int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET; + if (const_offset) + res_id += const_offset[0].u32; + else + assert(0 && "dynamic buffer offset not supported in buffer_size"); + + shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3},res_id)); + return true; +} + +bool RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader) +{ + auto& vf = shader.value_factory(); + auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {} + + + auto coord_load = vf.src_vec4(intrin->src[1], pin_chan); + auto coord = vf.temp_vec4(pin_group); + + auto value_load = vf.src_vec4(intrin->src[3], pin_chan); + auto value = vf.temp_vec4(pin_group); + + RegisterVec4::Swizzle swizzle = {0,1,2,3}; + if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D && + nir_intrinsic_image_array(intrin)) + swizzle = {0,2,1,3}; + + for (int i = 0; i < 4; ++i) { + auto flags = i != 3 ? AluInstr::write : AluInstr::last_write; + shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags)); + } + for (int i = 0; i < 4; ++i) { + auto flags = i != 3 ? AluInstr::write : AluInstr::last_write; + shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags)); + } + + auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat; + auto store = new RatInstr(op, RatInstr::STORE_TYPED, value, coord, imageid, + image_offset, 1, 0xf, 0); + + if (nir_intrinsic_has_access(intrin) & ACCESS_COHERENT) + store->set_ack(); + shader.emit_instruction(store); + return true; +} + +bool RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader) +{ + auto& vf = shader.value_factory(); + auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {} + + bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses); + auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) : + get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT); + + auto coord_orig = vf.src_vec4(intrin->src[1], pin_chan); + auto coord = vf.temp_vec4(pin_group); + + auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3}); + + RegisterVec4::Swizzle swizzle = {0,1,2,3}; + if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D && + nir_intrinsic_image_array(intrin)) + swizzle = {0,2,1,3}; + + for (int i = 0; i < 4; ++i) { + auto flags = i != 3 ? AluInstr::write : AluInstr::last_write; + shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags)); + } + + shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write)); + + if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) { + shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[4], 0), AluInstr::write)); + shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3], + vf.src(intrin->src[3], 0), AluInstr::last_write)); + } else { + shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], + vf.src(intrin->src[3], 0), AluInstr::write)); + shader.emit_instruction(new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write)); + } + + auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, coord, imageid, + image_offset, 1, 0xf, 0); + shader.emit_instruction(atomic); + + atomic->set_ack(); + if (read_result) { + atomic->set_instr_flag(ack_rat_return_write); + auto dest = vf.dest_vec4(intrin->dest, pin_group); + + pipe_format format = nir_intrinsic_format(intrin); + unsigned fmt = fmt_32; + unsigned num_format = 0; + unsigned format_comp = 0; + unsigned endian = 0; + r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian); + + auto fetch = new FetchInstr(vc_fetch, + dest, {0, 1, 2, 3}, + shader.rat_return_address(), + 0, + no_index_offset, + (EVTXDataFormat)fmt, + (EVFetchNumFormat)num_format, + (EVFetchEndianSwap)endian, + R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid, + image_offset); + fetch->set_mfc(3); + fetch->set_fetch_flag(FetchInstr::srf_mode); + fetch->set_fetch_flag(FetchInstr::use_tc); + fetch->set_fetch_flag(FetchInstr::vpm); + fetch->set_fetch_flag(FetchInstr::wait_ack); + if (format_comp) + fetch->set_fetch_flag(FetchInstr::format_comp_signed); + + shader.chain_ssbo_read(fetch); + shader.emit_instruction(fetch); + } + + return true; +} + +#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) + +bool RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader) +{ + auto& vf = shader.value_factory(); + + auto src = RegisterVec4(0, true, {4,4,4,4}); + + assert(nir_src_as_uint(intrin->src[1]) == 0); + + auto const_offset = nir_src_as_const_value(intrin->src[0]); + PRegister dyn_offset = nullptr; + + int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET; + if (const_offset) + res_id += const_offset[0].u32; + else + dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0)); + + if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) { + auto dest = vf.dest_vec4(intrin->dest, pin_group); + shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3}, res_id)); + return true; + } else { + + if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE && + nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) { + /* Need to load the layers from a const buffer */ + + auto dest = vf.dest_vec4(intrin->dest, pin_group); + shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,7,3}, + src, 0/* ?? */, res_id, dyn_offset)); + + shader.set_flag(Shader::sh_txs_cube_array_comp); + + if (const_offset) { + unsigned lookup_resid = const_offset[0].u32; + shader.emit_instruction(new AluInstr(op1_mov, dest[2], + vf.uniform(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4, + R600_BUFFER_INFO_CONST_BUFFER), + AluInstr::last_write)); + } else { + /* If the adressing is indirect we have to get the z-value by using a binary search */ + auto addr = vf.temp_register(); + auto comp1 = vf.temp_register(); + auto comp2 = vf.temp_register(); + auto low_bit = vf.temp_register(); + auto high_bit = vf.temp_register(); + + auto trgt = vf.temp_vec4(pin_group); + + shader.emit_instruction(new AluInstr(op2_lshr_int, addr, vf.src(intrin->src[0], 0), + vf.literal(2), AluInstr::write)); + shader.emit_instruction(new AluInstr(op2_and_int, low_bit, vf.src(intrin->src[0], 0), + vf.one_i(), AluInstr::write)); + shader.emit_instruction(new AluInstr(op2_and_int, high_bit, vf.src(intrin->src[0], 0), + vf.literal(2), AluInstr::last_write)); + + shader.emit_instruction(new LoadFromBuffer(trgt, {0, 1, 2, 3}, addr, R600_SHADER_BUFFER_INFO_SEL, + R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float)); + + // this may be wrong + shader.emit_instruction(new AluInstr(op3_cnde_int, comp1, high_bit, trgt[0], trgt[2], + AluInstr::write)); + shader.emit_instruction(new AluInstr(op3_cnde_int, comp2, high_bit, trgt[1], trgt[3], + AluInstr::last_write)); + shader.emit_instruction(new AluInstr(op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write)); + } + } else { + auto dest = vf.dest_vec4(intrin->dest, pin_group); + shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,2,3}, + src, 0/* ?? */, res_id, dyn_offset)); + + } + } + return true; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_mem.h b/src/gallium/drivers/r600/sfn/sfn_instr_mem.h new file mode 100644 index 0000000..c224572 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_mem.h @@ -0,0 +1,177 @@ +#ifndef GDSINSTR_H +#define GDSINSTR_H + +#include "sfn_instr.h" +#include "sfn_valuefactory.h" + +namespace r600 { + +class Shader; + +class GDSInstr : public Instr { +public: + + GDSInstr(ESDOp op, Register *dest, + const RegisterVec4& src, int uav_base, + PRegister uav_id); + + bool is_equal_to(const GDSInstr& lhs) const; + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + bool do_ready() const override; + + auto opcode() const {return m_op;} + auto src() const { return m_src;} + + const auto& dest() const { return m_dest;} + auto& dest() { return m_dest;} + + auto uav_id() const {return m_uav_id;} + auto uav_base() const {return m_uav_base;} + + static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer; + + static bool emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader); + uint32_t slots() const override {return 1;}; + +private: + + static bool emit_atomic_read(nir_intrinsic_instr *intr, Shader& shader); + static bool emit_atomic_op2(nir_intrinsic_instr *intr, Shader& shader); + static bool emit_atomic_inc(nir_intrinsic_instr *intr, Shader& shader); + static bool emit_atomic_pre_dec(nir_intrinsic_instr *intr, Shader& shader); + + void do_print(std::ostream& os) const override; + + ESDOp m_op{DS_OP_INVALID}; + Register *m_dest; + + RegisterVec4 m_src; + + int m_uav_base{0}; + PRegister m_uav_id{nullptr}; + std::bitset<8> m_tex_flags; +}; + + +class RatInstr : public Instr { + +public: + enum ERatOp { + NOP, + STORE_TYPED, + STORE_RAW, + STORE_RAW_FDENORM, + CMPXCHG_INT, + CMPXCHG_FLT, + CMPXCHG_FDENORM, + ADD, + SUB, + RSUB, + MIN_INT, + MIN_UINT, + MAX_INT, + MAX_UINT, + AND, + OR, + XOR, + MSKOR, + INC_UINT, + DEC_UINT, + NOP_RTN = 32, + XCHG_RTN = 34, + XCHG_FDENORM_RTN, + CMPXCHG_INT_RTN, + CMPXCHG_FLT_RTN, + CMPXCHG_FDENORM_RTN, + ADD_RTN, + SUB_RTN, + RSUB_RTN, + MIN_INT_RTN, + MIN_UINT_RTN, + MAX_INT_RTN, + MAX_UINT_RTN, + AND_RTN, + OR_RTN, + XOR_RTN, + MSKOR_RTN, + UINT_RTN, + UNSUPPORTED + }; + + RatInstr(ECFOpCode cf_opcode, ERatOp rat_op, + const RegisterVec4& data, const RegisterVec4& index, + int rat_id, PRegister rat_id_offset, + int burst_count, int comp_mask, int element_size); + + auto rat_id_offset() const { return m_rat_id_offset;} + int rat_id() const { return m_rat_id;} + + ERatOp rat_op() const {return m_rat_op;} + + const auto& value() const { return m_data;} + auto& value() { return m_data;} + + const auto& addr() const { return m_index;} + auto& addr() { return m_index;} + + int data_gpr() const {return m_data.sel();} + int index_gpr() const {return m_index.sel();} + int elm_size() const {return m_element_size;} + + int comp_mask() const {return m_comp_mask;} + + bool need_ack() const {return m_need_ack;} + int burst_count() const {return m_burst_count;} + + int data_swz(int chan) const {return m_data[chan]->chan();} + + ECFOpCode cf_opcode() const { return m_cf_opcode;} + + void set_ack() {m_need_ack = true; set_mark(); } + void set_mark() {m_need_mark = true; } + bool mark() {return m_need_mark;} + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + bool is_equal_to(const RatInstr& lhs) const; + + static bool emit(nir_intrinsic_instr *intr, Shader& shader); + +private: + + static bool emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader); + static bool emit_ssbo_store(nir_intrinsic_instr *intr, Shader& shader); + static bool emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader); + static bool emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader); + + static bool emit_image_store(nir_intrinsic_instr *intr, Shader& shader); + static bool emit_image_load_or_atomic(nir_intrinsic_instr *intr, Shader& shader); + static bool emit_image_size(nir_intrinsic_instr *intr, Shader& shader); + + bool do_ready() const override; + void do_print(std::ostream& os) const override; + + ECFOpCode m_cf_opcode; + ERatOp m_rat_op; + + RegisterVec4 m_data; + RegisterVec4 m_index; + PRegister m_rat_id_offset{nullptr}; + + int m_rat_id{0}; + int m_burst_count{0}; + int m_comp_mask{15}; + int m_element_size{3}; + bool m_need_ack{false}; + bool m_need_mark{false}; + +}; + + +} + +#endif // GDSINSTR_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp new file mode 100644 index 0000000..a13d2a9 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp @@ -0,0 +1,1011 @@ +#include "sfn_instr_tex.h" +#include "sfn_instr_alu.h" +#include "sfn_instr_fetch.h" +#include "sfn_debug.h" + +namespace r600 { + +using std::string; + +TexInstr::TexInstr(Opcode op, const RegisterVec4& dest, + const RegisterVec4::Swizzle& dest_swizzle, + const RegisterVec4& src, unsigned sid, unsigned rid, + PVirtualValue sampler_offs): + InstrWithVectorResult(dest, dest_swizzle), + m_opcode(op), + m_src(src), + m_sampler_offset(sampler_offs), + m_inst_mode(0), + m_sampler_id(sid), + m_resource_id(rid) +{ + memset(m_offset, 0, sizeof(m_offset)); + m_src.add_use(this); + + if (m_sampler_offset && m_sampler_offset->as_register()) + m_sampler_offset->as_register()->add_use(this); +} + +void TexInstr::accept(ConstInstrVisitor& visitor) const +{ + visitor.visit(*this); +} + +void TexInstr::accept(InstrVisitor& visitor) +{ + visitor.visit(this); +} + +void TexInstr::set_offset(unsigned index, int32_t val) +{ + assert(index < 3); + m_offset[index] = val; +} + +int TexInstr::get_offset(unsigned index) const +{ + assert(index < 3); + return m_offset[index] << 1; +} + +void TexInstr::set_gather_comp(int cmp) +{ + m_inst_mode = cmp; +} + +bool TexInstr::is_equal_to(const TexInstr& lhs) const +{ + if (m_opcode != lhs.m_opcode) + return false; + + if (!comp_dest(lhs.dst(), lhs.all_dest_swizzle())) + return false; + + if (m_src != lhs.m_src) + return false; + + if (m_sampler_offset && lhs.m_sampler_offset) { + if (!m_sampler_offset->equal_to(*lhs.m_sampler_offset)) + return false; + } else if ((m_sampler_offset && !lhs.m_sampler_offset) || + (!m_sampler_offset && lhs.m_sampler_offset)) + return false; + + if (m_tex_flags != lhs.m_tex_flags) + return false; + + for(int i = 0; i < 3; ++i) { + if (m_offset[i] != lhs.m_offset[i]) + return false; + } + return m_inst_mode == lhs.m_inst_mode && + m_sampler_id == lhs.m_sampler_id && + m_resource_id == lhs.m_resource_id; +} + +bool TexInstr::propagate_death() +{ + m_src.del_use(this); + return true; +} + +bool TexInstr::do_ready() const +{ + for (auto p : m_prepare_instr) + if (!p->ready()) + return false; + + for (auto p :required_instr()) + if (!p->is_scheduled() && !p->is_dead()) { + return false; + } + + if (m_sampler_offset && m_sampler_offset->as_register() && + !m_sampler_offset->as_register()->ready(block_id(), index())) + return false; + return m_src.ready(block_id(), index()); +} + +void TexInstr::do_print(std::ostream& os) const +{ + + os << "TEX " << opname(m_opcode) << " "; + print_dest(os); + + os << " : "; + m_src.print(os); + + os << " RID:" << m_resource_id + << " SID:" << m_sampler_id; + + if (m_sampler_offset) + os << " SO:" << *m_sampler_offset; + + if (m_offset[0]) + os << " OX:" << m_offset[0]; + if (m_offset[1]) + os << " OY:" << m_offset[1]; + if (m_offset[2]) + os << " OZ:" << m_offset[2]; + + if (m_inst_mode || is_gather(m_opcode)) + os << " MODE:" << m_inst_mode; + + os << " "; + os << (m_tex_flags.test(x_unnormalized) ? "U" : "N"); + os << (m_tex_flags.test(y_unnormalized) ? "U" : "N"); + os << (m_tex_flags.test(z_unnormalized) ? "U" : "N"); + os << (m_tex_flags.test(w_unnormalized) ? "U" : "N"); +} + +const char *TexInstr::opname(Opcode op) +{ + switch (op) { + case ld: return "LD"; + case get_resinfo: return "GET_TEXTURE_RESINFO"; + case get_nsamples: return "GET_NUMBER_OF_SAMPLES"; + case get_tex_lod: return "GET_LOD"; + case get_gradient_h: return "GET_GRADIENTS_H"; + case get_gradient_v: return "GET_GRADIENTS_V"; + case set_offsets: return "SET_TEXTURE_OFFSETS"; + case keep_gradients: return "KEEP_GRADIENTS"; + case set_gradient_h: return "SET_GRADIENTS_H"; + case set_gradient_v: return "SET_GRADIENTS_V"; + case sample: return "SAMPLE"; + case sample_l: return "SAMPLE_L"; + case sample_lb: return "SAMPLE_LB"; + case sample_lz: return "SAMPLE_LZ"; + case sample_g: return "SAMPLE_G"; + case sample_g_lb: return "SAMPLE_G_L"; + case gather4: return "GATHER4"; + case gather4_o: return "GATHER4_O"; + case sample_c: return "SAMPLE_C"; + case sample_c_l: return "SAMPLE_C_L"; + case sample_c_lb: return "SAMPLE_C_LB"; + case sample_c_lz: return "SAMPLE_C_LZ"; + case sample_c_g: return "SAMPLE_C_G"; + case sample_c_g_lb: return "SAMPLE_C_G_L"; + case gather4_c: return "GATHER4_C"; + case gather4_c_o: return "OP_GATHER4_C_O"; + default: return "ERROR"; + } + +} + +const std::map TexInstr::s_opcode_map = { + {ld, "LD"}, + {get_resinfo,"GET_TEXTURE_RESINFO"}, + {get_nsamples,"GET_NUMBER_OF_SAMPLES"}, + {get_tex_lod,"GET_LOD"}, + {get_gradient_h,"GET_GRADIENTS_H"}, + {get_gradient_v,"GET_GRADIENTS_V"}, + {set_offsets,"SET_TEXTURE_OFFSETS"}, + {keep_gradients,"KEEP_GRADIENTS"}, + {set_gradient_h,"SET_GRADIENTS_H"}, + {set_gradient_v,"SET_GRADIENTS_V"}, + {sample,"SAMPLE"}, + {sample_l,"SAMPLE_L"}, + {sample_lb,"SAMPLE_LB"}, + {sample_lz,"SAMPLE_LZ"}, + {sample_g,"SAMPLE_G"}, + {sample_g_lb,"SAMPLE_G_L"}, + {gather4,"GATHER4"}, + {gather4_o,"GATHER4_O"}, + {sample_c,"SAMPLE_C"}, + {sample_c_l,"SAMPLE_C_L"}, + {sample_c_lb,"SAMPLE_C_LB"}, + {sample_c_lz,"SAMPLE_C_LZ"}, + {sample_c_g,"SAMPLE_C_G"}, + {sample_c_g_lb,"SAMPLE_C_G_L"}, + {gather4_c,"GATHER4_C"}, + {gather4_c_o,"OP_GATHER4_C_O"}, + {unknown, "ERROR"} +}; + +bool TexInstr::is_gather(Opcode op) +{ + return op == gather4 || op == gather4_c || + op == gather4_o || op == gather4_c_o; +} + +TexInstr::Opcode TexInstr::op_from_string(const std::string& s) +{ + for (auto& [op, str] : s_opcode_map) { + if (s == str) + return op; + } + return unknown; +} + +Instr::Pointer TexInstr::from_string(std::istream& is, ValueFactory& value_fctory) +{ + string opstr; + string deststr; + is >> opstr >> deststr; + + auto opcode = TexInstr::op_from_string(opstr); + + RegisterVec4::Swizzle dest_swz; + + auto dest = value_fctory.dest_vec4_from_string(deststr, dest_swz, pin_group); + + char dummy; + is >> dummy; + assert(dummy == ':'); + + string srcstr; + is >> srcstr; + + auto src = value_fctory.src_vec4_from_string(srcstr); + + string res_id_str; + string sampler_id_str; + + is >> res_id_str >> sampler_id_str; + + int res_id = int_from_string_with_prefix(res_id_str, "RID:"); + int sampler_id = int_from_string_with_prefix(sampler_id_str, "SID:"); + + auto tex = new TexInstr( opcode, dest, dest_swz, src, sampler_id, res_id, nullptr); + + while (!is.eof() && is.good()) { + std::string next_token; + is >> next_token; + + if (next_token.empty()) + break; + + if (next_token[0] == 'U' || next_token[0] == 'N') { + tex->read_tex_coord_normalitazion(next_token); + } else { + tex->set_tex_param(next_token); + } + } + + return tex; +} + +void TexInstr::read_tex_coord_normalitazion(const std::string& flags) +{ + assert(flags.length() == 4); + if (flags[0] == 'U') set_tex_flag(x_unnormalized); + if (flags[1] == 'U') set_tex_flag(y_unnormalized); + if (flags[2] == 'U') set_tex_flag(z_unnormalized); + if (flags[3] == 'U') set_tex_flag(w_unnormalized); +} + +void TexInstr::set_tex_param(const std::string& token) +{ + if (token.substr(0,3) == "OX:") + set_offset(0, int_from_string_with_prefix(token, "OX:")); + else if (token.substr(0,3) == "OY:") + set_offset(1, int_from_string_with_prefix(token, "OY:")); + else if (token.substr(0,3) == "OZ:") + set_offset(2, int_from_string_with_prefix(token, "OZ:")); + else if (token.substr(0,5) == "MODE:") + set_inst_mode(int_from_string_with_prefix(token, "MODE:")); + else if (token.substr(0,3) == "SO:") + set_sampler_offset(VirtualValue::from_string(token.substr(3))); + else { + std::cerr << "Token '" << token << "': "; + unreachable("Unknown token in tex param"); + } +} + +bool TexInstr::from_nir(nir_tex_instr *tex, Shader& shader) +{ + Inputs src(*tex, shader.value_factory()); + + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + switch (tex->op) { + case nir_texop_txs: + return emit_tex_txs(tex, src, {0,1,2,3}, shader); + case nir_texop_txf: + return emit_buf_txf(tex, src, shader); + default: + return false; + } + } else { + switch (tex->op) { + case nir_texop_tex: + return emit_tex_tex(tex, src, shader); + case nir_texop_txf: + return emit_tex_txf(tex, src, shader); + case nir_texop_txb: + case nir_texop_txl: + return emit_tex_txl_txb(tex, src, shader); + case nir_texop_txs: + return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader); + case nir_texop_lod: + return emit_tex_lod(tex, src, shader); + case nir_texop_query_levels: + return emit_tex_txs(tex, src, {3,7,7,7}, shader); + case nir_texop_txd: + return emit_tex_txd(tex, src, shader); + case nir_texop_txf_ms: + return emit_tex_tex_ms(tex, src, shader); + case nir_texop_tg4: + return emit_tex_tg4(tex, src, shader); + case nir_texop_texture_samples: + return emit_tex_texture_samples(tex, src, shader); + default: + return false; + } + } + return true; +} + +struct SamplerId { + int id; + bool indirect; +}; + +SamplerId +get_sampler_id(int sampler_id, const nir_variable *deref) +{ + SamplerId result = {sampler_id, false}; + + if (deref) { + assert(glsl_type_is_sampler(deref->type)); + result.id = deref->data.binding; + } + return result; +} + + +bool TexInstr::emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader) +{ + auto& vf = shader.value_factory(); + + sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(tex) + << "' (" << __func__ << ")\n"; + + auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref); + assert(!sampler.indirect); + + auto src_coord = prepare_source(tex, src, shader); + auto dst = vf.dest_vec4(tex->dest, pin_group); + + auto irt = new TexInstr(src.opcode, dst, {0,1,2,3}, src_coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, + src.sampler_offset); + if (tex->is_array) + irt->set_tex_flag(TexInstr::z_unnormalized); + + irt->set_rect_coordinate_flags(tex); + irt->set_coord_offsets(src.offset); + + shader.emit_instruction(irt); + return true; +} + +bool TexInstr::emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader) +{ + auto& vf = shader.value_factory(); + + auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto src_coord = prepare_source(tex, src, shader); + + auto dst = vf.dest_vec4(tex->dest, pin_group); + + auto irt = new TexInstr(src.opcode, dst, {0,1,2,3}, src_coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, + src.sampler_offset); + + if (tex->is_array) + irt->set_tex_flag(TexInstr::z_unnormalized); + + irt->set_rect_coordinate_flags(tex); + irt->set_coord_offsets(src.offset); + + shader.emit_instruction(irt); + return true; +} + + +bool TexInstr::emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader) +{ + auto& vf = shader.value_factory(); + + int sampler = tex->sampler_index; + + auto swizzle = src.swizzle_from_ncomps(tex->coord_components); + swizzle[3] = 3; + + if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) { + swizzle[2] = 1; + swizzle[1] = 7; + } + + auto src_coord = vf.temp_vec4(pin_group, swizzle); + + for (unsigned i = 0; i < tex->coord_components; i++) { + unsigned k = i; + if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1) + k = 2; + + + if (src.offset) { + shader.emit_instruction(new AluInstr(op2_add_int, src_coord[k], src.coord[i], + vf.src(src.offset[i], i), + AluInstr::write)); + } else { + shader.emit_instruction(new AluInstr(op1_mov, src_coord[k], src.coord[i],AluInstr::write)); + } + } + + shader.emit_instruction(new AluInstr(op1_mov, src_coord[3], src.lod, AluInstr::last_write)); + + auto dst = vf.dest_vec4(tex->dest, pin_group); + + auto tex_ir = new TexInstr(src.opcode, dst, {0, 1, 2, 3}, src_coord, + sampler, + sampler + R600_MAX_CONST_BUFFERS, + src.sampler_offset); + + if (tex->is_array) + tex_ir->set_tex_flag(z_unnormalized); + + tex_ir->set_rect_coordinate_flags(tex); + tex_ir->set_sampler_offset(src.sampler_offset); + + shader.emit_instruction(tex_ir); + + return true; +} + +bool TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader) +{ + auto dst = shader.value_factory().dest_vec4(tex->dest, pin_group); + + PRegister tex_offset = nullptr; + if (src.texture_offset) + tex_offset = shader.emit_load_to_register(src.texture_offset); + auto ir = new LoadFromBuffer(dst, {0,1,2,3}, src.coord[0], 0, + tex->texture_index + R600_MAX_CONST_BUFFERS, + tex_offset, fmt_32_32_32_32_float); + ir->set_fetch_flag(FetchInstr::use_const_field); + shader.emit_instruction(ir); + shader.set_flag(Shader::sh_uses_tex_buffer); + return true; +} + +bool TexInstr::emit_tex_tex_ms(nir_tex_instr *tex, Inputs& src, Shader& shader) +{ + assert(tex->src[0].src.is_ssa); + auto& vf = shader.value_factory(); + + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(tex) + << "' (" << __func__ << ")\n"; + + auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto sample_id_dest = vf.temp_vec4(pin_group); + RegisterVec4::Swizzle dest_swz = {0,7,7,7}; + + auto temp1 = vf.temp_vec4(pin_group); + for (unsigned i = 0; i < tex->coord_components; ++i) { + unsigned k = i; + if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1) + k = 2; + + if (src.offset && i < src.offset->ssa->num_components) + shader.emit_instruction(new AluInstr(op2_add_int, temp1[k], + src.coord[i], + vf.src(*src.offset, i), + AluInstr::write)); + else + shader.emit_instruction(new AluInstr(op1_mov, temp1[k], + src.coord[i], AluInstr::write)); + } + + shader.emit_instruction(new AluInstr(op1_mov, temp1[3], + src.ms_index, AluInstr::last_write)); + + auto tex_sample_id_ir = new TexInstr(ld, sample_id_dest, dest_swz, temp1, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + + tex_sample_id_ir->set_tex_flag(x_unnormalized); + tex_sample_id_ir->set_tex_flag(y_unnormalized); + tex_sample_id_ir->set_tex_flag(z_unnormalized); + tex_sample_id_ir->set_tex_flag(w_unnormalized); + tex_sample_id_ir->set_inst_mode(1); + + shader.emit_instruction(tex_sample_id_ir); + + Register *sample_id_dest_reg = sample_id_dest[0]; + + if (!src.ms_index->as_inline_const() || + src.ms_index->as_inline_const()->sel() != ALU_SRC_0) { + + auto help = vf.temp_register(); + + shader.emit_instruction(new AluInstr(op2_lshl_int, help, + src.ms_index, vf.literal(2), + AluInstr::last_write)); + + sample_id_dest_reg = vf.temp_register(); + shader.emit_instruction(new AluInstr(op2_lshr_int, sample_id_dest_reg, + sample_id_dest[0], help, + AluInstr::last_write)); + } + + auto temp2 = vf.temp_vec4(pin_group); + + for (unsigned i = 0; i < tex->coord_components; ++i) { + unsigned k = i; + if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1) + k = 2; + + shader.emit_instruction(new AluInstr(op1_mov, temp2[k], + temp1[k], AluInstr::write)); + } + + shader.emit_instruction(new AluInstr(op2_and_int, temp2[3], + sample_id_dest_reg, vf.literal(15), + AluInstr::last_write)); + + auto dst = vf.dest_vec4(tex->dest, pin_group); + + /* txf doesn't need rounding for the array index, but 1D has the array index + * in the z component */ + auto tex_ir = new TexInstr(ld, dst, {0,1,2,3}, temp2, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + + shader.emit_instruction(tex_ir); + return true; +} + +bool TexInstr::emit_tex_texture_samples(nir_tex_instr* instr, Inputs& src, Shader& shader) +{ + RegisterVec4 dest = shader.value_factory().dest_vec4(instr->dest, pin_chan); + RegisterVec4 help{0, true, {4,4,4,4}}; + + int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index; + + auto ir = new TexInstr(src.opcode, dest, {3, 7, 7, 7}, help, + 0, res_id, src.sampler_offset); + shader.emit_instruction(ir); + return true; +} + + +bool TexInstr::emit_tex_txd(nir_tex_instr *tex, Inputs& src, Shader& shader) +{ + + auto& vf = shader.value_factory(); + + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(tex) + << "' (" << __func__ << ")\n"; + + auto dst = vf.dest_vec4(tex->dest, pin_group); + RegisterVec4 empty_dst(126, false, {0,0,0,0}, pin_group); + + auto swizzle = src.swizzle_from_ncomps(tex->coord_components); + + if (tex->is_shadow) + swizzle[3] = 3; + + unsigned array_coord = 2; + if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) { + swizzle[2] = 1; + swizzle[1] = 7; + array_coord = 1; + } + + auto src_coord = vf.temp_vec4(pin_group, swizzle); + + auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto irgh = new TexInstr(set_gradient_h, empty_dst, {7,7,7,7}, src.ddx, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, + src.sampler_offset); + + auto irgv = new TexInstr(set_gradient_v, empty_dst, {7,7,7,7}, src.ddy, + sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, + src.sampler_offset); + + auto tir = new TexInstr(src.opcode, dst, {0,1,2,3}, src_coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, + src.sampler_offset); + + + /* r600_bytecode_add_tex has a hack that will start a new tex CF if + * set_gradient_h is emitted, so make sure it is emitted first */ + + AluInstr *ir = nullptr; + for (unsigned i = 0; i < tex->coord_components; ++i) { + int k = i; + if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1) + k = 2; + + ir = new AluInstr(tex->is_array && i == array_coord ? op1_rndne : op1_mov, + src_coord[k], src.coord[i], + AluInstr::write); + shader.emit_instruction(ir); + } + + if (tex->is_shadow) { + ir = new AluInstr(op1_mov, src_coord[3], src.comperator, AluInstr::last_write); + shader.emit_instruction(ir); + } + + tir->add_prepare_instr(irgh); + tir->add_prepare_instr(irgv); + + if (tex->is_array) + tir->set_tex_flag(TexInstr::z_unnormalized); + + irgh->set_rect_coordinate_flags(tex); + irgv->set_rect_coordinate_flags(tex); + irgh->set_always_keep(); + irgv->set_always_keep(); + + tir->set_rect_coordinate_flags(tex); + + tir->set_coord_offsets(src.offset); + + if (shader.last_txd()) + tir->add_required_instr(shader.last_txd()); + + shader.emit_instruction(tir); + shader.set_last_txd(tir); + + return true; +} + +bool TexInstr::emit_tex_txs(nir_tex_instr *tex, Inputs& src, + RegisterVec4::Swizzle dest_swz, Shader& shader) +{ + auto& vf = shader.value_factory(); + + auto dest = vf.dest_vec4(tex->dest, pin_group); + + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,7,7,7}, + tex->sampler_index + R600_MAX_CONST_BUFFERS)); + } else { + + auto src_lod = vf.temp_register(); + shader.emit_instruction(new AluInstr(op1_mov, src_lod, src.lod, AluInstr::last_write)); + + RegisterVec4 src_coord(src_lod, src_lod, src_lod, src_lod, pin_free); + + auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) + dest_swz[2] = 7; + + auto ir = new TexInstr(get_resinfo, dest, dest_swz, src_coord, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, + src.sampler_offset); + + ir->set_dest_swizzle(dest_swz); + shader.emit_instruction(ir); + + if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { + auto src_loc = vf.uniform(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2), + sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER); + + + auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::last_write); + shader.emit_instruction(alu); + shader.set_flag(Shader::sh_txs_cube_array_comp); + } + } + + return true; +} + +bool TexInstr::emit_tex_tg4(nir_tex_instr* tex, Inputs& src , Shader& shader) +{ + auto& vf = shader.value_factory(); + + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(tex) + << "' (" << __func__ << ")\n"; + + TexInstr *set_ofs = nullptr; + + auto src_coord = prepare_source(tex, src, shader); + + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(tex) + << "' (" << __func__ << ")\n"; + + auto dst = vf.dest_vec4(tex->dest, pin_group); + + RegisterVec4 empty_dst(125, false, {7,7,7,7}, pin_group); + + /* pre CAYMAN needs swizzle */ + auto dest_swizzle = shader.chip_class() <= ISA_CC_EVERGREEN ? + RegisterVec4::Swizzle{1, 2, 0, 3} : + RegisterVec4::Swizzle{0, 1, 2, 3}; + + auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + bool literal_offset = false; + if (src.offset) { + literal_offset = nir_src_as_const_value(*src.offset) != 0; + r600::sfn_log << SfnLog::tex << " really have offsets and they are " << + (literal_offset ? "literal" : "varying") << + "\n"; + + if (!literal_offset) { + RegisterVec4::Swizzle swizzle = {4,4,4,4}; + int src_components = tex->coord_components; + if (tex->is_array) + --src_components; + + for (int i = 0; i < src_components; ++i) + swizzle[i] = i; + + int noffsets = tex->coord_components; + if (tex->is_array) + --noffsets; + + auto ofs = vf.src_vec4(*src.offset, pin_group, swizzle); + RegisterVec4 dummy(0, true, {7,7,7,7}); + + set_ofs = new TexInstr(TexInstr::set_offsets, dummy, {7,7,7,7}, + ofs, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + } else { + src.opcode = src.opcode == gather4_o ? gather4 : gather4_c; + } + } + + auto irt = new TexInstr(src.opcode, dst, dest_swizzle, src_coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + + irt->set_gather_comp(tex->component); + + if (tex->is_array) + irt->set_tex_flag(z_unnormalized); + + if (literal_offset) { + r600::sfn_log << SfnLog::tex << "emit literal offsets\n"; + irt->set_coord_offsets(src.offset); + } + + irt->set_rect_coordinate_flags(tex); + + if (set_ofs) { + set_ofs->set_always_keep(); + irt->add_prepare_instr(set_ofs); + } + + shader.emit_instruction(irt); + return true; +} + +auto TexInstr::prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader& shader) -> RegisterVec4 +{ + RegisterVec4::Swizzle target{7,7,7,7}; + PVirtualValue src[4]{nullptr,nullptr,nullptr,nullptr}; + + + for (unsigned i = 0; i < tex->coord_components; ++i) { + target[i] = i; + src[i] = inputs.coord[i]; + } + + // array index always goes into z + if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) { + target[2] = 1; + target[1] = 7; + src[2] = inputs.coord[1]; + } + + /* With txl and txb shadow goes into z and lod or bias go into w */ + if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) { + target[3] = 3; + src[3] = tex->op == nir_texop_txl ? inputs.lod : inputs.bias; + if (tex->is_shadow){ + target[2] = 2; + src[2] = inputs.comperator; + } + } else if (tex->is_shadow) { + /* Other ops have shadow in w */ + target[3] = 3; + src[3] = inputs.comperator; + } + + auto src_coord = shader.value_factory().temp_vec4(pin_group, target); + + AluInstr *ir = nullptr; + for (int i = 0; i < 4; ++i) { + if (target[i] > 3) + continue; + + auto op = tex->is_array && i == 2 ? op1_rndne : op1_mov; + + ir = new AluInstr(op, src_coord[i], src[i], AluInstr::write); + shader.emit_instruction(ir); + } + + if (ir) + ir->set_alu_flag(alu_last_instr); + + return src_coord; +} + +TexInstr::Inputs::Inputs(const nir_tex_instr& instr, ValueFactory& vf): + sampler_deref(nullptr), + texture_deref(nullptr), + bias(nullptr), + comperator(nullptr), + lod(nullptr), + offset(nullptr), + gather_comp(nullptr), + ms_index(nullptr), + sampler_offset(nullptr), + texture_offset(nullptr), + opcode(ld) +{ + //sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n"; + + unsigned grad_components = instr.coord_components; + if (instr.is_array && !instr.array_is_lowered_cube) + --grad_components; + + for (unsigned i = 0; i < instr.num_srcs; ++i) { + switch (instr.src[i].src_type) { + case nir_tex_src_bias: + bias = vf.src(instr.src[i], 0); + break; + + case nir_tex_src_coord: { + coord = vf.src_vec4(instr.src[i].src, pin_none, swizzle_from_ncomps(instr.coord_components)); + } break; + case nir_tex_src_comparator: + comperator = vf.src(instr.src[i], 0); + break; + case nir_tex_src_ddx: + ddx = vf.src_vec4(instr.src[i].src, pin_group, swizzle_from_ncomps(grad_components)); + break; + case nir_tex_src_ddy: + ddy = vf.src_vec4(instr.src[i].src, pin_group, swizzle_from_ncomps(grad_components)); + break; + case nir_tex_src_lod: + lod = vf.src(instr.src[i].src, 0); + break; + case nir_tex_src_offset: + offset = &instr.src[i].src; + break; + /* case nir_tex_src_sampler_deref: + sampler_deref = get_deref_location(instr.src[i].src); + break; + case nir_tex_src_texture_deref: + texture_deref = get_deref_location(instr.src[i].src); + break; + */ + case nir_tex_src_ms_index: + ms_index = vf.src(instr.src[i], 0); + break; + case nir_tex_src_texture_offset: + texture_offset = vf.src(instr.src[i], 0); + break; + case nir_tex_src_sampler_offset: + sampler_offset = vf.src(instr.src[i], 0); + break; + case nir_tex_src_plane: + case nir_tex_src_projector: + case nir_tex_src_min_lod: + default: + unreachable("unsupported texture input type"); + } + } + + opcode = get_opcode(instr); + + +} + +auto TexInstr::Inputs::get_opcode(const nir_tex_instr& instr) -> Opcode +{ + switch (instr.op) { + case nir_texop_tex: + return instr.is_shadow ? sample_c : sample; + case nir_texop_txf: + return ld; + case nir_texop_txb: + return instr.is_shadow ? sample_c_lb : sample_lb; + case nir_texop_txl: + return instr.is_shadow ? sample_c_l : sample_l; + case nir_texop_txs: + return get_resinfo; + case nir_texop_lod: + return get_resinfo; + case nir_texop_txd: + return instr.is_shadow ? sample_c_g : sample_g; + case nir_texop_tg4: + return instr.is_shadow ? + (offset ? gather4_c_o : gather4_c) : + (offset ? gather4_o : gather4); + + case nir_texop_txf_ms: + return ld; + case nir_texop_query_levels: + return get_resinfo; + case nir_texop_texture_samples: + return TexInstr::get_nsamples; + default: + unreachable("unsupported texture input opcode"); + } +} + +bool TexInstr::emit_tex_lod(nir_tex_instr* tex, Inputs& src, Shader& shader) +{ + auto& vf = shader.value_factory(); + auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto dst = shader.value_factory().dest_vec4(tex->dest, pin_group); + + auto swizzle = src.swizzle_from_ncomps(tex->coord_components); + + auto src_coord = vf.temp_vec4(pin_group, swizzle); + + AluInstr *ir = nullptr; + for (unsigned i = 0; i < tex->coord_components; ++i) { + ir = new AluInstr(op1_mov, + src_coord[i], src.coord[i], + AluInstr::write); + shader.emit_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + + auto irt = new TexInstr(TexInstr::get_tex_lod, dst, {1,0,7,7}, src_coord, + sampler.id, sampler.id + R600_MAX_CONST_BUFFERS); + + shader.emit_instruction(irt); + return true; +} + + +RegisterVec4::Swizzle TexInstr::Inputs::swizzle_from_ncomps(int comps) const +{ + RegisterVec4::Swizzle swz; + for (int i = 0; i < 4; ++i) + swz[i] = i < comps ? i : 7; + return swz; +} + +void TexInstr::set_coord_offsets(nir_src *offset) +{ + if (!offset) + return; + + assert(offset->is_ssa); + auto literal = nir_src_as_const_value(*offset); + assert(literal); + + for (int i = 0; i < offset->ssa->num_components; ++i) + set_offset(i, literal[i].i32); +} + +void TexInstr::set_rect_coordinate_flags(nir_tex_instr* instr) +{ + if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) { + set_tex_flag(x_unnormalized); + set_tex_flag(y_unnormalized); + } +} + + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_tex.h b/src/gallium/drivers/r600/sfn/sfn_instr_tex.h new file mode 100644 index 0000000..a7c068c --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.h @@ -0,0 +1,166 @@ +#ifndef INSTR_TEX_H +#define INSTR_TEX_H + +#include "sfn_instr.h" +#include "sfn_valuefactory.h" + +#include "sfn_shader.h" + +namespace r600 { + +class TexInstr : public InstrWithVectorResult { +public: + enum Opcode { + ld = FETCH_OP_LD, + get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO, + get_nsamples = FETCH_OP_GET_NUMBER_OF_SAMPLES, + get_tex_lod = FETCH_OP_GET_LOD, + get_gradient_h = FETCH_OP_GET_GRADIENTS_H, + get_gradient_v = FETCH_OP_GET_GRADIENTS_V, + set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS, + keep_gradients = FETCH_OP_KEEP_GRADIENTS, + set_gradient_h = FETCH_OP_SET_GRADIENTS_H, + set_gradient_v = FETCH_OP_SET_GRADIENTS_V, + sample = FETCH_OP_SAMPLE, + sample_l = FETCH_OP_SAMPLE_L, + sample_lb = FETCH_OP_SAMPLE_LB, + sample_lz = FETCH_OP_SAMPLE_LZ, + sample_g = FETCH_OP_SAMPLE_G, + sample_g_lb = FETCH_OP_SAMPLE_G_L, + gather4 = FETCH_OP_GATHER4, + gather4_o = FETCH_OP_GATHER4_O, + + sample_c = FETCH_OP_SAMPLE_C, + sample_c_l = FETCH_OP_SAMPLE_C_L, + sample_c_lb = FETCH_OP_SAMPLE_C_LB, + sample_c_lz = FETCH_OP_SAMPLE_C_LZ, + sample_c_g = FETCH_OP_SAMPLE_C_G, + sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L, + gather4_c = FETCH_OP_GATHER4_C, + gather4_c_o = FETCH_OP_GATHER4_C_O, + unknown = 255 + }; + + enum Flags { + x_unnormalized, + y_unnormalized, + z_unnormalized, + w_unnormalized, + grad_fine, + num_tex_flag + }; + + struct Inputs { + Inputs(const nir_tex_instr& instr, ValueFactory &vf); + const nir_variable *sampler_deref; + const nir_variable *texture_deref; + RegisterVec4 coord; + PVirtualValue bias; + PVirtualValue comperator; + PVirtualValue lod; + RegisterVec4 ddx; + RegisterVec4 ddy; + nir_src *offset; + PVirtualValue gather_comp; + PVirtualValue ms_index; + PVirtualValue sampler_offset; + PVirtualValue texture_offset; + + RegisterVec4::Swizzle swizzle_from_ncomps(int comps) const; + + Opcode opcode; + private: + auto get_opcode(const nir_tex_instr& instr) -> Opcode; + }; + + TexInstr(Opcode op, const RegisterVec4& dest, + const RegisterVec4::Swizzle& dest_swizzle, + const RegisterVec4& src, unsigned sid, unsigned rid, + PVirtualValue sampler_offs = nullptr); + + TexInstr(const TexInstr& orig) = delete; + TexInstr(const TexInstr&& orig) = delete; + TexInstr& operator =(const TexInstr& orig) = delete; + TexInstr& operator =(const TexInstr&& orig) = delete; + + void accept(ConstInstrVisitor& visitor) const override; + void accept(InstrVisitor& visitor) override; + + const auto& src() const {return m_src;} + auto& src() {return m_src;} + + unsigned opcode() const {return m_opcode;} + unsigned sampler_id() const {return m_sampler_id;} + unsigned resource_id() const {return m_resource_id;} + + void set_offset(unsigned index, int32_t val); + int get_offset(unsigned index) const; + + void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;} + int inst_mode() const { return m_inst_mode;} + + void set_tex_flag(Flags flag) {m_tex_flags.set(flag);} + bool has_tex_flag(Flags flag) const {return m_tex_flags.test(flag);} + + void set_sampler_offset(PVirtualValue ofs) {m_sampler_offset = ofs;} + auto* sampler_offset() const {return m_sampler_offset;} + + void set_gather_comp(int cmp); + bool is_equal_to(const TexInstr& lhs) const; + + static Opcode op_from_string(const std::string& s); + static Instr::Pointer from_string(std::istream& is, ValueFactory& value_fctory); + + static bool from_nir(nir_tex_instr *tex, Shader& shader); + + uint32_t slots() const override {return 1;}; + + auto prepare_instr() const { return m_prepare_instr;} + +private: + + bool do_ready() const override; + void do_print(std::ostream& os) const override; + bool propagate_death() override; + + static const char *opname(Opcode code); + static bool is_gather(Opcode op); + + void read_tex_coord_normalitazion(const std::string& next_token); + void set_tex_param(const std::string& next_token); + + static auto prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader &shader) -> RegisterVec4; + + static bool emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader); + static bool emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader); + static bool emit_tex_tex_ms(nir_tex_instr *tex, Inputs& src, Shader& shader); + static bool emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader); + static bool emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader); + static bool emit_tex_txs(nir_tex_instr *tex, Inputs& src, + RegisterVec4::Swizzle dest_swz, Shader& shader); + static bool emit_tex_lod(nir_tex_instr* tex, Inputs& src, Shader& shader); + static bool emit_tex_txd(nir_tex_instr *tex, Inputs& src, Shader& shader); + static bool emit_tex_tg4(nir_tex_instr* instr, Inputs& src , Shader& shader); + static bool emit_tex_texture_samples(nir_tex_instr* instr, Inputs& src, Shader& shader); + + void set_coord_offsets(nir_src *offset); + void set_rect_coordinate_flags(nir_tex_instr* instr); + void add_prepare_instr(TexInstr *ir) {m_prepare_instr.push_back(ir);}; + + Opcode m_opcode; + + RegisterVec4 m_src; + PVirtualValue m_sampler_offset; + std::bitset m_tex_flags; + int m_offset[3]; + int m_inst_mode; + unsigned m_sampler_id; + unsigned m_resource_id; + + static const std::map s_opcode_map; + std::list m_prepare_instr; +}; + +} + +#endif // INSTR_TEX_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp b/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp new file mode 100644 index 0000000..b73cc13 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp @@ -0,0 +1,188 @@ +#include "sfn_instrfactory.h" + +#include "sfn_instr_alugroup.h" +#include "sfn_debug.h" +#include "sfn_instr_controlflow.h" +#include "sfn_instr_export.h" +#include "sfn_instr_fetch.h" +#include "sfn_instr_lds.h" +#include "sfn_instr_mem.h" +#include "sfn_instr_tex.h" + +#include "sfn_alu_defines.h" + +#include "sfn_shader.h" + +#include +#include +#include + +namespace r600 { + +using std::string; +using std::vector; + +InstrFactory::InstrFactory(): + group(nullptr) +{ + +} + +PInst InstrFactory::from_string(const std::string& s, int nesting_depth) +{ + string type; + std::istringstream is(s); + + PInst result = nullptr; + + do { + is >> type; + } while (type.empty() && is.good()); + + if (type == "ALU_GROUP_BEGIN") { + group = new AluGroup(); + group->set_nesting_depth(nesting_depth); + return nullptr; + } else if (type == "ALU_GROUP_END") { + AluGroup *retval = group; + group = nullptr; + return retval; + } else if (type == "ALU") { + result = AluInstr::from_string(is, m_value_factory, group); + } else if (type == "TEX") { + result = TexInstr::from_string(is, m_value_factory); + } else if (type == "EXPORT") { + result = ExportInstr::from_string(is, m_value_factory); + } else if (type == "EXPORT_DONE") { + result = ExportInstr::last_from_string(is, m_value_factory); + } else if (type == "VFETCH") { + result = FetchInstr::from_string(is, m_value_factory); + } else if (type == "GET_BUF_RESINFO") { + result = QueryBufferSizeInstr::from_string(is, m_value_factory); + } else if (type == "LOAD_BUF") { + result = LoadFromBuffer::from_string(is, m_value_factory); + } else if (type == "READ_SCRATCH") { + result = LoadFromScratch::from_string(is, m_value_factory); + } else if (type == "IF") { + result = IfInstr::from_string(is, m_value_factory); + } else if (type == "WRITE_SCRATCH") { + result = WriteScratchInstr::from_string(is, m_value_factory); + } else if (type == "MEM_RING") { + result = MemRingOutInstr::from_string(is, m_value_factory); + } else if (type == "EMIT_VERTEX") { + result = EmitVertexInstr::from_string(is, false); + } else if (type == "EMIT_CUT_VERTEX") { + result = EmitVertexInstr::from_string(is, true); + } else if (type == "LDS_READ") { + result = LDSReadInstr::from_string(is, m_value_factory); + } else if (type == "LDS") { + result = LDSAtomicInstr::from_string(is, m_value_factory); + } else if (type == "WRITE_TF") { + result = WriteTFInstr::from_string(is, m_value_factory); + } else + result = ControlFlowInstr::from_string(type); + + if (!result && !group) { + std::cerr << "Error translating '" << s << "'\n"; + } + + return result; +} + +bool InstrFactory::from_nir(nir_instr *instr, Shader& shader) +{ + switch (instr->type) { + case nir_instr_type_alu: + return AluInstr::from_nir(nir_instr_as_alu(instr), shader); + case nir_instr_type_intrinsic: + return shader.process_intrinsic(nir_instr_as_intrinsic(instr)); + case nir_instr_type_load_const: + return load_const(nir_instr_as_load_const(instr), shader); + case nir_instr_type_tex: + return TexInstr::from_nir(nir_instr_as_tex(instr), shader); + case nir_instr_type_jump: + return process_jump(nir_instr_as_jump(instr), shader); + case nir_instr_type_ssa_undef: + return process_undef(nir_instr_as_ssa_undef(instr), shader); + default: + fprintf(stderr, "Instruction type %d not supported\n", instr->type); + return false; + } +} + +bool InstrFactory::load_const(nir_load_const_instr *literal, Shader& shader) +{ + AluInstr *ir = nullptr; + + if (literal->def.bit_size == 64) { + for (int i = 0; i < literal->def.num_components; ++i) { + auto dest0 = m_value_factory.dest(literal->def, 2 * i, pin_none); + auto src0 = m_value_factory.literal(literal->value[i].u64 & 0xffffffff); + shader.emit_instruction(new AluInstr(op1_mov, dest0, src0, {alu_write})); + + auto dest1 = m_value_factory.dest(literal->def, 2 * i + 1, pin_none); + auto src1 = m_value_factory.literal((literal->value[i].u64 >> 32) & 0xffffffff); + shader.emit_instruction(new AluInstr(op1_mov, dest1, src1, AluInstr::last_write)); + } + } else { + Pin pin = literal->def.num_components == 1 ? pin_free : pin_none; + for (int i = 0; i < literal->def.num_components; ++i) { + auto dest = m_value_factory.dest(literal->def, i, pin); + uint32_t v = literal->value[i].i32; + PVirtualValue src = nullptr; + switch (v) { + case 0: src = m_value_factory.zero(); break; + case 1: src = m_value_factory.one_i(); break; + case 0xffffffff: src = m_value_factory.inline_const(ALU_SRC_M_1_INT, 0); break; + case 0x3f800000: src = m_value_factory.inline_const(ALU_SRC_1, 0); break; + case 0x3f000000: src = m_value_factory.inline_const(ALU_SRC_0_5, 0); break; + default: src = m_value_factory.literal(v); + } + + ir = new AluInstr(op1_mov, dest, src, {alu_write}); + shader.emit_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + + } + return true; +} + +bool InstrFactory::process_jump(nir_jump_instr *instr, Shader& shader) +{ + ControlFlowInstr::CFType type; + switch (instr->type) { + case nir_jump_break: + type = ControlFlowInstr::cf_loop_break; + break; + + case nir_jump_continue: + type = ControlFlowInstr::cf_loop_continue; + break; + + default: { + nir_instr *i = reinterpret_cast(instr); + sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n"; + return false; + } + } + shader.emit_instruction(new ControlFlowInstr(type)); + shader.start_new_block(0); + + return true; +} + +bool InstrFactory::process_undef(nir_ssa_undef_instr *undef, Shader& shader) +{ + for (int i = 0; i < undef->def.num_components; ++i) { + auto dest = shader.value_factory().undef(undef->def.index, i); + shader.emit_instruction(new AluInstr(op1_mov, dest, + value_factory().zero(), + AluInstr::last_write)); + } + return true; +} + + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instrfactory.h b/src/gallium/drivers/r600/sfn/sfn_instrfactory.h new file mode 100644 index 0000000..5775813 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instrfactory.h @@ -0,0 +1,34 @@ +#ifndef INSTRFACTORY_H +#define INSTRFACTORY_H + +#include "sfn_instr.h" +#include "sfn_valuefactory.h" + + +#include + +namespace r600 { + +class Shader; +class InstrFactory : public Allocate { +public: + InstrFactory(); + + PInst from_string(const std::string &s, int nesting_depth); + bool from_nir(nir_instr *instr, Shader& shader); + auto& value_factory() { return m_value_factory;} + +private: + bool load_const(nir_load_const_instr *lc, Shader& shader); + bool process_jump(nir_jump_instr *instr, Shader& shader); + bool process_undef(nir_ssa_undef_instr *undef, Shader& shader); + + Instr::Pointer export_from_string(std::istream& is, bool is_last); + + ValueFactory m_value_factory; + AluGroup *group; +}; + +} + +#endif // INSTRFACTORY_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp deleted file mode 100644 index 72cf231..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp +++ /dev/null @@ -1,183 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_instruction_alu.h" -#include "sfn_valuepool.h" - -namespace r600 { - -const AluModifiers AluInstruction::src_abs_flags[2] = - {alu_src0_abs, alu_src1_abs}; -const AluModifiers AluInstruction::src_neg_flags[3] = - {alu_src0_neg, alu_src1_neg, alu_src2_neg}; -const AluModifiers AluInstruction::src_rel_flags[3] = - {alu_src0_rel, alu_src1_rel, alu_src2_rel}; - -AluInstruction::AluInstruction(EAluOp opcode): - Instruction (Instruction::alu), - m_opcode(opcode), - m_src(alu_ops.at(opcode).nsrc), - m_bank_swizzle(alu_vec_unknown), - m_cf_type(cf_alu) -{ - if (alu_ops.at(opcode).nsrc == 3) - m_flags.set(alu_op3); -} - -AluInstruction::AluInstruction(EAluOp opcode, PValue dest, - std::vector src, - const std::set& flags): - Instruction (Instruction::alu), - m_opcode(opcode), - m_dest(dest), - m_bank_swizzle(alu_vec_unknown), - m_cf_type(cf_alu) -{ - assert(dest); - m_src.swap(src); - for (auto f : flags) - m_flags.set(f); - - if (alu_ops.at(opcode).nsrc == 3) - m_flags.set(alu_op3); - - for (auto &s: m_src) - add_remappable_src_value(&s); - - add_remappable_dst_value(&m_dest); -} - -AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0, - const std::set& flags): - AluInstruction(opcode, dest, std::vector{src0}, flags) -{ -} - -AluInstruction::AluInstruction(EAluOp opcode, PValue dest, - PValue src0, PValue src1, - const std::set &m_flags): - AluInstruction(opcode, dest, {src0, src1}, m_flags) -{ -} - -AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0, - PValue src1, PValue src2, - const std::set &flags): - AluInstruction(opcode, dest, {src0, src1, src2}, flags) -{ -} - -bool AluInstruction::is_equal_to(const Instruction& lhs) const -{ - assert(lhs.type() == alu); - const auto& oth = static_cast(lhs); - - if (m_opcode != oth.m_opcode) { - return false; - } - - if (*m_dest != *oth.m_dest) - return false; - - if (m_src.size() != oth.m_src.size()) - return false; - - for (unsigned i = 0; i < m_src.size(); ++i) - if (*m_src[i] != *oth.m_src[i]) { - return false; - } - return (m_flags == oth.m_flags && m_cf_type == oth.m_cf_type); -} - -void AluInstruction::replace_values(const ValueSet& candidates, PValue new_value) -{ - for (auto c: candidates) { - if (*c == *m_dest) - m_dest = new_value; - - for (auto& s: m_src) { - if (*c == *s) - s = new_value; - } - } -} - -PValue AluInstruction::remap_one_registers(PValue reg, std::vector& map, - ValueMap &values) -{ - auto new_index = map[reg->sel()]; - if (new_index.valid) - reg = values.get_or_inject(new_index.new_reg, reg->chan()); - map[reg->sel()].used = true; - return reg; -} - - -void AluInstruction::set_flag(AluModifiers flag) -{ - m_flags.set(flag); -} - -void AluInstruction::set_bank_swizzle(AluBankSwizzle bswz) -{ - m_bank_swizzle = bswz; -} - -unsigned AluInstruction::n_sources() const -{ - return m_src.size(); -} - -void AluInstruction::do_print(std::ostream& os) const -{ - os << "ALU " << alu_ops.at(m_opcode).name; - if (m_flags.test(alu_dst_clamp)) - os << "_CLAMP"; - if (m_dest) - os << ' ' << *m_dest << " : " ; - - for (unsigned i = 0; i < m_src.size(); ++i) { - int pflags = 0; - if (i) - os << ' '; - if (m_flags.test(src_neg_flags[i])) pflags |= Value::PrintFlags::has_neg; - if (m_flags.test(src_rel_flags[i])) pflags |= Value::PrintFlags::is_rel; - if (i < 2) - if (m_flags.test(src_abs_flags[i])) pflags |= Value::PrintFlags::has_abs; - m_src[i]->print(os, Value::PrintFlags(0, pflags)); - } - os << " {"; - os << (m_flags.test(alu_write) ? 'W' : ' '); - os << (m_flags.test(alu_last_instr) ? 'L' : ' '); - os << (m_flags.test(alu_update_exec) ? 'E' : ' '); - os << (m_flags.test(alu_update_pred) ? 'P' : ' '); - os << "}"; - - os << " BS:" << m_bank_swizzle; - os << " CF:" << m_cf_type; -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h b/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h deleted file mode 100644 index ecf563c..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h +++ /dev/null @@ -1,142 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef sfn_r600_instruction_alu_h -#define sfn_r600_instruction_alu_h - -#include "sfn_instruction_base.h" -#include "sfn_alu_defines.h" - -namespace r600 { - -enum AluModifiers { - alu_src0_neg, - alu_src0_abs, - alu_src0_rel, - alu_src1_neg, - alu_src1_abs, - alu_src1_rel, - alu_src2_neg, - alu_src2_rel, - alu_dst_clamp, - alu_dst_rel, - alu_last_instr, - alu_update_exec, - alu_update_pred, - alu_write, - alu_op3 -}; - -enum AluDstModifiers { - omod_off = 0, - omod_mul2 = 1, - omod_mul4 = 2, - omod_divl2 = 3 -}; - -enum AluPredSel { - pred_off = 0, - pred_zero = 2, - pred_one = 3 -}; - -enum AluBankSwizzle { - alu_vec_012 = 0, - sq_alu_scl_201 = 0, - alu_vec_021 = 1, - sq_alu_scl_122 = 1, - alu_vec_120 = 2, - sq_alu_scl_212 = 2, - alu_vec_102 = 3, - sq_alu_scl_221 = 3, - alu_vec_201 = 4, - alu_vec_210 = 5, - alu_vec_unknown = 6 -}; - -class AluInstruction : public Instruction { -public: - - static const AluModifiers src_abs_flags[2]; - static const AluModifiers src_neg_flags[3]; - static const AluModifiers src_rel_flags[3]; - - AluInstruction(EAluOp opcode); - AluInstruction(EAluOp opcode, PValue dest, - std::vector src0, - const std::set& m_flags); - - AluInstruction(EAluOp opcode, PValue dest, PValue src0, - const std::set& m_flags); - - AluInstruction(EAluOp opcode, PValue dest, - PValue src0, PValue src1, - const std::set& m_flags); - - AluInstruction(EAluOp opcode, PValue dest, PValue src0, PValue src1, - PValue src2, - const std::set& m_flags); - - void set_flag(AluModifiers flag); - unsigned n_sources() const; - - PValue dest() {return m_dest;} - EAluOp opcode() const {return m_opcode;} - const Value *dest() const {return m_dest.get();} - Value& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];} - PValue *psrc(unsigned i) {assert(i < m_src.size()); return &m_src[i];} - bool is_last() const {return m_flags.test(alu_last_instr);} - bool write() const {return m_flags.test(alu_write);} - bool flag(AluModifiers f) const {return m_flags.test(f);} - void set_bank_swizzle(AluBankSwizzle swz); - int bank_swizzle() const {return m_bank_swizzle;} - ECFAluOpCode cf_type() const {return m_cf_type;} - void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; } - - void replace_values(const ValueSet& candidates, PValue new_value) override; - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - PValue remap_one_registers(PValue reg, std::vector& map, - ValueMap &values); - - - EAluOp m_opcode; - PValue m_dest; - std::vector m_src; - AluOpFlags m_flags; - AluBankSwizzle m_bank_swizzle; - ECFAluOpCode m_cf_type; -}; - -} - -#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp deleted file mode 100644 index 116bfac..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include -#include - -#include "sfn_instruction_base.h" -#include "sfn_liverange.h" -#include "sfn_valuepool.h" - -namespace r600 { - -ValueRemapper::ValueRemapper(std::vector& m, - ValueMap& values): - m_map(m), - m_values(values) -{ -} - -void ValueRemapper::remap(PValue& v) -{ - if (!v) - return; - if (v->type() == Value::gpr) { - v = remap_one_registers(v); - } else if (v->type() == Value::gpr_array_value) { - GPRArrayValue& val = static_cast(*v); - auto value = val.value(); - auto addr = val.indirect(); - val.reset_value(remap_one_registers(value)); - if (addr) { - if (addr->type() == Value::gpr) - val.reset_addr(remap_one_registers(addr)); - } - size_t range_start = val.sel(); - size_t range_end = range_start + val.array_size(); - while (range_start < range_end) - m_map[range_start++].used = true; - } else if (v->type() == Value::kconst) { - auto& val = static_cast(*v); - auto addr = val.addr(); - if (addr && addr->type() == Value::gpr) - val.reset_addr(remap_one_registers(addr)); - } - -} - -void ValueRemapper::remap(GPRVector& v) -{ - for (int i = 0; i < 4; ++i) { - if (v.reg_i(i)) { - auto& ns_idx = m_map[v.reg_i(i)->sel()]; - if (ns_idx.valid) - v.set_reg_i(i,m_values.get_or_inject(ns_idx.new_reg, v.reg_i(i)->chan())); - m_map[v.reg_i(i)->sel()].used = true; - } - } -} - -PValue ValueRemapper::remap_one_registers(PValue& reg) -{ - auto new_index = m_map[reg->sel()]; - if (new_index.valid) - reg = m_values.get_or_inject(new_index.new_reg, reg->chan()); - m_map[reg->sel()].used = true; - return reg; -} - - -Instruction::Instruction(instr_type t): - m_type(t) -{ -} - -Instruction::~Instruction() -{ -} - -void Instruction::print(std::ostream& os) const -{ - os << "OP:"; - do_print(os); -} - - -void Instruction::remap_registers(ValueRemapper& map) -{ - sfn_log << SfnLog::merge << "REMAP " << *this << "\n"; - for (auto& v: m_mappable_src_registers) - map.remap(*v); - - for (auto& v: m_mappable_src_vectors) - map.remap(*v); - - for (auto& v: m_mappable_dst_registers) - map.remap(*v); - - for (auto& v: m_mappable_dst_vectors) - map.remap(*v); - sfn_log << SfnLog::merge << "TO " << *this << "\n\n"; -} - -void Instruction::add_remappable_src_value(PValue *v) -{ - if (*v) - m_mappable_src_registers.push_back(v); -} - -void Instruction::add_remappable_src_value(GPRVector *v) -{ - m_mappable_src_vectors.push_back(v); -} - -void Instruction::add_remappable_dst_value(PValue *v) -{ - if (v) - m_mappable_dst_registers.push_back(v); -} - -void Instruction::add_remappable_dst_value(GPRVector *v) -{ - m_mappable_dst_vectors.push_back(v); -} - -void Instruction::replace_values(UNUSED const ValueSet& candidates, UNUSED PValue new_value) -{ - -} - -void Instruction::evalue_liveness(LiverangeEvaluator& eval) const -{ - sfn_log << SfnLog::merge << "Scan " << *this << "\n"; - for (const auto& s: m_mappable_src_registers) - if (*s) - eval.record_read(**s); - - for (const auto& s: m_mappable_src_vectors) - eval.record_read(*s); - - for (const auto& s: m_mappable_dst_registers) - if (*s) - eval.record_write(**s); - - for (const auto& s: m_mappable_dst_vectors) - eval.record_write(*s); - - do_evalue_liveness(eval); -} - -void Instruction::do_evalue_liveness(UNUSED LiverangeEvaluator& eval) const -{ - -} - -bool operator == (const Instruction& lhs, const Instruction& rhs) -{ - if (rhs.m_type != lhs.m_type) - return false; - - return lhs.is_equal_to(rhs); -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_base.h b/src/gallium/drivers/r600/sfn/sfn_instruction_base.h deleted file mode 100644 index 0689a47..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_base.h +++ /dev/null @@ -1,155 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef sfn_r600_instr_h -#define sfn_r600_instr_h - -#include "sfn_instructionvisitor.h" -#include "sfn_value_gpr.h" -#include "sfn_defines.h" - -#include "gallium/drivers/r600/r600_isa.h" -#include -#include -#include -#include - -namespace r600 { - -struct rename_reg_pair { - bool valid; - bool used; - int new_reg; -}; - -class LiverangeEvaluator; -class ValueMap; - - -class ValueRemapper { -public: - ValueRemapper(std::vector& m, - ValueMap& values); - - void remap(PValue& v); - void remap(GPRVector& v); -private: - PValue remap_one_registers(PValue& reg); - - std::vector& m_map; - ValueMap& m_values; -}; - - -using OutputRegisterMap = std::map; - -class Instruction { -public: - enum instr_type { - alu, - exprt, - tex, - vtx, - wait_ack, - cond_if, - cond_else, - cond_endif, - lds_atomic, - lds_read, - lds_write, - loop_begin, - loop_end, - loop_break, - loop_continue, - phi, - streamout, - ring, - emit_vtx, - mem_wr_scratch, - gds, - rat, - tf_write, - block, - unknown - }; - - typedef std::shared_ptr Pointer; - - friend bool operator == (const Instruction& lhs, const Instruction& rhs); - - Instruction(instr_type t); - - virtual ~Instruction(); - - instr_type type() const { return m_type;} - - void print(std::ostream& os) const; - - virtual void replace_values(const ValueSet& candidates, PValue new_value); - - void evalue_liveness(LiverangeEvaluator& eval) const; - - void remap_registers(ValueRemapper& map); - - virtual bool accept(InstructionVisitor& visitor) = 0; - virtual bool accept(ConstInstructionVisitor& visitor) const = 0; - -protected: - - void add_remappable_src_value(PValue *v); - void add_remappable_src_value(GPRVector *v); - void add_remappable_dst_value(PValue *v); - void add_remappable_dst_value(GPRVector *v); - -private: - - virtual void do_evalue_liveness(LiverangeEvaluator& eval) const; - - virtual bool is_equal_to(const Instruction& lhs) const = 0; - - instr_type m_type; - - virtual void do_print(std::ostream& os) const = 0; - - std::vector m_mappable_src_registers; - std::vector m_mappable_src_vectors; - std::vector m_mappable_dst_registers; - std::vector m_mappable_dst_vectors; -}; - -using PInstruction=Instruction::Pointer; - -inline std::ostream& operator << (std::ostream& os, const Instruction& instr) -{ - instr.print(os); - return os; -} - -bool operator == (const Instruction& lhs, const Instruction& rhs); - -} - -#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp deleted file mode 100644 index 212499f..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include "sfn_instruction_block.h" - -namespace r600 { - - -InstructionBlock::InstructionBlock(unsigned nesting_depth, unsigned block_number): - Instruction(block), - m_block_number(block_number), - m_nesting_depth(nesting_depth) -{ -} - -void InstructionBlock::emit(PInstruction instr) -{ - m_block.push_back(instr); -} - -void InstructionBlock::remap_registers(ValueRemapper& map) -{ - for(auto& i: m_block) - i->remap_registers(map); -} - -void InstructionBlock::do_evalue_liveness(LiverangeEvaluator& eval) const -{ - for(auto& i: m_block) - i->evalue_liveness(eval); -} - -bool InstructionBlock::is_equal_to(const Instruction& lhs) const -{ - assert(lhs.type() == block); - auto& l = static_cast(lhs); - - if (m_block.size() != l.m_block.size()) - return false; - - if (m_block_number != l.m_block_number) - return false; - - return std::equal(m_block.begin(), m_block.end(), l.m_block.begin(), - [](PInstruction ri, PInstruction li) {return *ri == *li;}); -} - -PInstruction InstructionBlock::last_instruction() -{ - return m_block.size() ? *m_block.rbegin() : nullptr; -} - -void InstructionBlock::do_print(std::ostream& os) const -{ - std::string space(" ", 2 * m_nesting_depth); - for(auto& i: m_block) - os << space << *i << "\n"; -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_block.h b/src/gallium/drivers/r600/sfn/sfn_instruction_block.h deleted file mode 100644 index fe40cc1..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_block.h +++ /dev/null @@ -1,82 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#ifndef sfn_instruction_block_h -#define sfn_instruction_block_h - -#include "sfn_instruction_base.h" - -namespace r600 { - -class InstructionBlock : public Instruction -{ -public: - InstructionBlock(unsigned nesting_depth, unsigned block_number); - - void emit(PInstruction instr); - - - std::vector::const_iterator begin() const { - return m_block.begin(); - } - std::vector::const_iterator end() const { - return m_block.end(); - } - - void remap_registers(ValueRemapper& map); - - size_t size() const { - return m_block.size(); - } - - const PInstruction& operator [] (int i) const { - return m_block[i]; - } - - unsigned number() const { - return m_block_number; - } - - PInstruction last_instruction(); - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - void do_evalue_liveness(LiverangeEvaluator& eval) const override; - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - - std::vector m_block; - - unsigned m_block_number; - unsigned m_nesting_depth; -}; - -} - -#endif // INSTRUCTIONBLOCK_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp deleted file mode 100644 index 455d6d6..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp +++ /dev/null @@ -1,195 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_instruction_cf.h" -#include "sfn_liverange.h" - -namespace r600 { - -CFInstruction::CFInstruction(instr_type type):Instruction(type) -{ - -} - -IfElseInstruction::IfElseInstruction(instr_type type): - CFInstruction (type) -{ - -} - -IfInstruction::IfInstruction(AluInstruction *pred): - IfElseInstruction(cond_if), - m_pred(pred) -{ - PValue *v = m_pred->psrc(0); - add_remappable_src_value(v); - pred->set_cf_type(cf_alu_push_before); -} - -void IfInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const -{ - eval.scope_if(); -} - -bool IfInstruction::is_equal_to(const Instruction& lhs) const -{ - assert(lhs.type() == cond_if); - const IfInstruction& l = static_cast(lhs); - return *l.m_pred == *m_pred; -} - -void IfInstruction::do_print(std::ostream& os) const -{ - os << "PRED = " << *m_pred << "\n"; - os << "IF (PRED)"; -} - -ElseInstruction::ElseInstruction(IfInstruction *jump_src): - IfElseInstruction(cond_else), - m_jump_src(jump_src) -{ -} - -void ElseInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const -{ - eval.scope_else(); -} - - -bool ElseInstruction::is_equal_to(const Instruction& lhs) const -{ - if (lhs.type() != cond_else) - return false; - auto& l = static_cast(lhs); - return (*m_jump_src == *l.m_jump_src); -} - -void ElseInstruction::do_print(std::ostream& os) const -{ - os << "ELSE"; -} - -IfElseEndInstruction::IfElseEndInstruction(): - IfElseInstruction(cond_endif) -{ -} - -void IfElseEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const -{ - eval.scope_endif(); -} - -bool IfElseEndInstruction::is_equal_to(const Instruction& lhs) const -{ - if (lhs.type() != cond_endif) - return false; - return true; -} - -void IfElseEndInstruction::do_print(std::ostream& os) const -{ - os << "ENDIF"; -} - -LoopBeginInstruction::LoopBeginInstruction(): - CFInstruction(loop_begin) -{ -} - -void LoopBeginInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const -{ - eval.scope_loop_begin(); -} - -bool LoopBeginInstruction::is_equal_to(const Instruction& lhs) const -{ - assert(lhs.type() == loop_begin); - return true; -} - -void LoopBeginInstruction::do_print(std::ostream& os) const -{ - os << "BGNLOOP"; -} - -LoopEndInstruction::LoopEndInstruction(LoopBeginInstruction *start): - CFInstruction (loop_end), - m_start(start) -{ -} - -void LoopEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const -{ - eval.scope_loop_end(); -} - -bool LoopEndInstruction::is_equal_to(const Instruction& lhs) const -{ - assert(lhs.type() == loop_end); - const auto& other = static_cast(lhs); - return *m_start == *other.m_start; -} - -void LoopEndInstruction::do_print(std::ostream& os) const -{ - os << "ENDLOOP"; -} - -LoopBreakInstruction::LoopBreakInstruction(): - CFInstruction (loop_break) -{ -} - -void LoopBreakInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const -{ - eval.scope_loop_break(); -} - -bool LoopBreakInstruction::is_equal_to(UNUSED const Instruction& lhs) const -{ - return true; -} - -void LoopBreakInstruction::do_print(std::ostream& os) const -{ - os << "BREAK"; -} - -LoopContInstruction::LoopContInstruction(): - CFInstruction (loop_continue) -{ -} - -bool LoopContInstruction::is_equal_to(UNUSED const Instruction& lhs) const -{ - return true; -} -void LoopContInstruction::do_print(std::ostream& os) const -{ - os << "CONTINUE"; -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h deleted file mode 100644 index a137948..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h +++ /dev/null @@ -1,142 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_IFELSEINSTRUCTION_H -#define SFN_IFELSEINSTRUCTION_H - -#include "sfn_instruction_alu.h" - -namespace r600 { - -class CFInstruction : public Instruction { -protected: - CFInstruction(instr_type type); -}; - -class IfElseInstruction : public CFInstruction { -public: - IfElseInstruction(instr_type type); - -}; - -class IfInstruction : public IfElseInstruction { -public: - IfInstruction(AluInstruction *pred); - const AluInstruction& pred() const {return *m_pred;} - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - void do_evalue_liveness(LiverangeEvaluator& eval) const override; - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - std::shared_ptr m_pred; -}; - -class ElseInstruction : public IfElseInstruction { -public: - ElseInstruction(IfInstruction *jump_src); - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - void do_evalue_liveness(LiverangeEvaluator& eval) const override; - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - - IfElseInstruction *m_jump_src; -}; - -class IfElseEndInstruction : public IfElseInstruction { -public: - IfElseEndInstruction(); - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - void do_evalue_liveness(LiverangeEvaluator& eval) const override; - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; -}; - -class LoopBeginInstruction: public CFInstruction { -public: - LoopBeginInstruction(); - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - void do_evalue_liveness(LiverangeEvaluator& eval) const override; - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; -}; - -class LoopEndInstruction: public CFInstruction { -public: - LoopEndInstruction(LoopBeginInstruction *start); - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - void do_evalue_liveness(LiverangeEvaluator& eval) const override; - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - LoopBeginInstruction *m_start; -}; - -class LoopBreakInstruction: public CFInstruction { -public: - LoopBreakInstruction(); - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - void do_evalue_liveness(LiverangeEvaluator& eval) const override; - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; -}; - -class LoopContInstruction: public CFInstruction { -public: - LoopContInstruction(); - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; -}; - -} - -#endif // SFN_IFELSEINSTRUCTION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp deleted file mode 100644 index 7d1d948..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp +++ /dev/null @@ -1,341 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "sfn_instruction_export.h" -#include "sfn_liverange.h" -#include "sfn_valuepool.h" - -namespace r600 { - -WriteoutInstruction::WriteoutInstruction(instr_type t, const GPRVector& value): - Instruction(t), - m_value(value) -{ - add_remappable_src_value(&m_value); -} - -void WriteoutInstruction::replace_values(const ValueSet& candidates, PValue new_value) -{ - // I wonder whether we can actually end up here ... - for (auto c: candidates) { - if (*c == *m_value.reg_i(c->chan())) - m_value.set_reg_i(c->chan(), new_value); - } - - replace_values_child(candidates, new_value); -} - -void WriteoutInstruction::replace_values_child(UNUSED const ValueSet& candidates, - UNUSED PValue new_value) -{ -} - -void WriteoutInstruction::remap_registers_child(UNUSED std::vector& map, - UNUSED ValueMap& values) -{ -} - -ExportInstruction::ExportInstruction(unsigned loc, const GPRVector &value, ExportType type): - WriteoutInstruction(Instruction::exprt, value), - m_type(type), - m_loc(loc), - m_is_last(false) -{ -} - - -bool ExportInstruction::is_equal_to(const Instruction& lhs) const -{ - assert(lhs.type() == exprt); - const auto& oth = static_cast(lhs); - - return (gpr() == oth.gpr()) && - (m_type == oth.m_type) && - (m_loc == oth.m_loc) && - (m_is_last == oth.m_is_last); -} - -void ExportInstruction::do_print(std::ostream& os) const -{ - os << (m_is_last ? "EXPORT_DONE ":"EXPORT "); - switch (m_type) { - case et_pixel: os << "PIXEL "; break; - case et_pos: os << "POS "; break; - case et_param: os << "PARAM "; break; - } - os << m_loc << " " << gpr(); -} - -void ExportInstruction::update_output_map(OutputRegisterMap& map) const -{ - map[m_loc] = gpr_ptr(); -} - -void ExportInstruction::set_last() -{ - m_is_last = true; -} - -WriteScratchInstruction::WriteScratchInstruction(unsigned loc, const GPRVector& value, - int align, int align_offset, int writemask): - WriteoutInstruction (Instruction::mem_wr_scratch, value), - m_loc(loc), - m_align(align), - m_align_offset(align_offset), - m_writemask(writemask), - m_array_size(0) -{ -} - -WriteScratchInstruction::WriteScratchInstruction(const PValue& address, const GPRVector& value, - int align, int align_offset, int writemask, int array_size): - WriteoutInstruction (Instruction::mem_wr_scratch, value), - m_loc(0), - m_address(address), - m_align(align), - m_align_offset(align_offset), - m_writemask(writemask), - m_array_size(array_size - 1) -{ - add_remappable_src_value(&m_address); -} - -bool WriteScratchInstruction::is_equal_to(const Instruction& lhs) const -{ - if (lhs.type() != Instruction::mem_wr_scratch) - return false; - const auto& other = static_cast(lhs); - - if (m_address) { - if (!other.m_address) - return false; - if (*m_address != *other.m_address) - return false; - } else { - if (other.m_address) - return false; - } - - return gpr() == other.gpr() && - m_loc == other.m_loc && - m_align == other.m_align && - m_align_offset == other.m_align_offset && - m_writemask == other.m_writemask; -} - -static char *writemask_to_swizzle(int writemask, char *buf) -{ - const char *swz = "xyzw"; - for (int i = 0; i < 4; ++i) { - buf[i] = (writemask & (1 << i)) ? swz[i] : '_'; - } - return buf; -} - -void WriteScratchInstruction::do_print(std::ostream& os) const -{ - char buf[5]; - - os << "MEM_SCRATCH_WRITE "; - if (m_address) - os << "@" << *m_address << "+"; - - os << m_loc << "." << writemask_to_swizzle(m_writemask, buf) - << " " << gpr() << " AL:" << m_align << " ALO:" << m_align_offset; -} - -void WriteScratchInstruction::replace_values_child(const ValueSet& candidates, PValue new_value) -{ - if (!m_address) - return; - - for (auto c: candidates) { - if (*c == *m_address) - m_address = new_value; - } -} - -void WriteScratchInstruction::remap_registers_child(std::vector& map, - ValueMap& values) -{ - if (!m_address) - return; - sfn_log << SfnLog::merge << "Remap " << *m_address << " of type " << m_address->type() << "\n"; - assert(m_address->type() == Value::gpr); - auto new_index = map[m_address->sel()]; - if (new_index.valid) - m_address = values.get_or_inject(new_index.new_reg, m_address->chan()); - map[m_address->sel()].used = true; -} - -StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components, - int array_base, int comp_mask, int out_buffer, - int stream): - WriteoutInstruction(Instruction::streamout, value), - m_element_size(num_components == 3 ? 3 : num_components - 1), - m_burst_count(1), - m_array_base(array_base), - m_array_size(0xfff), - m_writemask(comp_mask), - m_output_buffer(out_buffer), - m_stream(stream) -{ -} - -unsigned StreamOutIntruction::op() const -{ - int op = 0; - switch (m_output_buffer) { - case 0: op = CF_OP_MEM_STREAM0_BUF0; break; - case 1: op = CF_OP_MEM_STREAM0_BUF1; break; - case 2: op = CF_OP_MEM_STREAM0_BUF2; break; - case 3: op = CF_OP_MEM_STREAM0_BUF3; break; - } - return 4 * m_stream + op; -} - -bool StreamOutIntruction::is_equal_to(const Instruction& lhs) const -{ - assert(lhs.type() == streamout); - const auto& oth = static_cast(lhs); - - return gpr() == oth.gpr() && - m_element_size == oth.m_element_size && - m_burst_count == oth.m_burst_count && - m_array_base == oth.m_array_base && - m_array_size == oth.m_array_size && - m_writemask == oth.m_writemask && - m_output_buffer == oth.m_output_buffer && - m_stream == oth.m_stream; -} - -void StreamOutIntruction::do_print(std::ostream& os) const -{ - os << "WRITE STREAM(" << m_stream << ") " << gpr() - << " ES:" << m_element_size - << " BC:" << m_burst_count - << " BUF:" << m_output_buffer - << " ARRAY:" << m_array_base; - if (m_array_size != 0xfff) - os << "+" << m_array_size; -} - -MemRingOutIntruction::MemRingOutIntruction(ECFOpCode ring, EMemWriteType type, - const GPRVector& value, - unsigned base_addr, unsigned ncomp, - PValue index): - WriteoutInstruction(Instruction::ring, value), - m_ring_op(ring), - m_type(type), - m_base_address(base_addr), - m_num_comp(ncomp), - m_index(index) -{ - add_remappable_src_value(&m_index); - - assert(m_ring_op == cf_mem_ring || m_ring_op == cf_mem_ring1|| - m_ring_op == cf_mem_ring2 || m_ring_op == cf_mem_ring3); - assert(m_num_comp <= 4); -} - -unsigned MemRingOutIntruction::ncomp() const -{ - switch (m_num_comp) { - case 1: return 0; - case 2: return 1; - case 3: - case 4: return 3; - default: - assert(0); - } - return 3; -} - -bool MemRingOutIntruction::is_equal_to(const Instruction& lhs) const -{ - assert(lhs.type() == streamout); - const auto& oth = static_cast(lhs); - - bool equal = gpr() == oth.gpr() && - m_ring_op == oth.m_ring_op && - m_type == oth.m_type && - m_num_comp == oth.m_num_comp && - m_base_address == oth.m_base_address; - - if (m_type == mem_write_ind || m_type == mem_write_ind_ack) - equal &= (*m_index == *oth.m_index); - return equal; - -} - -static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" }; -void MemRingOutIntruction::do_print(std::ostream& os) const -{ - os << "MEM_RING " << m_ring_op; - os << " " << write_type_str[m_type] << " " << m_base_address; - os << " " << gpr(); - if (m_type == mem_write_ind || m_type == mem_write_ind_ack) - os << " @" << *m_index; - os << " ES:" << m_num_comp; -} - - -void MemRingOutIntruction::replace_values_child(const ValueSet& candidates, - PValue new_value) -{ - if (!m_index) - return; - - for (auto c: candidates) { - if (*c == *m_index) - m_index = new_value; - } -} - -void MemRingOutIntruction::remap_registers_child(std::vector& map, - ValueMap& values) -{ - if (!m_index) - return; - - assert(m_index->type() == Value::gpr); - auto new_index = map[m_index->sel()]; - if (new_index.valid) - m_index = values.get_or_inject(new_index.new_reg, m_index->chan()); - map[m_index->sel()].used = true; -} - -void MemRingOutIntruction::patch_ring(int stream, PValue index) -{ - const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3}; - - assert(stream < 4); - m_ring_op = ring_op[stream]; - m_index = index; -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_export.h b/src/gallium/drivers/r600/sfn/sfn_instruction_export.h deleted file mode 100644 index 6d01408..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_export.h +++ /dev/null @@ -1,185 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_EXPORTINSTRUCTION_H -#define SFN_EXPORTINSTRUCTION_H - -#include "sfn_instruction_base.h" - -namespace r600 { - -class WriteoutInstruction: public Instruction { -public: - void replace_values(const ValueSet& candidates, PValue new_value) override; - const GPRVector& gpr() const {return m_value;} - const GPRVector *gpr_ptr() const {return &m_value;} -protected: - WriteoutInstruction(instr_type t, const GPRVector& value); -private: - virtual void replace_values_child(const ValueSet& candidates, PValue new_value); - virtual void remap_registers_child(std::vector& map, - ValueMap& values); - - GPRVector m_value; -}; - -class ExportInstruction : public WriteoutInstruction { -public: - enum ExportType { - et_pixel, - et_pos, - et_param - }; - - ExportInstruction(unsigned loc, const GPRVector& value, ExportType type); - void set_last(); - - ExportType export_type() const {return m_type;} - - unsigned location() const {return m_loc;} - bool is_last_export() const {return m_is_last;} - - void update_output_map(OutputRegisterMap& map) const; - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - - -private: - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - - ExportType m_type; - unsigned m_loc; - bool m_is_last; -}; - -class WriteScratchInstruction : public WriteoutInstruction { -public: - - WriteScratchInstruction(unsigned loc, const GPRVector& value, int align, - int align_offset, int writemask); - WriteScratchInstruction(const PValue& address, const GPRVector& value, - int align, int align_offset, int writemask, int array_size); - unsigned location() const {return m_loc;} - - int write_mask() const { return m_writemask;} - int address() const { assert(m_address); return m_address->sel();} - bool indirect() const { return !!m_address;} - int array_size() const { return m_array_size;} - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - - void replace_values_child(const ValueSet& candidates, PValue new_value) override; - void remap_registers_child(std::vector& map, - ValueMap& values)override; - - unsigned m_loc; - PValue m_address; - unsigned m_align; - unsigned m_align_offset; - unsigned m_writemask; - int m_array_size; -}; - - -class StreamOutIntruction: public WriteoutInstruction { -public: - StreamOutIntruction(const GPRVector& value, int num_components, - int array_base, int comp_mask, int out_buffer, - int stream); - int element_size() const { return m_element_size;} - int burst_count() const { return m_burst_count;} - int array_base() const { return m_array_base;} - int array_size() const { return m_array_size;} - int comp_mask() const { return m_writemask;} - unsigned op() const; - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - - int m_element_size; - int m_burst_count; - int m_array_base; - int m_array_size; - int m_writemask; - int m_output_buffer; - int m_stream; -}; - -enum EMemWriteType { - mem_write = 0, - mem_write_ind = 1, - mem_write_ack = 2, - mem_write_ind_ack = 3, -}; - -class MemRingOutIntruction: public WriteoutInstruction { -public: - - MemRingOutIntruction(ECFOpCode ring, EMemWriteType type, - const GPRVector& value, unsigned base_addr, - unsigned ncomp, PValue m_index); - - unsigned op() const{return m_ring_op;} - unsigned ncomp() const; - unsigned addr() const {return m_base_address;} - EMemWriteType type() const {return m_type;} - unsigned index_reg() const {return m_index->sel();} - unsigned array_base() const {return m_base_address; } - void replace_values_child(const ValueSet& candidates, PValue new_value) override; - void remap_registers_child(std::vector& map, - ValueMap& values) override; - void patch_ring(int stream, PValue index); - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - - ECFOpCode m_ring_op; - EMemWriteType m_type; - unsigned m_base_address; - unsigned m_num_comp; - PValue m_index; - -}; - -} - - -#endif // SFN_EXPORTINSTRUCTION_H \ No newline at end of file diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp deleted file mode 100644 index ec1a488..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp +++ /dev/null @@ -1,480 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_instruction_fetch.h" - -#include "gallium/drivers/r600/r600_pipe.h" - -namespace r600 { - -/* refactor this to add status create methods for specific tasks */ -FetchInstruction::FetchInstruction(EVFetchInstr op, - EVFetchType type, - GPRVector dst, - PValue src, int offset, - int buffer_id, PValue buffer_offset, - EBufferIndexMode cp_rel, - bool use_const_field): - Instruction(vtx), - m_vc_opcode(op), - m_fetch_type(type), - m_endian_swap(vtx_es_none), - m_src(src), - m_dst(dst), - m_offset(offset), - m_is_mega_fetch(1), - m_mega_fetch_count(16), - m_buffer_id(buffer_id), - m_semantic_id(0), - m_buffer_index_mode(cp_rel), - m_flags(0), - m_uncached(false), - m_indexed(false), - m_array_base(0), - m_array_size(0), - m_elm_size(0), - m_buffer_offset(buffer_offset), - m_dest_swizzle({0,1,2,3}) -{ - if (use_const_field) { - m_flags.set(vtx_use_const_field); - m_data_format = fmt_invalid; - m_num_format = vtx_nf_norm; - } else { - m_flags.set(vtx_format_comp_signed); - m_data_format = fmt_32_32_32_32_float; - m_num_format = vtx_nf_scaled; - } - - add_remappable_src_value(&m_src); - add_remappable_src_value(&m_buffer_offset); - - add_remappable_dst_value(&m_dst); -} - -/* Resource query */ -FetchInstruction::FetchInstruction(EVFetchInstr vc_opcode, - EVFetchType fetch_type, - EVTXDataFormat data_format, - EVFetchNumFormat num_format, - EVFetchEndianSwap endian_swap, - const PValue src, - const GPRVector dst, - uint32_t offset, - bool is_mega_fetch, - uint32_t mega_fetch_count, - uint32_t buffer_id, - uint32_t semantic_id, - - EBufferIndexMode buffer_index_mode, - bool uncached, - bool indexed, - int array_base, - int array_size, - int elm_size, - PValue buffer_offset, - const std::array& dest_swizzle): - Instruction(vtx), - m_vc_opcode(vc_opcode), - m_fetch_type(fetch_type), - m_data_format(data_format), - m_num_format(num_format), - m_endian_swap(endian_swap), - m_src(src), - m_dst(dst), - m_offset(offset), - m_is_mega_fetch(is_mega_fetch), - m_mega_fetch_count(mega_fetch_count), - m_buffer_id(buffer_id), - m_semantic_id(semantic_id), - m_buffer_index_mode(buffer_index_mode), - m_uncached(uncached), - m_indexed(indexed), - m_array_base(array_base), - m_array_size(array_size), - m_elm_size(elm_size), - m_buffer_offset(buffer_offset), - m_dest_swizzle(dest_swizzle) -{ - add_remappable_src_value(&m_src); - add_remappable_dst_value(&m_dst); - add_remappable_src_value(&m_buffer_offset); -} - -FetchInstruction::FetchInstruction(GPRVector dst, - PValue src, - int buffer_id, PValue buffer_offset, - EVTXDataFormat format, - EVFetchNumFormat num_format): - Instruction(vtx), - m_vc_opcode(vc_fetch), - m_fetch_type(no_index_offset), - m_data_format(format), - m_num_format(num_format), - m_endian_swap(vtx_es_none), - m_src(src), - m_dst(dst), - m_offset(0), - m_is_mega_fetch(0), - m_mega_fetch_count(0), - m_buffer_id(buffer_id), - m_semantic_id(0), - m_buffer_index_mode(bim_none), - m_flags(0), - m_uncached(false), - m_indexed(false), - m_array_base(0), - m_array_size(0), - m_elm_size(1), - m_buffer_offset(buffer_offset), - m_dest_swizzle({0,1,2,3}) -{ - m_flags.set(vtx_format_comp_signed); - - add_remappable_src_value(&m_src); - add_remappable_dst_value(&m_dst); - add_remappable_src_value(&m_buffer_offset); -} - - -/* Resource query */ -FetchInstruction::FetchInstruction(GPRVector dst, - PValue src, - int buffer_id, - EBufferIndexMode cp_rel): - Instruction(vtx), - m_vc_opcode(vc_get_buf_resinfo), - m_fetch_type(no_index_offset), - m_data_format(fmt_32_32_32_32), - m_num_format(vtx_nf_norm), - m_endian_swap(vtx_es_none), - m_src(src), - m_dst(dst), - m_offset(0), - m_is_mega_fetch(0), - m_mega_fetch_count(16), - m_buffer_id(buffer_id), - m_semantic_id(0), - m_buffer_index_mode(cp_rel), - m_flags(0), - m_uncached(false), - m_indexed(false), - m_array_base(0), - m_array_size(0), - m_elm_size(0), - m_dest_swizzle({0,1,2,3}) -{ - m_flags.set(vtx_format_comp_signed); - add_remappable_src_value(&m_src); - add_remappable_dst_value(&m_dst); - add_remappable_src_value(&m_buffer_offset); -} - -FetchInstruction::FetchInstruction(GPRVector dst, PValue src, int scratch_size): - Instruction(vtx), - m_vc_opcode(vc_read_scratch), - m_fetch_type(vertex_data), - m_data_format(fmt_32_32_32_32), - m_num_format(vtx_nf_int), - m_endian_swap(vtx_es_none), - m_dst(dst), - m_offset(0), - m_is_mega_fetch(0), - m_mega_fetch_count(16), - m_buffer_id(0), - m_semantic_id(0), - m_buffer_index_mode(bim_none), - m_flags(0), - m_uncached(true), - m_array_base(0), - m_array_size(0), - m_elm_size(3), - m_dest_swizzle({0,1,2,3}) -{ - if (src->type() == Value::literal) { - const auto& lv = static_cast(*src); - m_array_base = lv.value(); - m_indexed = false; - m_src.reset(new GPRValue(0,0)); - m_array_size = 0; - } else { - m_array_base = 0; - m_src = src; - m_indexed = true; - m_array_size = scratch_size - 1; - } - add_remappable_src_value(&m_src); - add_remappable_dst_value(&m_dst); - add_remappable_src_value(&m_buffer_offset); -} - -void FetchInstruction::replace_values(const ValueSet& candidates, PValue new_value) -{ - if (!m_src) - return; - for (auto c: candidates) { - for (int i = 0; i < 4; ++i) { - if (*c == *m_dst.reg_i(i)) - m_dst.set_reg_i(i, new_value); - } - if (*m_src == *c) - m_src = new_value; - } -} - - -bool FetchInstruction::is_equal_to(const Instruction& lhs) const -{ - auto& l = static_cast(lhs); - if (m_src) { - if (!l.m_src) - return false; - if (*m_src != *l.m_src) - return false; - } else { - if (l.m_src) - return false; - } - - return m_vc_opcode == l.m_vc_opcode && - m_fetch_type == l.m_fetch_type && - m_data_format == l.m_data_format && - m_num_format == l.m_num_format && - m_endian_swap == l.m_endian_swap && - m_dst == l.m_dst && - m_offset == l.m_offset && - m_buffer_id == l.m_buffer_id && - m_semantic_id == l.m_semantic_id && - m_buffer_index_mode == l.m_buffer_index_mode && - m_flags == l.m_flags && - m_indexed == l.m_indexed && - m_uncached == l.m_uncached; -} - -void FetchInstruction::set_format(EVTXDataFormat fmt) -{ - m_data_format = fmt; -} - - -void FetchInstruction::set_dest_swizzle(const std::array& swz) -{ - m_dest_swizzle = swz; -} - -void FetchInstruction::prelude_append(Instruction *instr) -{ - assert(instr); - m_prelude.push_back(PInstruction(instr)); -} - -const std::vector& FetchInstruction::prelude() const -{ - return m_prelude; -} - -LoadFromScratch::LoadFromScratch(GPRVector dst, PValue src, int scratch_size): - FetchInstruction(dst, src, scratch_size) -{ -} - -FetchGDSOpResult::FetchGDSOpResult(const GPRVector dst, const PValue src): - FetchInstruction(vc_fetch, - no_index_offset, - fmt_32, - vtx_nf_int, - vtx_es_none, - src, - dst, - 0, - false, - 0xf, - R600_IMAGE_IMMED_RESOURCE_OFFSET, - 0, - bim_none, - false, - false, - 0, - 0, - 0, - PValue(), - {0,7,7,7}) -{ - set_flag(vtx_srf_mode); - set_flag(vtx_vpm); -} - -FetchTCSIOParam::FetchTCSIOParam(GPRVector dst, PValue src, int offset): - FetchInstruction(vc_fetch, - no_index_offset, - fmt_32_32_32_32, - vtx_nf_scaled, - vtx_es_none, - src, - dst, - offset, - false, - 16, - R600_LDS_INFO_CONST_BUFFER, - 0, - bim_none, - false, - false, - 0, - 0, - 0, - PValue(), - {0,1,2,3}) -{ - set_flag(vtx_srf_mode); - set_flag(vtx_format_comp_signed); -} - - -static const char *fmt_descr[64] = { - "INVALID", - "8", - "4_4", - "3_3_2", - "RESERVED_4", - "16", - "16F", - "8_8", - "5_6_5", - "6_5_5", - "1_5_5_5", - "4_4_4_4", - "5_5_5_1", - "32", - "32F", - "16_16", - "16_16F", - "8_24", - "8_24F", - "24_8", - "24_8F", - "10_11_11", - "10_11_11F", - "11_11_10", - "11_11_10F", - "2_10_10_10", - "8_8_8_8", - "10_10_10_2", - "X24_8_32F", - "32_32", - "32_32F", - "16_16_16_16", - "16_16_16_16F", - "RESERVED_33", - "32_32_32_32", - "32_32_32_32F", - "RESERVED_36", - "1", - "1_REVERSED", - "GB_GR", - "BG_RG", - "32_AS_8", - "32_AS_8_8", - "5_9_9_9_SHAREDEXP", - "8_8_8", - "16_16_16", - "16_16_16F", - "32_32_32", - "32_32_32F", - "BC1", - "BC2", - "BC3", - "BC4", - "BC5", - "APC0", - "APC1", - "APC2", - "APC3", - "APC4", - "APC5", - "APC6", - "APC7", - "CTX1", - "RESERVED_63" -}; - - -void FetchInstruction::do_print(std::ostream& os) const -{ - static const std::string num_format_char[] = {"norm", "int", "scaled"}; - static const std::string endian_swap_code[] = { - "noswap", "8in16", "8in32" - }; - static const char buffer_index_mode_char[] = "_01E"; - static const char *flag_string[] = {"WQM", "CF", "signed", "no_zero", - "nostride", "AC", "TC", "VPM"}; - switch (m_vc_opcode) { - case vc_fetch: - os << "Fetch " << m_dst; - break; - case vc_semantic: - os << "Fetch Semantic ID:" << m_semantic_id; - break; - case vc_get_buf_resinfo: - os << "Fetch BufResinfo:" << m_dst; - break; - case vc_read_scratch: - os << "MEM_READ_SCRATCH:" << m_dst; - break; - default: - os << "Fetch ERROR"; - return; - } - - os << ", " << *m_src; - - if (m_offset) - os << "+" << m_offset; - - os << " BUFID:" << m_buffer_id - << " FMT:(" << fmt_descr[m_data_format] - << " " << num_format_char[m_num_format] - << " " << endian_swap_code[m_endian_swap] - << ")"; - if (m_buffer_index_mode > 0) - os << " IndexMode:" << buffer_index_mode_char[m_buffer_index_mode]; - - - if (m_is_mega_fetch) - os << " MFC:" << m_mega_fetch_count; - else - os << " mfc*:" << m_mega_fetch_count; - - if (m_flags.any()) { - os << " Flags:"; - for( int i = 0; i < vtx_unknown; ++i) { - if (m_flags.test(i)) - os << ' ' << flag_string[i]; - } - } -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h deleted file mode 100644 index 71a3f69..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h +++ /dev/null @@ -1,187 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_INSTRUCTION_FETCH_H -#define SFN_INSTRUCTION_FETCH_H - -#include "sfn_instruction_base.h" - -namespace r600 { - -class FetchInstruction : public Instruction { -public: - - FetchInstruction(EVFetchInstr vc_opcode, - EVFetchType fetch_type, - EVTXDataFormat data_format, - EVFetchNumFormat num_format, - EVFetchEndianSwap endian_swap, - const PValue src, - const GPRVector dst, - uint32_t offset, - bool is_mega_fetch, - uint32_t mega_fetch_count, - uint32_t buffer_id, - uint32_t semantic_id, - - EBufferIndexMode buffer_index_mode, - bool uncached, - bool indexed, - int array_base, - int array_size, - int elm_size, - PValue buffer_offset, - const std::array& dest_swizzle); - - FetchInstruction(EVFetchInstr op, - EVFetchType type, - GPRVector dst, - PValue src, int offset, - int buffer_id, PValue buffer_offset, - EBufferIndexMode cp_rel, - bool use_const_field = false); - - FetchInstruction(GPRVector dst, - PValue src, - int buffer_id, - PValue buffer_offset, - EVTXDataFormat format, - EVFetchNumFormat num_format); - - FetchInstruction(GPRVector dst, - PValue src, - int buffer_id, - EBufferIndexMode cp_rel); - - FetchInstruction(GPRVector dst, PValue src, int scratch_size); - - void replace_values(const ValueSet& candidates, PValue new_value) override; - EVFetchInstr vc_opcode() const { return m_vc_opcode;} - EVFetchType fetch_type() const { return m_fetch_type;} - - EVTXDataFormat data_format() const { return m_data_format;} - EVFetchNumFormat num_format() const { return m_num_format;} - EVFetchEndianSwap endian_swap() const { return m_endian_swap;} - - const Value& src() const { return *m_src;} - const GPRVector& dst() const { return m_dst;} - uint32_t offset() const { return m_offset;} - - bool is_mega_fetchconst() { return m_is_mega_fetch;} - uint32_t mega_fetch_count() const { return m_mega_fetch_count;} - - uint32_t buffer_id() const { return m_buffer_id;} - uint32_t semantic_id() const { return m_semantic_id;} - EBufferIndexMode buffer_index_mode() const{ return m_buffer_index_mode;} - - bool is_signed() const { return m_flags.test(vtx_format_comp_signed);} - bool use_const_fields() const { return m_flags.test(vtx_use_const_field);} - - bool srf_mode_no_zero() const { return m_flags.test(vtx_srf_mode);} - - void set_flag(EVFetchFlagShift flag) {m_flags.set(flag);} - - bool uncached() const {return m_uncached; } - bool indexed() const {return m_indexed; } - int array_base()const {return m_array_base; } - int array_size() const {return m_array_size; } - int elm_size() const {return m_elm_size; } - - void set_buffer_offset(PValue buffer_offset) { - m_buffer_offset = buffer_offset; - add_remappable_src_value(&m_buffer_offset); - } - PValue buffer_offset() const { return m_buffer_offset; } - - void set_dest_swizzle(const std::array& swz); - void set_format(EVTXDataFormat fmt); - - int swz(int idx) const { return m_dest_swizzle[idx];} - - bool use_tc() const {return m_flags.test(vtx_use_tc);} - - bool use_vpm() const {return m_flags.test(vtx_vpm);} - - void prelude_append(Instruction *instr); - - const std::vector& prelude() const; - - bool has_prelude() const {return !m_prelude.empty();} - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - - EVFetchInstr m_vc_opcode; - EVFetchType m_fetch_type; - - EVTXDataFormat m_data_format; - EVFetchNumFormat m_num_format; - EVFetchEndianSwap m_endian_swap; - - PValue m_src; - GPRVector m_dst; - uint32_t m_offset; - - bool m_is_mega_fetch; - uint32_t m_mega_fetch_count; - - uint32_t m_buffer_id; - uint32_t m_semantic_id; - - EBufferIndexMode m_buffer_index_mode; - std::bitset<16> m_flags; - bool m_uncached; - bool m_indexed; - int m_array_base; - int m_array_size; - int m_elm_size; - PValue m_buffer_offset; - std::array m_dest_swizzle; - std::vector m_prelude; -}; - -class LoadFromScratch: public FetchInstruction { -public: - LoadFromScratch(GPRVector dst, PValue src, int scratch_size); -}; - -class FetchGDSOpResult : public FetchInstruction { -public: - FetchGDSOpResult(const GPRVector dst, const PValue src); -}; - -class FetchTCSIOParam : public FetchInstruction { -public: - FetchTCSIOParam(GPRVector dst, PValue src, int offset); -}; - -} - -#endif // SFN_INSTRUCTION_FETCH_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp deleted file mode 100644 index 095cd40..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_instruction_gds.h" -#include "sfn_liverange.h" - -namespace r600 { - -GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value, - const PValue& value2, const PValue& uav_id, int uav_base): - Instruction(gds), - m_op(op), - m_src(value), - m_src2(value2), - m_dest(dest), - m_dest_swizzle({PIPE_SWIZZLE_X,7,7,7}), - m_src_swizzle({PIPE_SWIZZLE_0, PIPE_SWIZZLE_X, PIPE_SWIZZLE_0}), - m_buffer_index_mode(bim_none), - m_uav_id(uav_id), - m_uav_base(uav_base), - m_flags(0) -{ - add_remappable_src_value(&m_src); - add_remappable_src_value(&m_src2); - add_remappable_src_value(&m_uav_id); - add_remappable_dst_value(&m_dest); - m_dest_swizzle[0] = m_dest.chan_i(0); -} - -GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value, - const PValue& uav_id, int uav_base): - GDSInstr(op, dest, value, PValue(), uav_id, uav_base) -{ - assert(value); - m_src_swizzle[1] = value->chan(); - m_src_swizzle[2] = PIPE_SWIZZLE_0; -} - -GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, - const PValue& uav_id, int uav_base): - GDSInstr(op, dest, PValue(), PValue(), uav_id, uav_base) -{ - m_src_swizzle[1] = PIPE_SWIZZLE_0; -} - -bool GDSInstr::is_equal_to(UNUSED const Instruction& lhs) const -{ - return false; -} - -void GDSInstr::do_print(std::ostream& os) const -{ - const char *swz = "xyzw01?_"; - os << lds_ops.at(m_op).name << " R" << m_dest.sel() << "."; - for (int i = 0; i < 4; ++i) { - os << swz[m_dest_swizzle[i]]; - } - if (m_src) - os << " " << *m_src; - - os << " UAV:" << *m_uav_id; -} - -RatInstruction::RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op, - const GPRVector& data, const GPRVector& index, - int rat_id, const PValue& rat_id_offset, - int burst_count, int comp_mask, int element_size, bool ack): - Instruction(rat), - m_cf_opcode(cf_opcode), - m_rat_op(rat_op), - m_data(data), - m_index(index), - m_rat_id(rat_id), - m_rat_id_offset(rat_id_offset), - m_burst_count(burst_count), - m_comp_mask(comp_mask), - m_element_size(element_size), - m_need_ack(ack) -{ - add_remappable_src_value(&m_data); - add_remappable_src_value(&m_rat_id_offset); - add_remappable_src_value(&m_index); -} - -bool RatInstruction::is_equal_to(UNUSED const Instruction& lhs) const -{ - return false; -} - -void RatInstruction::do_print(std::ostream& os) const -{ - os << "MEM_RAT RAT(" << m_rat_id; - if (m_rat_id_offset) - os << "+" << *m_rat_id_offset; - os << ") @" << m_index; - os << " OP:" << m_rat_op << " " << m_data; - os << " BC:" << m_burst_count - << " MASK:" << m_comp_mask - << " ES:" << m_element_size; - if (m_need_ack) - os << " ACK"; -} - -RatInstruction::ERatOp RatInstruction::opcode(nir_intrinsic_op opcode) -{ - switch (opcode) { - case nir_intrinsic_ssbo_atomic_add: - return ADD_RTN; - case nir_intrinsic_ssbo_atomic_and: - return AND_RTN; - case nir_intrinsic_ssbo_atomic_exchange: - return XCHG_RTN; - case nir_intrinsic_ssbo_atomic_umax: - return MAX_UINT_RTN; - case nir_intrinsic_ssbo_atomic_umin: - return MIN_UINT_RTN; - case nir_intrinsic_ssbo_atomic_imax: - return MAX_INT_RTN; - case nir_intrinsic_ssbo_atomic_imin: - return MIN_INT_RTN; - case nir_intrinsic_ssbo_atomic_xor: - return XOR_RTN; - default: - return UNSUPPORTED; - } -} - -GDSStoreTessFactor::GDSStoreTessFactor(GPRVector& value): - Instruction(tf_write), - m_value(value) -{ - add_remappable_src_value(&m_value); -} - -void GDSStoreTessFactor::replace_values(const ValueSet& candidates, PValue new_value) -{ - for (auto& c: candidates) { - for (int i = 0; i < 4; ++i) { - if (*c == *m_value[i]) - m_value[i] = new_value; - } - } -} - - -bool GDSStoreTessFactor::is_equal_to(const Instruction& lhs) const -{ - auto& other = static_cast(lhs); - return m_value == other.m_value; -} - -void GDSStoreTessFactor::do_print(std::ostream& os) const -{ - os << "TF_WRITE " << m_value; -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h b/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h deleted file mode 100644 index 6f8e0f2..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h +++ /dev/null @@ -1,225 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_GDSINSTR_H -#define SFN_GDSINSTR_H - -#include "sfn_instruction_base.h" - -#include - -namespace r600 { - -class GDSInstr : public Instruction -{ -public: - GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value, - const PValue &uav_id, int uav_base); - GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value, - const PValue& value2, const PValue &uav_id, int uav_base); - GDSInstr(ESDOp op, const GPRVector& dest, const PValue &uav_id, int uav_base); - - ESDOp op() const {return m_op;} - - int src_sel() const { - if (!m_src) - return 0; - - assert(m_src->type() == Value::gpr); - return m_src->sel(); - } - - int src2_chan() const { - if (!m_src2) - return 0; - - assert(m_src->type() == Value::gpr); - return m_src->chan(); - } - - int src_swizzle(int idx) const {assert(idx < 3); return m_src_swizzle[idx];} - - int dest_sel() const { - return m_dest.sel(); - } - - int dest_swizzle(int i) const { - if (i < 4) - return m_dest_swizzle[i]; - return 7; - } - - void set_dest_swizzle(const std::array& swz) { - m_dest_swizzle = swz; - } - - PValue uav_id() const {return m_uav_id;} - int uav_base() const {return m_uav_base;} - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - - ESDOp m_op; - - PValue m_src; - PValue m_src2; - GPRVector m_dest; - std::array m_dest_swizzle; - std::array m_src_swizzle; - - EBufferIndexMode m_buffer_index_mode; - PValue m_uav_id; - int m_uav_base; - std::bitset<8> m_flags; - -}; - -class RatInstruction : public Instruction { - -public: - enum ERatOp { - NOP, - STORE_TYPED, - STORE_RAW, - STORE_RAW_FDENORM, - CMPXCHG_INT, - CMPXCHG_FLT, - CMPXCHG_FDENORM, - ADD, - SUB, - RSUB, - MIN_INT, - MIN_UINT, - MAX_INT, - MAX_UINT, - AND, - OR, - XOR, - MSKOR, - INC_UINT, - DEC_UINT, - NOP_RTN = 32, - XCHG_RTN = 34, - XCHG_FDENORM_RTN, - CMPXCHG_INT_RTN, - CMPXCHG_FLT_RTN, - CMPXCHG_FDENORM_RTN, - ADD_RTN, - SUB_RTN, - RSUB_RTN, - MIN_INT_RTN, - MIN_UINT_RTN, - MAX_INT_RTN, - MAX_UINT_RTN, - AND_RTN, - OR_RTN, - XOR_RTN, - MSKOR_RTN, - UINT_RTN, - UNSUPPORTED - }; - - RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op, - const GPRVector& data, const GPRVector& index, - int rat_id, const PValue& rat_id_offset, - int burst_count, int comp_mask, int element_size, - bool ack); - - PValue rat_id_offset() const { return m_rat_id_offset;} - int rat_id() const { return m_rat_id;} - - ERatOp rat_op() const {return m_rat_op;} - - int data_gpr() const {return m_data.sel();} - int index_gpr() const {return m_index.sel();} - int elm_size() const {return m_element_size;} - - int comp_mask() const {return m_comp_mask;} - - bool need_ack() const {return m_need_ack;} - int burst_count() const {return m_burst_count;} - - static ERatOp opcode(nir_intrinsic_op opcode); - - int data_swz(int chan) const {return m_data.chan_i(chan);} - - ECFOpCode cf_opcode() const { return m_cf_opcode;} - - void set_ack() {m_need_ack = true; } - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - - -private: - - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - - ECFOpCode m_cf_opcode; - ERatOp m_rat_op; - - GPRVector m_data; - GPRVector m_index; - - int m_rat_id; - PValue m_rat_id_offset; - int m_burst_count; - int m_comp_mask; - int m_element_size; - - std::bitset<8> m_flags; - - bool m_need_ack; - -}; - -class GDSStoreTessFactor : public Instruction { -public: - GDSStoreTessFactor(GPRVector& value); - int sel() const {return m_value.sel();} - int chan(int i ) const {return m_value.chan_i(i);} - - void replace_values(const ValueSet& candiates, PValue new_value) override; - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - - GPRVector m_value; -}; - -} - -#endif // SFN_GDSINSTR_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp deleted file mode 100644 index b77461a..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp +++ /dev/null @@ -1,151 +0,0 @@ -#include "sfn_instruction_lds.h" - -namespace r600 { - -void LDSReadInstruction::do_print(std::ostream& os) const -{ - os << "LDS Read ["; - for (auto& v : m_dest_value) - os << *v << " "; - os << "], "; - for (auto& a : m_address) - os << *a << " "; -} - -LDSReadInstruction::LDSReadInstruction(std::vector& address, std::vector& value): - Instruction(lds_read), - m_address(address), - m_dest_value(value) -{ - assert(address.size() == value.size()); - - for (unsigned i = 0; i < address.size(); ++i) { - add_remappable_src_value(&m_address[i]); - add_remappable_dst_value(&m_dest_value[i]); - } -} - -void LDSReadInstruction::replace_values(const ValueSet& candidates, PValue new_value) -{ - for (auto& c : candidates) { - for (auto& d: m_dest_value) { - if (*c == *d) - d = new_value; - } - - for (auto& a: m_address) { - if (*c == *a) - a = new_value; - } - } -} - -bool LDSReadInstruction::is_equal_to(const Instruction& lhs) const -{ - auto& other = static_cast(lhs); - return m_address == other.m_address && - m_dest_value == other.m_dest_value; -} - -LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op): - Instruction(lds_atomic), - m_address(address), - m_dest_value(dest), - m_src0_value(src0), - m_src1_value(src1), - m_opcode(op) -{ - add_remappable_src_value(&m_src0_value); - add_remappable_src_value(&m_src1_value); - add_remappable_src_value(&m_address); - add_remappable_dst_value(&m_dest_value); -} - -LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op): - LDSAtomicInstruction(dest, src0, PValue(), address, op) -{ - -} - - -void LDSAtomicInstruction::do_print(std::ostream& os) const -{ - os << "LDS " << m_opcode << " " << *m_dest_value << " "; - os << "[" << *m_address << "] " << *m_src0_value; - if (m_src1_value) - os << ", " << *m_src1_value; -} - -bool LDSAtomicInstruction::is_equal_to(const Instruction& lhs) const -{ - auto& other = static_cast(lhs); - - return m_opcode == other.m_opcode && - *m_dest_value == *other.m_dest_value && - *m_src0_value == *other.m_src0_value && - *m_address == *other.m_address && - ((m_src1_value && other.m_src1_value && (*m_src1_value == *other.m_src1_value)) || - (!m_src1_value && !other.m_src1_value)); -} - -LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0): - LDSWriteInstruction::LDSWriteInstruction(address, idx_offset, value0, PValue()) - -{ -} - -LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1): - Instruction(lds_write), - m_address(address), - m_value0(value0), - m_value1(value1), - m_idx_offset(idx_offset) -{ - add_remappable_src_value(&m_address); - add_remappable_src_value(&m_value0); - if (m_value1) - add_remappable_src_value(&m_value1); -} - - -void LDSWriteInstruction::do_print(std::ostream& os) const -{ - os << "LDS Write" << num_components() - << " " << address() << ", " << value0(); - if (num_components() > 1) - os << ", " << value1(); -} - -void LDSWriteInstruction::replace_values(const ValueSet& candidates, PValue new_value) -{ - for (auto c: candidates) { - if (*c == *m_address) - m_address = new_value; - - if (*c == *m_value0) - m_value0 = new_value; - - if (*c == *m_value1) - m_value1 = new_value; - } -} - -bool LDSWriteInstruction::is_equal_to(const Instruction& lhs) const -{ - auto& other = static_cast(lhs); - - if (m_value1) { - if (!other.m_value1) - return false; - if (*m_value1 != *other.m_value1) - return false; - } else { - if (other.m_value1) - return false; - } - - return (m_value0 != other.m_value0 && - *m_address != *other.m_address); -} - -} // namespace r600 diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h b/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h deleted file mode 100644 index 96439a7..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h +++ /dev/null @@ -1,82 +0,0 @@ -#ifndef LDSINSTRUCTION_H -#define LDSINSTRUCTION_H - -#include "sfn_instruction_base.h" - -namespace r600 { - -class LDSReadInstruction : public Instruction { -public: - LDSReadInstruction(std::vector& value, std::vector& address); - void replace_values(const ValueSet& candidates, PValue new_value) override; - - unsigned num_values() const { return m_dest_value.size();} - const Value& address(unsigned i) const { return *m_address[i];} - const Value& dest(unsigned i) const { return *m_dest_value[i];} - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - void do_print(std::ostream& os) const override; - bool is_equal_to(const Instruction& lhs) const override; - - std::vector m_address; - std::vector m_dest_value; -}; - -class LDSAtomicInstruction : public Instruction { -public: - LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op); - LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op); - - const Value& address() const { return *m_address;} - const Value& dest() const { return *m_dest_value;} - const Value& src0() const { return *m_src0_value;} - const PValue& src1() const { return m_src1_value;} - unsigned op() const {return m_opcode;} - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - void do_print(std::ostream& os) const override; - bool is_equal_to(const Instruction& lhs) const override; - - PValue m_address; - PValue m_dest_value; - PValue m_src0_value; - PValue m_src1_value; - unsigned m_opcode; -}; - -class LDSWriteInstruction : public Instruction { -public: - LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0); - LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1); - - const Value& address() const {return *m_address;}; - const Value& value0() const { return *m_value0;} - const Value& value1() const { return *m_value1;} - unsigned num_components() const { return m_value1 ? 2 : 1;} - unsigned idx_offset() const {return m_idx_offset;}; - - void replace_values(const ValueSet& candidates, PValue new_value) override; - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - void do_print(std::ostream& os) const override; - bool is_equal_to(const Instruction& lhs) const override; - - PValue m_address; - PValue m_value0; - PValue m_value1; - unsigned m_idx_offset; - -}; - -} - -#endif // LDSINSTRUCTION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp deleted file mode 100644 index 1c1a98c..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_instruction_misc.h" - -namespace r600 { -EmitVertex::EmitVertex(int stream, bool cut): - Instruction (emit_vtx), - m_stream(stream), - m_cut(cut) -{ - -} - -bool EmitVertex::is_equal_to(const Instruction& lhs) const -{ - auto& oth = static_cast(lhs); - return oth.m_stream == m_stream && - oth.m_cut == m_cut; -} - -void EmitVertex::do_print(std::ostream& os) const -{ - os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream; -} - -WaitAck::WaitAck(int nack): - Instruction (wait_ack), - m_nack(nack) -{ - -} - -bool WaitAck::is_equal_to(const Instruction& lhs) const -{ - const auto& l = static_cast(lhs); - return m_nack == l.m_nack; -} - -void WaitAck::do_print(std::ostream& os) const -{ - os << "WAIT_ACK @" << m_nack; -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h b/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h deleted file mode 100644 index d322b4a..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_INSTRUCTION_MISC_H -#define SFN_INSTRUCTION_MISC_H - -#include "sfn_instruction_base.h" - -namespace r600 { - -class EmitVertex : public Instruction { -public: - EmitVertex(int stream, bool cut); - ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;} - int stream() const { return m_stream;} - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - int m_stream; - bool m_cut; -}; - -class WaitAck : public Instruction { -public: - WaitAck(int nack); - ECFOpCode op() const {return cf_wait_ack;} - int n_ack() const {return m_nack;} - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - int m_nack; -}; - -} - -#endif // SFN_INSTRUCTION_MISC_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h b/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h deleted file mode 100644 index 2fe7cba..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h +++ /dev/null @@ -1,143 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef INSTRUCTION_TEX_H -#define INSTRUCTION_TEX_H - -#include "sfn_instruction_base.h" - -namespace r600 { - -class TexInstruction : public Instruction { -public: - enum Opcode { - ld = FETCH_OP_LD, - get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO, - get_nsampled = FETCH_OP_GET_NUMBER_OF_SAMPLES, - get_tex_lod = FETCH_OP_GET_LOD, - get_gradient_h = FETCH_OP_GET_GRADIENTS_H, - get_gradient_v = FETCH_OP_GET_GRADIENTS_V, - set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS, - keep_gradients = FETCH_OP_KEEP_GRADIENTS, - set_gradient_h = FETCH_OP_SET_GRADIENTS_H, - set_gradient_v = FETCH_OP_SET_GRADIENTS_V, - sample = FETCH_OP_SAMPLE, - sample_l = FETCH_OP_SAMPLE_L, - sample_lb = FETCH_OP_SAMPLE_LB, - sample_lz = FETCH_OP_SAMPLE_LZ, - sample_g = FETCH_OP_SAMPLE_G, - sample_g_lb = FETCH_OP_SAMPLE_G_L, - gather4 = FETCH_OP_GATHER4, - gather4_o = FETCH_OP_GATHER4_O, - - sample_c = FETCH_OP_SAMPLE_C, - sample_c_l = FETCH_OP_SAMPLE_C_L, - sample_c_lb = FETCH_OP_SAMPLE_C_LB, - sample_c_lz = FETCH_OP_SAMPLE_C_LZ, - sample_c_g = FETCH_OP_SAMPLE_C_G, - sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L, - gather4_c = FETCH_OP_GATHER4_C, - gather4_c_o = FETCH_OP_GATHER4_C_O, - - }; - - enum Flags { - x_unnormalized, - y_unnormalized, - z_unnormalized, - w_unnormalized, - grad_fine - }; - - TexInstruction(Opcode op, const GPRVector& dest, const GPRVector& src, unsigned sid, - unsigned rid, PValue sampler_offset); - - const GPRVector& src() const {return m_src;} - const GPRVector& dst() const {return m_dst;} - unsigned opcode() const {return m_opcode;} - unsigned sampler_id() const {return m_sampler_id;} - unsigned resource_id() const {return m_resource_id;} - - void replace_values(const ValueSet& candidates, PValue new_value) override; - - void set_offset(unsigned index, int32_t val); - int get_offset(unsigned index) const; - - void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;} - - int inst_mode() const { return m_inst_mode;} - - void set_flag(Flags flag) { - m_flags.set(flag); - } - - PValue sampler_offset() const { - return m_sampler_offset; - } - - bool has_flag(Flags flag) const { - return m_flags.test(flag); - } - - int dest_swizzle(int i) const { - assert(i < 4); - return m_dest_swizzle[i]; - } - - void set_dest_swizzle(const std::array& swz) { - m_dest_swizzle = swz; - } - - void set_gather_comp(int cmp); - - bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} - bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} - -private: - bool is_equal_to(const Instruction& lhs) const override; - void do_print(std::ostream& os) const override; - - static const char *opname(Opcode code); - - Opcode m_opcode; - GPRVector m_dst; - GPRVector m_src; - unsigned m_sampler_id; - unsigned m_resource_id; - std::bitset<8> m_flags; - int m_offset[3]; - int m_inst_mode; - std::array m_dest_swizzle; - PValue m_sampler_offset; -}; - -bool r600_nir_lower_int_tg4(nir_shader *nir); -bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader); -bool r600_nir_lower_cube_to_2darray(nir_shader *shader); - -} - -#endif // INSTRUCTION_TEX_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp b/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp deleted file mode 100644 index e69de29..0000000 diff --git a/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h b/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h deleted file mode 100644 index 9b34fcd..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef INSTRUCTIONVISITOR_H -#define INSTRUCTIONVISITOR_H - -namespace r600 { - - -class AluInstruction; -class ExportInstruction; -class TexInstruction; -class FetchInstruction; -class IfInstruction; -class ElseInstruction; -class IfElseEndInstruction; -class LoopBeginInstruction; -class LoopEndInstruction; -class LoopBreakInstruction; -class LoopContInstruction; -class StreamOutIntruction; -class MemRingOutIntruction; -class EmitVertex; -class WaitAck; -class WriteScratchInstruction; -class GDSInstr; -class RatInstruction; -class LDSWriteInstruction; -class LDSReadInstruction; -class LDSAtomicInstruction; -class GDSStoreTessFactor; -class InstructionBlock; - -class InstructionVisitor -{ -public: - virtual ~InstructionVisitor() {}; - virtual bool visit(AluInstruction& i) = 0; - virtual bool visit(ExportInstruction& i) = 0; - virtual bool visit(TexInstruction& i) = 0; - virtual bool visit(FetchInstruction& i) = 0; - virtual bool visit(IfInstruction& i) = 0; - virtual bool visit(ElseInstruction& i) = 0; - virtual bool visit(IfElseEndInstruction& i) = 0; - virtual bool visit(LoopBeginInstruction& i) = 0; - virtual bool visit(LoopEndInstruction& i) = 0; - virtual bool visit(LoopBreakInstruction& i) = 0; - virtual bool visit(LoopContInstruction& i) = 0; - virtual bool visit(StreamOutIntruction& i) = 0; - virtual bool visit(MemRingOutIntruction& i) = 0; - virtual bool visit(EmitVertex& i) = 0; - virtual bool visit(WaitAck& i) = 0; - virtual bool visit(WriteScratchInstruction& i) = 0; - virtual bool visit(GDSInstr& i) = 0; - virtual bool visit(RatInstruction& i) = 0; - virtual bool visit(LDSWriteInstruction& i) = 0; - virtual bool visit(LDSReadInstruction& i) = 0; - virtual bool visit(LDSAtomicInstruction& i) = 0; - virtual bool visit(GDSStoreTessFactor& i) = 0; - virtual bool visit(InstructionBlock& i) = 0; -}; - -class ConstInstructionVisitor -{ -public: - virtual ~ConstInstructionVisitor() {}; - virtual bool visit(const AluInstruction& i) = 0; - virtual bool visit(const ExportInstruction& i) = 0; - virtual bool visit(const TexInstruction& i) = 0; - virtual bool visit(const FetchInstruction& i) = 0; - virtual bool visit(const IfInstruction& i) = 0; - virtual bool visit(const ElseInstruction& i) = 0; - virtual bool visit(const IfElseEndInstruction& i) = 0; - virtual bool visit(const LoopBeginInstruction& i) = 0; - virtual bool visit(const LoopEndInstruction& i) = 0; - virtual bool visit(const LoopBreakInstruction& i) = 0; - virtual bool visit(const LoopContInstruction& i) = 0; - virtual bool visit(const StreamOutIntruction& i) = 0; - virtual bool visit(const MemRingOutIntruction& i) = 0; - virtual bool visit(const EmitVertex& i) = 0; - virtual bool visit(const WaitAck& i) = 0; - virtual bool visit(const WriteScratchInstruction& i) = 0; - virtual bool visit(const GDSInstr& i) = 0; - virtual bool visit(const RatInstruction& i) = 0; - virtual bool visit(const LDSWriteInstruction& i) = 0; - virtual bool visit(const LDSReadInstruction& i) = 0; - virtual bool visit(const LDSAtomicInstruction& i) = 0; - virtual bool visit(const GDSStoreTessFactor& i) = 0; - virtual bool visit(const InstructionBlock& i) = 0; -}; - -} - -#endif // INSTRUCTIONVISITOR_H diff --git a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h deleted file mode 100644 index 0c82032..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h +++ /dev/null @@ -1,45 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "sfn_nir.h" - -struct r600_shader; -union r600_shader_key; - -namespace r600 { - -class AssemblyFromShaderLegacy : public AssemblyFromShader { -public: - AssemblyFromShaderLegacy(struct r600_shader *sh, r600_shader_key *key); - ~AssemblyFromShaderLegacy() override; -private: - bool do_lower(const std::vector &ir) override ; - - struct AssemblyFromShaderLegacyImpl *impl; -}; - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_liverange.cpp b/src/gallium/drivers/r600/sfn/sfn_liverange.cpp deleted file mode 100644 index 28eef05..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_liverange.cpp +++ /dev/null @@ -1,1006 +0,0 @@ -/* - * Copyright (c) 2017-2019 Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_liverange.h" -#include "sfn_debug.h" -#include "sfn_value.h" -#include "sfn_value_gpr.h" - -#include "program/prog_instruction.h" -#include "util/bitscan.h" -#include "util/u_math.h" - -#include -#include -#include - -/* std::sort is significantly faster than qsort */ -#include - -/* If is included this is defined and clashes with - * std::numeric_limits<>::max() - */ -#ifdef max -#undef max -#endif - - -namespace r600 { - -using std::numeric_limits; -using std::unique_ptr; -using std::setw; - -prog_scope_storage::prog_scope_storage(int n): - current_slot(0), - storage(n) -{ -} - -prog_scope_storage::~prog_scope_storage() -{ -} - -prog_scope* -prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id, - int lvl, int s_begin) -{ - storage[current_slot] = prog_scope(p, type, id, lvl, s_begin); - return &storage[current_slot++]; -} - -prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id, - int depth, int scope_begin): - scope_type(type), - scope_id(id), - scope_nesting_depth(depth), - scope_begin(scope_begin), - scope_end(-1), - break_loop_line(numeric_limits::max()), - parent_scope(parent) -{ -} - -prog_scope::prog_scope(): - prog_scope(nullptr, undefined_scope, -1, -1, -1) -{ -} - -prog_scope_type prog_scope::type() const -{ - return scope_type; -} - -prog_scope *prog_scope::parent() const -{ - return parent_scope; -} - -int prog_scope::nesting_depth() const -{ - return scope_nesting_depth; -} - -bool prog_scope::is_loop() const -{ - return (scope_type == loop_body); -} - -bool prog_scope::is_in_loop() const -{ - if (scope_type == loop_body) - return true; - - if (parent_scope) - return parent_scope->is_in_loop(); - - return false; -} - -const prog_scope *prog_scope::innermost_loop() const -{ - if (scope_type == loop_body) - return this; - - if (parent_scope) - return parent_scope->innermost_loop(); - - return nullptr; -} - -const prog_scope *prog_scope::outermost_loop() const -{ - const prog_scope *loop = nullptr; - const prog_scope *p = this; - - do { - if (p->type() == loop_body) - loop = p; - p = p->parent(); - } while (p); - - return loop; -} - -bool prog_scope::is_child_of_ifelse_id_sibling(const prog_scope *scope) const -{ - const prog_scope *my_parent = in_parent_ifelse_scope(); - while (my_parent) { - /* is a direct child? */ - if (my_parent == scope) - return false; - /* is a child of the conditions sibling? */ - if (my_parent->id() == scope->id()) - return true; - my_parent = my_parent->in_parent_ifelse_scope(); - } - return false; -} - -bool prog_scope::is_child_of(const prog_scope *scope) const -{ - const prog_scope *my_parent = parent(); - while (my_parent) { - if (my_parent == scope) - return true; - my_parent = my_parent->parent(); - } - return false; -} - -const prog_scope *prog_scope::enclosing_conditional() const -{ - if (is_conditional()) - return this; - - if (parent_scope) - return parent_scope->enclosing_conditional(); - - return nullptr; -} - -bool prog_scope::contains_range_of(const prog_scope& other) const -{ - return (begin() <= other.begin()) && (end() >= other.end()); -} - -bool prog_scope::is_conditional() const -{ - return scope_type == if_branch || - scope_type == else_branch || - scope_type == switch_case_branch || - scope_type == switch_default_branch; -} - -const prog_scope *prog_scope::in_else_scope() const -{ - if (scope_type == else_branch) - return this; - - if (parent_scope) - return parent_scope->in_else_scope(); - - return nullptr; -} - -const prog_scope *prog_scope::in_parent_ifelse_scope() const -{ - if (parent_scope) - return parent_scope->in_ifelse_scope(); - else - return nullptr; -} - -const prog_scope *prog_scope::in_ifelse_scope() const -{ - if (scope_type == if_branch || - scope_type == else_branch) - return this; - - if (parent_scope) - return parent_scope->in_ifelse_scope(); - - return nullptr; -} - -bool prog_scope::is_switchcase_scope_in_loop() const -{ - return (scope_type == switch_case_branch || - scope_type == switch_default_branch) && - is_in_loop(); -} - -bool prog_scope::break_is_for_switchcase() const -{ - if (scope_type == loop_body) - return false; - - if (scope_type == switch_case_branch || - scope_type == switch_default_branch || - scope_type == switch_body) - return true; - - if (parent_scope) - return parent_scope->break_is_for_switchcase(); - - return false; -} - -int prog_scope::id() const -{ - return scope_id; -} - -int prog_scope::begin() const -{ - return scope_begin; -} - -int prog_scope::end() const -{ - return scope_end; -} - -void prog_scope::set_end(int end) -{ - if (scope_end == -1) - scope_end = end; -} - -void prog_scope::set_loop_break_line(int line) -{ - if (scope_type == loop_body) { - break_loop_line = MIN2(break_loop_line, line); - } else { - if (parent_scope) - parent()->set_loop_break_line(line); - } -} - -int prog_scope::loop_break_line() const -{ - return break_loop_line; -} - -temp_access::temp_access(): - access_mask(0), - needs_component_tracking(false), - is_array_element(false) -{ -} - -void temp_access::update_access_mask(int mask) -{ - if (access_mask && access_mask != mask) - needs_component_tracking = true; - access_mask |= mask; -} - -void temp_access::record_write(int line, prog_scope *scope, int writemask, bool is_array_elm) -{ - - - update_access_mask(writemask); - is_array_element |= is_array_elm; - - if (writemask & WRITEMASK_X) - comp[0].record_write(line, scope); - if (writemask & WRITEMASK_Y) - comp[1].record_write(line, scope); - if (writemask & WRITEMASK_Z) - comp[2].record_write(line, scope); - if (writemask & WRITEMASK_W) - comp[3].record_write(line, scope); -} - -void temp_access::record_read(int line, prog_scope *scope, int readmask, bool is_array_elm) -{ - update_access_mask(readmask); - is_array_element |= is_array_elm; - - if (readmask & WRITEMASK_X) - comp[0].record_read(line, scope); - if (readmask & WRITEMASK_Y) - comp[1].record_read(line, scope); - if (readmask & WRITEMASK_Z) - comp[2].record_read(line, scope); - if (readmask & WRITEMASK_W) - comp[3].record_read(line, scope); -} - -inline static register_live_range make_live_range(int b, int e) -{ - register_live_range lt; - lt.begin = b; - lt.end = e; - lt.is_array_elm = false; - return lt; -} - -register_live_range temp_access::get_required_live_range() -{ - register_live_range result = make_live_range(-1, -1); - - unsigned mask = access_mask; - while (mask) { - unsigned chan = u_bit_scan(&mask); - register_live_range lt = comp[chan].get_required_live_range(); - - if (lt.begin >= 0) { - if ((result.begin < 0) || (result.begin > lt.begin)) - result.begin = lt.begin; - } - - if (lt.end > result.end) - result.end = lt.end; - - if (!needs_component_tracking) - break; - } - result.is_array_elm = is_array_element; - - return result; -} - -const int -temp_comp_access::conditionality_untouched = std::numeric_limits::max(); - -const int -temp_comp_access::write_is_unconditional = std::numeric_limits::max() - 1; - - -temp_comp_access::temp_comp_access(): - last_read_scope(nullptr), - first_read_scope(nullptr), - first_write_scope(nullptr), - first_write(-1), - last_read(-1), - last_write(-1), - first_read(numeric_limits::max()), - conditionality_in_loop_id(conditionality_untouched), - if_scope_write_flags(0), - next_ifelse_nesting_depth(0), - current_unpaired_if_write_scope(nullptr), - was_written_in_current_else_scope(false) -{ -} - -void temp_comp_access::record_read(int line, prog_scope *scope) -{ - last_read_scope = scope; - if (last_read < line) - last_read = line; - - if (first_read > line) { - first_read = line; - first_read_scope = scope; - } - - /* If the conditionality of the first write is already resolved then - * no further checks are required. - */ - if (conditionality_in_loop_id == write_is_unconditional || - conditionality_in_loop_id == write_is_conditional) - return; - - /* Check whether we are in a condition within a loop */ - const prog_scope *ifelse_scope = scope->in_ifelse_scope(); - const prog_scope *enclosing_loop; - if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) { - - /* If we have either not yet written to this register nor writes are - * resolved as unconditional in the enclosing loop then check whether - * we read before write in an IF/ELSE branch. - */ - if ((conditionality_in_loop_id != write_is_conditional) && - (conditionality_in_loop_id != enclosing_loop->id())) { - - if (current_unpaired_if_write_scope) { - - /* Has been written in this or a parent scope? - this makes the temporary - * unconditionally set at this point. - */ - if (scope->is_child_of(current_unpaired_if_write_scope)) - return; - - /* Has been written in the same scope before it was read? */ - if (ifelse_scope->type() == if_branch) { - if (current_unpaired_if_write_scope->id() == scope->id()) - return; - } else { - if (was_written_in_current_else_scope) - return; - } - } - - /* The temporary was read (conditionally) before it is written, hence - * it should survive a loop. This can be signaled like if it were - * conditionally written. - */ - conditionality_in_loop_id = write_is_conditional; - } - } -} - -void temp_comp_access::record_write(int line, prog_scope *scope) -{ - last_write = line; - - if (first_write < 0) { - first_write = line; - first_write_scope = scope; - - /* If the first write we encounter is not in a conditional branch, or - * the conditional write is not within a loop, then this is to be - * considered an unconditional dominant write. - */ - const prog_scope *conditional = scope->enclosing_conditional(); - if (!conditional || !conditional->innermost_loop()) { - conditionality_in_loop_id = write_is_unconditional; - } - } - - /* The conditionality of the first write is already resolved. */ - if (conditionality_in_loop_id == write_is_unconditional || - conditionality_in_loop_id == write_is_conditional) - return; - - /* If the nesting depth is larger than the supported level, - * then we assume conditional writes. - */ - if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) { - conditionality_in_loop_id = write_is_conditional; - return; - } - - /* If we are in an IF/ELSE scope within a loop and the loop has not - * been resolved already, then record this write. - */ - const prog_scope *ifelse_scope = scope->in_ifelse_scope(); - if (ifelse_scope && ifelse_scope->innermost_loop() && - ifelse_scope->innermost_loop()->id() != conditionality_in_loop_id) - record_ifelse_write(*ifelse_scope); -} - -void temp_comp_access::record_ifelse_write(const prog_scope& scope) -{ - if (scope.type() == if_branch) { - /* The first write in an IF branch within a loop implies unresolved - * conditionality (if it was untouched or unconditional before). - */ - conditionality_in_loop_id = conditionality_unresolved; - was_written_in_current_else_scope = false; - record_if_write(scope); - } else { - was_written_in_current_else_scope = true; - record_else_write(scope); - } -} - -void temp_comp_access::record_if_write(const prog_scope& scope) -{ - /* Don't record write if this IF scope if it ... - * - is not the first write in this IF scope, - * - has already been written in a parent IF scope. - * In both cases this write is a secondary write that doesn't contribute - * to resolve conditionality. - * - * Record the write if it - * - is the first one (obviously), - * - happens in an IF branch that is a child of the ELSE branch of the - * last active IF/ELSE pair. In this case recording this write is used to - * established whether the write is (un-)conditional in the scope enclosing - * this outer IF/ELSE pair. - */ - if (!current_unpaired_if_write_scope || - (current_unpaired_if_write_scope->id() != scope.id() && - scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope))) { - if_scope_write_flags |= 1 << next_ifelse_nesting_depth; - current_unpaired_if_write_scope = &scope; - next_ifelse_nesting_depth++; - } -} - -void temp_comp_access::record_else_write(const prog_scope& scope) -{ - int mask = 1 << (next_ifelse_nesting_depth - 1); - - /* If the temporary was written in an IF branch on the same scope level - * and this branch is the sibling of this ELSE branch, then we have a - * pair of writes that makes write access to this temporary unconditional - * in the enclosing scope. - */ - - if ((if_scope_write_flags & mask) && - (scope.id() == current_unpaired_if_write_scope->id())) { - --next_ifelse_nesting_depth; - if_scope_write_flags &= ~mask; - - /* The following code deals with propagating unconditionality from - * inner levels of nested IF/ELSE to the outer levels like in - * - * 1: var t; - * 2: if (a) { <- start scope A - * 3: if (b) - * 4: t = ... - * 5: else - * 6: t = ... - * 7: } else { <- start scope B - * 8: if (c) - * 9: t = ... - * A: else <- start scope C - * B: t = ... - * C: } - * - */ - - const prog_scope *parent_ifelse = scope.parent()->in_ifelse_scope(); - - if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) { - /* We are at the end of scope C and already recorded a write - * within an IF scope (A), the sibling of the parent ELSE scope B, - * and it is not yet resolved. Mark that as the last relevant - * IF scope. Below the write will be resolved for the A/B - * scope pair. - */ - current_unpaired_if_write_scope = parent_ifelse; - } else { - current_unpaired_if_write_scope = nullptr; - } - /* Promote the first write scope to the enclosing scope because - * the current IF/ELSE pair is now irrelevant for the analysis. - * This is also required to evaluate the minimum life time for t in - * { - * var t; - * if (a) - * t = ... - * else - * t = ... - * x = t; - * ... - * } - */ - first_write_scope = scope.parent(); - - /* If some parent is IF/ELSE and in a loop then propagate the - * write to that scope. Otherwise the write is unconditional - * because it happens in both corresponding IF/ELSE branches - * in this loop, and hence, record the loop id to signal the - * resolution. - */ - if (parent_ifelse && parent_ifelse->is_in_loop()) { - record_ifelse_write(*parent_ifelse); - } else { - conditionality_in_loop_id = scope.innermost_loop()->id(); - } - } else { - /* The temporary was not written in the IF branch corresponding - * to this ELSE branch, hence the write is conditional. - */ - conditionality_in_loop_id = write_is_conditional; - } -} - -bool temp_comp_access::conditional_ifelse_write_in_loop() const -{ - return conditionality_in_loop_id <= conditionality_unresolved; -} - -void temp_comp_access::propagate_live_range_to_dominant_write_scope() -{ - first_write = first_write_scope->begin(); - int lr = first_write_scope->end(); - - if (last_read < lr) - last_read = lr; -} - -register_live_range temp_comp_access::get_required_live_range() -{ - bool keep_for_full_loop = false; - - /* This register component is not used at all, or only read, - * mark it as unused and ignore it when renaming. - * glsl_to_tgsi_visitor::renumber_registers will take care of - * eliminating registers that are not written to. - */ - if (last_write < 0) - return make_live_range(-1, -1); - - assert(first_write_scope); - - /* Only written to, just make sure the register component is not - * reused in the range it is used to write to - */ - if (!last_read_scope) - return make_live_range(first_write, last_write + 1); - - const prog_scope *enclosing_scope_first_read = first_read_scope; - const prog_scope *enclosing_scope_first_write = first_write_scope; - - /* We read before writing in a loop - * hence the value must survive the loops - */ - if ((first_read <= first_write) && - first_read_scope->is_in_loop()) { - keep_for_full_loop = true; - enclosing_scope_first_read = first_read_scope->outermost_loop(); - } - - /* A conditional write within a (nested) loop must survive the outermost - * loop if the last read was not within the same scope. - */ - const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional(); - if (conditional && !conditional->contains_range_of(*last_read_scope) && - (conditional->is_switchcase_scope_in_loop() || - conditional_ifelse_write_in_loop())) { - keep_for_full_loop = true; - enclosing_scope_first_write = conditional->outermost_loop(); - } - - /* Evaluate the scope that is shared by all: required first write scope, - * required first read before write scope, and last read scope. - */ - const prog_scope *enclosing_scope = enclosing_scope_first_read; - if (enclosing_scope_first_write->contains_range_of(*enclosing_scope)) - enclosing_scope = enclosing_scope_first_write; - - if (last_read_scope->contains_range_of(*enclosing_scope)) - enclosing_scope = last_read_scope; - - while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) || - !enclosing_scope->contains_range_of(*last_read_scope)) { - enclosing_scope = enclosing_scope->parent(); - assert(enclosing_scope); - } - - /* Propagate the last read scope to the target scope */ - while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) { - /* If the read is in a loop and we have to move up the scope we need to - * extend the live range to the end of this current loop because at this - * point we don't know whether the component was written before - * un-conditionally in the same loop. - */ - if (last_read_scope->is_loop()) - last_read = last_read_scope->end(); - - last_read_scope = last_read_scope->parent(); - } - - /* If the variable has to be kept for the whole loop, and we - * are currently in a loop, then propagate the live range. - */ - if (keep_for_full_loop && first_write_scope->is_loop()) - propagate_live_range_to_dominant_write_scope(); - - /* Propagate the first_dominant_write scope to the target scope */ - while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) { - /* Propagate live_range if there was a break in a loop and the write was - * after the break inside that loop. Note, that this is only needed if - * we move up in the scopes. - */ - if (first_write_scope->loop_break_line() < first_write) { - keep_for_full_loop = true; - propagate_live_range_to_dominant_write_scope(); - } - - first_write_scope = first_write_scope->parent(); - - /* Propagate live_range if we are now in a loop */ - if (keep_for_full_loop && first_write_scope->is_loop()) - propagate_live_range_to_dominant_write_scope(); - } - - /* The last write past the last read is dead code, but we have to - * ensure that the component is not reused too early, hence extend the - * live_range past the last write. - */ - if (last_write >= last_read) - last_read = last_write + 1; - - /* Here we are at the same scope, all is resolved */ - return make_live_range(first_write, last_read); -} - -/* Helper class for sorting and searching the registers based - * on live ranges. */ -class register_merge_record { -public: - int begin; - int end; - int reg; - bool erase; - bool is_array_elm; - - bool operator < (const register_merge_record& rhs) const { - return begin < rhs.begin; - } -}; - -LiverangeEvaluator::LiverangeEvaluator(): - line(0), - loop_id(1), - if_id(1), - switch_id(0), - is_at_end(false), - n_scopes(1), - cur_scope(nullptr) -{ -} - -void LiverangeEvaluator::run(const Shader& shader, - std::vector& register_live_ranges) -{ - temp_acc.resize(register_live_ranges.size()); - fill(temp_acc.begin(), temp_acc.end(), temp_access()); - - sfn_log << SfnLog::merge << "have " << temp_acc.size() << " temps\n"; - - for (const auto& block: shader.m_ir) { - for (const auto& ir: block) { - switch (ir->type()) { - case Instruction::cond_if: - case Instruction::cond_else: - case Instruction::loop_begin: - ++n_scopes; - default: - ; - } - } - } - - scopes.reset(new prog_scope_storage(n_scopes)); - - cur_scope = scopes->create(nullptr, outer_scope, 0, 0, line); - - line = 0; - - for (auto& v: shader.m_temp) { - if (v.second->type() == Value::gpr) { - sfn_log << SfnLog::merge << "Record " << *v.second << "\n"; - const auto& g = static_cast(*v.second); - if (g.is_input()) { - sfn_log << SfnLog::merge << "Record INPUT write for " - << g << " in " << temp_acc.size() << " temps\n"; - temp_acc[g.sel()].record_write(line, cur_scope, 1 << g.chan(), false); - temp_acc[g.sel()].record_read(line, cur_scope, 1 << g.chan(), false); - } - if (g.keep_alive()) { - sfn_log << SfnLog::merge << "Record KEEP ALIVE for " - << g << " in " << temp_acc.size() << " temps\n"; - temp_acc[g.sel()].record_read(0x7fffff, cur_scope, 1 << g.chan(), false); - } - } - } - - for (const auto& block: shader.m_ir) - for (const auto& ir: block) { - ir->evalue_liveness(*this); - if (ir->type() != Instruction::alu || - static_cast(*ir).flag(alu_last_instr)) - ++line; - } - - assert(cur_scope->type() == outer_scope); - cur_scope->set_end(line); - is_at_end = true; - - get_required_live_ranges(register_live_ranges); -} - - -void LiverangeEvaluator::record_read(const Value& src, bool is_array_elm) -{ - sfn_log << SfnLog::merge << "Record read l:" << line << " reg:" << src << "\n"; - if (src.type() == Value::gpr) { - const GPRValue& v = static_cast(src); - if (v.chan() < 4) - temp_acc[v.sel()].record_read(v.keep_alive() ? 0x7fffff: line, cur_scope, 1 << v.chan(), is_array_elm); - return; - } else if (src.type() == Value::gpr_array_value) { - const GPRArrayValue& v = static_cast(src); - v.record_read(*this); - } else if (src.type() == Value::kconst) { - const UniformValue& v = static_cast(src); - if (v.addr()) - record_read(*v.addr(),is_array_elm); - } -} - -void LiverangeEvaluator::record_write(const Value& src, bool is_array_elm) -{ - sfn_log << SfnLog::merge << "Record write for " - << src << " in " << temp_acc.size() << " temps\n"; - - if (src.type() == Value::gpr) { - const GPRValue& v = static_cast(src); - assert(v.sel() < temp_acc.size()); - if (v.chan() < 4) - temp_acc[v.sel()].record_write(line, cur_scope, 1 << v.chan(), is_array_elm); - return; - } else if (src.type() == Value::gpr_array_value) { - const GPRArrayValue& v = static_cast(src); - v.record_write(*this); - } else if (src.type() == Value::kconst) { - const UniformValue& v = static_cast(src); - if (v.addr()) - record_write(*v.addr(),is_array_elm); - } -} - -void LiverangeEvaluator::record_read(const GPRVector& src) -{ - for (int i = 0; i < 4; ++i) - if (src.reg_i(i)) - record_read(*src.reg_i(i)); -} - -void LiverangeEvaluator::record_write(const GPRVector& dst) -{ - for (int i = 0; i < 4; ++i) - if (dst.reg_i(i)) - record_write(*dst.reg_i(i)); -} - -void LiverangeEvaluator::get_required_live_ranges(std::vector& register_live_ranges) -{ - sfn_log << SfnLog::merge << "== register live ranges ==========\n"; - for(unsigned i = 0; i < register_live_ranges.size(); ++i) { - sfn_log << SfnLog::merge << setw(4) << i; - register_live_ranges[i] = temp_acc[i].get_required_live_range(); - sfn_log << SfnLog::merge << ": [" << register_live_ranges[i].begin << ", " - << register_live_ranges[i].end << "]\n"; - } - sfn_log << SfnLog::merge << "==================================\n\n"; -} - -void LiverangeEvaluator::scope_if() -{ - cur_scope = scopes->create(cur_scope, if_branch, if_id++, - cur_scope->nesting_depth() + 1, line + 1); -} - -void LiverangeEvaluator::scope_else() -{ - assert(cur_scope->type() == if_branch); - cur_scope->set_end(line - 1); - cur_scope = scopes->create(cur_scope->parent(), else_branch, - cur_scope->id(), cur_scope->nesting_depth(), - line + 1); -} - -void LiverangeEvaluator::scope_endif() -{ - cur_scope->set_end(line - 1); - cur_scope = cur_scope->parent(); - assert(cur_scope); -} - -void LiverangeEvaluator::scope_loop_begin() -{ - cur_scope = scopes->create(cur_scope, loop_body, loop_id++, - cur_scope->nesting_depth() + 1, line); -} - -void LiverangeEvaluator::scope_loop_end() -{ - assert(cur_scope->type() == loop_body); - cur_scope->set_end(line); - cur_scope = cur_scope->parent(); - assert(cur_scope); -} - -void LiverangeEvaluator::scope_loop_break() -{ - cur_scope->set_loop_break_line(line); -} - -/* This functions evaluates the register merges by using a binary - * search to find suitable merge candidates. */ - -std::vector -get_temp_registers_remapping(const std::vector& live_ranges) -{ - - std::vector result(live_ranges.size(), rename_reg_pair{false, false, 0}); - std::vector reg_access; - - for (unsigned i = 0; i < live_ranges.size(); ++i) { - if (live_ranges[i].begin >= 0) { - register_merge_record r; - r.begin = live_ranges[i].begin; - r.end = live_ranges[i].end; - r.is_array_elm = live_ranges[i].is_array_elm; - r.reg = i; - r.erase = false; - reg_access.push_back(r); - } - } - - std::sort(reg_access.begin(), reg_access.end()); - - for (auto& r : reg_access) - sfn_log << SfnLog::merge << "Use Range " <reg << "[" << trgt->begin << ", " << trgt->end << "]\n"; - - - auto src = upper_bound(search_start, reg_access_end, trgt->end, - [](int bound, const register_merge_record& m){ - return bound < m.begin && !m.is_array_elm;} - ); - - if (src != reg_access_end) { - result[src->reg].new_reg = trgt->reg; - result[src->reg].valid = true; - - sfn_log << SfnLog::merge << "Map " - << src->reg << "[" << src->begin << ", " << src->end << "] to " - << trgt->reg << "[" << trgt->begin << ", " << trgt->end << ":"; - trgt->end = src->end; - sfn_log << SfnLog::merge << trgt->end << "]\n"; - - /* Since we only search forward, don't remove the renamed - * register just now, only mark it. */ - src->erase = true; - - if (first_erase == reg_access_end) - first_erase = src; - - search_start = src + 1; - } else { - /* Moving to the next target register it is time to remove - * the already merged registers from the search range */ - if (first_erase != reg_access_end) { - auto outp = first_erase; - auto inp = first_erase + 1; - - while (inp != reg_access_end) { - if (!inp->erase) - *outp++ = *inp; - ++inp; - } - - reg_access_end = outp; - first_erase = reg_access_end; - } - ++trgt; - search_start = trgt + 1; - } - } - return result; -} - -} // end ns r600 diff --git a/src/gallium/drivers/r600/sfn/sfn_liverange.h b/src/gallium/drivers/r600/sfn/sfn_liverange.h deleted file mode 100644 index 8b9ed2e..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_liverange.h +++ /dev/null @@ -1,314 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_LIVERANGE_H -#define SFN_LIVERANGE_H - -#include -#include -#include -#include - -#include "sfn_instruction_base.h" -#include "sfn_nir.h" - -namespace r600 { - -/** Storage to record the required live range of a temporary register - * begin == end == -1 indicates that the register can be reused without - * limitations. Otherwise, "begin" indicates the first instruction in which - * a write operation may target this temporary, and end indicates the - * last instruction in which a value can be read from this temporary. - * Hence, a register R2 can be merged with a register R1 if R1.end <= R2.begin. - */ -struct register_live_range { - int begin; - int end; - bool is_array_elm; -}; - -enum prog_scope_type { - outer_scope, /* Outer program scope */ - loop_body, /* Inside a loop */ - if_branch, /* Inside if branch */ - else_branch, /* Inside else branch */ - switch_body, /* Inside switch statement */ - switch_case_branch, /* Inside switch case statement */ - switch_default_branch, /* Inside switch default statement */ - undefined_scope -}; - -class prog_scope { -public: - prog_scope(); - prog_scope(prog_scope *parent, prog_scope_type type, int id, - int depth, int begin); - - prog_scope_type type() const; - prog_scope *parent() const; - int nesting_depth() const; - int id() const; - int end() const; - int begin() const; - int loop_break_line() const; - - const prog_scope *in_else_scope() const; - const prog_scope *in_ifelse_scope() const; - const prog_scope *in_parent_ifelse_scope() const; - const prog_scope *innermost_loop() const; - const prog_scope *outermost_loop() const; - const prog_scope *enclosing_conditional() const; - - bool is_loop() const; - bool is_in_loop() const; - bool is_switchcase_scope_in_loop() const; - bool is_conditional() const; - bool is_child_of(const prog_scope *scope) const; - bool is_child_of_ifelse_id_sibling(const prog_scope *scope) const; - - bool break_is_for_switchcase() const; - bool contains_range_of(const prog_scope& other) const; - - void set_end(int end); - void set_loop_break_line(int line); - -private: - prog_scope_type scope_type; - int scope_id; - int scope_nesting_depth; - int scope_begin; - int scope_end; - int break_loop_line; - prog_scope *parent_scope; -}; - -/* Some storage class to encapsulate the prog_scope (de-)allocations */ -class prog_scope_storage { -public: - prog_scope_storage(int n); - ~prog_scope_storage(); - prog_scope * create(prog_scope *p, prog_scope_type type, int id, - int lvl, int s_begin); -private: - int current_slot; - std::vector storage; -}; - -/* Class to track the access to a component of a temporary register. */ - -class temp_comp_access { -public: - temp_comp_access(); - - void record_read(int line, prog_scope *scope); - void record_write(int line, prog_scope *scope); - register_live_range get_required_live_range(); -private: - void propagate_live_range_to_dominant_write_scope(); - bool conditional_ifelse_write_in_loop() const; - - void record_ifelse_write(const prog_scope& scope); - void record_if_write(const prog_scope& scope); - void record_else_write(const prog_scope& scope); - - prog_scope *last_read_scope; - prog_scope *first_read_scope; - prog_scope *first_write_scope; - - int first_write; - int last_read; - int last_write; - int first_read; - - /* This member variable tracks the current resolution of conditional writing - * to this temporary in IF/ELSE clauses. - * - * The initial value "conditionality_untouched" indicates that this - * temporary has not yet been written to within an if clause. - * - * A positive (other than "conditionality_untouched") number refers to the - * last loop id for which the write was resolved as unconditional. With each - * new loop this value will be overwitten by "conditionality_unresolved" - * on entering the first IF clause writing this temporary. - * - * The value "conditionality_unresolved" indicates that no resolution has - * been achieved so far. If the variable is set to this value at the end of - * the processing of the whole shader it also indicates a conditional write. - * - * The value "write_is_conditional" marks that the variable is written - * conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at - * least one loop. - */ - int conditionality_in_loop_id; - - /* Helper constants to make the tracking code more readable. */ - static const int write_is_conditional = -1; - static const int conditionality_unresolved = 0; - static const int conditionality_untouched; - static const int write_is_unconditional; - - /* A bit field tracking the nexting levels of if-else clauses where the - * temporary has (so far) been written to in the if branch, but not in the - * else branch. - */ - unsigned int if_scope_write_flags; - - int next_ifelse_nesting_depth; - static const int supported_ifelse_nesting_depth = 32; - - /* Tracks the last if scope in which the temporary was written to - * without a write in the corresponding else branch. Is also used - * to track read-before-write in the according scope. - */ - const prog_scope *current_unpaired_if_write_scope; - - /* Flag to resolve read-before-write in the else scope. */ - bool was_written_in_current_else_scope; -}; - -/* Class to track the access to all components of a temporary register. */ -class temp_access { -public: - temp_access(); - void record_read(int line, prog_scope *scope, int swizzle, bool is_array_elm); - void record_write(int line, prog_scope *scope, int writemask, bool is_array_elm); - register_live_range get_required_live_range(); -private: - void update_access_mask(int mask); - - temp_comp_access comp[4]; - int access_mask; - bool needs_component_tracking; - bool is_array_element; -}; - -/* Helper class to merge the live ranges of an arrays. - * - * For arrays the array length, live range, and component access needs to - * be kept, because when live ranges are merged or arrays are interleaved - * one can only merge or interleave an array into another with equal or more - * elements. For interleaving it is also required that the sum of used swizzles - * is at most four. - */ - -class array_live_range { -public: - array_live_range(); - array_live_range(unsigned aid, unsigned alength); - array_live_range(unsigned aid, unsigned alength, int first_access, - int last_access, int mask); - - void set_live_range(int first_access, int last_access); - void set_begin(int _begin){first_access = _begin;} - void set_end(int _end){last_access = _end;} - void set_access_mask(int s); - - static void merge(array_live_range *a, array_live_range *b); - static void interleave(array_live_range *a, array_live_range *b); - - int array_id() const {return id;} - int target_array_id() const {return target_array ? target_array->id : 0;} - const array_live_range *final_target() const {return target_array ? - target_array->final_target() : this;} - unsigned array_length() const { return length;} - int begin() const { return first_access;} - int end() const { return last_access;} - int access_mask() const { return component_access_mask;} - int used_components() const {return used_component_count;} - - bool time_doesnt_overlap(const array_live_range& other) const; - - void print(std::ostream& os) const; - - bool is_mapped() const { return target_array != nullptr;} - - int8_t remap_one_swizzle(int8_t idx) const; - -private: - void init_swizzles(); - void set_target(array_live_range *target); - void merge_live_range_from(array_live_range *other); - void interleave_into(array_live_range *other); - - unsigned id; - unsigned length; - int first_access; - int last_access; - uint8_t component_access_mask; - uint8_t used_component_count; - array_live_range *target_array; - int8_t swizzle_map[4]; -}; - - - -class LiverangeEvaluator { -public: - LiverangeEvaluator(); - - void run(const Shader& shader, - std::vector ®ister_live_ranges); - - void scope_if(); - void scope_else(); - void scope_endif(); - void scope_loop_begin(); - void scope_loop_end(); - void scope_loop_break(); - - void record_read(const Value& src, bool is_array_elm = false); - void record_write(const Value& dst, bool is_array_elm = false); - - void record_read(const GPRVector& src); - void record_write(const GPRVector& dst); - -private: - - prog_scope *create_scope(prog_scope *parent, prog_scope_type type, int id, - int lvl, int s_begin); - - - void get_required_live_ranges(std::vector& register_live_ranges); - - int line; - int loop_id; - int if_id; - int switch_id; - bool is_at_end; - int n_scopes; - std::unique_ptr scopes; - prog_scope *cur_scope; - - std::vector temp_acc; - -}; - -std::vector -get_temp_registers_remapping(const std::vector& live_ranges); - -} // end namespace r600 - -#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp new file mode 100644 index 0000000..b02a51ed --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp @@ -0,0 +1,438 @@ +#include "sfn_liverangeevaluator.h" +#include "sfn_liverangeevaluator_helpers.h" + +#include "sfn_instr_alugroup.h" +#include "sfn_instr_controlflow.h" +#include "sfn_instr_export.h" +#include "sfn_instr_fetch.h" +#include "sfn_instr_mem.h" +#include "sfn_instr_tex.h" +#include "sfn_shader.h" +#include "sfn_debug.h" + +#include +#include + +namespace r600 { + +class LiveRangeInstrVisitor : public InstrVisitor { +public: + LiveRangeInstrVisitor(LiveRangeMap& live_range_map); + + void visit(AluInstr *instr) override; + void visit(AluGroup *instr) override; + void visit(TexInstr *instr) override; + void visit(ExportInstr *instr) override; + void visit(FetchInstr *instr) override; + void visit(Block *instr) override; + void visit(ControlFlowInstr *instr) override; + void visit(IfInstr *instr) override; + void visit(WriteScratchInstr *instr) override; + void visit(StreamOutInstr *instr) override; + void visit(MemRingOutInstr *instr) override; + void visit(EmitVertexInstr *instr) override {(void)instr;} + void visit(GDSInstr *instr) override; + void visit(WriteTFInstr *instr) override; + void visit(LDSAtomicInstr *instr) override; + void visit(LDSReadInstr *instr) override; + void visit(RatInstr *instr) override; + + void finalize(); +private: + + void record_write(const Register *reg); + void record_read(const Register *reg, LiveRangeEntry::EUse use); + + void record_write(const RegisterVec4& reg, const RegisterVec4::Swizzle& swizzle); + void record_read(const RegisterVec4 ®, LiveRangeEntry::EUse use); + + void scope_if(); + void scope_else(); + void scope_endif(); + void scope_loop_begin(); + void scope_loop_end(); + void scope_loop_break(); + ProgramScope *create_scope(ProgramScope *parent, ProgramScopeType type, + int id, int nesting_depth, int line); + + std::vector> m_scopes; + ProgramScope *m_current_scope; + LiveRangeMap& m_live_range_map; + RegisterAccess m_register_access; + + int m_line{0}; + int m_if_id{1}; + int m_loop_id{1}; +}; + +LiveRangeEvaluator::LiveRangeEvaluator() +{ + +} + + +LiveRangeMap LiveRangeEvaluator::run(Shader& sh) +{ + + LiveRangeMap range_map = sh.prepare_live_range_map(); + + + LiveRangeInstrVisitor evaluator(range_map); + + for (auto& b : sh.func()) + b->accept(evaluator); + + evaluator.finalize(); + + return range_map; +} + +void LiveRangeInstrVisitor::finalize() +{ + m_current_scope->set_end(m_line); + + for (int i = 0; i < 4; ++i) { + + auto& live_ranges = m_live_range_map.component(i); + for(const auto& r : live_ranges) { + if (r.m_register->live_end_pinned()) + record_read(r.m_register, LiveRangeEntry::use_unspecified); + } + + auto& comp_access = m_register_access.component(i); + + for (size_t i = 0; i < comp_access.size(); ++i) { + sfn_log << SfnLog::merge << "Evaluae access for " << *live_ranges[i].m_register << "\n"; + + auto& rca = comp_access[i]; + rca.update_required_live_range(); + live_ranges[i].m_start = rca.range().start; + live_ranges[i].m_end = rca.range().end; + live_ranges[i].m_use = rca.use_type(); + } + } +} + +LiveRangeInstrVisitor::LiveRangeInstrVisitor(LiveRangeMap& live_range_map): + m_live_range_map(live_range_map), + m_register_access(live_range_map.sizes()) +{ + if (sfn_log.has_debug_flag(SfnLog::merge)) { + sfn_log << SfnLog::merge << "Have component register numbers: "; + for (auto n : live_range_map.sizes()) + sfn_log << n << " "; + sfn_log << "\n"; + } + + m_scopes.push_back(std::make_unique(nullptr, outer_scope, 0, 0, 0)); + m_current_scope = m_scopes[0].get(); + + for (int i = 0; i < 4; ++i) { + const auto& comp = live_range_map.component(i); + for(const auto& r : comp) { + if (r.m_register->live_start_pinned()) + record_write(r.m_register); + } + } + m_line = 1; +} + +void LiveRangeInstrVisitor::record_write(const RegisterVec4& reg, const RegisterVec4::Swizzle &swizzle) +{ + for (int i = 0; i < 4; ++i) { + if (swizzle[i] < 6 && reg[i]->chan() < 4) + record_write(reg[i]); + } +} + +void LiveRangeInstrVisitor::record_read(const RegisterVec4& reg, LiveRangeEntry::EUse use) +{ + for (int i = 0; i < 4; ++i) { + if (reg[i]->chan() < 4) + record_read(reg[i], use); + } +} + +void LiveRangeInstrVisitor::scope_if() +{ + m_current_scope = create_scope(m_current_scope, if_branch, m_if_id++, + m_current_scope->nesting_depth() + 1, m_line + 1); +} + +void LiveRangeInstrVisitor::scope_else() +{ + assert(m_current_scope->type() == if_branch); + m_current_scope->set_end(m_line - 1); + + m_current_scope = create_scope(m_current_scope->parent(), else_branch, m_current_scope->id(), + m_current_scope->nesting_depth() + 1, m_line + 1); +} + +void LiveRangeInstrVisitor::scope_endif() +{ + m_current_scope->set_end(m_line - 1); + m_current_scope = m_current_scope->parent(); + assert(m_current_scope); +} + +void LiveRangeInstrVisitor::scope_loop_begin() +{ + m_current_scope = create_scope(m_current_scope, loop_body, m_loop_id++, + m_current_scope->nesting_depth() + 1, m_line); +} + +void LiveRangeInstrVisitor::scope_loop_end() +{ + m_current_scope->set_end(m_line); + m_current_scope = m_current_scope->parent(); + assert(m_current_scope); +} + +void LiveRangeInstrVisitor::scope_loop_break() +{ + m_current_scope->set_loop_break_line(m_line); +} + +ProgramScope *LiveRangeInstrVisitor::create_scope(ProgramScope *parent, ProgramScopeType type, + int id, int nesting_depth, int line) +{ + m_scopes.emplace_back(std::make_unique(parent, type, id, nesting_depth, line)); + return m_scopes[m_scopes.size() - 1].get(); +} + +void LiveRangeInstrVisitor::visit(AluInstr *instr) +{ + sfn_log << SfnLog::merge << "Visit " << *instr << "\n"; + if (instr->has_alu_flag(alu_write)) + record_write(instr->dest()); + for (unsigned i = 0; i < instr->n_sources(); ++i) { + record_read(instr->src(i).as_register(), LiveRangeEntry::use_unspecified); + auto uniform = instr->src(i).as_uniform(); + if (uniform && uniform->buf_addr()) { + record_read(uniform->buf_addr()->as_register(), LiveRangeEntry::use_unspecified); + } + } +} + +void LiveRangeInstrVisitor::visit(AluGroup *group) +{ + for (auto i : *group) + if (i) + i->accept(*this); +} + +void LiveRangeInstrVisitor::visit(TexInstr *instr) +{ + sfn_log << SfnLog::merge << "Visit " << *instr << "\n"; + record_write(instr->dst(), instr->all_dest_swizzle()); + + auto src = instr->src(); + record_read(src, LiveRangeEntry::use_unspecified); + + if (instr->sampler_offset() && instr->sampler_offset()->as_register()) + record_read(instr->sampler_offset()->as_register(), LiveRangeEntry::use_unspecified); + +} + +void LiveRangeInstrVisitor::visit(ExportInstr *instr) +{ + sfn_log << SfnLog::merge << "Visit " << *instr << "\n"; + auto src = instr->value(); + record_read(src, LiveRangeEntry::use_export); +} + +void LiveRangeInstrVisitor::visit(FetchInstr *instr) +{ + sfn_log << SfnLog::merge << "Visit " << *instr << "\n"; + record_write(instr->dst(), instr->all_dest_swizzle()); + auto& src = instr->src(); + if (src.chan() < 4) /* Channel can be 7 to disable source */ + record_read(&src, LiveRangeEntry::use_unspecified); +} + +void LiveRangeInstrVisitor::visit(Block *instr) +{ + sfn_log << SfnLog::merge << "Visit block\n"; + for (auto i : *instr) { + i->accept(*this); + if (i->end_group()) + ++m_line; + } + sfn_log << SfnLog::merge << "End block\n"; +} + +void LiveRangeInstrVisitor::visit(WriteScratchInstr *instr) +{ + auto& src = instr->value(); + for (int i = 0; i < 4; ++i) { + if ((1 << i) & instr->write_mask()) { + record_read(src[i], LiveRangeEntry::use_unspecified); + } + } + + auto addr = instr->address(); + if (addr) + record_read(addr, LiveRangeEntry::use_unspecified); +} + +void LiveRangeInstrVisitor::visit(StreamOutInstr *instr) +{ + sfn_log << SfnLog::merge << "Visit " << *instr << "\n"; + auto src = instr->value(); + record_read(src, LiveRangeEntry::use_export); +} + +void LiveRangeInstrVisitor::visit(MemRingOutInstr *instr) +{ + sfn_log << SfnLog::merge << "Visit " << *instr << "\n"; + auto src = instr->value(); + record_read(src, LiveRangeEntry::use_export); + + auto idx = instr->export_index(); + if (idx && idx->as_register()) + record_read(idx->as_register(), LiveRangeEntry::use_unspecified); +} + +void LiveRangeInstrVisitor::visit(ControlFlowInstr *instr) +{ + switch (instr->cf_type()) { + case ControlFlowInstr::cf_else: scope_else(); break; + case ControlFlowInstr::cf_endif: scope_endif(); break; + case ControlFlowInstr::cf_loop_begin: scope_loop_begin(); break; + case ControlFlowInstr::cf_loop_end: scope_loop_end(); break; + case ControlFlowInstr::cf_loop_break: scope_loop_break(); break; + case ControlFlowInstr::cf_loop_continue: break; + case ControlFlowInstr::cf_wait_ack: break; + default: + unreachable("Flow control unreachanble"); + } +} + +void LiveRangeInstrVisitor::visit(IfInstr *instr) +{ + instr->predicate()->accept(*this); + scope_if(); +} + +void LiveRangeInstrVisitor::visit(GDSInstr *instr) +{ + sfn_log << SfnLog::merge << "Visit " << *instr << "\n"; + record_read(instr->src(), LiveRangeEntry::use_unspecified); + if (instr->uav_id()) + record_read(instr->uav_id(), LiveRangeEntry::use_unspecified); + record_write(instr->dest()); +} + +void LiveRangeInstrVisitor::visit(RatInstr *instr) +{ + sfn_log << SfnLog::merge << "Visit " << *instr << "\n"; + record_read(instr->value(), LiveRangeEntry::use_unspecified); + record_read(instr->addr(), LiveRangeEntry::use_unspecified); + + auto idx = instr->rat_id_offset(); + if (idx) + record_read(idx, LiveRangeEntry::use_unspecified); +} + + +void LiveRangeInstrVisitor::visit(WriteTFInstr *instr) +{ + record_read(instr->value(), LiveRangeEntry::use_export); +} + +void LiveRangeInstrVisitor::visit(UNUSED LDSAtomicInstr *instr) +{ + unreachable("LDSAtomicInstr must be lowered before scheduling and live range evaluation"); +} + +void LiveRangeInstrVisitor::visit(UNUSED LDSReadInstr *instr) +{ + unreachable("LDSReadInstr must be lowered before scheduling and live range evaluation"); +} + +void LiveRangeInstrVisitor::record_write(const Register *reg) +{ + auto addr = reg->get_addr(); + if (addr && addr->as_register()) { + record_read(addr->as_register(), LiveRangeEntry::use_unspecified); + + const auto av = static_cast(reg); + auto& array = av->array(); + + sfn_log << SfnLog::merge << array << " write:" << m_line << "\n"; + + for (auto i = 0u; i < array.size(); ++i) { + auto& rav = m_register_access(array(i, reg->chan())); + rav.record_write(m_line, m_current_scope); + } + } else { + auto& ra = m_register_access(*reg); + sfn_log << SfnLog::merge << *reg << " write:" << m_line << "\n"; + ra.record_write(m_line, m_current_scope); + } +} + +void LiveRangeInstrVisitor::record_read(const Register *reg, LiveRangeEntry::EUse use) +{ + if (!reg) + return; + + auto addr = reg->get_addr(); + if (addr && addr->as_register()) { + sfn_log << SfnLog::merge << "Record reading address register " << *addr << "\n"; + + auto& ra = m_register_access(*addr->as_register()); + ra.record_read(m_line, m_current_scope, use); + + const auto av = static_cast(reg); + auto& array = av->array(); + sfn_log << SfnLog::merge << array << " read:" << m_line << "\n"; + + for (auto i = 0u; i < array.size(); ++i) { + auto& rav = m_register_access(array(i, reg->chan())); + rav.record_read(m_line, m_current_scope, use); + } + } else { + sfn_log << SfnLog::merge << *reg << " read:" << m_line << "\n"; + auto& ra = m_register_access(*reg); + ra.record_read(m_line, m_current_scope, use); + } +} + +std::ostream& operator << (std::ostream& os, const LiveRangeMap& lrm) +{ + os << "Live ranges\n"; + for (int i = 0; i < 4; ++i) { + const auto& comp = lrm.component(i); + for (auto& range : comp) + os << " " << range << "\n"; + } + return os; +} + +bool operator == (const LiveRangeMap& lhs, const LiveRangeMap& rhs) +{ + for (int i = 0; i < 4; ++i) { + const auto& lc = lhs.component(i); + const auto& rc = rhs.component(i); + if (lc.size() != rc.size()) + return false; + + for (auto j = 0u; j < lc.size(); ++j) { + const auto& lv = lc[j]; + const auto& rv = rc[j]; + + if (lv.m_start != rv.m_start || + lv.m_end != rv.m_end || + lv.m_color != rv.m_color || + !lv.m_register->equal_to(*rv.m_register)) + return false; + } + } + + return true; +} + + +} + diff --git a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h new file mode 100644 index 0000000..3518902 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h @@ -0,0 +1,23 @@ +#ifndef LIFERANGEEVALUATOR_H +#define LIFERANGEEVALUATOR_H + +#include "sfn_valuefactory.h" + +#include +#include + +namespace r600 { + +class Shader; + +class LiveRangeEvaluator { +public: + + LiveRangeEvaluator(); + + LiveRangeMap run(Shader &sh); +}; + +} + +#endif // LIFERANGEEVALUATOR_H diff --git a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp new file mode 100644 index 0000000..20294ac --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp @@ -0,0 +1,623 @@ +#include "sfn_liverangeevaluator_helpers.h" + +#include "sfn_virtualvalues.h" + +#include "util/u_math.h" + +#include +#include +#include + +namespace r600 { + +ProgramScope::ProgramScope(ProgramScope *parent, ProgramScopeType type, int id, + int depth, int scope_begin): + scope_type(type), + scope_id(id), + scope_nesting_depth(depth), + scope_begin(scope_begin), + scope_end(-1), + break_loop_line(std::numeric_limits::max()), + parent_scope(parent) +{ +} + +ProgramScope::ProgramScope(): + ProgramScope(nullptr, undefined_scope, -1, -1, -1) +{ +} + +ProgramScopeType ProgramScope::type() const +{ + return scope_type; +} + +ProgramScope *ProgramScope::parent() const +{ + return parent_scope; +} + +int ProgramScope::nesting_depth() const +{ + return scope_nesting_depth; +} + +bool ProgramScope::is_loop() const +{ + return (scope_type == loop_body); +} + +bool ProgramScope::is_in_loop() const +{ + if (scope_type == loop_body) + return true; + + if (parent_scope) + return parent_scope->is_in_loop(); + + return false; +} + +const ProgramScope *ProgramScope::innermost_loop() const +{ + if (scope_type == loop_body) + return this; + + if (parent_scope) + return parent_scope->innermost_loop(); + + return nullptr; +} + +const ProgramScope *ProgramScope::outermost_loop() const +{ + const ProgramScope *loop = nullptr; + const ProgramScope *p = this; + + do { + if (p->type() == loop_body) + loop = p; + p = p->parent(); + } while (p); + + return loop; +} + +bool ProgramScope::is_child_of_ifelse_id_sibling(const ProgramScope *scope) const +{ + const ProgramScope *my_parent = in_parent_ifelse_scope(); + while (my_parent) { + /* is a direct child? */ + if (my_parent == scope) + return false; + /* is a child of the conditions sibling? */ + if (my_parent->id() == scope->id()) + return true; + my_parent = my_parent->in_parent_ifelse_scope(); + } + return false; +} + +bool ProgramScope::is_child_of(const ProgramScope *scope) const +{ + const ProgramScope *my_parent = parent(); + while (my_parent) { + if (my_parent == scope) + return true; + my_parent = my_parent->parent(); + } + return false; +} + +const ProgramScope *ProgramScope::enclosing_conditional() const +{ + if (is_conditional()) + return this; + + if (parent_scope) + return parent_scope->enclosing_conditional(); + + return nullptr; +} + +bool ProgramScope::contains_range_of(const ProgramScope& other) const +{ + return (begin() <= other.begin()) && (end() >= other.end()); +} + +bool ProgramScope::is_conditional() const +{ + return scope_type == if_branch || + scope_type == else_branch || + scope_type == switch_case_branch || + scope_type == switch_default_branch; +} + +const ProgramScope *ProgramScope::in_else_scope() const +{ + if (scope_type == else_branch) + return this; + + if (parent_scope) + return parent_scope->in_else_scope(); + + return nullptr; +} + +const ProgramScope *ProgramScope::in_parent_ifelse_scope() const +{ + if (parent_scope) + return parent_scope->in_ifelse_scope(); + else + return nullptr; +} + +const ProgramScope *ProgramScope::in_ifelse_scope() const +{ + if (scope_type == if_branch || + scope_type == else_branch) + return this; + + if (parent_scope) + return parent_scope->in_ifelse_scope(); + + return nullptr; +} + +bool ProgramScope::is_switchcase_scope_in_loop() const +{ + return (scope_type == switch_case_branch || + scope_type == switch_default_branch) && + is_in_loop(); +} + +bool ProgramScope::break_is_for_switchcase() const +{ + if (scope_type == loop_body) + return false; + + if (scope_type == switch_case_branch || + scope_type == switch_default_branch || + scope_type == switch_body) + return true; + + if (parent_scope) + return parent_scope->break_is_for_switchcase(); + + return false; +} + +int ProgramScope::id() const +{ + return scope_id; +} + +int ProgramScope::begin() const +{ + return scope_begin; +} + +int ProgramScope::end() const +{ + return scope_end; +} + +void ProgramScope::set_end(int end) +{ + if (scope_end == -1) + scope_end = end; +} + +void ProgramScope::set_loop_break_line(int line) +{ + if (scope_type == loop_body) { + break_loop_line = MIN2(break_loop_line, line); + } else { + if (parent_scope) + parent()->set_loop_break_line(line); + } +} + +int ProgramScope::loop_break_line() const +{ + return break_loop_line; +} + +RegisterCompAccess::RegisterCompAccess(LiveRange range): + last_read_scope(nullptr), + first_read_scope(nullptr), + first_write_scope(nullptr), + first_write(range.start), + last_read(range.end), + last_write(range.start), + first_read(std::numeric_limits::max()), + conditionality_in_loop_id(conditionality_untouched), + if_scope_write_flags(0), + next_ifelse_nesting_depth(0), + current_unpaired_if_write_scope(nullptr), + was_written_in_current_else_scope(false), + m_range(range) +{ + +} + +RegisterCompAccess::RegisterCompAccess(): + RegisterCompAccess(LiveRange(-1,-1)) +{ +} + + +void RegisterCompAccess::record_read(int line, ProgramScope *scope, LiveRangeEntry::EUse use) +{ + last_read_scope = scope; + if (use != LiveRangeEntry::use_unspecified) + m_use_type.set(use); + if (last_read < line) + last_read = line; + + if (first_read > line) { + first_read = line; + first_read_scope = scope; + } + + /* If the conditionality of the first write is already resolved then + * no further checks are required. + */ + if (conditionality_in_loop_id == write_is_unconditional || + conditionality_in_loop_id == write_is_conditional) + return; + + /* Check whether we are in a condition within a loop */ + const ProgramScope *ifelse_scope = scope->in_ifelse_scope(); + const ProgramScope *enclosing_loop; + if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) { + + /* If we have either not yet written to this register nor writes are + * resolved as unconditional in the enclosing loop then check whether + * we read before write in an IF/ELSE branch. + */ + if ((conditionality_in_loop_id != write_is_conditional) && + (conditionality_in_loop_id != enclosing_loop->id())) { + + if (current_unpaired_if_write_scope) { + + /* Has been written in this or a parent scope? - this makes the temporary + * unconditionally set at this point. + */ + if (scope->is_child_of(current_unpaired_if_write_scope)) + return; + + /* Has been written in the same scope before it was read? */ + if (ifelse_scope->type() == if_branch) { + if (current_unpaired_if_write_scope->id() == scope->id()) + return; + } else { + if (was_written_in_current_else_scope) + return; + } + } + + /* The temporary was read (conditionally) before it is written, hence + * it should survive a loop. This can be signaled like if it were + * conditionally written. + */ + conditionality_in_loop_id = write_is_conditional; + } + } +} + +void RegisterCompAccess::record_write(int line, ProgramScope *scope) +{ + last_write = line; + + if (first_write < 0) { + first_write = line; + first_write_scope = scope; + + /* If the first write we encounter is not in a conditional branch, or + * the conditional write is not within a loop, then this is to be + * considered an unconditional dominant write. + */ + const ProgramScope *conditional = scope->enclosing_conditional(); + if (!conditional || !conditional->innermost_loop()) { + conditionality_in_loop_id = write_is_unconditional; + } + } + + /* The conditionality of the first write is already resolved. */ + if (conditionality_in_loop_id == write_is_unconditional || + conditionality_in_loop_id == write_is_conditional) + return; + + /* If the nesting depth is larger than the supported level, + * then we assume conditional writes. + */ + if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) { + conditionality_in_loop_id = write_is_conditional; + return; + } + + /* If we are in an IF/ELSE scope within a loop and the loop has not + * been resolved already, then record this write. + */ + const ProgramScope *ifelse_scope = scope->in_ifelse_scope(); + if (ifelse_scope && ifelse_scope->innermost_loop() && + ifelse_scope->innermost_loop()->id() != conditionality_in_loop_id) + record_ifelse_write(*ifelse_scope); +} + +void RegisterCompAccess::record_ifelse_write(const ProgramScope& scope) +{ + if (scope.type() == if_branch) { + /* The first write in an IF branch within a loop implies unresolved + * conditionality (if it was untouched or unconditional before). + */ + conditionality_in_loop_id = conditionality_unresolved; + was_written_in_current_else_scope = false; + record_if_write(scope); + } else { + was_written_in_current_else_scope = true; + record_else_write(scope); + } +} + +void RegisterCompAccess::record_if_write(const ProgramScope& scope) +{ + /* Don't record write if this IF scope if it ... + * - is not the first write in this IF scope, + * - has already been written in a parent IF scope. + * In both cases this write is a secondary write that doesn't contribute + * to resolve conditionality. + * + * Record the write if it + * - is the first one (obviously), + * - happens in an IF branch that is a child of the ELSE branch of the + * last active IF/ELSE pair. In this case recording this write is used to + * established whether the write is (un-)conditional in the scope enclosing + * this outer IF/ELSE pair. + */ + if (!current_unpaired_if_write_scope || + (current_unpaired_if_write_scope->id() != scope.id() && + scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope))) { + if_scope_write_flags |= 1 << next_ifelse_nesting_depth; + current_unpaired_if_write_scope = &scope; + next_ifelse_nesting_depth++; + } +} + +void RegisterCompAccess::record_else_write(const ProgramScope& scope) +{ + int mask = 1 << (next_ifelse_nesting_depth - 1); + + /* If the temporary was written in an IF branch on the same scope level + * and this branch is the sibling of this ELSE branch, then we have a + * pair of writes that makes write access to this temporary unconditional + * in the enclosing scope. + */ + + if ((if_scope_write_flags & mask) && + (scope.id() == current_unpaired_if_write_scope->id())) { + --next_ifelse_nesting_depth; + if_scope_write_flags &= ~mask; + + /* The following code deals with propagating unconditionality from + * inner levels of nested IF/ELSE to the outer levels like in + * + * 1: var t; + * 2: if (a) { <- start scope A + * 3: if (b) + * 4: t = ... + * 5: else + * 6: t = ... + * 7: } else { <- start scope B + * 8: if (c) + * 9: t = ... + * A: else <- start scope C + * B: t = ... + * C: } + * + */ + + const ProgramScope *parent_ifelse = scope.parent()->in_ifelse_scope(); + + if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) { + /* We are at the end of scope C and already recorded a write + * within an IF scope (A), the sibling of the parent ELSE scope B, + * and it is not yet resolved. Mark that as the last relevant + * IF scope. Below the write will be resolved for the A/B + * scope pair. + */ + current_unpaired_if_write_scope = parent_ifelse; + } else { + current_unpaired_if_write_scope = nullptr; + } + /* Promote the first write scope to the enclosing scope because + * the current IF/ELSE pair is now irrelevant for the analysis. + * This is also required to evaluate the minimum life time for t in + * { + * var t; + * if (a) + * t = ... + * else + * t = ... + * x = t; + * ... + * } + */ + first_write_scope = scope.parent(); + + /* If some parent is IF/ELSE and in a loop then propagate the + * write to that scope. Otherwise the write is unconditional + * because it happens in both corresponding IF/ELSE branches + * in this loop, and hence, record the loop id to signal the + * resolution. + */ + if (parent_ifelse && parent_ifelse->is_in_loop()) { + record_ifelse_write(*parent_ifelse); + } else { + conditionality_in_loop_id = scope.innermost_loop()->id(); + } + } else { + /* The temporary was not written in the IF branch corresponding + * to this ELSE branch, hence the write is conditional. + */ + conditionality_in_loop_id = write_is_conditional; + } +} + +bool RegisterCompAccess::conditional_ifelse_write_in_loop() const +{ + return conditionality_in_loop_id <= conditionality_unresolved; +} + +void RegisterCompAccess::propagate_live_range_to_dominant_write_scope() +{ + first_write = first_write_scope->begin(); + int lr = first_write_scope->end(); + + if (last_read < lr) + last_read = lr; +} + +void RegisterCompAccess::update_required_live_range() +{ + bool keep_for_full_loop = false; + + /* This register component is not used at all, or only read, + * mark it as unused and ignore it when renaming. + * glsl_to_tgsi_visitor::renumber_registers will take care of + * eliminating registers that are not written to. + */ + if (last_write < 0) { + m_range.start = -1; + m_range.end = -1; + return; + } + + /* Only written to, just make sure the register component is not + * reused in the range it is used to write to + */ + if (!last_read_scope) { + m_range.start = first_write; + m_range.end = last_write + 1; + return; + } + + assert(first_write_scope || m_range.start >= 0); + + /* The register was pre-defines, so th first write scope is the outerpost scopw */ + if (!first_write_scope) { + first_write_scope = first_read_scope; + while (first_write_scope->parent()) + first_write_scope = first_write_scope->parent(); + } + + const ProgramScope *enclosing_scope_first_read = first_read_scope; + const ProgramScope *enclosing_scope_first_write = first_write_scope; + + /* We read before writing in a loop + * hence the value must survive the loops + */ + if ((first_read <= first_write) && + first_read_scope->is_in_loop()) { + keep_for_full_loop = true; + enclosing_scope_first_read = first_read_scope->outermost_loop(); + } + + /* A conditional write within a (nested) loop must survive the outermost + * loop if the last read was not within the same scope. + */ + const ProgramScope *conditional = enclosing_scope_first_write->enclosing_conditional(); + if (conditional && !conditional->contains_range_of(*last_read_scope) && + (conditional->is_switchcase_scope_in_loop() || + conditional_ifelse_write_in_loop())) { + keep_for_full_loop = true; + enclosing_scope_first_write = conditional->outermost_loop(); + } + + /* Evaluate the scope that is shared by all: required first write scope, + * required first read before write scope, and last read scope. + */ + const ProgramScope *enclosing_scope = enclosing_scope_first_read; + if (enclosing_scope_first_write->contains_range_of(*enclosing_scope)) + enclosing_scope = enclosing_scope_first_write; + + if (last_read_scope->contains_range_of(*enclosing_scope)) + enclosing_scope = last_read_scope; + + while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) || + !enclosing_scope->contains_range_of(*last_read_scope)) { + enclosing_scope = enclosing_scope->parent(); + assert(enclosing_scope); + } + + /* Propagate the last read scope to the target scope */ + while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) { + /* If the read is in a loop and we have to move up the scope we need to + * extend the live range to the end of this current loop because at this + * point we don't know whether the component was written before + * un-conditionally in the same loop. + */ + if (last_read_scope->is_loop()) + last_read = last_read_scope->end(); + + last_read_scope = last_read_scope->parent(); + } + + /* If the variable has to be kept for the whole loop, and we + * are currently in a loop, then propagate the live range. + */ + if (keep_for_full_loop && first_write_scope->is_loop()) + propagate_live_range_to_dominant_write_scope(); + + /* Propagate the first_dominant_write scope to the target scope */ + while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) { + /* Propagate live_range if there was a break in a loop and the write was + * after the break inside that loop. Note, that this is only needed if + * we move up in the scopes. + */ + if (first_write_scope->loop_break_line() < first_write) { + keep_for_full_loop = true; + propagate_live_range_to_dominant_write_scope(); + } + + first_write_scope = first_write_scope->parent(); + + /* Propagate live_range if we are now in a loop */ + if (keep_for_full_loop && first_write_scope->is_loop()) + propagate_live_range_to_dominant_write_scope(); + } + + /* The last write past the last read is dead code, but we have to + * ensure that the component is not reused too early, hence extend the + * live_range past the last write. + */ + if (last_write >= last_read) + last_read = last_write + 1; + + /* Here we are at the same scope, all is resolved */ + m_range.start = first_write; + m_range.end = last_read; +} + +const int +RegisterCompAccess::conditionality_untouched = std::numeric_limits::max(); + +const int +RegisterCompAccess::write_is_unconditional = std::numeric_limits::max() - 1; + + +RegisterAccess::RegisterAccess(const std::array& sizes) +{ + for (int i = 0; i < 4; ++i) + m_access_record[i].resize(sizes[i]); +} + +RegisterCompAccess& RegisterAccess::operator() (const Register& reg) +{ + assert(reg.chan() < 4); + assert(m_access_record[reg.chan()].size() > (size_t)reg.index()); + return m_access_record[reg.chan()][reg.index()]; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h new file mode 100644 index 0000000..3a1fed5 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h @@ -0,0 +1,162 @@ +#ifndef SFN_LIFERANGEEVALUATOR_HELPERS_H +#define SFN_LIFERANGEEVALUATOR_HELPERS_H + +#include "sfn_valuefactory.h" + +namespace r600 { + +enum ProgramScopeType { + outer_scope, /* Outer program scope */ + loop_body, /* Inside a loop */ + if_branch, /* Inside if branch */ + else_branch, /* Inside else branch */ + switch_body, /* Inside switch statement */ + switch_case_branch, /* Inside switch case statement */ + switch_default_branch, /* Inside switch default statement */ + undefined_scope +}; + +class ProgramScope { +public: + ProgramScope(); + ProgramScope(ProgramScope *parent, ProgramScopeType type, int id, + int depth, int begin); + + ProgramScopeType type() const; + ProgramScope *parent() const; + int nesting_depth() const; + int id() const; + int end() const; + int begin() const; + int loop_break_line() const; + + const ProgramScope *in_else_scope() const; + const ProgramScope *in_ifelse_scope() const; + const ProgramScope *in_parent_ifelse_scope() const; + const ProgramScope *innermost_loop() const; + const ProgramScope *outermost_loop() const; + const ProgramScope *enclosing_conditional() const; + + bool is_loop() const; + bool is_in_loop() const; + bool is_switchcase_scope_in_loop() const; + bool is_conditional() const; + bool is_child_of(const ProgramScope *scope) const; + bool is_child_of_ifelse_id_sibling(const ProgramScope *scope) const; + + bool break_is_for_switchcase() const; + bool contains_range_of(const ProgramScope& other) const; + + void set_end(int end); + void set_loop_break_line(int line); + +private: + ProgramScopeType scope_type; + int scope_id; + int scope_nesting_depth; + int scope_begin; + int scope_end; + int break_loop_line; + ProgramScope *parent_scope; +}; + +/* Class to track the access to a component of a temporary register. */ + +struct LiveRange; + +class RegisterCompAccess { +public: + RegisterCompAccess(); + RegisterCompAccess(LiveRange range); + + void record_read(int line, ProgramScope *scope, LiveRangeEntry::EUse use); + void record_write(int line, ProgramScope *scope); + + void update_required_live_range(); + + const auto& range() { return m_range;} + + const auto& use_type() { return m_use_type; } +private: + void propagate_live_range_to_dominant_write_scope(); + bool conditional_ifelse_write_in_loop() const; + + void record_ifelse_write(const ProgramScope& scope); + void record_if_write(const ProgramScope& scope); + void record_else_write(const ProgramScope& scope); + + ProgramScope *last_read_scope; + ProgramScope *first_read_scope; + ProgramScope *first_write_scope; + + int first_write; + int last_read; + int last_write; + int first_read; + + /* This member variable tracks the current resolution of conditional writing + * to this temporary in IF/ELSE clauses. + * + * The initial value "conditionality_untouched" indicates that this + * temporary has not yet been written to within an if clause. + * + * A positive (other than "conditionality_untouched") number refers to the + * last loop id for which the write was resolved as unconditional. With each + * new loop this value will be overwitten by "conditionality_unresolved" + * on entering the first IF clause writing this temporary. + * + * The value "conditionality_unresolved" indicates that no resolution has + * been achieved so far. If the variable is set to this value at the end of + * the processing of the whole shader it also indicates a conditional write. + * + * The value "write_is_conditional" marks that the variable is written + * conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at + * least one loop. + */ + int conditionality_in_loop_id; + + /* Helper constants to make the tracking code more readable. */ + static const int write_is_conditional = -1; + static const int conditionality_unresolved = 0; + static const int conditionality_untouched; + static const int write_is_unconditional; + + /* A bit field tracking the nexting levels of if-else clauses where the + * temporary has (so far) been written to in the if branch, but not in the + * else branch. + */ + unsigned int if_scope_write_flags; + + int next_ifelse_nesting_depth; + static const int supported_ifelse_nesting_depth = 32; + + /* Tracks the last if scope in which the temporary was written to + * without a write in the corresponding else branch. Is also used + * to track read-before-write in the according scope. + */ + const ProgramScope *current_unpaired_if_write_scope; + + /* Flag to resolve read-before-write in the else scope. */ + bool was_written_in_current_else_scope; + + LiveRange m_range; + + std::bitset m_use_type; +}; + +class RegisterAccess { +public: + using RegisterCompAccessVector = std::vector; + + RegisterAccess(const std::array& sizes); + + RegisterCompAccess& operator() (const Register& reg); + + auto& component(int i) { return m_access_record[i]; } + +private: + std::array m_access_record; +}; + +} +#endif // SFN_LIFERANGEEVALUATOR_HELPERS_H diff --git a/src/gallium/drivers/r600/sfn/sfn_memorypool.cpp b/src/gallium/drivers/r600/sfn/sfn_memorypool.cpp new file mode 100644 index 0000000..00e9c39 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_memorypool.cpp @@ -0,0 +1,86 @@ +#include "sfn_memorypool.h" + +#include +#include + +namespace r600 { + +struct MemoryPoolImpl { +public: + MemoryPoolImpl(); + ~MemoryPoolImpl(); + + using MemoryBacking = ::std::pmr::monotonic_buffer_resource; + + MemoryBacking *pool; +}; + +MemoryPool::MemoryPool() noexcept : impl(nullptr) +{ +} + +MemoryPool& MemoryPool::instance() +{ + static thread_local MemoryPool me; + me.initialize(); + return me; +} + +void MemoryPool::free() +{ + delete impl; + impl = nullptr; +} + +void MemoryPool::initialize() +{ + if (!impl) + impl = new MemoryPoolImpl(); +} + +void *MemoryPool::allocate(size_t size) +{ + return impl->pool->allocate(size); +} + +void *MemoryPool::allocate(size_t size, size_t align) +{ + return impl->pool->allocate(size, align); +} + +void MemoryPool::release_all() +{ + instance().free(); +} + +void init_pool() +{ + MemoryPool::instance(); +} + +void release_pool() +{ + MemoryPool::release_all(); +} + +void *Allocate::operator new(size_t size) +{ + return MemoryPool::instance().allocate(size); +} + +void Allocate::operator delete (void *p, size_t size) +{ + // MemoryPool::instance().deallocate(p, size); +} + +MemoryPoolImpl::MemoryPoolImpl() +{ + pool = new MemoryBacking(); +} + +MemoryPoolImpl::~MemoryPoolImpl() +{ + delete pool; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_memorypool.h b/src/gallium/drivers/r600/sfn/sfn_memorypool.h new file mode 100644 index 0000000..57d404d --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_memorypool.h @@ -0,0 +1,69 @@ +#ifndef MEMORYPOOL_H +#define MEMORYPOOL_H + +#include +#include +#include + +#if __cplusplus >= 21703L +#include +#define R600_POINTER_TYPE(X) X * +#else +#error Need C++17 +#endif + +namespace r600 { + +void init_pool(); +void release_pool(); + +class Allocate +{ +public: + void * operator new(size_t size); + void operator delete (void *p, size_t size); +}; + +class MemoryPool { +public: + static MemoryPool& instance(); + static void release_all(); + + void free(); + void initialize(); + + void *allocate(size_t size); + void *allocate(size_t size, size_t align); + +private: + MemoryPool() noexcept; + + struct MemoryPoolImpl* impl; +}; + +template +struct Allocator { + using value_type = T; + + Allocator() = default; + Allocator(const Allocator& other) = default; + + template + Allocator(const Allocator& other) {(void)other;} + + T *allocate(size_t n) { + return (T *)MemoryPool::instance().allocate(n * sizeof(T), alignof(T)); + } + + void deallocate(void *p, size_t n) { + (void)p; (void)n; + //MemoryPool::instance().deallocate(p, n * sizeof(T), alignof(T)); + } + + friend bool operator == (const Allocator& lhs, const Allocator& rhs) { + (void)lhs; (void)rhs; return true;} +}; + +} + +#endif // MEMORYPOOL_H diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index 46c10e4..fe5397e 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -30,19 +30,19 @@ #include "../r600_pipe.h" #include "../r600_shader.h" -#include "util/u_prim.h" -#include "sfn_instruction_tex.h" +#include "util/u_prim.h" -#include "sfn_shader_vertex.h" -#include "sfn_shader_fragment.h" -#include "sfn_shader_geometry.h" -#include "sfn_shader_compute.h" -#include "sfn_shader_tcs.h" -#include "sfn_shader_tess_eval.h" +#include "sfn_shader.h" +#include "sfn_assembler.h" +#include "sfn_debug.h" +#include "sfn_liverangeevaluator.h" #include "sfn_nir_lower_fs_out_to_vector.h" -#include "sfn_ir_to_assembly.h" #include "sfn_nir_lower_alu.h" +#include "sfn_nir_lower_tex.h" +#include "sfn_optimizer.h" +#include "sfn_ra.h" +#include "sfn_scheduler.h" #include @@ -78,264 +78,11 @@ bool NirLowerInstruction::run(nir_shader *shader) (void *)this); } - -ShaderFromNir::ShaderFromNir():sh(nullptr), - gfx_level(CLASS_UNKNOWN), - m_current_if_id(0), - m_current_loop_id(0), - scratch_size(0) -{ -} - -bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader, - r600_pipe_shader_selector *sel, r600_shader_key& key, - struct r600_shader* gs_shader, enum amd_gfx_level _chip_class) -{ - sh = shader; - gfx_level = _chip_class; - assert(sh); - - switch (shader->info.stage) { - case MESA_SHADER_VERTEX: - impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, gfx_level)); - break; - case MESA_SHADER_TESS_CTRL: - sfn_log << SfnLog::trans << "Start TCS\n"; - impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, gfx_level)); - break; - case MESA_SHADER_TESS_EVAL: - sfn_log << SfnLog::trans << "Start TESS_EVAL\n"; - impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, gfx_level)); - break; - case MESA_SHADER_GEOMETRY: - sfn_log << SfnLog::trans << "Start GS\n"; - impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, gfx_level)); - break; - case MESA_SHADER_FRAGMENT: - sfn_log << SfnLog::trans << "Start FS\n"; - impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, gfx_level)); - break; - case MESA_SHADER_COMPUTE: - sfn_log << SfnLog::trans << "Start CS\n"; - impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, gfx_level)); - break; - default: - return false; - } - - sfn_log << SfnLog::trans << "Process declarations\n"; - if (!process_declaration()) - return false; - - // at this point all functions should be inlined - const nir_function *func = reinterpret_cast(exec_list_get_head_const(&sh->functions)); - - sfn_log << SfnLog::trans << "Scan shader\n"; - - if (sfn_log.has_debug_flag(SfnLog::instr)) - nir_print_shader(const_cast(shader), stderr); - - nir_foreach_block(block, func->impl) { - nir_foreach_instr(instr, block) { - if (!impl->scan_instruction(instr)) { - fprintf(stderr, "Unhandled sysvalue access "); - nir_print_instr(instr, stderr); - fprintf(stderr, "\n"); - return false; - } - } - } - - sfn_log << SfnLog::trans << "Reserve registers\n"; - if (!impl->allocate_reserved_registers()) { - return false; - } - - ValuePool::array_list arrays; - sfn_log << SfnLog::trans << "Allocate local registers\n"; - foreach_list_typed(nir_register, reg, node, &func->impl->registers) { - impl->allocate_local_register(*reg, arrays); - } - - sfn_log << SfnLog::trans << "Emit shader start\n"; - impl->allocate_arrays(arrays); - - impl->emit_shader_start(); - - sfn_log << SfnLog::trans << "Process shader \n"; - foreach_list_typed(nir_cf_node, node, node, &func->impl->body) { - if (!process_cf_node(node)) - return false; - } - - // Add optimizations here - sfn_log << SfnLog::trans << "Finalize\n"; - impl->finalize(); - - impl->get_array_info(pipe_shader->shader); - - if (!sfn_log.has_debug_flag(SfnLog::nomerge)) { - sfn_log << SfnLog::trans << "Merge registers\n"; - impl->remap_registers(); - } - - sfn_log << SfnLog::trans << "Finished translating to R600 IR\n"; - return true; -} - -Shader ShaderFromNir::shader() const -{ - return Shader{impl->m_output, impl->get_temp_registers()}; -} - - -bool ShaderFromNir::process_cf_node(nir_cf_node *node) -{ - SFN_TRACE_FUNC(SfnLog::flow, "CF"); - switch (node->type) { - case nir_cf_node_block: - return process_block(nir_cf_node_as_block(node)); - case nir_cf_node_if: - return process_if(nir_cf_node_as_if(node)); - case nir_cf_node_loop: - return process_loop(nir_cf_node_as_loop(node)); - default: - return false; - } -} - -bool ShaderFromNir::process_if(nir_if *if_stmt) -{ - SFN_TRACE_FUNC(SfnLog::flow, "IF"); - - if (!impl->emit_if_start(m_current_if_id, if_stmt)) - return false; - - int if_id = m_current_if_id++; - m_if_stack.push(if_id); - - foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list) - if (!process_cf_node(n)) return false; - - if (!if_stmt->then_list.is_empty()) { - if (!impl->emit_else_start(if_id)) - return false; - - foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list) - if (!process_cf_node(n)) return false; - } - - if (!impl->emit_ifelse_end(if_id)) - return false; - - m_if_stack.pop(); - return true; -} - -bool ShaderFromNir::process_loop(nir_loop *node) -{ - SFN_TRACE_FUNC(SfnLog::flow, "LOOP"); - int loop_id = m_current_loop_id++; - - if (!impl->emit_loop_start(loop_id)) - return false; - - foreach_list_typed(nir_cf_node, n, node, &node->body) - if (!process_cf_node(n)) return false; - - if (!impl->emit_loop_end(loop_id)) - return false; - - return true; -} - -bool ShaderFromNir::process_block(nir_block *block) -{ - SFN_TRACE_FUNC(SfnLog::flow, "BLOCK"); - nir_foreach_instr(instr, block) { - int r = emit_instruction(instr); - if (!r) { - sfn_log << SfnLog::err << "R600: Unsupported instruction: " - << *instr << "\n"; - return false; - } - } - return true; -} - - -ShaderFromNir::~ShaderFromNir() -{ -} - -pipe_shader_type ShaderFromNir::processor_type() const -{ - return impl->m_processor_type; -} - - -bool ShaderFromNir::emit_instruction(nir_instr *instr) -{ - assert(impl); - - sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n"; - - switch (instr->type) { - case nir_instr_type_alu: - return impl->emit_alu_instruction(instr); - case nir_instr_type_deref: - return impl->emit_deref_instruction(nir_instr_as_deref(instr)); - case nir_instr_type_intrinsic: - return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr)); - case nir_instr_type_load_const: /* const values are loaded when needed */ - return true; - case nir_instr_type_tex: - return impl->emit_tex_instruction(instr); - case nir_instr_type_jump: - return impl->emit_jump_instruction(nir_instr_as_jump(instr)); - default: - fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type); - nir_print_instr(instr, stderr); - fprintf(stderr, "'\n"); - return false; - case nir_instr_type_ssa_undef: - return impl->create_undef(nir_instr_as_ssa_undef(instr)); - return true; - } -} - -bool ShaderFromNir::process_declaration() -{ - impl->set_shader_info(sh); - - if (!impl->scan_inputs_read(sh)) - return false; - - // scan declarations - nir_foreach_variable_with_modes(variable, sh, nir_var_uniform | - nir_var_mem_ubo | - nir_var_mem_ssbo) { - if (!impl->process_uniforms(variable)) { - fprintf(stderr, "R600: error parsing outputs variable %s\n", variable->name); - return false; - } - } - - return true; -} - -const std::vector& ShaderFromNir::shader_ir() const -{ - assert(impl); - return impl->m_output; -} - - AssemblyFromShader::~AssemblyFromShader() { } -bool AssemblyFromShader::lower(const std::vector& ir) +bool AssemblyFromShader::lower(const Shader& ir) { return do_lower(ir); } @@ -557,7 +304,6 @@ r600_nir_lower_atomics(nir_shader *shader) nir_metadata_dominance, NULL); } -using r600::r600_nir_lower_int_tg4; using r600::r600_lower_scratch_addresses; using r600::r600_lower_fs_out_to_vector; using r600::r600_lower_ubo_to_align16; @@ -676,6 +422,7 @@ r600_lower_shared_io(nir_shader *nir) static nir_ssa_def * r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options) { + (void)_options; auto old_ir = nir_instr_as_intrinsic(instr); auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); nir_ssa_dest_init(&load->instr, &load->dest, @@ -693,6 +440,8 @@ r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options) bool r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options) { + (void)_options; + if (instr->type != nir_instr_type_intrinsic) return false; @@ -713,7 +462,7 @@ bool r600_lower_fs_pos_input(nir_shader *shader) }; static bool -optimize_once(nir_shader *shader, bool vectorize) +optimize_once(nir_shader *shader) { bool progress = false; NIR_PASS(progress, shader, nir_lower_vars_to_ssa); @@ -722,9 +471,6 @@ optimize_once(nir_shader *shader, bool vectorize) NIR_PASS(progress, shader, nir_opt_algebraic); NIR_PASS(progress, shader, nir_opt_constant_folding); NIR_PASS(progress, shader, nir_opt_copy_prop_vars); - if (vectorize) - NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL); - NIR_PASS(progress, shader, nir_opt_remove_phis); if (nir_opt_trivial_continues(shader)) { @@ -777,13 +523,9 @@ bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *) case nir_op_fdot2: case nir_op_fdot3: case nir_op_fdot4: + return nir_src_bit_size(alu->src[0].src) == 64; case nir_op_cube_r600: return false; - case nir_op_bany_fnequal2: - case nir_op_ball_fequal2: - case nir_op_bany_inequal2: - case nir_op_ball_iequal2: - return nir_src_bit_size(alu->src[0].src) != 64; default: return true; } @@ -793,15 +535,13 @@ int r600_shader_from_nir(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, r600_shader_key *key) { - char filename[4000]; struct r600_pipe_shader_selector *sel = pipeshader->selector; - bool lower_64bit = ((sel->nir->options->lower_int64_options || + bool lower_64bit = (rctx->b.gfx_level < CAYMAN && + (sel->nir->options->lower_int64_options || sel->nir->options->lower_doubles_options) && (sel->nir->info.bit_sizes_float | sel->nir->info.bit_sizes_int) & 64); - r600::ShaderFromNir convert; - if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) { fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n"); nir_print_shader(sel->nir, stderr); @@ -813,10 +553,7 @@ int r600_shader_from_nir(struct r600_context *rctx, /* Cayman seems very crashy about accessing images that don't exists or are * accessed out of range, this lowering seems to help (but it can also be * another problem */ - if (sel->nir->info.num_images > 0 && rctx->b.gfx_level == CAYMAN) - NIR_PASS_V(sel->nir, r600_legalize_image_load_store); - NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa); NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa); nir_lower_idiv_options idiv_options = {0}; idiv_options.imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE; @@ -828,7 +565,7 @@ int r600_shader_from_nir(struct r600_context *rctx, if (lower_64bit) NIR_PASS_V(sel->nir, nir_lower_int64); - while(optimize_once(sel->nir, false)); + while(optimize_once(sel->nir)); NIR_PASS_V(sel->nir, r600_lower_shared_io); NIR_PASS_V(sel->nir, r600_nir_lower_atomics); @@ -839,8 +576,8 @@ int r600_shader_from_nir(struct r600_context *rctx, lower_tex_options.lower_invalid_implicit_lod = true; NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options); - NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube); - NIR_PASS_V(sel->nir, r600::r600_nir_lower_cube_to_2darray); + NIR_PASS_V(sel->nir, r600_nir_lower_txl_txf_array_or_cube); + NIR_PASS_V(sel->nir, r600_nir_lower_cube_to_2darray); NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16); @@ -851,30 +588,11 @@ int r600_shader_from_nir(struct r600_context *rctx, NIR_PASS_V(sel->nir, nir_lower_fragcoord_wtrans); NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector); } + nir_variable_mode io_modes = nir_var_uniform | + nir_var_shader_in | + nir_var_shader_out; - nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in; - - //if (sel->nir->info.stage != MESA_SHADER_FRAGMENT) - io_modes |= nir_var_shader_out; - - if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) { - - /* Lower IO to temporaries late, because otherwise we get into trouble - * with the glsl 4.40 interpolateAt swizzle tests. There seems to be a bug - * somewhere that results in the input alweas reading from the same temp - * regardless of interpolation when the lowering is done early */ - NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(sel->nir), - true, true); - - /* Since we're doing nir_lower_io_to_temporaries late, we need - * to lower all the copy_deref's introduced by - * lower_io_to_temporaries before calling nir_lower_io. - */ - NIR_PASS_V(sel->nir, nir_split_var_copies); - NIR_PASS_V(sel->nir, nir_lower_var_copies); - NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local); - } - + NIR_PASS_V(sel->nir, nir_opt_combine_stores, nir_var_shader_out); NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size, nir_lower_io_lower_64bit_to_32); @@ -916,14 +634,27 @@ int r600_shader_from_nir(struct r600_context *rctx, NIR_PASS_V(sh, r600_lower_tess_coord, u_tess_prim_from_shader(sh->info.tess._primitive_mode)); } + NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL); + NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false); + NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL); + + NIR_PASS_V(sh, r600::r600_nir_split_64bit_io); + NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi); + NIR_PASS_V(sh, nir_split_64bit_vec3_and_vec4); + NIR_PASS_V(sh, nir_lower_int64); + NIR_PASS_V(sh, nir_lower_ubo_vec4); + + if (lower_64bit) NIR_PASS_V(sh, r600::r600_nir_64_to_vec2); + NIR_PASS_V(sh, r600::r600_split_64bit_uniforms_and_ubo); /* Lower to scalar to let some optimization work out better */ - while(optimize_once(sh, false)); + while(optimize_once(sh)); - NIR_PASS_V(sh, r600::r600_merge_vec2_stores); + if (lower_64bit) + NIR_PASS_V(sh, r600::r600_merge_vec2_stores); NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL); NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_out, NULL); @@ -934,7 +665,7 @@ int r600_shader_from_nir(struct r600_context *rctx, 40, r600_get_natural_size_align_bytes); - while (optimize_once(sh, true)); + while (optimize_once(sh)); NIR_PASS_V(sh, nir_lower_bool_to_int32); NIR_PASS_V(sh, r600_nir_lower_int_tg4); @@ -945,8 +676,6 @@ int r600_shader_from_nir(struct r600_context *rctx, NIR_PASS_V(sh, nir_lower_locals_to_regs); - //NIR_PASS_V(sh, nir_opt_algebraic); - //NIR_PASS_V(sh, nir_copy_prop); NIR_PASS_V(sh, nir_lower_to_source_mods, (nir_lower_to_source_mods_flags)(nir_lower_float_source_mods | nir_lower_64bit_source_mods)); @@ -974,33 +703,66 @@ int r600_shader_from_nir(struct r600_context *rctx, pipeshader->shader.cc_dist_mask = (1 << (sh->info.cull_distance_array_size + sh->info.clip_distance_array_size)) - 1; } - - struct r600_shader* gs_shader = nullptr; + struct r600_shader* gs_shader = nullptr; if (rctx->gs_shader) gs_shader = &rctx->gs_shader->current->shader; r600_screen *rscreen = rctx->screen; - bool r = convert.lower(sh, pipeshader, sel, *key, gs_shader, rscreen->b.gfx_level); - if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) { - static int shnr = 0; + r600::Shader *shader = r600::Shader::translate_from_nir(sh, &sel->so, gs_shader, + *key, rctx->isa->hw_class); - snprintf(filename, 4000, "nir-%s_%d.inc", sh->info.name, shnr++); + assert(shader); + if (!shader) + return -2; - if (access(filename, F_OK) == -1) { - FILE *f = fopen(filename, "w"); + pipeshader->enabled_stream_buffers_mask = shader->enabled_stream_buffers_mask(); + pipeshader->selector->info.file_count[TGSI_FILE_HW_ATOMIC] += shader->atomic_file_count(); + pipeshader->selector->info.writes_memory = shader->has_flag(r600::Shader::sh_writes_memory); - if (f) { - fprintf(f, "const char *shader_blob_%s = {\nR\"(", sh->info.name); - nir_print_shader(sh, f); - fprintf(f, ")\";\n"); - fclose(f); - } + if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) { + std::cerr << "Shader after conversion from nir\n"; + shader->print(std::cerr); + } + + if (!r600::sfn_log.has_debug_flag(r600::SfnLog::noopt)) { + optimize(*shader); + + if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) { + std::cerr << "Shader after optimization\n"; + shader->print(std::cerr); } - if (!r) - return -2; } - auto shader = convert.shader(); + auto scheduled_shader = r600::schedule(shader); + if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) { + std::cerr << "Shader after scheduling\n"; + shader->print(std::cerr); + } + + if (!r600::sfn_log.has_debug_flag(r600::SfnLog::nomerge)) { + + if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge)) { + r600::sfn_log << r600::SfnLog::merge << "Shader before RA\n"; + scheduled_shader->print(std::cerr); + } + + r600::sfn_log << r600::SfnLog::trans << "Merge registers\n"; + auto lrm = r600::LiveRangeEvaluator().run(*scheduled_shader); + + if (!r600::register_allocation(lrm)) { + R600_ERR("%s: Register allocation failed\n", __func__); + /* For now crash if the shader could not be benerated */ + assert(0); + return -1; + } else if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge) || + r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) { + r600::sfn_log << "Shader after RA\n"; + scheduled_shader->print(std::cerr); + } + } + + scheduled_shader->get_shader_info(&pipeshader->shader); + pipeshader->shader.uses_doubles = sh->info.bit_sizes_float & 64 ? 1 : 0; r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.gfx_level, rscreen->b.family, rscreen->has_compressed_msaa_texturing); @@ -1012,9 +774,13 @@ int r600_shader_from_nir(struct r600_context *rctx, pipeshader->shader.bc.type = pipeshader->shader.processor_type; pipeshader->shader.bc.isa = rctx->isa; - r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key); - if (!afs.lower(shader.m_ir)) { + r600::Assembler afs(&pipeshader->shader, *key); + if (!afs.lower(scheduled_shader)) { R600_ERR("%s: Lowering to assembly failed\n", __func__); + + scheduled_shader->print(std::cerr); + /* For now crash if the shader could not be benerated */ + assert(0); return -1; } @@ -1025,8 +791,5 @@ int r600_shader_from_nir(struct r600_context *rctx, } else { r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n"; } - if (pipeshader->shader.bc.ngpr < 6) - pipeshader->shader.bc.ngpr = 6; - return 0; } diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.h b/src/gallium/drivers/r600/sfn/sfn_nir.h index ee9ace6..0514cc3 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.h +++ b/src/gallium/drivers/r600/sfn/sfn_nir.h @@ -31,7 +31,7 @@ #include "nir_builder.h" #ifdef __cplusplus -#include "sfn_shader_base.h" +#include "sfn_shader.h" #include namespace r600 { @@ -64,56 +64,16 @@ bool r600_nir_64_to_vec2(nir_shader *sh); bool r600_merge_vec2_stores(nir_shader *shader); -class Shader { -public: - std::vector& m_ir; - ValueMap m_temp; -}; - -class ShaderFromNir { -public: - ShaderFromNir(); - ~ShaderFromNir(); - - unsigned ninputs() const; - - bool lower(const nir_shader *shader, r600_pipe_shader *sh, - r600_pipe_shader_selector *sel, r600_shader_key &key, - r600_shader *gs_shader, enum amd_gfx_level gfx_level); - - bool process_declaration(); - - pipe_shader_type processor_type() const; - - bool emit_instruction(nir_instr *instr); - - const std::vector &shader_ir() const; - - Shader shader() const; -private: - - bool process_block(); - bool process_cf_node(nir_cf_node *node); - bool process_if(nir_if *node); - bool process_loop(nir_loop *node); - bool process_block(nir_block *node); - - std::unique_ptr impl; - const nir_shader *sh; - - enum amd_gfx_level gfx_level; - int m_current_if_id; - int m_current_loop_id; - std::stack m_if_stack; - int scratch_size; -}; +bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh); +bool r600_lower_64bit_to_vec2(nir_shader *sh); +bool r600_split_64bit_alu_and_phi(nir_shader *sh); class AssemblyFromShader { public: virtual ~AssemblyFromShader(); - bool lower(const std::vector &ir); + bool lower(const Shader& s); private: - virtual bool do_lower(const std::vector& ir) = 0 ; + virtual bool do_lower(const Shader& s) = 0 ; }; } diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_legalize_image_load_store.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_legalize_image_load_store.cpp index 4929cbc..4211d1f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir_legalize_image_load_store.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir_legalize_image_load_store.cpp @@ -32,7 +32,8 @@ static nir_ssa_def * -r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr, void *_options) +r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr, + UNUSED void *_options) { b->cursor = nir_before_instr(instr); auto ir = nir_instr_as_intrinsic(instr); @@ -143,7 +144,8 @@ r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr, void *_opt } static bool -r600_legalize_image_load_store_filter(const nir_instr *instr, const void *_options) +r600_legalize_image_load_store_filter(const nir_instr *instr, + UNUSED const void *_options) { if (instr->type != nir_instr_type_intrinsic) return false; diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp index 88e0085..ba68cd7 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp @@ -108,6 +108,193 @@ private: }; +class LowerLoad64Uniform : public NirLowerInstruction { + bool filter(const nir_instr *instr) const override; + nir_ssa_def *lower(nir_instr *instr) override; +}; + +bool LowerLoad64Uniform::filter(const nir_instr *instr) const +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + auto intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_uniform && + intr->intrinsic != nir_intrinsic_load_ubo && + intr->intrinsic != nir_intrinsic_load_ubo_vec4) + return false; + + return nir_dest_bit_size(intr->dest) == 64; +} + + +nir_ssa_def *LowerLoad64Uniform::lower(nir_instr *instr) +{ + auto intr = nir_instr_as_intrinsic(instr); + int old_components = nir_dest_num_components(intr->dest); + assert(old_components <= 2); + assert(intr->dest.is_ssa); + intr->dest.ssa.num_components *= 2; + intr->dest.ssa.bit_size = 32; + intr->num_components *= 2; + + if (intr->intrinsic ==nir_intrinsic_load_ubo || + intr->intrinsic ==nir_intrinsic_load_ubo_vec4) + nir_intrinsic_set_component(intr, 2 * nir_intrinsic_component(intr)); + + nir_ssa_def *result_vec[2] = {nullptr, nullptr}; + + for (int i = 0; i < old_components; ++i) { + result_vec[i] = nir_pack_64_2x32_split(b, + nir_channel(b, &intr->dest.ssa, 2 * i), + nir_channel(b, &intr->dest.ssa, 2 * i + 1)); + } + if (old_components == 1) + return result_vec[0]; + + return nir_vec2(b, result_vec[0], result_vec[1]); +} + +bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh) +{ + return LowerLoad64Uniform().run(sh); +} + +class LowerSplit64op : public NirLowerInstruction { + bool filter(const nir_instr *instr) const override { + switch (instr->type) { + case nir_instr_type_alu: { + auto alu = nir_instr_as_alu(instr); + switch (alu->op) { + case nir_op_bcsel: + return nir_dest_bit_size(alu->dest.dest) == 64; + case nir_op_f2b1: + case nir_op_f2i32: + case nir_op_f2u32: + case nir_op_f2i64: + case nir_op_f2u64: + case nir_op_u2f64: + case nir_op_i2f64: + return nir_src_bit_size(alu->src[0].src) == 64; + default: + return false; + } + } + case nir_instr_type_phi: { + auto phi = nir_instr_as_phi(instr); + return nir_dest_num_components(phi->dest) == 64; + } + default: + return false; + } + } + + nir_ssa_def *lower(nir_instr *instr) override { + + switch (instr->type) { + case nir_instr_type_alu: { + auto alu = nir_instr_as_alu(instr); + switch (alu->op) { + + case nir_op_bcsel: { + auto lo = nir_bcsel(b, nir_ssa_for_src(b, alu->src[0].src, 1), + nir_unpack_64_2x32_split_x(b, nir_ssa_for_alu_src(b, alu, 1)), + nir_unpack_64_2x32_split_x(b, nir_ssa_for_alu_src(b, alu, 2))); + auto hi = nir_bcsel(b, nir_ssa_for_src(b, alu->src[0].src, 1), + nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 1)), + nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 2))); + return nir_pack_64_2x32_split(b, lo, hi); + } + case nir_op_f2b1: { + auto mask = nir_component_mask(nir_dest_num_components(alu->dest.dest)); + return nir_fneu(b, nir_channels(b, nir_ssa_for_alu_src(b, alu, 0), mask), + nir_imm_zero(b, nir_dest_num_components(alu->dest.dest), 64)); + } + case nir_op_f2i32: { + auto src = nir_ssa_for_alu_src(b, alu, 0); + auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src); + auto abs_src = nir_fabs(b, src); + auto value = nir_f2u32(b, abs_src); + return nir_bcsel(b, gt0, value, nir_ineg(b, value)); + } + case nir_op_f2u32: { + /* fp32 doesn't hold suffient bits to represent the full range of + * u32, therefore we have to split the values, and because f2f32 + * rounds, we have to remove the fractional part in the hi bits + * For values > UINT_MAX the result is undefined */ + auto src = nir_ssa_for_alu_src(b, alu, 0); + auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src); + auto highval = nir_fmul_imm(b, src, 1.0/65536.0); + auto fract = nir_ffract(b, highval); + auto high = nir_f2u32(b, nir_f2f32(b, nir_fsub(b, highval, fract))); + auto lowval = nir_fmul_imm(b, fract, 65536.0); + auto low = nir_f2u32(b, nir_f2f32(b, lowval)); + return nir_bcsel(b, gt0, nir_ior(b, nir_ishl_imm(b, high, 16), low), + nir_imm_int(b, 0)); + } + case nir_op_f2i64: { + auto src = nir_ssa_for_alu_src(b, alu, 0); + auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src); + auto abs_src = nir_fabs(b, src); + auto value = nir_f2u64(b, abs_src); + return nir_bcsel(b, gt0, value, nir_isub(b, nir_imm_zero(b, 1, 64), value)); + } + case nir_op_f2u64: { + auto src = nir_ssa_for_alu_src(b, alu, 0); + auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src); + auto highval = nir_fmul_imm(b, src, 1.0/(65536.0 * 65536.0)); + auto fract = nir_ffract(b, highval); + auto high = nir_f2u32(b, nir_fsub(b, highval, fract)); + auto low = nir_f2u32(b, nir_fmul_imm(b, fract, 65536.0 * 65536.0)); + return nir_bcsel(b, gt0, nir_pack_64_2x32_split(b, low, high), + nir_imm_zero(b, 1, 64)); + } + case nir_op_u2f64: { + auto src = nir_ssa_for_alu_src(b, alu, 0); + auto low = nir_unpack_64_2x32_split_x(b, src); + auto high = nir_unpack_64_2x32_split_y(b, src); + auto flow = nir_u2f64(b, low); + auto fhigh = nir_u2f64(b, high); + return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow); + } + case nir_op_i2f64: { + auto src = nir_ssa_for_alu_src(b, alu, 0); + auto low = nir_unpack_64_2x32_split_x(b, src); + auto high = nir_unpack_64_2x32_split_y(b, src); + auto flow = nir_u2f64(b, low); + auto fhigh = nir_i2f64(b, high); + return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow); + } + default: + unreachable("trying to lower instruction that was not in filter"); + } + } + case nir_instr_type_phi: { + auto phi = nir_instr_as_phi(instr); + auto phi_lo = nir_phi_instr_create(b->shader); + auto phi_hi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi_lo->instr, &phi_lo->dest, phi->dest.ssa.num_components * 2, 32, ""); + nir_ssa_dest_init(&phi_hi->instr, &phi_hi->dest, phi->dest.ssa.num_components * 2, 32, ""); + nir_foreach_phi_src(s, phi) { + auto lo = nir_unpack_32_2x16_split_x(b, nir_ssa_for_src(b, s->src, 1)); + auto hi = nir_unpack_32_2x16_split_x(b, nir_ssa_for_src(b, s->src, 1)); + nir_phi_instr_add_src(phi_lo, s->pred, nir_src_for_ssa(lo)); + nir_phi_instr_add_src(phi_hi, s->pred, nir_src_for_ssa(hi)); + } + return nir_pack_64_2x32_split(b, &phi_lo->dest.ssa, &phi_hi->dest.ssa); + } + default: + unreachable("Trying to lower instruction that was not in filter"); + } + } +}; + +bool r600_split_64bit_alu_and_phi(nir_shader *sh) +{ + return LowerSplit64op().run(sh); +} + + bool LowerSplit64BitVar::filter(const nir_instr *instr) const { @@ -271,7 +458,7 @@ LowerSplit64BitVar::split_store_deref_array(nir_intrinsic_instr *intr, nir_deref } nir_ssa_def * -LowerSplit64BitVar::split_store_deref_var(nir_intrinsic_instr *intr, nir_deref_instr *deref) +LowerSplit64BitVar::split_store_deref_var(nir_intrinsic_instr *intr, UNUSED nir_deref_instr *deref) { auto old_var = nir_intrinsic_get_var(intr, 0); unsigned old_components = old_var->type->without_array()->components(); @@ -556,8 +743,6 @@ LowerSplit64BitVar::lower(nir_instr *instr) } case nir_instr_type_alu: { auto alu = nir_instr_as_alu(instr); - nir_print_instr(instr, stderr); - fprintf(stderr, "\n"); switch (alu->op) { case nir_op_bany_fnequal3: return split_reduction3(alu, nir_op_bany_fnequal2, nir_op_fneu, nir_op_ior); @@ -845,7 +1030,7 @@ static bool store_64bit_intr(nir_src *src, void *state) return !*s; } -static bool double2vec2(nir_src *src, void *state) +static bool double2vec2(nir_src *src, UNUSED void *state) { if (nir_src_bit_size(*src) != 64) return true; @@ -1058,6 +1243,206 @@ bool r600_merge_vec2_stores(nir_shader *shader) return merger.combine(); } +static bool +r600_lower_64bit_intrinsic(nir_builder *b, nir_intrinsic_instr *instr) +{ + b->cursor = nir_after_instr(&instr->instr); + + switch (instr->intrinsic) { + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ubo_vec4: + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_input: + case nir_intrinsic_load_interpolated_input: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_ssbo: + break; + default: + return false; + } + + if (instr->num_components <= 2) + return false; + + bool has_dest = nir_intrinsic_infos[instr->intrinsic].has_dest; + if (has_dest) { + if (nir_dest_bit_size(instr->dest) != 64) + return false; + } else { + if (nir_src_bit_size(instr->src[0]) != 64) + return false; + } + + nir_intrinsic_instr *first = + nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr)); + nir_intrinsic_instr *second = + nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr)); + + switch (instr->intrinsic) { + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ubo_vec4: + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_ssbo: + case nir_intrinsic_store_ssbo: + break; + + default: { + nir_io_semantics semantics = nir_intrinsic_io_semantics(second); + semantics.location++; + semantics.num_slots--; + nir_intrinsic_set_io_semantics(second, semantics); + + nir_intrinsic_set_base(second, nir_intrinsic_base(second) + 1); + break; + } + } + + first->num_components = 2; + second->num_components -= 2; + if (has_dest) { + first->dest.ssa.num_components = 2; + second->dest.ssa.num_components -= 2; + } + + nir_builder_instr_insert(b, &first->instr); + nir_builder_instr_insert(b, &second->instr); + + if (has_dest) { + /* Merge the two loads' results back into a vector. */ + nir_ssa_scalar channels[4] = { + nir_get_ssa_scalar(&first->dest.ssa, 0), + nir_get_ssa_scalar(&first->dest.ssa, 1), + nir_get_ssa_scalar(&second->dest.ssa, 0), + nir_get_ssa_scalar(&second->dest.ssa, second->num_components > 1 ? 1 : 0), + }; + nir_ssa_def *new_ir = nir_vec_scalars(b, channels, instr->num_components); + nir_ssa_def_rewrite_uses(&instr->dest.ssa, new_ir); + } else { + /* Split the src value across the two stores. */ + b->cursor = nir_before_instr(&instr->instr); + + nir_ssa_def *src0 = instr->src[0].ssa; + nir_ssa_scalar channels[4] = { 0 }; + for (int i = 0; i < instr->num_components; i++) + channels[i] = nir_get_ssa_scalar(src0, i); + + nir_intrinsic_set_write_mask(first, nir_intrinsic_write_mask(instr) & 3); + nir_intrinsic_set_write_mask(second, nir_intrinsic_write_mask(instr) >> 2); + + nir_instr_rewrite_src(&first->instr, &first->src[0], + nir_src_for_ssa(nir_vec_scalars(b, channels, 2))); + nir_instr_rewrite_src(&second->instr, &second->src[0], + nir_src_for_ssa(nir_vec_scalars(b, &channels[2], + second->num_components))); + } + + int offset_src = -1; + uint32_t offset_amount = 16; + + switch (instr->intrinsic) { + case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_ubo: + offset_src = 1; + break; + case nir_intrinsic_load_ubo_vec4: + case nir_intrinsic_load_uniform: + offset_src = 0; + offset_amount = 1; + break; + case nir_intrinsic_store_ssbo: + offset_src = 2; + break; + default: + break; + } + if (offset_src != -1) { + b->cursor = nir_before_instr(&second->instr); + nir_ssa_def *second_offset = + nir_iadd_imm(b, second->src[offset_src].ssa, offset_amount); + nir_instr_rewrite_src(&second->instr, &second->src[offset_src], + nir_src_for_ssa(second_offset)); + } + + /* DCE stores we generated with no writemask (nothing else does this + * currently). + */ + if (!has_dest) { + if (nir_intrinsic_write_mask(first) == 0) + nir_instr_remove(&first->instr); + if (nir_intrinsic_write_mask(second) == 0) + nir_instr_remove(&second->instr); + } + + nir_instr_remove(&instr->instr); + + return true; +} + +static bool +r600_lower_64bit_load_const(nir_builder *b, nir_load_const_instr *instr) +{ + int num_components = instr->def.num_components; + + if (instr->def.bit_size != 64 || num_components <= 2) + return false; + + b->cursor = nir_before_instr(&instr->instr); + + nir_load_const_instr *first = + nir_load_const_instr_create(b->shader, 2, 64); + nir_load_const_instr *second = + nir_load_const_instr_create(b->shader, num_components - 2, 64); + + first->value[0] = instr->value[0]; + first->value[1] = instr->value[1]; + second->value[0] = instr->value[2]; + if (num_components == 4) + second->value[1] = instr->value[3]; + + nir_builder_instr_insert(b, &first->instr); + nir_builder_instr_insert(b, &second->instr); + + nir_ssa_def *channels[4] = { + nir_channel(b, &first->def, 0), + nir_channel(b, &first->def, 1), + nir_channel(b, &second->def, 0), + num_components == 4 ? nir_channel(b, &second->def, 1) : NULL, + }; + nir_ssa_def *new_ir = nir_vec(b, channels, num_components); + nir_ssa_def_rewrite_uses(&instr->def, new_ir); + nir_instr_remove(&instr->instr); + + return true; +} + +static bool +r600_lower_64bit_to_vec2_instr(nir_builder *b, nir_instr *instr, void *data) +{ + switch (instr->type) { + case nir_instr_type_load_const: + return r600_lower_64bit_load_const(b, nir_instr_as_load_const(instr)); + + case nir_instr_type_intrinsic: + return r600_lower_64bit_intrinsic(b, nir_instr_as_intrinsic(instr)); + default: + return false; + } +} + +bool +r600_lower_64bit_to_vec2(nir_shader *s) +{ + return nir_shader_instructions_pass(s, + r600_lower_64bit_to_vec2_instr, + nir_metadata_block_index | + nir_metadata_dominance, + NULL); +} + + } // end namespace r600 diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp index 892f006..4a4cc0a 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp @@ -95,13 +95,14 @@ emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_i auto idx2 = nir_src_as_const_value(op->src[1]); if (!idx2 || idx2->u32 != 0) - offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4))); + offset = nir_iadd(b, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)), offset); return nir_iadd(b, addr, offset); } static nir_ssa_def * -emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset) +emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, + UNUSED nir_variable_mode mode, int src_offset) { nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0), @@ -552,7 +553,7 @@ r600_lower_tess_coord_filter(const nir_instr *instr, UNUSED const void *_options } static nir_ssa_def * -r600_lower_tess_coord_impl(nir_builder *b, nir_instr *instr, void *_options) +r600_lower_tess_coord_impl(nir_builder *b, UNUSED nir_instr *instr, void *_options) { pipe_prim_type prim_type = *(pipe_prim_type *)_options; diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.cpp similarity index 65% rename from src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp rename to src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.cpp index 8fc5469..197bff4 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.cpp @@ -1,142 +1,9 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_instruction_tex.h" +#include "sfn_nir_lower_tex.h" + +#include "nir.h" #include "nir_builder.h" #include "nir_builtin_builder.h" -namespace r600 { - -TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src, - unsigned sid, unsigned rid, PValue sampler_offset): - Instruction(tex), - m_opcode(op), - m_dst(dest), - m_src(src), - m_sampler_id(sid), - m_resource_id(rid), - m_flags(0), - m_inst_mode(0), - m_dest_swizzle{0,1,2,3}, - m_sampler_offset(sampler_offset) - -{ - memset(m_offset, 0, sizeof (m_offset)); - - add_remappable_src_value(&m_src); - add_remappable_src_value(&m_sampler_offset); - add_remappable_dst_value(&m_dst); -} - -void TexInstruction::set_gather_comp(int cmp) -{ - m_inst_mode = cmp; -} - -void TexInstruction::replace_values(const ValueSet& candidates, PValue new_value) -{ - // I wonder whether we can actually end up here ... - for (auto c: candidates) { - if (*c == *m_src.reg_i(c->chan())) - m_src.set_reg_i(c->chan(), new_value); - if (*c == *m_dst.reg_i(c->chan())) - m_dst.set_reg_i(c->chan(), new_value); - } -} - -void TexInstruction::set_offset(unsigned index, int32_t val) -{ - assert(index < 3); - m_offset[index] = val; -} - -int TexInstruction::get_offset(unsigned index) const -{ - assert(index < 3); - return (m_offset[index] << 1 & 0x1f); -} - -bool TexInstruction::is_equal_to(const Instruction& rhs) const -{ - assert(rhs.type() == tex); - const auto& r = static_cast(rhs); - return (m_opcode == r.m_opcode && - m_dst == r.m_dst && - m_src == r.m_src && - m_sampler_id == r.m_sampler_id && - m_resource_id == r.m_resource_id); -} - -void TexInstruction::do_print(std::ostream& os) const -{ - const char *map_swz = "xyzw01?_"; - os << opname(m_opcode) << " R" << m_dst.sel() << "."; - for (int i = 0; i < 4; ++i) - os << map_swz[m_dest_swizzle[i]]; - - os << " " << m_src - << " RESID:" << m_resource_id << " SAMPLER:" - << m_sampler_id; -} - -const char *TexInstruction::opname(Opcode op) -{ - switch (op) { - case ld: return "LD"; - case get_resinfo: return "GET_TEXTURE_RESINFO"; - case get_nsampled: return "GET_NUMBER_OF_SAMPLES"; - case get_tex_lod: return "GET_LOD"; - case get_gradient_h: return "GET_GRADIENTS_H"; - case get_gradient_v: return "GET_GRADIENTS_V"; - case set_offsets: return "SET_TEXTURE_OFFSETS"; - case keep_gradients: return "KEEP_GRADIENTS"; - case set_gradient_h: return "SET_GRADIENTS_H"; - case set_gradient_v: return "SET_GRADIENTS_V"; - case sample: return "SAMPLE"; - case sample_l: return "SAMPLE_L"; - case sample_lb: return "SAMPLE_LB"; - case sample_lz: return "SAMPLE_LZ"; - case sample_g: return "SAMPLE_G"; - case sample_g_lb: return "SAMPLE_G_L"; - case gather4: return "GATHER4"; - case gather4_o: return "GATHER4_O"; - case sample_c: return "SAMPLE_C"; - case sample_c_l: return "SAMPLE_C_L"; - case sample_c_lb: return "SAMPLE_C_LB"; - case sample_c_lz: return "SAMPLE_C_LZ"; - case sample_c_g: return "SAMPLE_C_G"; - case sample_c_g_lb: return "SAMPLE_C_G_L"; - case gather4_c: return "GATHER4_C"; - case gather4_c_o: return "OP_GATHER4_C_O"; - } - return "ERROR"; -} - - - static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex) { b->cursor = nir_before_instr(&tex->instr); @@ -273,14 +140,14 @@ bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex) nir_ssa_def *lambda_exp = nir_fexp2(b, lod); nir_ssa_def *scale = NULL; - if (tex->is_array) { + if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { + unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0}; + scale = nir_frcp(b, nir_channels(b, size, 1)); + scale = nir_swizzle(b, scale, swizzle, 3); + } else if (tex->is_array) { int cmp_mask = (1 << (size->num_components - 1)) - 1; scale = nir_frcp(b, nir_channels(b, size, (nir_component_mask_t)cmp_mask)); - } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { - unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0}; - scale = nir_frcp(b, nir_channels(b, size, 1)); - scale = nir_swizzle(b, scale, swizzle, 3); } nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale); @@ -408,7 +275,3 @@ r600_nir_lower_cube_to_2darray(nir_shader *shader) r600_nir_lower_cube_to_2darray_filer, r600_nir_lower_cube_to_2darray_impl, nullptr); } - - - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h new file mode 100644 index 0000000..0601d65 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h @@ -0,0 +1,10 @@ +#ifndef SFN_NIR_LOWER_TEX_H +#define SFN_NIR_LOWER_TEX_H + +struct nir_shader; + +bool r600_nir_lower_int_tg4(nir_shader *nir); +bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader); +bool r600_nir_lower_cube_to_2darray(nir_shader *shader); + +#endif // LALA_H diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp new file mode 100644 index 0000000..393bc67 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp @@ -0,0 +1,627 @@ +#include "sfn_optimizer.h" + +#include "sfn_instr_alugroup.h" +#include "sfn_instr_controlflow.h" +#include "sfn_instr_export.h" +#include "sfn_instr_tex.h" +#include "sfn_instr_fetch.h" +#include "sfn_instr_lds.h" +#include "sfn_peephole.h" +#include "sfn_debug.h" + +#include + +namespace r600 { + +bool optimize(Shader& shader) +{ + bool progress; + + sfn_log << SfnLog::opt << "Shader before optimization\n"; + if (sfn_log.has_debug_flag(SfnLog::opt)) { + std::stringstream ss; + shader.print(ss); + sfn_log << ss.str() << "\n\n"; + } + + do { + progress = false; + progress |= copy_propagation_fwd(shader); + progress |= dead_code_elimination(shader); + progress |= copy_propagation_backward(shader); + progress |= dead_code_elimination(shader); + progress |= simplify_source_vectors(shader); + progress |= peephole(shader); + progress |= dead_code_elimination(shader); + } while (progress); + + return progress; +} + +class DCEVisitor : public InstrVisitor { +public: + DCEVisitor(); + + void visit(AluInstr *instr) override; + void visit(AluGroup *instr) override; + void visit(TexInstr *instr) override; + void visit(ExportInstr *instr) override {(void)instr;}; + void visit(FetchInstr *instr) override; + void visit(Block *instr) override; + + void visit(ControlFlowInstr *instr) override {(void)instr;}; + void visit(IfInstr *instr) override {(void)instr;}; + void visit(WriteScratchInstr *instr) override {(void)instr;}; + void visit(StreamOutInstr *instr) override {(void)instr;}; + void visit(MemRingOutInstr *instr) override {(void)instr;}; + void visit(EmitVertexInstr *instr) override {(void)instr;}; + void visit(GDSInstr *instr) override {(void)instr;}; + void visit(WriteTFInstr *instr) override {(void)instr;}; + void visit(LDSAtomicInstr *instr) override {(void)instr;}; + void visit(LDSReadInstr *instr) override; + void visit(RatInstr *instr) override {(void)instr;}; + + + bool progress; +}; + +bool dead_code_elimination(Shader& shader) +{ + DCEVisitor dce; + + do { + + sfn_log << SfnLog::opt << "start dce run\n"; + + dce.progress = false; + for (auto& b : shader.func()) + b->accept(dce); + + sfn_log << SfnLog::opt << "finished dce run\n\n"; + + } while (dce.progress); + + sfn_log << SfnLog::opt << "Shader after DCE\n"; + if (sfn_log.has_debug_flag(SfnLog::opt)) { + std::stringstream ss; + shader.print(ss); + sfn_log << ss.str() << "\n\n"; + } + + return dce.progress; +} + +DCEVisitor::DCEVisitor():progress(false) +{ +} + +void DCEVisitor::visit(AluInstr *instr) +{ + sfn_log << SfnLog::opt << "DCE: visit '" << *instr; + + if (instr->has_instr_flag(Instr::dead)) + return; + + if (instr->dest() && instr->dest()->has_uses()) { + sfn_log << SfnLog::opt << " dest used\n"; + return; + } + + switch (instr->opcode()) { + case op2_kille: + case op2_killne: + case op2_kille_int: + case op2_killne_int: + case op2_killge: + case op2_killge_int: + case op2_killge_uint: + case op2_killgt: + case op2_killgt_int: + case op2_killgt_uint: + case op0_group_barrier: + sfn_log << SfnLog::opt << " never kill\n"; + return; + default: + ; + } + + bool dead = instr->set_dead(); + sfn_log << SfnLog::opt << (dead ? "dead" : "alive") << "\n"; + progress |= dead; +} + +void DCEVisitor::visit(LDSReadInstr *instr) +{ + sfn_log << SfnLog::opt << "visit " << *instr << "\n"; + progress |= instr->remove_unused_components(); +} + +void DCEVisitor::visit(AluGroup *instr) +{ + /* Groups are created because the instructions are used together + * so don't try to eliminate code there */ + (void)instr; +} + +void DCEVisitor::visit(TexInstr *instr) +{ + auto& dest = instr->dst(); + + bool has_uses = false; + RegisterVec4::Swizzle swz = instr->all_dest_swizzle(); + for (int i = 0; i < 4; ++i) { + if (!dest[i]->has_uses()) + swz[i] = 7; + else + has_uses |= true; + } + instr->set_dest_swizzle(swz); + + if (has_uses) + return; + + progress |= instr->set_dead(); +} + +void DCEVisitor::visit(FetchInstr *instr) +{ + auto& dest = instr->dst(); + + bool has_uses = false; + RegisterVec4::Swizzle swz = instr->all_dest_swizzle(); + for (int i = 0; i < 4; ++i) { + if (!dest[i]->has_uses()) + swz[i] = 7; + else + has_uses |= true; + } + instr->set_dest_swizzle(swz); + + if (has_uses) + return; + + sfn_log << SfnLog::opt << "set dead: " << *instr << "\n"; + + progress |= instr->set_dead(); +} + +void DCEVisitor::visit(Block *block) +{ + auto i = block->begin(); + auto e = block->end(); + while (i != e) { + auto n = i++; + if (!(*n)->keep()) { + (*n)->accept(*this); + if ((*n)->is_dead()) { + block->erase(n); + } + } + } +} + +void visit(ControlFlowInstr *instr) +{ + (void)instr; +} + +void visit(IfInstr *instr) +{ + (void)instr; +} + +class CopyPropFwdVisitor : public InstrVisitor { +public: + CopyPropFwdVisitor(); + + void visit(AluInstr *instr) override; + void visit(AluGroup *instr) override; + void visit(TexInstr *instr) override; + void visit(ExportInstr *instr) override {(void)instr;} + void visit(FetchInstr *instr) override; + void visit(Block *instr) override; + void visit(ControlFlowInstr *instr) override {(void)instr;} + void visit(IfInstr *instr) override {(void)instr;} + void visit(WriteScratchInstr *instr) override {(void)instr;} + void visit(StreamOutInstr *instr) override {(void)instr;} + void visit(MemRingOutInstr *instr) override {(void)instr;} + void visit(EmitVertexInstr *instr) override {(void)instr;} + void visit(GDSInstr *instr) override {(void)instr;}; + void visit(WriteTFInstr *instr) override {(void)instr;}; + void visit(RatInstr *instr) override {(void)instr;}; + + // TODO: these two should use copy propagation + void visit(LDSAtomicInstr *instr) override {(void)instr;}; + void visit(LDSReadInstr *instr) override {(void)instr;}; + + bool progress; +}; + + +class CopyPropBackVisitor : public InstrVisitor { +public: + CopyPropBackVisitor(); + + void visit(AluInstr *instr) override; + void visit(AluGroup *instr) override; + void visit(TexInstr *instr) override; + void visit(ExportInstr *instr) override {(void)instr;} + void visit(FetchInstr *instr) override; + void visit(Block *instr) override; + void visit(ControlFlowInstr *instr) override {(void)instr;} + void visit(IfInstr *instr) override {(void)instr;} + void visit(WriteScratchInstr *instr) override {(void)instr;} + void visit(StreamOutInstr *instr) override {(void)instr;} + void visit(MemRingOutInstr *instr) override {(void)instr;} + void visit(EmitVertexInstr *instr) override {(void)instr;} + void visit(GDSInstr *instr) override {(void)instr;}; + void visit(WriteTFInstr *instr) override {(void)instr;}; + void visit(LDSAtomicInstr *instr) override {(void)instr;}; + void visit(LDSReadInstr *instr) override {(void)instr;}; + void visit(RatInstr *instr) override {(void)instr;}; + + bool progress; +}; + +bool copy_propagation_fwd(Shader& shader) +{ + auto& root = shader.func(); + CopyPropFwdVisitor copy_prop; + + do { + copy_prop.progress = false; + for (auto b : root) + b->accept(copy_prop); + } while (copy_prop.progress); + + sfn_log << SfnLog::opt << "Shader after Copy Prop forward\n"; + if (sfn_log.has_debug_flag(SfnLog::opt)) { + std::stringstream ss; + shader.print(ss); + sfn_log << ss.str() << "\n\n"; + } + + + return copy_prop.progress; +} + +bool copy_propagation_backward(Shader& shader) +{ + CopyPropBackVisitor copy_prop; + + do { + copy_prop.progress = false; + for (auto b: shader.func()) + b->accept(copy_prop); + } while (copy_prop.progress); + + sfn_log << SfnLog::opt << "Shader after Copy Prop backwards\n"; + if (sfn_log.has_debug_flag(SfnLog::opt)) { + std::stringstream ss; + shader.print(ss); + sfn_log << ss.str() << "\n\n"; + } + + return copy_prop.progress; +} + +CopyPropFwdVisitor::CopyPropFwdVisitor(): + progress(false) +{} + +void CopyPropFwdVisitor::visit(AluInstr *instr) +{ + sfn_log << SfnLog::opt << "CopyPropFwdVisitor:[" + << instr->block_id() << ":" << instr->index() << "] " << *instr + << " dset=" << instr->dest() << " "; + + + + if (instr->dest()) { + sfn_log << SfnLog::opt << "has uses; " + << instr->dest()->uses().size(); + } + + sfn_log << SfnLog::opt << "\n"; + + if (!instr->can_propagate_src()) { + return; + } + + auto src = instr->psrc(0); + auto dest = instr->dest(); + + for (auto& i : instr->dest()->uses()) { + /* SSA can always be propagated, registers only in the same block + * and only if they are not assigned to more than once */ + if (dest->is_ssa() || + (instr->block_id() == i->block_id() && + instr->index() < i->index() && + dest->uses().size() == 1)) { + sfn_log << SfnLog::opt << " Try replace in " + << i->block_id() << ":" << i->index() + << *i<< "\n"; + progress |= i->replace_source(dest, src); + } + } + if (instr->dest()) { + sfn_log << SfnLog::opt << "has uses; " + << instr->dest()->uses().size(); + } + sfn_log << SfnLog::opt << " done\n"; +} + + +void CopyPropFwdVisitor::visit(AluGroup *instr) +{ + (void)instr; +} + +void CopyPropFwdVisitor::visit(TexInstr *instr) +{ + (void)instr; +} + +void CopyPropFwdVisitor::visit(FetchInstr *instr) +{ + (void)instr; +} + +void CopyPropFwdVisitor::visit(Block *instr) +{ + for (auto& i: *instr) + i->accept(*this); +} + +CopyPropBackVisitor::CopyPropBackVisitor(): + progress(false) +{ + +} + +void CopyPropBackVisitor::visit(AluInstr *instr) +{ + bool local_progress = false; + + sfn_log << SfnLog::opt << "CopyPropBackVisitor:[" + << instr->block_id() << ":" << instr->index() << "] " << *instr << "\n"; + + + if (!instr->can_propagate_dest()) { + return; + } + + auto src_reg = instr->psrc(0)->as_register(); + if (!src_reg) { + return; + } + + if (src_reg->uses().size() > 1) + return; + + auto dest = instr->dest(); + if (!dest || + !instr->has_alu_flag(alu_write)) { + return; + } + + if (!dest->is_ssa() && dest->parents().size() > 1) + return; + + for (auto& i: src_reg->parents()) { + sfn_log << SfnLog::opt << "Try replace dest in " + << i->block_id() << ":" << i->index() + << *i<< "\n"; + + if (i->replace_dest(dest, instr)) { + dest->del_parent(instr); + dest->add_parent(i); + for (auto d : instr->dependend_instr()) { + d->add_required_instr(i); + } + local_progress = true; + } + } + + if (local_progress) + instr->set_dead(); + + progress |= local_progress; +} + +void CopyPropBackVisitor::visit(AluGroup *instr) +{ + for (auto& i: *instr) { + if (i) + i->accept(*this); + } +} + +void CopyPropBackVisitor::visit(TexInstr *instr) +{ + (void)instr; +} + +void CopyPropBackVisitor::visit(FetchInstr *instr) +{ + (void)instr; +} + +void CopyPropBackVisitor::visit(Block *instr) +{ + for (auto i = instr->rbegin(); i != instr->rend(); ++i) + if (!(*i)->is_dead()) + (*i)->accept(*this); +} + +class SimplifySourceVecVisitor : public InstrVisitor { +public: + SimplifySourceVecVisitor():progress(false) {} + + void visit(AluInstr *instr) override{(void)instr;} + void visit(AluGroup *instr) override{(void)instr;} + void visit(TexInstr *instr) override; + void visit(ExportInstr *instr) override; + void visit(FetchInstr *instr) override; + void visit(Block *instr) override; + void visit(ControlFlowInstr *instr) override; + void visit(IfInstr *instr) override; + void visit(WriteScratchInstr *instr) override; + void visit(StreamOutInstr *instr) override; + void visit(MemRingOutInstr *instr) override; + void visit(EmitVertexInstr *instr) override {(void)instr;} + void visit(GDSInstr *instr) override {(void)instr;}; + void visit(WriteTFInstr *instr) override {(void)instr;}; + void visit(LDSAtomicInstr *instr) override {(void)instr;}; + void visit(LDSReadInstr *instr) override {(void)instr;}; + void visit(RatInstr *instr) override {(void)instr;}; + + void replace_src(Instr *instr, RegisterVec4& reg4); + + bool progress; +}; + +bool simplify_source_vectors(Shader& sh) +{ + SimplifySourceVecVisitor visitor; + + for (auto b: sh.func()) + b->accept(visitor); + + return visitor.progress; +} + +void SimplifySourceVecVisitor::visit(TexInstr *instr) +{ + if (instr->opcode() != TexInstr::get_resinfo) { + replace_src(instr, instr->src()); + } +} + +void SimplifySourceVecVisitor::visit(WriteScratchInstr *instr) +{ + (void) instr; +} + +class ReplaceConstSource : public AluInstrVisitor { +public: + ReplaceConstSource(Instr *old_use_, RegisterVec4& vreg_, int i): + old_use(old_use_), vreg(vreg_), index(i),success(false) {} + + using AluInstrVisitor::visit; + + void visit(AluInstr *alu) override; + + Instr *old_use; + RegisterVec4& vreg; + int index; + bool success; +}; + +void SimplifySourceVecVisitor::visit(ExportInstr *instr) +{ + replace_src(instr, instr->value()); +} + +void SimplifySourceVecVisitor::replace_src(Instr *instr, RegisterVec4& reg4) +{ + for (int i = 0; i < 4; ++i) { + auto s = reg4[i]; + + if (s->chan() > 3) + continue; + + if (!s->is_ssa()) + continue; + + /* Cayman trans ops have more then one parent for + * one dest */ + if (s->parents().size() != 1) + continue; + + auto& op = *s->parents().begin(); + + ReplaceConstSource visitor(instr, reg4, i); + + op->accept(visitor); + + progress |= visitor.success; + } +} + +void SimplifySourceVecVisitor::visit(StreamOutInstr *instr) +{ + (void)instr; +} + +void SimplifySourceVecVisitor::visit(MemRingOutInstr *instr) +{ + (void)instr; +} + +void ReplaceConstSource::visit(AluInstr *alu) +{ + if (alu->opcode() != op1_mov) + return; + + if (alu->has_alu_flag(alu_src0_abs) || + alu->has_alu_flag(alu_src0_neg)) + return; + + auto src = alu->psrc(0); + assert(src); + + int override_chan = -1; + + auto ic = src->as_inline_const(); + if (ic) { + if (ic->sel() == ALU_SRC_0) + override_chan = 4; + + if (ic->sel() == ALU_SRC_1) + override_chan = 5; + } + + auto literal = src->as_literal(); + if (literal) { + + if (literal->value() == 0) + override_chan = 4; + + if (literal->value() == 0x3F800000) + override_chan = 5; + } + + if (override_chan >= 0) { + vreg[index]->del_use(old_use); + auto reg = new Register(vreg.sel(), override_chan, vreg[index]->pin()); + vreg.set_value(index, reg); + success = true; + } +} + +void SimplifySourceVecVisitor::visit(FetchInstr *instr) +{ + (void) instr; +} + +void SimplifySourceVecVisitor::visit(Block *instr) +{ + for (auto i = instr->rbegin(); i != instr->rend(); ++i) + if (!(*i)->is_dead()) + (*i)->accept(*this); +} + +void SimplifySourceVecVisitor::visit(ControlFlowInstr *instr) +{ + (void) instr; +} + +void SimplifySourceVecVisitor::visit(IfInstr *instr) +{ + (void) instr; +} + + + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizer.h b/src/gallium/drivers/r600/sfn/sfn_optimizer.h new file mode 100644 index 0000000..a1b3a13 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_optimizer.h @@ -0,0 +1,17 @@ +#ifndef OPTIMIZER_H +#define OPTIMIZER_H + +#include "sfn_shader.h" + +namespace r600 { + +bool dead_code_elimination(Shader& shader); +bool copy_propagation_fwd(Shader& shader); +bool copy_propagation_backward(Shader& shader); +bool simplify_source_vectors(Shader& sh); + +bool optimize(Shader& shader); + +} + +#endif // OPTIMIZER_H diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizers.cpp b/src/gallium/drivers/r600/sfn/sfn_optimizers.cpp deleted file mode 100644 index dbffcfa..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_optimizers.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "sfn_optimizers.h" -#include "sfn_instruction_block.h" - -namespace r600 { - -std::vector -flatten_shader(const std::vector &ir) -{ - -} - -} \ No newline at end of file diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizers.h b/src/gallium/drivers/r600/sfn/sfn_optimizers.h deleted file mode 100644 index d17d32b..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_optimizers.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef SFN_OPTIMIZERS_H -#define SFN_OPTIMIZERS_H - -#include "sfn_instruction_base.h" - -namespace r600 { - -std::vector -flatten_alu_ops(const std::vector &ir); - - -} - -#endif // SFN_OPTIMIZERS_H diff --git a/src/gallium/drivers/r600/sfn/sfn_peephole.cpp b/src/gallium/drivers/r600/sfn/sfn_peephole.cpp new file mode 100644 index 0000000..8eebec0 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_peephole.cpp @@ -0,0 +1,212 @@ +#include "sfn_peephole.h" + +namespace r600 { + + +class PeepholeVisitor : public InstrVisitor { +public: + void visit(AluInstr *instr) override; + void visit(AluGroup *instr) override; + void visit(TexInstr *instr) override {(void)instr;}; + void visit(ExportInstr *instr) override {(void)instr;} + void visit(FetchInstr *instr) override {(void)instr;} + void visit(Block *instr) override; + void visit(ControlFlowInstr *instr) override {(void)instr;} + void visit(IfInstr *instr) override; + void visit(WriteScratchInstr *instr) override {(void)instr;} + void visit(StreamOutInstr *instr) override {(void)instr;} + void visit(MemRingOutInstr *instr) override {(void)instr;} + void visit(EmitVertexInstr *instr) override {(void)instr;} + void visit(GDSInstr *instr) override {(void)instr;}; + void visit(WriteTFInstr *instr) override {(void)instr;}; + void visit(LDSAtomicInstr *instr) override {(void)instr;}; + void visit(LDSReadInstr *instr) override {(void)instr;}; + void visit(RatInstr *instr) override {(void)instr;}; + + bool src_is_zero(PVirtualValue value); + bool src_is_one(PVirtualValue value); + + void convert_to_mov(AluInstr *alu, int src_idx); + + + bool progress{false}; +}; + + +bool peephole(Shader& sh) +{ + PeepholeVisitor peephole; + for(auto b : sh.func()) + b->accept(peephole); + return peephole.progress; +} + +void PeepholeVisitor::visit(AluInstr *instr) +{ + switch (instr->opcode()) { + case op2_add: + case op2_add_int: + if (src_is_zero(instr->psrc(0))) + convert_to_mov(instr, 1); + else if (src_is_zero(instr->psrc(1))) + convert_to_mov(instr, 0); + break; + case op2_mul: + case op2_mul_ieee: + if (src_is_one(instr->psrc(0))) + convert_to_mov(instr, 1); + else if (src_is_one(instr->psrc(1))) + convert_to_mov(instr, 0); + break; + case op3_muladd: + case op3_muladd_ieee: + if (src_is_zero(instr->psrc(0)) || + src_is_zero(instr->psrc(1))) + convert_to_mov(instr, 2); + break; + default: + ; + } +} + +bool PeepholeVisitor::src_is_zero(PVirtualValue value) +{ + if (value->as_inline_const() && + value->as_inline_const()->sel() == ALU_SRC_0) + return true; + + if (value->as_literal() && + value->as_literal()->value() == 0) + return true; + + return false; +} + +bool PeepholeVisitor::src_is_one(PVirtualValue value) +{ + if (value->as_inline_const() && + value->as_inline_const()->sel() == ALU_SRC_1) + return true; + + if (value->as_literal() && + value->as_literal()->value() == 0x3f800000) + return true; + + return false; +} + +void PeepholeVisitor::convert_to_mov(AluInstr *alu, int src_idx) +{ + AluInstr::SrcValues new_src{alu->psrc(src_idx)}; + alu->set_sources(new_src); + alu->set_op(op1_mov); + progress = true; +} + + +void PeepholeVisitor::visit(AluGroup *instr) +{ + +} + +void PeepholeVisitor::visit(Block *instr) +{ + for (auto& i: *instr) + i->accept(*this); +} + +class ReplaceIfPredicate : public AluInstrVisitor { +public: + ReplaceIfPredicate(AluInstr *pred): + m_pred(pred) {} + + using AluInstrVisitor::visit; + + void visit(AluInstr *alu) override; + + AluInstr *m_pred; + bool success{false}; +}; + +void PeepholeVisitor::visit(IfInstr *instr) +{ + auto pred = instr->predicate(); + + auto& src1 = pred->src(1); + if (src1.as_inline_const() && + src1.as_inline_const()->sel() == ALU_SRC_0) { + auto src0 = pred->src(0).as_register(); + if (src0 && src0->is_ssa()) { + assert(!src0->parents().empty()); + auto parent = *src0->parents().begin(); + + ReplaceIfPredicate visitor(pred); + parent->accept(visitor); + progress |= visitor.success; + } + } +} + +static EAluOp pred_from_op(EAluOp pred_op, EAluOp op) +{ + switch (pred_op) { + case op2_pred_setne_int: + switch (op) { + /* + case op2_setge_dx10 : return op2_pred_setge_int; + case op2_setgt_dx10 : return op2_pred_setgt_int; + case op2_sete_dx10 : return op2_prede_int; + case op2_setne_dx10 : return op2_pred_setne_int; + */ + case op2_setge_int : return op2_pred_setge_int; + case op2_setgt_int : return op2_pred_setgt_int; + case op2_setge_uint : return op2_pred_setge_uint; + case op2_setgt_uint : return op2_pred_setgt_uint; + case op2_sete_int : return op2_prede_int; + case op2_setne_int : return op2_pred_setne_int; + default: + return op0_nop; + } + case op2_prede_int: + switch (op) { + case op2_sete_int : return op2_pred_setne_int; + case op2_setne_int : return op2_prede_int; + default: + return op0_nop; + } + default: + return op0_nop; + } +} + +void ReplaceIfPredicate::visit(AluInstr *alu) +{ + auto new_op = pred_from_op(m_pred->opcode(), alu->opcode()); + + if (new_op == op0_nop) + return; + + /* Have to figure out how to pass the dependency correctly */ + /*for (auto& s : alu->sources()) { + if (s->as_register() && s->as_register()->addr()) + return; + }*/ + + m_pred->set_op(new_op); + m_pred->set_sources(alu->sources()); + + if (alu->has_alu_flag(alu_src0_abs)) + m_pred->set_alu_flag(alu_src0_abs); + if (alu->has_alu_flag(alu_src1_abs)) + m_pred->set_alu_flag(alu_src1_abs); + + if (alu->has_alu_flag(alu_src0_neg)) + m_pred->set_alu_flag(alu_src0_neg); + + if (alu->has_alu_flag(alu_src1_neg)) + m_pred->set_alu_flag(alu_src1_neg); + + success = true; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_peephole.h b/src/gallium/drivers/r600/sfn/sfn_peephole.h new file mode 100644 index 0000000..1384fc0 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_peephole.h @@ -0,0 +1,13 @@ +#ifndef PEEPHOLE_H +#define PEEPHOLE_H + +#include "sfn_shader.h" + +namespace r600 { + +bool peephole(Shader& sh); + +} + + +#endif // PEEPHOLE_H diff --git a/src/gallium/drivers/r600/sfn/sfn_ra.cpp b/src/gallium/drivers/r600/sfn/sfn_ra.cpp new file mode 100644 index 0000000..375362d --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_ra.cpp @@ -0,0 +1,268 @@ +#include "sfn_debug.h" +#include "sfn_ra.h" + +#include +#include + +namespace r600 { + +void ComponentInterference::prepare_row(int row) +{ + m_rows.resize(row + 1); + +} + +void ComponentInterference::add(size_t idx1, size_t idx2) +{ + assert(idx1 > idx2); + assert(m_rows.size() > idx1); + m_rows[idx1].push_back(idx2); + m_rows[idx2].push_back(idx1); +} + + +Interference::Interference(LiveRangeMap& map): + m_map(map) +{ + initialize(); +} + +void Interference::initialize() +{ + for(int i = 0; i < 4; ++i) { + initialize(m_components_maps[i], m_map.component(i)); + } +} + +void Interference::initialize(ComponentInterference& comp_interference, + LiveRangeMap::ChannelLiveRange& clr) +{ + for (size_t row = 0; row < clr.size(); ++row) { + auto& row_entry = clr[row]; + comp_interference.prepare_row(row); + for (size_t col = 0; col < row; ++col) { + auto& col_entry = clr[col]; + if (row_entry.m_end >= col_entry.m_start && + row_entry.m_start <= col_entry.m_end) + comp_interference.add(row, col); + } + } +} + +struct Group { + int priority; + std::array channels; +}; + +static inline bool operator < (const Group& lhs, const Group& rhs) +{ + return lhs.priority < rhs.priority; +} + +using GroupRegisters = std::priority_queue; + +static bool +group_allocation (LiveRangeMap& lrm, const Interference& interference, GroupRegisters& groups) +{ + int color = 0; + // allocate grouped registers + while (!groups.empty()) { + auto group = groups.top(); + groups.pop(); + + int start_comp = 0; + while (!group.channels[start_comp]) + ++start_comp; + + sfn_log << SfnLog::merge << "Color group with " << *group.channels[start_comp] << "\n"; + + // don't restart registers for exports, we may be able tp merge the + // export calls, is fthe registers are consecutive + if (group.priority > 0) + color = 0; + + while (color < 124) { + /* Find the coloring for the first channel */ + bool color_in_use = false; + int comp = start_comp; + + auto& adjecency = interference.row(start_comp, group.channels[comp]->index()); + auto& regs = lrm.component(comp); + + sfn_log << SfnLog::merge << "Try color "<< color; + + for (auto adj : adjecency) { + if (regs[adj].m_color == color) { + color_in_use = true; + sfn_log << SfnLog::merge << " in use\n"; + break; + } + } + + if (color_in_use) { + ++color; + continue; + } + + /* First channel color found, check whether it can be used for all channels */ + while (comp < 4) { + sfn_log << SfnLog::merge << " interference: "; + if (group.channels[comp]) { + auto& component_life_ranges = lrm.component(comp); + auto& adjecencies = interference.row(comp, group.channels[comp]->index()); + + for (auto adj_index : adjecencies) { + sfn_log << SfnLog::merge << *component_life_ranges[adj_index].m_register << " "; + if (component_life_ranges[adj_index].m_color == color) { + color_in_use = true; + sfn_log << SfnLog::merge << "used"; + break; + } + } + + if (color_in_use) + break; + } + ++comp; + } + + /* We couldn't allocate all channels with this color, so try next */ + if (color_in_use) { + ++color; + sfn_log << SfnLog::merge << "\n"; + continue; + } + sfn_log << SfnLog::merge << " success\n"; + + /* Coloring successful */ + for (auto reg : group.channels) { + if (reg) { + auto& vregs = lrm.component(reg->chan()); + auto& vreg_cmp = vregs[reg->index()]; + assert(vreg_cmp.m_start != -1 || vreg_cmp.m_end != -1); + vreg_cmp.m_color = color; + } + } + break; + } + + if (color == 124) + return false; + } + + return true; +} + +static bool +scalar_allocation (LiveRangeMap& lrm, const Interference& interference) +{ + for (int comp = 0; comp < 4; ++comp) { + auto& live_ranges = lrm.component(comp); + for (auto& r : live_ranges) { + if (r.m_color != -1) + continue; + + if (r.m_start == -1 && + r.m_end == -1) + continue; + + sfn_log << SfnLog::merge << "Color " << *r.m_register << "\n"; + + auto& adjecency = interference.row(comp, r.m_register->index()); + + int color = 0; + + while (color < 124) { + bool color_in_use = false; + for (auto adj : adjecency) { + if (live_ranges[adj].m_color == color) { + color_in_use = true; + break; + } + } + + if (color_in_use) { + ++color; + continue; + } + + r.m_color = color; + break; + } + if (color == 124) + return false; + } + } + return true; +} + +bool register_allocation(LiveRangeMap& lrm) +{ + Interference interference(lrm); + + std::map groups; + + // setup fixed colors and group relationships + for (int i = 0; i < 4; ++i) { + auto& comp = lrm.component(i); + for (auto& entry : comp) { + sfn_log << SfnLog::merge << "Prepare RA for " + << *entry.m_register + << " [" << entry.m_start << ", " << entry.m_end << "]\n"; + auto pin = entry.m_register->pin(); + if (entry.m_start == -1 && entry.m_end == -1) { + if (pin == pin_group || pin == pin_chgr) + entry.m_register->set_chan(7); + continue; + } + + auto sel = entry.m_register->sel(); + /* fully pinned registers contain system values with the + * definite register index, and array values are allocated + * right after the system registers, so just reuse the IDs (for now) */ + if (pin == pin_fully || pin == pin_array) { + /* Must set all array element entries */ + sfn_log << SfnLog::merge << "Pin color " << sel << " to " << *entry.m_register << "\n"; + entry.m_color = sel; + } else if (pin == pin_group || pin == pin_chgr) { + /* Groups must all have the same sel() value, because they are used + * as vec4 registers */ + auto igroup = groups.find(sel); + if (igroup != groups.end()) { + igroup->second.channels[i] = entry.m_register; + assert(comp[entry.m_register->index()].m_register->index() == entry.m_register->index()); + } else { + int priority = entry.m_use.test(LiveRangeEntry::use_export) ? - entry.m_end : entry.m_start; + Group group{priority, {nullptr, nullptr, nullptr, nullptr}}; + group.channels[i] = entry.m_register; + assert(comp[group.channels[i]->index()].m_register->index() == entry.m_register->index()); + groups[sel] = group; + } + } + } + } + + GroupRegisters groups_sorted; + for (auto& [sel, group] : groups) + groups_sorted.push(group); + + if (!group_allocation (lrm, interference, groups_sorted)) + return false; + + if (!scalar_allocation(lrm, interference)) + return false; + + for (int i = 0; i < 4; ++i) { + auto& comp = lrm.component(i); + for (auto& entry : comp) { + sfn_log << SfnLog::merge << "Set " << *entry.m_register << " to "; + entry.m_register->set_sel(entry.m_color); + entry.m_register->set_pin(pin_none); + sfn_log << SfnLog::merge << *entry.m_register << "\n"; + } + } + + return true; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_ra.h b/src/gallium/drivers/r600/sfn/sfn_ra.h new file mode 100644 index 0000000..b40b611 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_ra.h @@ -0,0 +1,51 @@ +#ifndef INTERFERENCE_H +#define INTERFERENCE_H + +#include "sfn_valuefactory.h" + +#include + +namespace r600 { + +class ComponentInterference +{ +public: + + using Row = std::vector; + + void prepare_row(int row); + + void add(size_t idx1, size_t idx2); + + auto row(int idx) const -> const Row& { + assert((size_t)idx < m_rows.size()); return m_rows[idx];} + +private: + + std::vector m_rows; +}; + +class Interference { +public: + Interference(LiveRangeMap& map); + + const auto& row(int comp, int index) const { + assert(comp < 4); + return m_components_maps[comp].row(index); + } + +private: + void initialize(); + void initialize(ComponentInterference& comp, LiveRangeMap::ChannelLiveRange& clr); + + LiveRangeMap& m_map; + std::array m_components_maps; + + +}; + +bool register_allocation(LiveRangeMap& lrm); + +} + +#endif // INTERFERENCE_H diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp new file mode 100644 index 0000000..d180f25 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp @@ -0,0 +1,890 @@ +#include "sfn_scheduler.h" +#include "sfn_instr_alugroup.h" +#include "sfn_instr_controlflow.h" +#include "sfn_instr_export.h" +#include "sfn_instr_fetch.h" +#include "sfn_instr_mem.h" +#include "sfn_instr_lds.h" +#include "sfn_instr_tex.h" +#include "sfn_debug.h" + +#include +#include + +namespace r600 { + +class CollectInstructions : public InstrVisitor { + +public: + CollectInstructions(ValueFactory& vf): + m_value_factory(vf) {} + + void visit(AluInstr *instr) override { + if (instr->has_alu_flag(alu_is_trans)) + alu_trans.push_back(instr); + else { + if (instr->alu_slots() == 1) + alu_vec.push_back(instr); + else + alu_groups.push_back(instr->split(m_value_factory)); + } + } + void visit(AluGroup *instr) override { + alu_groups.push_back(instr); + } + void visit(TexInstr *instr) override { + tex.push_back(instr); + } + void visit(ExportInstr *instr) override { + exports.push_back(instr); + } + void visit(FetchInstr *instr) override { + fetches.push_back(instr); + } + void visit(Block *instr) override { + for (auto& i: *instr) + i->accept(*this); + } + + void visit(ControlFlowInstr *instr) override { + assert(!m_cf_instr); + m_cf_instr = instr; + } + + void visit(IfInstr *instr) override { + assert(!m_cf_instr); + m_cf_instr = instr; + } + + void visit(EmitVertexInstr *instr) override { + assert(!m_cf_instr); + m_cf_instr = instr; + } + + void visit(WriteScratchInstr *instr) override { + mem_write_instr.push_back(instr); + } + + void visit(StreamOutInstr *instr) override { + mem_write_instr.push_back(instr); + } + + void visit(MemRingOutInstr *instr) override { + mem_ring_writes.push_back(instr); + } + + void visit(GDSInstr *instr) override { + gds_op.push_back(instr); + } + + void visit(WriteTFInstr *instr) override { + write_tf.push_back(instr); + } + + void visit(LDSReadInstr *instr) override { + std::vector buffer; + m_last_lds_instr = instr->split(buffer, m_last_lds_instr); + for (auto& i: buffer) { + i->accept(*this); + } + } + + void visit(LDSAtomicInstr *instr) override { + std::vector buffer; + m_last_lds_instr = instr->split(buffer, m_last_lds_instr); + for (auto& i: buffer) { + i->accept(*this); + } + } + + void visit(RatInstr *instr) override { + rat_instr.push_back(instr); + } + + + std::list alu_trans; + std::list alu_vec; + std::list tex; + std::list alu_groups; + std::list exports; + std::list fetches; + std::list mem_write_instr; + std::list mem_ring_writes; + std::list gds_op; + std::list write_tf; + std::list rat_instr; + + Instr *m_cf_instr{nullptr}; + ValueFactory& m_value_factory; + + AluInstr *m_last_lds_instr{nullptr}; +}; + +class BlockSheduler { +public: + BlockSheduler(); + void run(Shader *shader); + + void finalize(); + +private: + + void schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf); + + bool collect_ready(CollectInstructions &available); + + template + bool collect_ready_type(std::list& ready, std::list& orig); + + bool collect_ready_alu_vec(std::list& ready, std::list& available); + + bool schedule_tex(Shader::ShaderBlocks& out_blocks); + bool schedule_vtx(Shader::ShaderBlocks& out_blocks); + + template + bool schedule_gds(Shader::ShaderBlocks& out_blocks, std::list& ready_list); + + template + bool schedule_cf(Shader::ShaderBlocks& out_blocks, std::list& ready_list); + + bool schedule_alu(Shader::ShaderBlocks& out_blocks); + void start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type); + + bool schedule_alu_to_group_vec(AluGroup *group); + bool schedule_alu_to_group_trans(AluGroup *group, std::list& readylist); + + bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list& ready_list); + + template + bool schedule(std::list& ready_list); + + template + bool schedule_block(std::list& ready_list); + + std::list alu_vec_ready; + std::list alu_trans_ready; + std::list alu_groups_ready; + std::list tex_ready; + std::list exports_ready; + std::list fetches_ready; + std::list memops_ready; + std::list mem_ring_writes_ready; + std::list gds_ready; + std::list write_tf_ready; + std::list rat_instr_ready; + + enum { + sched_alu, + sched_tex, + sched_fetch, + sched_free, + sched_mem_ring, + sched_gds, + sched_write_tf, + sched_rat, + } current_shed; + + ExportInstr *m_last_pos; + ExportInstr *m_last_pixel; + ExportInstr *m_last_param; + + Block *m_current_block; + + int m_lds_addr_count{0}; + int m_alu_groups_schduled{0}; + +}; + +Shader *schedule(Shader *original) +{ + AluGroup::set_chipclass(original->chip_class()); + + sfn_log << SfnLog::schedule << "Original shader\n"; + if (sfn_log.has_debug_flag(SfnLog::schedule)) { + std::stringstream ss; + original->print(ss); + sfn_log << ss.str() << "\n\n"; + } + + // TODO later it might be necessary to clone the shader + // to be able to re-start scheduling + + auto scheduled_shader = original; + BlockSheduler s; + s.run(scheduled_shader); + s.finalize(); + + sfn_log << SfnLog::schedule << "Scheduled shader\n"; + if (sfn_log.has_debug_flag(SfnLog::schedule)) { + std::stringstream ss; + scheduled_shader->print(ss); + sfn_log << ss.str() << "\n\n"; + } + + return scheduled_shader; +} + +BlockSheduler::BlockSheduler(): + current_shed(sched_alu), + m_last_pos(nullptr), + m_last_pixel(nullptr), + m_last_param(nullptr), + m_current_block(nullptr) +{ +} + +void BlockSheduler::run( Shader *shader) +{ + Shader::ShaderBlocks scheduled_blocks; + + for (auto& block : shader->func()) { + sfn_log << SfnLog::schedule << "Process block " << block->id() <<"\n"; + if (sfn_log.has_debug_flag(SfnLog::schedule)) { + std::stringstream ss; + block->print(ss); + sfn_log << ss.str() << "\n"; + } + schedule_block(*block, scheduled_blocks, shader->value_factory()); + } + + shader->reset_function(scheduled_blocks); +} + +void BlockSheduler::schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf) +{ + + assert(in_block.id() >= 0); + + + current_shed = sched_fetch; + auto last_shed = sched_fetch; + + CollectInstructions cir(vf); + in_block.accept(cir); + + bool have_instr = collect_ready(cir); + + m_current_block = new Block(in_block.nesting_depth(), in_block.id()); + + assert(m_current_block->id() >= 0); + + while (have_instr) { + + sfn_log << SfnLog::schedule << "Have ready instructions\n"; + + if (alu_vec_ready.size()) + sfn_log << SfnLog::schedule << " ALU V:" << alu_vec_ready.size() << "\n"; + + if (alu_trans_ready.size()) + sfn_log << SfnLog::schedule << " ALU T:" << alu_trans_ready.size() << "\n"; + + if (alu_groups_ready.size()) + sfn_log << SfnLog::schedule << " ALU G:" << alu_groups_ready.size() << "\n"; + + if (exports_ready.size()) + sfn_log << SfnLog::schedule << " EXP:" << exports_ready.size() + << "\n"; + if (tex_ready.size()) + sfn_log << SfnLog::schedule << " TEX:" << tex_ready.size() + << "\n"; + if (fetches_ready.size()) + sfn_log << SfnLog::schedule << " FETCH:" << fetches_ready.size() + << "\n"; + if (mem_ring_writes_ready.size()) + sfn_log << SfnLog::schedule << " MEM_RING:" << mem_ring_writes_ready.size() + << "\n"; + if (memops_ready.size()) + sfn_log << SfnLog::schedule << " MEM_OPS:" << mem_ring_writes_ready.size() + << "\n"; + + if (!m_current_block->lds_group_active()) { + if (last_shed != sched_free && memops_ready.size() > 8) + current_shed = sched_free; + else if (mem_ring_writes_ready.size() > 5) + current_shed = sched_mem_ring; + else if (rat_instr_ready.size() > 3) + current_shed = sched_rat; + else if (gds_ready.size() > 3) + current_shed = sched_gds; + else if (tex_ready.size() > 3) + current_shed = sched_tex; + } + + switch (current_shed) { + case sched_alu: + if (!schedule_alu(out_blocks)) { + assert(!m_current_block->lds_group_active()); + current_shed = sched_tex; + continue; + } + last_shed = current_shed; + break; + case sched_tex: + if (tex_ready.empty() || !schedule_tex(out_blocks)) { + current_shed = sched_fetch; + continue; + } + last_shed = current_shed; + break; + case sched_fetch: + if (!fetches_ready.empty()) { + schedule_vtx(out_blocks); + last_shed = current_shed; + } + current_shed = sched_gds; + continue; + case sched_gds: + if (!gds_ready.empty()) { + schedule_gds(out_blocks, gds_ready); + last_shed = current_shed; + } + current_shed = sched_mem_ring; + continue; + case sched_mem_ring: + if (mem_ring_writes_ready.empty() || !schedule_cf(out_blocks, mem_ring_writes_ready)) { + current_shed = sched_write_tf; + continue; + } + last_shed = current_shed; + break; + case sched_write_tf: + if (write_tf_ready.empty() || !schedule_gds(out_blocks, write_tf_ready)) { + current_shed = sched_rat; + continue; + } + last_shed = current_shed; + break; + case sched_rat: + if (rat_instr_ready.empty() || !schedule_cf(out_blocks, rat_instr_ready)) { + current_shed = sched_free; + continue; + } + last_shed = current_shed; + break; + case sched_free: + if (memops_ready.empty() || !schedule_cf(out_blocks, memops_ready)) { + current_shed = sched_alu; + break; + } + last_shed = current_shed; + } + + have_instr = collect_ready(cir); + } + + /* Emit exports always at end of a block */ + while (collect_ready_type(exports_ready, cir.exports)) + schedule_exports(out_blocks, exports_ready); + + bool fail = false; + + if (!cir.alu_groups.empty()) { + std::cerr << "Unscheduled ALU groups:\n"; + for (auto& a : cir.alu_groups) { + std::cerr << " " << *a << "\n"; + } + fail = true; + } + + if (!cir.alu_vec.empty()){ + std::cerr << "Unscheduled ALU vec ops:\n"; + for (auto& a : cir.alu_vec) { + std::cerr << " " << *a << "\n"; + } + fail = true; + } + + if (!cir.alu_trans.empty()){ + std::cerr << "Unscheduled ALU trans ops:\n"; + for (auto& a : cir.alu_trans) { + std::cerr << " " << *a << "\n"; + } + fail = true; + } + if (!cir.mem_write_instr.empty()){ + std::cerr << "Unscheduled MEM ops:\n"; + for (auto& a : cir.mem_write_instr) { + std::cerr << " " << *a << "\n"; + } + fail = true; + } + + if (!cir.fetches.empty()){ + std::cerr << "Unscheduled Fetch ops:\n"; + for (auto& a : cir.fetches) { + std::cerr << " " << *a << "\n"; + } + fail = true; + } + + if (!cir.tex.empty()){ + std::cerr << "Unscheduled Tex ops:\n"; + for (auto& a : cir.tex) { + std::cerr << " " << *a << "\n"; + } + fail = true; + } + + assert(cir.tex.empty()); + assert(cir.exports.empty()); + assert(cir.fetches.empty()); + assert(cir.alu_vec.empty()); + assert(cir.mem_write_instr.empty()); + assert(cir.mem_ring_writes.empty()); + + assert (!fail); + + if (cir.m_cf_instr) { + // Assert that if condition is ready + m_current_block->push_back(cir.m_cf_instr); + cir.m_cf_instr->set_scheduled(); + } + + out_blocks.push_back(m_current_block); +} + +void BlockSheduler::finalize() +{ + if (m_last_pos) + m_last_pos->set_is_last_export(true); + if (m_last_pixel) + m_last_pixel->set_is_last_export(true); + if (m_last_param) + m_last_param->set_is_last_export(true); +} + +bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks) +{ + bool success = false; + AluGroup *group = nullptr; + + bool has_alu_ready = !alu_vec_ready.empty() || !alu_trans_ready.empty(); + + bool has_lds_ready = !alu_vec_ready.empty() && + (*alu_vec_ready.begin())->has_lds_access(); + + /* Schedule groups first. unless we have a pending LDS instuction + * We don't want the LDS instructions to be too far apart because the + * fetch + read from queue has to be in the same ALU CF block */ + if (!alu_groups_ready.empty() && !has_lds_ready) { + group = *alu_groups_ready.begin(); + alu_groups_ready.erase(alu_groups_ready.begin()); + sfn_log << SfnLog::schedule << "Schedule ALU group\n"; + success = true; + } else { + if (has_alu_ready) { + group = new AluGroup(); + sfn_log << SfnLog::schedule << "START new ALU group\n"; + } + } + + if (group) { + int free_slots = group->free_slots(); + + if (free_slots && has_alu_ready) { + if (!alu_vec_ready.empty()) + success |= schedule_alu_to_group_vec(group); + + /* Apparently one can't schedule a t-slot if there is already + * and LDS instruction scheduled. + * TODO: check whether this is only relevant for actual LDS instructions + * or also for instructions that read from the LDS return value queue */ + + if (free_slots & 0x10 && !has_lds_ready) { + sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n"; + if (!alu_trans_ready.empty()) + success |= schedule_alu_to_group_trans(group, alu_trans_ready); + if (!alu_vec_ready.empty()) + success |= schedule_alu_to_group_trans(group, alu_vec_ready); + } + } + + sfn_log << SfnLog::schedule << "Finalize ALU group\n"; + group->set_scheduled(); + group->fix_last_flag(); + group->set_nesting_depth(m_current_block->nesting_depth()); + + + if (m_current_block->type() != Block::alu) { + start_new_block(out_blocks, Block::alu); + m_alu_groups_schduled = 0; + } + + /* Pessimistic hack: If we have started an LDS group, + * make sure 8 instructions groups still fit into the CF + * TODO: take care of Address slot emission + * TODO: maybe do this CF split only in the assembler + */ + /*if (group->slots() > m_current_block->remaining_slots() || + (group->has_lds_group_start() && + m_current_block->remaining_slots() < 7 * 8)) { + //assert(!m_current_block->lds_group_active()); + start_new_block(out_blocks, Block::alu); + }*/ + + if (!m_current_block->try_reserve_kcache(*group)) { + assert(!m_current_block->lds_group_active()); + start_new_block(out_blocks, Block::alu); + m_current_block->set_instr_flag(Instr::force_cf); + } + + assert(m_current_block->try_reserve_kcache(*group)); + + if (group->has_lds_group_start()) + m_current_block->lds_group_start(*group->begin()); + + m_current_block->push_back(group); + if (group->has_lds_group_end()) + m_current_block->lds_group_end(); + } + + if (success) + ++m_alu_groups_schduled; + + return success; +} + +bool BlockSheduler::schedule_tex(Shader::ShaderBlocks& out_blocks) +{ + if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() == 0) { + start_new_block(out_blocks, Block::tex); + m_current_block->set_instr_flag(Instr::force_cf); + } + + + if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) { + auto ii = tex_ready.begin(); + sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; + + if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size()) + start_new_block(out_blocks, Block::tex); + + for (auto prep : (*ii)->prepare_instr()) { + prep->set_scheduled(); + m_current_block->push_back(prep); + } + + (*ii)->set_scheduled(); + m_current_block->push_back(*ii); + tex_ready.erase(ii); + return true; + } + return false; +} + +bool BlockSheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks) +{ + if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) { + start_new_block(out_blocks, Block::vtx); + m_current_block->set_instr_flag(Instr::force_cf); + } + return schedule_block(fetches_ready); +} + +template +bool BlockSheduler::schedule_gds(Shader::ShaderBlocks& out_blocks, std::list& ready_list) +{ + bool was_full = m_current_block->remaining_slots() == 0; + if (m_current_block->type() != Block::gds || was_full) { + start_new_block(out_blocks, Block::gds); + if (was_full) + m_current_block->set_instr_flag(Instr::force_cf); + } + return schedule_block(ready_list); +} + + +void BlockSheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type) +{ + if (!m_current_block->empty()) { + sfn_log << SfnLog::schedule << "Start new block\n"; + assert(!m_current_block->lds_group_active()); + out_blocks.push_back(m_current_block); + m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id()); + } + m_current_block->set_type(type); +} + +template +bool BlockSheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list& ready_list) +{ + if (ready_list.empty()) + return false; + if (m_current_block->type() != Block::cf) + start_new_block(out_blocks, Block::cf); + return schedule(ready_list); +} + + +bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group) +{ + assert(group); + assert(!alu_vec_ready.empty()); + + bool success = false; + auto i = alu_vec_ready.begin(); + auto e = alu_vec_ready.end(); + while (i != e) { + sfn_log << SfnLog::schedule << "Try schedule to vec " << **i; + if (group->add_vec_instructions(*i)) { + auto old_i = i; + ++i; + if ((*old_i)->has_alu_flag(alu_is_lds)) { + --m_lds_addr_count; + } + + alu_vec_ready.erase(old_i); + success = true; + sfn_log << SfnLog::schedule << " success\n"; + } else { + ++i; + sfn_log << SfnLog::schedule << " failed\n"; + } + } + return success; +} + +bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list& readylist) +{ + assert(group); + + bool success = false; + auto i = readylist.begin(); + auto e = readylist.end(); + while (i != e) { + sfn_log << SfnLog::schedule << "Try schedule to trans " << **i; + if (group->add_trans_instructions(*i)) { + auto old_i = i; + ++i; + readylist.erase(old_i); + success = true; + sfn_log << SfnLog::schedule << " sucess\n"; + break; + } else { + ++i; + sfn_log << SfnLog::schedule << " failed\n"; + } + } + return success; +} + +template +bool BlockSheduler::schedule(std::list& ready_list) +{ + if (!ready_list.empty() && m_current_block->remaining_slots() > 0) { + auto ii = ready_list.begin(); + sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; + (*ii)->set_scheduled(); + m_current_block->push_back(*ii); + ready_list.erase(ii); + return true; + } + return false; +} + +template +bool BlockSheduler::schedule_block(std::list& ready_list) +{ + bool success = false; + while (!ready_list.empty() && m_current_block->remaining_slots() > 0) { + auto ii = ready_list.begin(); + sfn_log << SfnLog::schedule << "Schedule: " << **ii << " " + << m_current_block->remaining_slots() << "\n"; + (*ii)->set_scheduled(); + m_current_block->push_back(*ii); + ready_list.erase(ii); + success = true; + } + return success; +} + + +bool BlockSheduler::schedule_exports(Shader::ShaderBlocks& out_blocks, std::list& ready_list) +{ + if (m_current_block->type() != Block::cf) + start_new_block(out_blocks, Block::cf); + + if (!ready_list.empty()) { + auto ii = ready_list.begin(); + sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; + (*ii)->set_scheduled(); + m_current_block->push_back(*ii); + switch ((*ii)->export_type()) { + case ExportInstr::pos: m_last_pos = *ii; break; + case ExportInstr::param: m_last_param = *ii; break; + case ExportInstr::pixel: m_last_pixel = *ii; break; + } + (*ii)->set_is_last_export(false); + ready_list.erase(ii); + return true; + } + return false; +} + +bool BlockSheduler::collect_ready(CollectInstructions &available) +{ + sfn_log << SfnLog::schedule << "Ready instructions\n"; + bool result = false; + result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec); + result |= collect_ready_type(alu_trans_ready, available.alu_trans); + result |= collect_ready_type(alu_groups_ready, available.alu_groups); + result |= collect_ready_type(gds_ready, available.gds_op); + result |= collect_ready_type(tex_ready, available.tex); + result |= collect_ready_type(fetches_ready, available.fetches); + result |= collect_ready_type(memops_ready, available.mem_write_instr); + result |= collect_ready_type(mem_ring_writes_ready, available.mem_ring_writes); + result |= collect_ready_type(write_tf_ready, available.write_tf); + result |= collect_ready_type(rat_instr_ready, available.rat_instr); + + sfn_log << SfnLog::schedule << "\n"; + return result; +} + +bool BlockSheduler::collect_ready_alu_vec(std::list& ready, std::list& available) +{ + auto i = available.begin(); + auto e = available.end(); + + for (auto alu : ready) { + alu->add_priority(100 * alu->register_priority()); + } + + int max_check = 0; + while (i != e && max_check++ < 32) { + if (ready.size() < 32 && (*i)->ready()) { + + int priority = 0; + /* LDS fetches that use static offsets are usually ready ery fast, + * so that they would get schedules early, and this leaves the problem + * that we allocate too many registers with just constant values, + * and this will make problems wih RA. So limit the number of LDS + * address registers. + */ + if ((*i)->has_alu_flag(alu_lds_address)) { + if (m_lds_addr_count > 64) { + ++i; + continue; + } else { + ++m_lds_addr_count; + } + } + + /* LDS instructions are scheduled with high priority. + * instractions that can go into the t slot and don't have + * indirect access are put in last, so that they don't block + * vec-only instructions when scheduling to the vector slots + * for everything else we look at the register use */ + + if ((*i)->has_lds_access()) + priority = 100000; + else if (AluGroup::has_t()) { + auto opinfo = alu_ops.find((*i)->opcode()); + assert(opinfo != alu_ops.end()); + if (opinfo->second.can_channel(AluOp::t) && !(*i)->indirect_addr().first) + priority = -1; + } + + priority += 100 * (*i)->register_priority(); + + (*i)->add_priority(priority); + ready.push_back(*i); + + auto old_i = i; + ++i; + available.erase(old_i); + } else + ++i; + } + + for (auto& i: ready) + sfn_log << SfnLog::schedule << "V: " << *i << "\n"; + + ready.sort([](const AluInstr *lhs, const AluInstr *rhs) { + return lhs->priority() > rhs->priority();}); + + for (auto& i: ready) + sfn_log << SfnLog::schedule << "V (S): " << *i << "\n"; + + return !ready.empty(); +} + +template +struct type_char { + +}; + + +template <> +struct type_char { + static constexpr const char value = 'A'; +}; + +template <> +struct type_char { + static constexpr const char value = 'G'; +}; + +template <> +struct type_char { + static constexpr const char value = 'E'; +}; + +template <> +struct type_char { + static constexpr const char value = 'T'; +}; + +template <> +struct type_char { + static constexpr const char value = 'F'; +}; + +template <> +struct type_char { + static constexpr const char value = 'M'; +}; + +template <> +struct type_char { + static constexpr const char value = 'R'; +}; + +template <> +struct type_char { + static constexpr const char value = 'X'; +}; + +template <> +struct type_char { + static constexpr const char value = 'S'; +}; + +template <> +struct type_char { + static constexpr const char value = 'I'; +}; + + +template +bool BlockSheduler::collect_ready_type(std::list& ready, std::list& available) +{ + auto i = available.begin(); + auto e = available.end(); + + while (i != e) { + if ((*i)->ready()) { + ready.push_back(*i); + auto old_i = i; + ++i; + available.erase(old_i); + } else + ++i; + } + + for (auto& i: ready) + sfn_log << SfnLog::schedule << type_char::value << "; " << *i << "\n"; + + return !ready.empty(); +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.h b/src/gallium/drivers/r600/sfn/sfn_scheduler.h new file mode 100644 index 0000000..63ba7d9 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.h @@ -0,0 +1,13 @@ +#ifndef SHEDULER_H +#define SHEDULER_H + +#include "sfn_shader.h" + +namespace r600 { + + +Shader *schedule(Shader *original); + +} + +#endif // SHEDULER_H diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp new file mode 100644 index 0000000..ca7bcd9 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -0,0 +1,1379 @@ +#include "sfn_shader.h" +#include "sfn_instr.h" +#include "sfn_instr_alugroup.h" +#include "sfn_instr_export.h" +#include "sfn_instr_fetch.h" +#include "sfn_instr_lds.h" +#include "sfn_instr_mem.h" + +#include "sfn_instr_controlflow.h" +#include "sfn_liverangeevaluator.h" + +#include "sfn_shader_cs.h" +#include "sfn_shader_fs.h" +#include "sfn_shader_vs.h" +#include "sfn_shader_gs.h" +#include "sfn_shader_tess.h" + +#include "sfn_debug.h" +#include "gallium/drivers/r600/r600_shader.h" + +#include "tgsi/tgsi_from_mesa.h" + +#include "nir.h" + +#include +#include + +namespace r600 { + +using std::string; + +std::pair +r600_get_varying_semantic(unsigned varying_location) +{ + std::pair result; + tgsi_get_gl_varying_semantic(static_cast(varying_location), + true, &result.first, &result.second); + + if (result.first == TGSI_SEMANTIC_GENERIC) { + result.second += 9; + } else if (result.first == TGSI_SEMANTIC_PCOORD) { + result.second = 8; + } + return result; +} + +void ShaderIO::set_sid(int sid) +{ + m_sid = sid; + switch (m_name) { + case TGSI_SEMANTIC_POSITION: + case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_EDGEFLAG: + case TGSI_SEMANTIC_FACE: + case TGSI_SEMANTIC_SAMPLEMASK: + case TGSI_SEMANTIC_CLIPVERTEX: + m_spi_sid = 0; + break; + case TGSI_SEMANTIC_GENERIC: + case TGSI_SEMANTIC_TEXCOORD: + case TGSI_SEMANTIC_PCOORD: + m_spi_sid = m_sid + 1; + break; + default: + /* For non-generic params - pack name and sid into 8 bits */ + m_spi_sid = (0x80 | (m_name << 3) | m_sid) + 1; + } +} + +void ShaderIO::override_spi_sid(int spi) +{ + m_spi_sid = spi; +} + +void ShaderIO::print(std::ostream& os) const +{ + os << m_type << " LOC:" << m_location << " NAME:" << m_name; + do_print(os); + + if (m_sid > 0) { + os << " SID:" << m_sid << " SPI_SID:" << m_spi_sid; + } +} + + +ShaderIO::ShaderIO(const char *type, int loc, int name): + m_type(type), + m_location(loc), + m_name(name) +{ +} + +ShaderOutput::ShaderOutput(): + ShaderIO("OUTPUT", -1, -1) +{ +} + +ShaderOutput::ShaderOutput(int location, int name, int writemask): + ShaderIO("OUTPUT", location, name), + m_writemask(writemask) +{ + +} + +void ShaderOutput::do_print(std::ostream& os) const +{ + os << " MASK:" << m_writemask; +} + + +ShaderInput::ShaderInput(int location, int name): + ShaderIO("INPUT", location, name) +{ +} + +ShaderInput::ShaderInput(): + ShaderInput(-1, -1) +{ +} + + +void ShaderInput::do_print(std::ostream& os) const +{ + if (m_interpolator) + os << " INTERP:" << m_interpolator; + if (m_interpolate_loc) + os << " ILOC:" << m_interpolate_loc; + if (m_uses_interpolate_at_centroid) + os << " USE_CENTROID"; +} + +void ShaderInput::set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid) +{ + m_interpolator = interp; + m_interpolate_loc = interp_loc; + m_uses_interpolate_at_centroid = uses_interpolate_at_centroid; +} + +void ShaderInput::set_uses_interpolate_at_centroid() +{ + m_uses_interpolate_at_centroid = true; +} + +Shader::Shader(const char *type_id): + m_current_block(nullptr), + m_type_id(type_id), + m_chip_class(ISA_CC_R600), + m_next_block(0) +{ + m_instr_factory = new InstrFactory(); + m_chain_instr.this_shader = this; + start_new_block(0); +} + +void Shader::set_input_gpr(int driver_lcation, int gpr) +{ + auto i = m_inputs.find(driver_lcation); + assert(i != m_inputs.end()); + i->second.set_gpr(gpr); +} + +bool Shader::add_info_from_string(std::istream& is) +{ + std::string type; + is >> type; + + if (type == "CHIPCLASS") + return read_chipclass(is); + if (type == "OUTPUT") + return read_output(is); + if (type == "INPUT") + return read_input(is); + if (type == "PROP") + return read_prop(is); + if (type == "SYSVALUES") + return allocate_registers_from_string(is, pin_fully); + if (type == "REGISTERS") + return allocate_registers_from_string(is, pin_free); + if (type == "ARRAYS") + return allocate_arrays_from_string(is); + + + return false; +} + +void Shader::emit_instruction_from_string(const std::string& s) +{ + + sfn_log << SfnLog::instr << "Create Instr from '" << s << "'\n"; + if (s == "BLOCK_START") { + if (!m_current_block->empty()) { + start_new_block(m_current_block->nesting_offset()); + sfn_log << SfnLog::instr << " Emit start block\n"; + } + return; + } + + if (s == "BLOCK_END") { + return; + } + + auto ir = m_instr_factory->from_string(s, m_current_block->nesting_depth()); + if (ir) { + emit_instruction(ir); + if (ir->end_block()) + start_new_block(ir->nesting_offset()); + sfn_log << SfnLog::instr << " " << *ir << "\n"; + } +} + +bool Shader::read_output(std::istream& is) +{ + string value; + is >> value; + int pos = int_from_string_with_prefix(value, "LOC:"); + is >> value; + int name = int_from_string_with_prefix(value, "NAME:"); + is >> value; + int mask = int_from_string_with_prefix(value, "MASK:"); + ShaderOutput output(pos, name, mask); + + value.clear(); + is >> value; + if (!value.empty()) { + int sid = int_from_string_with_prefix(value, "SID:"); + output.set_sid(sid); + is >> value; + int spi_sid = int_from_string_with_prefix(value, "SPI_SID:"); + assert(spi_sid == output.spi_sid()); + } + + add_output(output); + return true; +} + + +bool Shader::read_input(std::istream& is) +{ + string value; + is >> value; + int pos = int_from_string_with_prefix(value, "LOC:"); + is >> value; + int name = int_from_string_with_prefix(value, "NAME:"); + + value.clear(); + + ShaderInput input(pos, name); + + int interp = 0; + int interp_loc = 0; + bool use_centroid = false; + + is >> value; + while (!value.empty()) { + if (value.substr(0, 4) == "SID:") { + int sid = int_from_string_with_prefix(value, "SID:"); + input.set_sid(sid); + } else if (value.substr(0, 8) == "SPI_SID:") { + int spi_sid = int_from_string_with_prefix(value, "SPI_SID:"); + assert(spi_sid == input.spi_sid()); + } else if (value.substr(0, 7) == "INTERP:") { + interp = int_from_string_with_prefix(value, "INTERP:"); + } else if (value.substr(0, 5) == "ILOC:") { + interp_loc = int_from_string_with_prefix(value, "ILOC:"); + } else if (value == "USE_CENTROID") { + use_centroid = true; + } else { + std::cerr << "Unknown parse value '" << value << "'"; + assert(!value.c_str()); + } + value.clear(); + is >> value; + } + + input.set_interpolator(interp, interp_loc, use_centroid); + + add_input(input); + return true; +} + +bool Shader::allocate_registers_from_string(std::istream& is, Pin pin) +{ + std::string line; + if (!std::getline(is, line)) + return false; + + std::istringstream iline(line); + + while (!iline.eof()) { + string reg_str; + iline >> reg_str; + + if (reg_str.empty()) + break; + + if (strchr(reg_str.c_str(), '@')) { + value_factory().dest_from_string(reg_str); + } else { + RegisterVec4::Swizzle swz = {0,1,2,3}; + auto regs = value_factory().dest_vec4_from_string(reg_str, swz, pin); + for (int i = 0; i < 4; ++i) { + if (swz[i] < 4 && pin == pin_fully) { + regs[i]->pin_live_range(true, false); + } + } + } + } + return true; +} + +bool Shader::allocate_arrays_from_string(std::istream& is) +{ + std::string line; + if (!std::getline(is, line)) + return false; + + std::istringstream iline(line); + + while (!iline.eof()) { + string reg_str; + iline >> reg_str; + + if (reg_str.empty()) + break; + + value_factory().array_from_string(reg_str); + } + return true; +} + +bool Shader::read_chipclass(std::istream& is) +{ + string name; + is >> name; + if (name == "R600") + m_chip_class = ISA_CC_R600; + else if (name == "R700") + m_chip_class = ISA_CC_R700; + else if (name == "EVERGREEN") + m_chip_class = ISA_CC_EVERGREEN; + else if (name == "CAYMAN") + m_chip_class = ISA_CC_CAYMAN; + else + return false; + return true; +} + +void Shader::allocate_reserved_registers() +{ + m_instr_factory->value_factory().set_virtual_register_base(0); + auto reserved_registers_end = do_allocate_reserved_registers(); + m_instr_factory->value_factory().set_virtual_register_base(reserved_registers_end); + if (!m_atomics.empty()) { + m_atomic_update = value_factory().temp_register(); + auto alu = new AluInstr(op1_mov, m_atomic_update, + value_factory().one_i(), + AluInstr::last_write); + alu->set_alu_flag(alu_no_schedule_bias); + emit_instruction(alu); + } + + if(m_flags.test(sh_needs_sbo_ret_address)) { + m_rat_return_address = value_factory().temp_register(0); + auto temp0 = value_factory().temp_register(0); + auto temp1 = value_factory().temp_register(1); + auto temp2 = value_factory().temp_register(2); + + auto group = new AluGroup(); + group->add_instruction(new AluInstr(op1_mbcnt_32lo_accum_prev_int, temp0, value_factory().literal(-1), {alu_write})); + group->add_instruction(new AluInstr(op1_mbcnt_32hi_int, temp1, value_factory().literal(-1), {alu_write})); + emit_instruction(group); + emit_instruction(new AluInstr(op3_muladd_uint24, temp2, value_factory().inline_const(ALU_SRC_SE_ID, 0), + value_factory().literal(256), value_factory().inline_const(ALU_SRC_HW_WAVE_ID, 0), {alu_write, alu_last_instr})); + emit_instruction(new AluInstr(op3_muladd_uint24, m_rat_return_address, + temp2, value_factory().literal(0x40), temp0, + {alu_write, alu_last_instr})); + } +} + +Shader *Shader::translate_from_nir(nir_shader *nir, const pipe_stream_output_info *so_info, + struct r600_shader* gs_shader, + r600_shader_key& key, r600_chip_class chip_class) +{ + Shader *shader = nullptr; + + switch (nir->info.stage) { + case MESA_SHADER_FRAGMENT: + shader = new FragmentShader(key); + break; + case MESA_SHADER_VERTEX: + shader = new VertexShader(so_info, gs_shader, key); + break; + case MESA_SHADER_GEOMETRY: + shader = new GeometryShader(key); + break; + case MESA_SHADER_TESS_CTRL: + shader = new TCSShader(key); + break; + case MESA_SHADER_TESS_EVAL: + shader = new TESShader(so_info, gs_shader, key); + break; + case MESA_SHADER_COMPUTE: + shader = new ComputeShader(key); + break; + default: + return nullptr; + } + + shader->set_info(nir); + + shader->set_chip_class(chip_class); + if (!shader->process(nir)) + return nullptr; + + return shader; +} + +void Shader::set_info(nir_shader *nir) +{ + m_scratch_size = nir->scratch_size; +} + +ValueFactory& Shader::value_factory() +{ + return m_instr_factory->value_factory(); +} + + +bool Shader::process(nir_shader *nir) +{ + m_ssbo_image_offset = nir->info.num_images; + + nir_foreach_uniform_variable(var, nir) + scan_uniforms(var); + + // at this point all functions should be inlined + const nir_function *func = reinterpret_cast(exec_list_get_head_const(&nir->functions)); + + if (!scan_shader(func)) + return false; + + allocate_reserved_registers(); + + allocate_local_registers(&func->impl->registers); + + sfn_log << SfnLog::trans << "Process shader \n"; + foreach_list_typed(nir_cf_node, node, node, &func->impl->body) { + if (!process_cf_node(node)) + return false; + } + + finalize(); + + return true; +} + +void Shader::allocate_local_registers(const exec_list *registers) +{ + if (value_factory().allocate_registers(registers)) + m_indirect_files |= 1 << TGSI_FILE_TEMPORARY; +} + +bool Shader::scan_shader(const nir_function *func) +{ + + nir_foreach_block(block, func->impl) { + nir_foreach_instr(instr, block) { + if (!scan_instruction(instr)) { + fprintf(stderr, "Unhandled sysvalue access "); + nir_print_instr(instr, stderr); + fprintf(stderr, "\n"); + return false; + } + } + } + + int lds_pos = 0; + for (auto& [index, input] : m_inputs) { + if (input.need_lds_pos()) + input.set_lds_pos(lds_pos++); + } + + int param_id = 0; + for (auto& [index, out] : m_outputs) { + if (out.is_param()) + out.set_pos(param_id++); + } + + return true; +} + +bool Shader::scan_uniforms(nir_variable *uniform) +{ + if (uniform->type->contains_atomic()) { + int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE; + m_nhwatomic += natomics; + + if (uniform->type->is_array()) + m_indirect_files |= 1 << TGSI_FILE_HW_ATOMIC; + + m_flags.set(sh_uses_atomics); + + r600_shader_atomic atom = {0}; + + atom.buffer_id = uniform->data.binding; + atom.hw_idx = m_atomic_base + m_next_hwatomic_loc; + + atom.start = uniform->data.offset >> 2; + atom.end = atom.start + natomics - 1; + + if (m_atomic_base_map.find(uniform->data.binding) == + m_atomic_base_map.end()) + m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc; + + m_next_hwatomic_loc += natomics; + + m_atomic_file_count += atom.end - atom.start + 1; + + sfn_log << SfnLog::io << "HW_ATOMIC file count: " + << m_atomic_file_count << "\n"; + + m_atomics.push_back(atom); + } + + auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type; + if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) { + m_flags.set(sh_uses_images); + if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo)) + m_indirect_files |= 1 << TGSI_FILE_IMAGE; + } + + return true; +} + + +bool Shader::scan_instruction(nir_instr *instr) +{ + if (do_scan_instruction(instr)) + return true; + + if (instr->type != nir_instr_type_intrinsic) + return true; + + auto intr = nir_instr_as_intrinsic(instr); + + // handle unhandled instructions + switch (intr->intrinsic) { + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_image_load: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_atomic_umin: + case nir_intrinsic_image_atomic_umax: + case nir_intrinsic_image_atomic_imin: + case nir_intrinsic_image_atomic_imax: + m_flags.set(sh_needs_sbo_ret_address); + FALLTHROUGH; + case nir_intrinsic_image_store: + case nir_intrinsic_store_ssbo: + m_flags.set(sh_writes_memory); + m_flags.set(sh_uses_images); + break; + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier: + case nir_intrinsic_group_memory_barrier: + m_chain_instr.prepare_mem_barrier = true; + default: + ; + } + return true; +} + +bool Shader::process_cf_node(nir_cf_node *node) +{ + SFN_TRACE_FUNC(SfnLog::flow, "CF"); + + switch (node->type) { + case nir_cf_node_block: + return process_block(nir_cf_node_as_block(node)); + case nir_cf_node_if: + return process_if(nir_cf_node_as_if(node)); + case nir_cf_node_loop: + return process_loop(nir_cf_node_as_loop(node)); + default: + return false; + } + +} + +static bool +child_block_empty (const exec_list& list) +{ + if (list.is_empty()) + return true; + + bool result = true; + + foreach_list_typed(nir_cf_node, n, node, &list) { + + if (n->type == nir_cf_node_block) { + if (!nir_cf_node_as_block(n)->instr_list.is_empty()) + return false; + } + if (n->type == nir_cf_node_if) + return false; + } + return result; +} + +bool Shader::process_if(nir_if *if_stmt) +{ + SFN_TRACE_FUNC(SfnLog::flow, "IF"); + + if (!emit_if_start(if_stmt)) + return false; + + foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list) { + SFN_TRACE_FUNC(SfnLog::flow, "IF-then"); + if (!process_cf_node(n)) + return false; + } + + if (!child_block_empty(if_stmt->else_list)) { + assert(emit_control_flow(ControlFlowInstr::cf_else)); + + foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list) + if (!process_cf_node(n)) return false; + } + + if (!emit_control_flow(ControlFlowInstr::cf_endif)) + return false; + + return true; +} + +bool Shader::emit_if_start(nir_if *if_stmt) +{ + auto value = value_factory().src(if_stmt->condition, 0); + AluInstr *pred = new AluInstr(op2_pred_setne_int, value_factory().temp_register(), + value, value_factory().zero(), AluInstr::last); + pred->set_alu_flag(alu_update_exec); + pred->set_alu_flag(alu_update_pred); + pred->set_cf_type(cf_alu_push_before); + + IfInstr *ir = new IfInstr(pred); + emit_instruction(ir); + start_new_block(1); + return true; +} + +bool Shader::emit_control_flow(ControlFlowInstr::CFType type) +{ + auto ir = new ControlFlowInstr(type); + emit_instruction(ir); + int depth = 0; + switch (type) { + case ControlFlowInstr::cf_loop_begin: + m_loops.push_back(ir); + m_nloops++; + depth = 1; + break; + case ControlFlowInstr::cf_loop_end: + m_loops.pop_back(); + FALLTHROUGH; + case ControlFlowInstr::cf_endif: + depth = -1; + break; + default: + ; + } + + start_new_block(depth); + return true; +} + +bool Shader::process_loop(nir_loop *node) +{ + SFN_TRACE_FUNC(SfnLog::flow, "LOOP"); + if (!emit_control_flow(ControlFlowInstr::cf_loop_begin)) + return false; + + foreach_list_typed(nir_cf_node, n, node, &node->body) + if (!process_cf_node(n)) return false; + + if (!emit_control_flow(ControlFlowInstr::cf_loop_end)) + return false; + + return true; +} + +bool Shader::process_block(nir_block *block) +{ + SFN_TRACE_FUNC(SfnLog::flow, "BLOCK"); + + nir_foreach_instr(instr, block) { + sfn_log << SfnLog::instr << "FROM:" << *instr << "\n"; + bool r = process_instr(instr); + if (!r) { + sfn_log << SfnLog::err << "R600: Unsupported instruction: " + << *instr << "\n"; + return false; + } + } + return true; +} + +bool Shader::process_instr(nir_instr *instr) +{ + return m_instr_factory->from_nir(instr, *this); +} + +bool Shader::process_intrinsic(nir_intrinsic_instr *intr) +{ + if (process_stage_intrinsic(intr)) + return true; + + if (GDSInstr::emit_atomic_counter(intr, *this)) { + set_flag(sh_writes_memory); + return true; + } + + if (RatInstr::emit(intr, *this)) + return true; + + switch (intr->intrinsic) { + case nir_intrinsic_store_output: return store_output(intr); + case nir_intrinsic_load_input: return load_input(intr); + case nir_intrinsic_load_uniform: return load_uniform(intr); + case nir_intrinsic_load_ubo_vec4: return load_ubo(intr); + case nir_intrinsic_store_scratch: return emit_store_scratch(intr); + case nir_intrinsic_load_scratch: return emit_load_scratch(intr); + case nir_intrinsic_store_local_shared_r600: return emit_local_store(intr); + case nir_intrinsic_load_local_shared_r600: return emit_local_load(intr); + case nir_intrinsic_load_tcs_in_param_base_r600: return emit_load_tcs_param_base(intr, 0); + case nir_intrinsic_load_tcs_out_param_base_r600: return emit_load_tcs_param_base(intr, 16); + // We only emit the group barrier, barriers across work groups + // are not yet implemented + case nir_intrinsic_control_barrier: + case nir_intrinsic_memory_barrier_tcs_patch: + case nir_intrinsic_memory_barrier_shared: + return emit_barrier(intr); + case nir_intrinsic_memory_barrier_atomic_counter: + return true; + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier: + return emit_wait_ack(); + + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: + return emit_atomic_local_shared(intr); + case nir_intrinsic_shader_clock: + return emit_shader_clock(intr); + + default: + return false; + } +} + +static ESDOp +lds_op_from_intrinsic(nir_intrinsic_op op, bool ret) { + switch (op) { + case nir_intrinsic_shared_atomic_add: + return ret ? LDS_ADD_RET : LDS_ADD; + case nir_intrinsic_shared_atomic_and: + return ret ? LDS_AND_RET : LDS_AND; + case nir_intrinsic_shared_atomic_or: + return ret ? LDS_OR_RET : LDS_OR; + case nir_intrinsic_shared_atomic_imax: + return ret ? LDS_MAX_INT_RET : LDS_MAX_INT; + case nir_intrinsic_shared_atomic_umax: + return ret ? LDS_MAX_UINT_RET : LDS_MAX_UINT; + case nir_intrinsic_shared_atomic_imin: + return ret ? LDS_MIN_INT_RET : LDS_MIN_INT; + case nir_intrinsic_shared_atomic_umin: + return ret ? LDS_MIN_UINT_RET : LDS_MIN_UINT; + case nir_intrinsic_shared_atomic_xor: + return ret ? LDS_XOR_RET : LDS_XOR; + case nir_intrinsic_shared_atomic_exchange: + return LDS_XCHG_RET; + case nir_intrinsic_shared_atomic_comp_swap: + return LDS_CMP_XCHG_RET; + default: + unreachable("Unsupported shared atomic opcode"); + } +} + +PRegister Shader::emit_load_to_register(PVirtualValue src) +{ + assert(src); + PRegister dest = src->as_register(); + + if (!dest) { + dest = value_factory().temp_register(); + emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write)); + } + return dest; +} + +bool Shader::emit_atomic_local_shared(nir_intrinsic_instr* instr) +{ + bool uses_retval = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses); + + auto& vf = value_factory(); + + auto dest_value = uses_retval ? vf.dest(instr->dest, 0, pin_free) : nullptr; + + auto op = lds_op_from_intrinsic(instr->intrinsic, uses_retval); + + auto address = vf.src(instr->src[0], 0); + + AluInstr::SrcValues src; + src.push_back(vf.src(instr->src[1], 0)); + + if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) + src.push_back(vf.src(instr->src[2], 0)); + emit_instruction(new LDSAtomicInstr(op, dest_value, address, src)); + return true; +} + +auto Shader::evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) -> std::pair +{ + auto& vf = value_factory(); + + PRegister uav_id{nullptr}; + int offset = 0; + + auto uav_id_const = nir_src_as_const_value(instr->src[src_id]); + if (uav_id_const) { + offset += uav_id_const->u32; + } else { + auto uav_id_val = vf.src(instr->src[src_id], 0); + if (uav_id_val->as_register()) { + uav_id = uav_id_val->as_register(); + } else { + uav_id = vf.temp_register(); + emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val, + AluInstr::last_write)); + } + } + return std::make_pair(offset, uav_id); +} + + +bool Shader::emit_store_scratch(nir_intrinsic_instr *intr) +{ + auto& vf = m_instr_factory->value_factory(); + + int writemask = nir_intrinsic_write_mask(intr); + + + RegisterVec4::Swizzle swz = {7,7,7,7}; + + for (unsigned i = 0; i < intr->num_components; ++i) + swz[i] = (1 << i) & writemask ? i : 7; + + auto value = vf.temp_vec4(pin_group, swz); + AluInstr *ir = nullptr; + for (unsigned i = 0; i < intr->num_components; ++i) { + if (value[i]->chan() < 4) { + ir = new AluInstr(op1_mov, value[i], vf.src(intr->src[0], i), AluInstr::write); + ir->set_alu_flag(alu_no_schedule_bias); + emit_instruction(ir); + } + } + if (!ir) + return true; + + ir->set_alu_flag(alu_last_instr); + + auto address = vf.src(intr->src[1], 0); + + + int align = nir_intrinsic_align_mul(intr); + int align_offset = nir_intrinsic_align_offset(intr); + + WriteScratchInstr *ws_ir = nullptr; + + int offset = -1; + if (address->as_literal()) { + offset = address->as_literal()->value(); + } else if (address->as_inline_const()) { + auto il = address->as_inline_const(); + if (il->sel() == ALU_SRC_0) + offset = 0; + else if (il->sel() == ALU_SRC_1_INT) + offset = 1; + } + + if (offset >= 0) { + ws_ir = new WriteScratchInstr(value, offset, align, align_offset, writemask); + } else { + auto addr_temp = vf.temp_register(0); + auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::last_write); + load_addr->set_alu_flag(alu_no_schedule_bias); + emit_instruction(load_addr); + + ws_ir = new WriteScratchInstr(value, addr_temp, align, align_offset, writemask, m_scratch_size); + } + emit_instruction(ws_ir); + + m_flags.set(sh_needs_scratch_space); + return true; +} + +bool Shader::emit_load_scratch(nir_intrinsic_instr *intr) +{ + auto addr = value_factory().src(intr->src[0], 0); + + RegisterVec4::Swizzle dest_swz = {7,7,7,7}; + + for (unsigned i = 0; i < intr->num_components; ++i) + dest_swz[i] = i; + + auto dest = value_factory().dest_vec4(intr->dest, pin_group); + + auto ir = new LoadFromScratch(dest, dest_swz, addr, m_scratch_size); + emit_instruction(ir); + + chain_scratch_read(ir); + + m_flags.set(sh_needs_scratch_space); + + return true; + +} + +bool Shader::emit_local_store(nir_intrinsic_instr *instr) +{ + unsigned write_mask = nir_intrinsic_write_mask(instr); + + auto address = value_factory().src(instr->src[1], 0); + int swizzle_base = (write_mask & 0x3) ? 0 : 2; + write_mask |= write_mask >> 2; + + if ((write_mask & 3) != 3) { + if (write_mask == 2) + swizzle_base += 1; + auto value = value_factory().src(instr->src[0], swizzle_base); + emit_instruction(new LDSAtomicInstr(LDS_WRITE, nullptr, address, {value})); + } else { + auto value = value_factory().src(instr->src[0], swizzle_base); + auto value1 = value_factory().src(instr->src[0], swizzle_base + 1); + emit_instruction(new LDSAtomicInstr(LDS_WRITE_REL, nullptr, address, {value, value1})); + } + return true; +} + +bool Shader::emit_local_load(nir_intrinsic_instr* instr) +{ + auto address = value_factory().src_vec(instr->src[0], instr->num_components); + auto dest_value = value_factory().dest_vec(instr->dest, instr->num_components); + emit_instruction(new LDSReadInstr(dest_value, address)); + return true; +} + +void Shader::chain_scratch_read(Instr *instr) +{ + m_chain_instr.apply(instr, &m_chain_instr.last_scratch_instr); +} + +void Shader::chain_ssbo_read(Instr *instr) +{ + m_chain_instr.apply(instr, &m_chain_instr.last_ssbo_instr); +} + +bool Shader::emit_wait_ack() +{ + start_new_block(0); + emit_instruction(new ControlFlowInstr(ControlFlowInstr::cf_wait_ack)); + start_new_block(0); + return true; +} + +void Shader::InstructionChain::visit(WriteScratchInstr *instr) +{ + apply(instr, &last_scratch_instr); +} + +void Shader::InstructionChain::visit(GDSInstr *instr) +{ + apply(instr, &last_gds_instr); + for (auto& loop : this_shader->m_loops) { + loop->set_instr_flag(Instr::vpm); + } +} + +void Shader::InstructionChain::visit(RatInstr *instr) +{ + apply(instr, &last_ssbo_instr); + for (auto& loop : this_shader->m_loops) { + loop->set_instr_flag(Instr::vpm); + } + + if (prepare_mem_barrier) + instr->set_ack(); +} + +void Shader::InstructionChain::apply(Instr *current, Instr **last) { + if (*last) + current->add_required_instr(*last); + *last = current; +} + +void Shader::emit_instruction(PInst instr) +{ + sfn_log << SfnLog::instr << " " << *instr << "\n"; + instr->accept(m_chain_instr); + m_current_block->push_back(instr); +} + +bool Shader::load_uniform(nir_intrinsic_instr *intr) +{ + auto literal = nir_src_as_const_value(intr->src[0]); + + if (literal) { + AluInstr *ir = nullptr; + auto pin = intr->dest.is_ssa && nir_dest_num_components(intr->dest) == 1 ? + pin_free : pin_none; + for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { + + sfn_log << SfnLog::io << "uniform " + << intr->dest.ssa.index << " const["<< i << "]: "<< intr->const_index[i] << "\n"; + + auto uniform = value_factory().uniform(intr, i); + ir = new AluInstr(op1_mov, value_factory().dest(intr->dest, i, pin), + uniform, {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + return true; + } else { + auto addr = value_factory().src(intr->src[0], 0); + return load_uniform_indirect(intr, addr, 16 * nir_intrinsic_base(intr), 0); + } +} + +bool Shader::load_uniform_indirect(nir_intrinsic_instr *intr, PVirtualValue addr, + int offset , int buffer_id) +{ + auto addr_reg = addr->as_register(); + if (!addr) { + auto tmp = value_factory().temp_register(); + emit_instruction(new AluInstr(op1_mov, tmp, addr, AluInstr::last_write)); + addr = tmp; + } + + RegisterVec4 dest = value_factory().dest_vec4(intr->dest, pin_group); + + auto ir = new LoadFromBuffer(dest, {0,1,2,3}, addr_reg, offset, buffer_id, + nullptr, fmt_32_32_32_32_float); + emit_instruction(ir); + m_flags.set(sh_indirect_const_file); + return true; +} + +bool Shader::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset) +{ + auto src = value_factory().temp_register(); + emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(), + AluInstr::last_write)); + + auto dest = value_factory().dest_vec4(instr->dest, pin_group); + auto fetch = new LoadFromBuffer(dest, {0,1,2,3}, src, offset, + R600_LDS_INFO_CONST_BUFFER, nullptr, + fmt_32_32_32_32); + + fetch->set_fetch_flag(LoadFromBuffer::srf_mode); + emit_instruction(fetch); + + return true; +} + +bool Shader::emit_shader_clock(nir_intrinsic_instr* instr) +{ + auto& vf = value_factory(); + auto group = new AluGroup(); + group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->dest, 0, pin_chan), + vf.inline_const(ALU_SRC_TIME_LO, 0), AluInstr::write)); + group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->dest, 1, pin_chan), + vf.inline_const(ALU_SRC_TIME_HI, 0), AluInstr::last_write)); + emit_instruction(group); + return true; +} + + +bool Shader::emit_barrier(nir_intrinsic_instr* intr) +{ + (void)intr; + /* Put barrier into it's own block, so that optimizers and the + * scheduler don't move code */ + start_new_block(0); + auto op = new AluInstr(op0_group_barrier, 0); + op->set_alu_flag(alu_last_instr); + emit_instruction(op); + start_new_block(0); + return true; +} + +bool Shader::load_ubo(nir_intrinsic_instr *instr) +{ + auto bufid = nir_src_as_const_value(instr->src[0]); + auto buf_offset = nir_src_as_const_value(instr->src[1]); + + if (!buf_offset) { + /* TODO: if bufid is constant then this can also be solved by using the CF indes + * on the ALU block, and this would probably make sense when there are more then one + * loads with the same buffer ID. */ + + auto addr = value_factory().src(instr->src[1], 0)->as_register(); + RegisterVec4::Swizzle dest_swz {7,7,7,7}; + auto dest = value_factory().dest_vec4(instr->dest, pin_group); + + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + dest_swz[i] = i + nir_intrinsic_component(instr); + } + + LoadFromBuffer *ir; + if (bufid) { + ir = new LoadFromBuffer(dest, dest_swz, addr, 0, 1 + bufid->u32, + nullptr, fmt_32_32_32_32_float); + } else { + auto buffer_id = emit_load_to_register(value_factory().src(instr->src[0], 0)); + ir = new LoadFromBuffer(dest, dest_swz, addr, 0, 1, buffer_id, + fmt_32_32_32_32_float); + } + emit_instruction(ir); + return true; + } + + /* direct load using the constant cache */ + if (bufid) { + int buf_cmp = nir_intrinsic_component(instr); + + AluInstr *ir = nullptr; + auto pin = instr->dest.is_ssa && nir_dest_num_components(instr->dest) == 1 ? + pin_free : pin_none; + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + + sfn_log << SfnLog::io << "UBO[" << bufid << "] " + << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n"; + + auto uniform = value_factory().uniform(512 + buf_offset->u32, i + buf_cmp, bufid->u32 + 1); + ir = new AluInstr(op1_mov, value_factory().dest(instr->dest, i, pin), + uniform, {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + return true; + } else { + int buf_cmp = nir_intrinsic_component(instr); + AluInstr *ir = nullptr; + auto kc_id = value_factory().src(instr->src[0], 0); + + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + int cmp = buf_cmp + i; + auto u = new UniformValue(512 + buf_offset->u32, cmp, kc_id); + auto dest = value_factory().dest(instr->dest, i, pin_none); + ir = new AluInstr(op1_mov, dest, u, AluInstr::write); + emit_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + m_indirect_files |= 1 << TGSI_FILE_CONSTANT; + return true; + } +} + +void Shader::start_new_block(int depth) +{ + int depth_offset = m_current_block ? m_current_block->nesting_depth() : 0; + m_current_block = new Block(depth + depth_offset, m_next_block++); + m_root.push_back(m_current_block); +} + +bool Shader::emit_simple_mov(nir_dest& dest, int chan, PVirtualValue src, Pin pin) +{ + auto dst = value_factory().dest(dest, chan, pin); + emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::last_write)); + return true; +} + +void Shader::print(std::ostream& os) const +{ + print_header(os); + + for (auto& [dummy, i]: m_inputs) { + i.print(os); + os << "\n"; + } + + for (auto& [dummy, o]: m_outputs) { + o.print(os); + os << "\n"; + } + + os << "SHADER\n"; + for (auto& b : m_root) + b->print(os); +} + +const char *chip_class_names[] = { + "R600", + "R700", + "EVERGREEN", + "CAYMAN" +}; + +void Shader::print_header(std::ostream& os) const +{ + assert(m_chip_class <= ISA_CC_CAYMAN); + os << m_type_id << "\n"; + os << "CHIPCLASS " << chip_class_names[m_chip_class] << "\n"; + print_properties(os); +} + +void Shader::print_properties(std::ostream& os) const +{ + do_print_properties(os); +} + +bool Shader::equal_to(const Shader& other) const +{ + if (m_root.size() != other.m_root.size()) + return false; + return std::inner_product(m_root.begin(), m_root.end(), + other.m_root.begin(), + true, + [](bool lhs, bool rhs){ return lhs & rhs;}, + [](const Block::Pointer lhs, const Block::Pointer rhs) -> bool { + return lhs->is_equal_to(*rhs); + }); +} + +void Shader::get_shader_info(r600_shader *sh_info) +{ + sh_info->ninput = m_inputs.size(); + int lds_pos = 0; + int output_array_array_loc = 0; + for (auto& [index, info] : m_inputs) { + r600_shader_io& io = sh_info->input[output_array_array_loc++]; + + io.sid = info.sid(); + io.gpr = info.gpr(); + io.spi_sid = info.spi_sid(); + io.ij_index = info.ij_index(); + io.name = info.name(); + io.interpolate = info.interpolator(); + io.interpolate_location = info.interpolate_loc(); + if (info.need_lds_pos()) + io.lds_pos = lds_pos++; + else + io.lds_pos = 0; + + io.ring_offset = info.ring_offset(); + io.uses_interpolate_at_centroid = info.uses_interpolate_at_centroid(); + + sfn_log << SfnLog::io << "Emit Input [" << index << "] sid:" << io.sid << " spi_sid:" << io.spi_sid << "\n"; + assert(io.spi_sid >= 0); + } + + sh_info->nlds = lds_pos; + sh_info->noutput = m_outputs.size(); + sh_info->num_loops = m_nloops; + + for (auto& [index, info] : m_outputs) { + r600_shader_io& io = sh_info->output[index]; + io.sid = info.sid(); + io.gpr = info.gpr(); + io.spi_sid = info.spi_sid(); + io.name = info.name(); + io.write_mask = info.writemask(); + + sfn_log << SfnLog::io << "Emit output[" << index << "] sid:" << io.sid << " spi_sid:" << io.spi_sid << "\n"; + assert(io.spi_sid >= 0); + } + + sh_info->nhwatomic = m_nhwatomic; + sh_info->atomic_base = m_atomic_base; + sh_info->nhwatomic_ranges = m_atomics.size(); + for (unsigned i = 0; i < m_atomics.size(); ++i) + sh_info->atomics[i] = m_atomics[i]; + + if (m_flags.test(sh_indirect_const_file)) + sh_info->indirect_files |= 1 << TGSI_FILE_CONSTANT; + + if (m_flags.test(sh_indirect_atomic)) + sh_info->indirect_files |= 1 << TGSI_FILE_HW_ATOMIC; + + sh_info->uses_tex_buffers = m_flags.test(sh_uses_tex_buffer); + + value_factory().get_shader_info(sh_info); + + sh_info->needs_scratch_space = m_flags.test(sh_needs_scratch_space); + sh_info->uses_images = m_flags.test(sh_uses_images); + sh_info->uses_atomics = m_flags.test(sh_uses_atomics); + sh_info->has_txq_cube_array_z_comp = m_flags.test(sh_txs_cube_array_comp); + sh_info->indirect_files = m_indirect_files; + do_get_shader_info(sh_info); +} + +PRegister Shader::atomic_update() +{ + assert(m_atomic_update); + return m_atomic_update; +} + +int Shader::remap_atomic_base(int base) +{ + return m_atomic_base_map[base]; +} + +void Shader::do_get_shader_info(r600_shader *sh_info) +{ + sh_info->uses_atomics = m_nhwatomic > 0; +} + + +const ShaderInput& Shader::input(int base) const +{ + auto io = m_inputs.find(base); + assert(io != m_inputs.end()); + return io->second; +} + +const ShaderOutput& Shader::output(int base) const +{ + auto io = m_outputs.find(base); + assert(io != m_outputs.end()); + return io->second; +} + +LiveRangeMap Shader::prepare_live_range_map() +{ + return m_instr_factory->value_factory().prepare_live_range_map(); + +} + +void Shader::reset_function(ShaderBlocks& new_root) +{ + std::swap(m_root, new_root); +} + +void Shader::finalize() +{ + do_finalize(); +} + +void Shader::do_finalize() +{ + +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.h b/src/gallium/drivers/r600/sfn/sfn_shader.h new file mode 100644 index 0000000..daf9484 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader.h @@ -0,0 +1,365 @@ +#ifndef SHADER_H +#define SHADER_H + +#include "sfn_instr.h" +#include "sfn_instrfactory.h" +#include "sfn_instr_controlflow.h" +#include "gallium/drivers/r600/r600_shader.h" +#include "sfn_liverangeevaluator.h" + +#include +#include +#include +#include + +struct nir_shader; +struct nir_cf_node; +struct nir_if; +struct nir_block; +struct nir_instr; + +namespace r600 { + +class ShaderIO { +public: + void set_sid(int sid); + void override_spi_sid(int spi_sid); + void print(std::ostream& os) const; + + int spi_sid() const { return m_spi_sid;} + unsigned sid() const { return m_sid;} + + int location() const {return m_location;} + unsigned name() const { return m_name;} + + int pos() const { return m_pos;} + void set_pos(int pos) {m_pos = pos;} + + bool is_param() const { return m_is_param;} + void set_is_param(bool val) { m_is_param = val;} + + void set_gpr(int gpr) {m_gpr = gpr;} + int gpr() const {return m_gpr;} + +protected: + ShaderIO(const char *type, int loc, int name); + +private: + + virtual void do_print(std::ostream& os) const = 0; + + const char *m_type; + int m_location{-1}; + int m_name{-1}; + int m_sid{0}; + int m_spi_sid{0}; + int m_pos{0}; + int m_is_param{false}; + int m_gpr{0}; +}; + +class ShaderOutput : public ShaderIO { +public: + ShaderOutput(); + ShaderOutput(int location, int name, int writemask); + + int writemask() const { return m_writemask;} + +private: + void do_print(std::ostream& os) const override; + + int m_writemask{0}; +}; + + +class ShaderInput : public ShaderIO { +public: + ShaderInput(); + ShaderInput(int location, int name); + void set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid); + void set_uses_interpolate_at_centroid(); + void set_need_lds_pos() { m_need_lds_pos = true;} + int ij_index() const { return m_ij_index;} + + int interpolator() const{return m_interpolator;} + int interpolate_loc() const {return m_interpolate_loc;} + bool need_lds_pos() const {return m_need_lds_pos;} + int lds_pos() const {return m_lds_pos;} + void set_lds_pos(int pos) {m_lds_pos = pos;} + + int ring_offset() const {return m_ring_offset;} + void set_ring_offset(int offs) {m_ring_offset = offs;} + bool uses_interpolate_at_centroid() const {return m_uses_interpolate_at_centroid;} + +private: + void do_print(std::ostream& os) const override; + + int m_interpolator{0}; + int m_interpolate_loc{0}; + int m_ij_index{0}; + bool m_uses_interpolate_at_centroid{false}; + bool m_need_lds_pos{false}; + int m_lds_pos{0}; + int m_ring_offset{0}; +}; + +class Shader : public Allocate { +public: + using InputIterator = std::map::iterator; + using OutputIterator = std::map::iterator; + + using ShaderBlocks = std::list>; + + Shader(const Shader& orig) = delete; + + virtual ~Shader() {} + + bool add_info_from_string(std::istream& is); + + static Shader *translate_from_nir(nir_shader *nir, const pipe_stream_output_info *so_info, r600_shader *gs_shader, + r600_shader_key& key, r600_chip_class chip_class); + + bool process(nir_shader *nir); + + bool process_cf_node(nir_cf_node *node); + bool process_if(nir_if *node); + bool process_loop(nir_loop *node); + bool process_block(nir_block *node); + bool process_instr(nir_instr *instr); + void emit_instruction(PInst instr); + bool emit_atomic_local_shared(nir_intrinsic_instr* instr); + + void print(std::ostream& os ) const; + void print_header(std::ostream& os ) const; + + bool process_intrinsic(nir_intrinsic_instr *intr); + + virtual bool load_input(nir_intrinsic_instr *intr) = 0; + virtual bool store_output(nir_intrinsic_instr *intr) = 0; + + bool load_uniform(nir_intrinsic_instr *intr); + bool load_ubo(nir_intrinsic_instr *intr); + + ValueFactory& value_factory(); + + void add_output(const ShaderOutput& output) { + m_outputs[output.location()] = output; + } + + void add_input(const ShaderInput& input) { + m_inputs[input.location()] = input; + } + + void set_input_gpr(int driver_lcation, int gpr); + + InputIterator find_input(int location) { return m_inputs.find(location);} + + InputIterator input_not_found() {return m_inputs.end();} + + OutputIterator find_output(int location); + OutputIterator output_not_found() {return m_outputs.end();} + + ShaderBlocks& func() { return m_root; } + void reset_function(ShaderBlocks& new_root); + + void emit_instruction_from_string(const std::string &s); + + void set_info(nir_shader *nir); + void get_shader_info(r600_shader *sh_info); + + r600_chip_class chip_class() const {return m_chip_class;}; + void set_chip_class(r600_chip_class cls) {m_chip_class = cls;}; + + void start_new_block(int nesting_depth); + + const ShaderOutput& output(int base) const; + + LiveRangeMap prepare_live_range_map(); + + void set_last_txd(Instr *txd){m_last_txd = txd;} + Instr *last_txd(){return m_last_txd;} + + // Needed for keeping the memory access in order + void chain_scratch_read(Instr *instr); + void chain_ssbo_read(Instr *instr); + + virtual uint32_t enabled_stream_buffers_mask() const {return 0;}; + + size_t noutputs() const { return m_outputs.size();} + size_t ninputs() const { return m_inputs.size();} + + enum Flags { + sh_indirect_const_file, + sh_needs_scratch_space, + sh_needs_sbo_ret_address, + sh_uses_atomics, + sh_uses_images, + sh_uses_tex_buffer, + sh_writes_memory, + sh_txs_cube_array_comp, + sh_indirect_atomic, + sh_mem_barrier, + sh_flags_count + }; + + void set_flag(Flags f) {m_flags.set(f);} + bool has_flag(Flags f) const {return m_flags.test(f);} + + int atomic_file_count() const { return m_atomic_file_count; } + + PRegister atomic_update(); + int remap_atomic_base(int base); + auto evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) -> std::pair; + int ssbo_image_offset() const {return m_ssbo_image_offset;} + PRegister rat_return_address() {assert(m_rat_return_address); return m_rat_return_address;} + + PRegister emit_load_to_register(PVirtualValue src); + +protected: + enum ESlots { + es_face, + es_instanceid, + es_invocation_id, + es_patch_id, + es_pos, + es_rel_patch_id, + es_sample_mask_in, + es_sample_id, + es_sample_pos, + es_tess_factor_base, + es_vertexid, + es_tess_coord, + es_primitive_id, + es_helper_invocation, + es_last + }; + + std::bitset m_sv_values; + + Shader(const char *type_id); + + const ShaderInput& input(int base) const; + + bool emit_simple_mov(nir_dest& dest, int chan, PVirtualValue src, Pin pin = pin_free); + +private: + virtual bool process_stage_intrinsic(nir_intrinsic_instr *intr) = 0; + + bool allocate_registers_from_string(std::istream& is, Pin pin); + bool allocate_arrays_from_string(std::istream& is); + + bool read_chipclass(std::istream& is); + + bool load_uniform_indirect(nir_intrinsic_instr *intr, PVirtualValue addr, int offset , int buffer_id); + + bool scan_shader(const nir_function *impl); + bool scan_uniforms(nir_variable *uniform); + void allocate_reserved_registers(); + + void allocate_local_registers(const exec_list *registers); + + virtual int do_allocate_reserved_registers() = 0; + + bool scan_instruction(nir_instr *instr); + virtual bool do_scan_instruction(nir_instr *instr) = 0; + + void print_properties(std::ostream& os) const; + virtual void do_print_properties(std::ostream& os) const = 0; + + bool read_output(std::istream& is); + bool read_input(std::istream& is); + virtual bool read_prop(std::istream& is) = 0; + + bool emit_if_start(nir_if *if_stmt); + bool emit_control_flow(ControlFlowInstr::CFType type); + bool emit_store_scratch(nir_intrinsic_instr *intr); + bool emit_load_scratch(nir_intrinsic_instr *intr); + bool emit_local_store(nir_intrinsic_instr *intr); + bool emit_local_load(nir_intrinsic_instr* instr); + bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset); + bool emit_barrier(nir_intrinsic_instr* intr); + bool emit_shader_clock(nir_intrinsic_instr* instr); + bool emit_wait_ack(); + + bool equal_to(const Shader& other) const; + void finalize(); + virtual void do_finalize(); + + virtual void do_get_shader_info(r600_shader *sh_info); + + ShaderBlocks m_root; + Block::Pointer m_current_block; + + InstrFactory *m_instr_factory; + const char *m_type_id; + + template + using IOMap = std::map, Allocator>>; + + IOMap m_outputs; + IOMap m_inputs; + r600_chip_class m_chip_class; + + int m_scratch_size; + int m_next_block; + bool m_indirect_const_file{false}; + + Instr *m_last_txd {nullptr}; + + uint32_t m_indirect_files{0}; + std::bitset m_flags; + uint32_t nhwatomic_ranges{0}; + std::vector m_atomics; + + uint32_t m_nhwatomic{0}; + uint32_t m_atomic_base{0}; + uint32_t m_next_hwatomic_loc{0}; + std::unordered_map m_atomic_base_map; + uint32_t m_atomic_file_count{0}; + PRegister m_atomic_update{nullptr}; + PRegister m_rat_return_address{nullptr}; + + int32_t m_ssbo_image_offset{0}; + uint32_t m_nloops{0}; + + class InstructionChain : public InstrVisitor { + public: + void visit(AluInstr *instr) override {(void) instr;} + void visit(AluGroup *instr) override {(void) instr;} + void visit(TexInstr *instr) override {(void) instr;} + void visit(ExportInstr *instr) override {(void) instr;} + void visit(FetchInstr *instr) override {(void) instr;} + void visit(Block *instr) override {(void) instr;} + void visit(ControlFlowInstr *instr) override {(void) instr;} + void visit(IfInstr *instr) override {(void) instr;} + void visit(StreamOutInstr *instr) override {(void) instr;} + void visit(MemRingOutInstr *instr) override {(void) instr;} + void visit(EmitVertexInstr *instr) override {(void) instr;} + void visit(WriteTFInstr *instr) override {(void) instr;} + void visit(LDSAtomicInstr *instr) override {(void) instr;} + void visit(LDSReadInstr *instr) override {(void) instr;} + + void visit(WriteScratchInstr *instr) override; + void visit(GDSInstr *instr) override; + void visit(RatInstr *instr) override; + + void apply(Instr *current, Instr **last); + + Shader *this_shader{nullptr}; + Instr *last_scratch_instr{nullptr}; + Instr *last_gds_instr{nullptr}; + Instr *last_ssbo_instr{nullptr}; + bool prepare_mem_barrier{false}; + }; + + InstructionChain m_chain_instr; + std::vector m_loops; +}; + + +std::pair +r600_get_varying_semantic(unsigned varying_location); + +} + +#endif // SHADER_H diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp deleted file mode 100644 index a25b04b..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp +++ /dev/null @@ -1,1188 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "../r600_pipe.h" -#include "../r600_shader.h" -#include "sfn_shader_vertex.h" - -#include "sfn_shader_compute.h" -#include "sfn_shader_fragment.h" -#include "sfn_shader_geometry.h" -#include "sfn_liverange.h" -#include "sfn_ir_to_assembly.h" -#include "sfn_nir.h" -#include "sfn_instruction_misc.h" -#include "sfn_instruction_fetch.h" -#include "sfn_instruction_lds.h" - -#include - -#define ENABLE_DEBUG 1 - -#ifdef ENABLE_DEBUG -#define DEBUG_SFN(X) \ - do {\ - X; \ - } while (0) -#else -#define DEBUG_SFN(X) -#endif - -namespace r600 { - -using namespace std; - - -ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype, - r600_pipe_shader_selector& sel, - r600_shader &sh_info, int scratch_size, - enum amd_gfx_level gfx_level, - int atomic_base): - m_processor_type(ptype), - m_nesting_depth(0), - m_block_number(0), - m_export_output(0, -1), - m_sh_info(sh_info), - m_chip_class(gfx_level), - m_tex_instr(*this), - m_alu_instr(*this), - m_ssbo_instr(*this), - m_pending_else(nullptr), - m_scratch_size(scratch_size), - m_next_hwatomic_loc(0), - m_sel(sel), - m_atomic_base(atomic_base), - m_image_count(0), - last_emitted_alu(nullptr) -{ - m_sh_info.processor_type = ptype; - -} - - -ShaderFromNirProcessor::~ShaderFromNirProcessor() -{ -} - -bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr) -{ - switch (instr->type) { - case nir_instr_type_tex: { - nir_tex_instr *t = nir_instr_as_tex(instr); - if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF) - sh_info().uses_tex_buffers = true; - if (t->op == nir_texop_txs && - t->sampler_dim == GLSL_SAMPLER_DIM_CUBE && - t->is_array) - sh_info().has_txq_cube_array_z_comp = true; - break; - } - case nir_instr_type_intrinsic: { - auto *i = nir_instr_as_intrinsic(instr); - switch (i->intrinsic) { - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_image_atomic_imin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_image_atomic_imax: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_image_atomic_umin: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_image_atomic_umax: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_image_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: - case nir_intrinsic_ssbo_atomic_comp_swap: - m_sel.info.writes_memory = 1; - FALLTHROUGH; - case nir_intrinsic_image_load: - m_ssbo_instr.set_require_rat_return_address(); - break; - case nir_intrinsic_image_size: { - if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE && - nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2) - sh_info().has_txq_cube_array_z_comp = true; - } - - - - default: - ; - } - - - } - default: - ; - } - - return scan_sysvalue_access(instr); -} - -enum amd_gfx_level ShaderFromNirProcessor::get_chip_class(void) const -{ - return m_chip_class; -} - -bool ShaderFromNirProcessor::allocate_reserved_registers() -{ - bool retval = do_allocate_reserved_registers(); - m_ssbo_instr.load_rat_return_address(); - if (sh_info().uses_atomics) - m_ssbo_instr.load_atomic_inc_limits(); - m_ssbo_instr.set_ssbo_offset(m_image_count); - return retval; -} - -static void remap_shader_info(r600_shader& sh_info, - std::vector& map, - UNUSED ValueMap& values) -{ - for (unsigned i = 0; i < sh_info.num_arrays; ++i) { - auto new_index = map[sh_info.arrays[i].gpr_start]; - if (new_index.valid) - sh_info.arrays[i].gpr_start = new_index.new_reg; - map[sh_info.arrays[i].gpr_start].used = true; - } - - for (unsigned i = 0; i < sh_info.ninput; ++i) { - sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr - << " of map.size()\n"; - - assert(sh_info.input[i].gpr < map.size()); - auto new_index = map[sh_info.input[i].gpr]; - if (new_index.valid) - sh_info.input[i].gpr = new_index.new_reg; - map[sh_info.input[i].gpr].used = true; - } - - for (unsigned i = 0; i < sh_info.noutput; ++i) { - assert(sh_info.output[i].gpr < map.size()); - auto new_index = map[sh_info.output[i].gpr]; - if (new_index.valid) - sh_info.output[i].gpr = new_index.new_reg; - map[sh_info.output[i].gpr].used = true; - } -} - -void ShaderFromNirProcessor::remap_registers() -{ - // register renumbering - auto rc = register_count(); - if (!rc) - return; - - std::vector register_live_ranges(rc); - - auto temp_register_map = get_temp_registers(); - - Shader sh{m_output, temp_register_map}; - LiverangeEvaluator().run(sh, register_live_ranges); - auto register_map = get_temp_registers_remapping(register_live_ranges); - - sfn_log << SfnLog::merge << "=========Mapping===========\n"; - for (size_t i = 0; i < register_map.size(); ++i) - if (register_map[i].valid) - sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n"; - - ValueRemapper vmap0(register_map, temp_register_map); - for (auto& block: m_output) - block.remap_registers(vmap0); - - remap_shader_info(m_sh_info, register_map, temp_register_map); - - /* Mark inputs as used registers, these registers should no be remapped */ - for (auto& v: sh.m_temp) { - if (v.second->type() == Value::gpr) { - const auto& g = static_cast(*v.second); - if (g.is_input()) - register_map[g.sel()].used = true; - } - } - - int new_index = 0; - for (auto& i : register_map) { - i.valid = i.used; - if (i.used) - i.new_reg = new_index++; - } - - ValueRemapper vmap1(register_map, temp_register_map); - for (auto& ir: m_output) - ir.remap_registers(vmap1); - - remap_shader_info(m_sh_info, register_map, temp_register_map); -} - -bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform) -{ - // m_uniform_type_map - m_uniform_type_map[uniform->data.location] = uniform->type; - - if (uniform->type->contains_atomic()) { - int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE; - sh_info().nhwatomic += natomics; - - if (uniform->type->is_array()) - sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC; - - sh_info().uses_atomics = 1; - - struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges]; - ++sh_info().nhwatomic_ranges; - atom.buffer_id = uniform->data.binding; - atom.hw_idx = m_atomic_base + m_next_hwatomic_loc; - - atom.start = uniform->data.offset >> 2; - atom.end = atom.start + natomics - 1; - - if (m_atomic_base_map.find(uniform->data.binding) == - m_atomic_base_map.end()) - m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc; - - m_next_hwatomic_loc += natomics; - - m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1; - - sfn_log << SfnLog::io << "HW_ATOMIC file count: " - << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n"; - } - - auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type; - if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) { - sh_info().uses_images = 1; - if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo)) - sh_info().indirect_files |= 1 << TGSI_FILE_IMAGE; - } - - return true; -} - -void ShaderFromNirProcessor::set_shader_info(const nir_shader *sh) -{ - m_image_count = sh->info.num_images; - do_set_shader_info(sh); -} - -void ShaderFromNirProcessor::do_set_shader_info(const nir_shader *sh) -{ - (void)sh; -} - -bool ShaderFromNirProcessor::scan_inputs_read(const nir_shader *sh) -{ - return true; -} - -void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr) -{ - auto& dest = instr->dest; - unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index; - assert(util_bitcount(instr->modes) == 1); - m_var_mode[instr->var] = instr->modes; - m_var_derefs[index] = instr->var; - - sfn_log << SfnLog::io << "Add var deref:" << index - << " with DDL:" << instr->var->data.driver_location << "\n"; -} - -void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io) -{ - switch (io.name) { - case TGSI_SEMANTIC_POSITION: - case TGSI_SEMANTIC_PSIZE: - case TGSI_SEMANTIC_EDGEFLAG: - case TGSI_SEMANTIC_FACE: - case TGSI_SEMANTIC_SAMPLEMASK: - case TGSI_SEMANTIC_CLIPVERTEX: - io.spi_sid = 0; - break; - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_TEXCOORD: - case TGSI_SEMANTIC_PCOORD: - io.spi_sid = io.sid + 1; - break; - default: - /* For non-generic params - pack name and sid into 8 bits */ - io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1; - } -} - -const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const -{ - unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index; - - sfn_log << SfnLog::io << "Search for deref:" << index << "\n"; - - auto v = m_var_derefs.find(index); - if (v != m_var_derefs.end()) - return v->second; - - fprintf(stderr, "R600: could not find deref with index %d\n", index); - - return nullptr; - - /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr); - return nir_deref_instr_get_variable(deref); */ -} - -bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr) -{ - return m_tex_instr.emit(instr); -} - -void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir) -{ - if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) { - for (unsigned i = 0; i < ir->n_sources(); ++i) { - auto& s = ir->src(i); - if (s.type() == Value::kconst) { - auto& c = static_cast(s); - if (c.addr()) { - last_emitted_alu->set_flag(alu_last_instr); - break; - } - } - } - } - last_emitted_alu = ir; - emit_instruction_internal(ir); -} - - -void ShaderFromNirProcessor::emit_instruction(Instruction *ir) -{ - - emit_instruction_internal(ir); - last_emitted_alu = nullptr; -} - -void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir) -{ - if (m_pending_else) { - append_block(-1); - m_output.back().emit(PInstruction(m_pending_else)); - append_block(1); - m_pending_else = nullptr; - } - - r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n"; - if (m_output.empty()) - append_block(0); - - m_output.back().emit(Instruction::Pointer(ir)); -} - -void ShaderFromNirProcessor::emit_shader_start() -{ - /* placeholder, may become an abstract method */ - m_ssbo_instr.set_ssbo_offset(m_image_count); -} - -bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr) -{ - switch (instr->type) { - case nir_jump_break: { - auto b = new LoopBreakInstruction(); - emit_instruction(b); - return true; - } - case nir_jump_continue: { - auto b = new LoopContInstruction(); - emit_instruction(b); - return true; - } - default: { - nir_instr *i = reinterpret_cast(instr); - sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n"; - return false; - } - } - return true; -} - -bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr) -{ - return m_alu_instr.emit(instr); -} - -bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr) -{ - return false; -} - -bool ShaderFromNirProcessor::emit_loop_start(int loop_id) -{ - LoopBeginInstruction *loop = new LoopBeginInstruction(); - emit_instruction(loop); - m_loop_begin_block_map[loop_id] = loop; - append_block(1); - return true; -} -bool ShaderFromNirProcessor::emit_loop_end(int loop_id) -{ - auto start = m_loop_begin_block_map.find(loop_id); - if (start == m_loop_begin_block_map.end()) { - sfn_log << SfnLog::err << "End loop: Loop start for " - << loop_id << " not found\n"; - return false; - } - m_nesting_depth--; - m_block_number++; - m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number)); - LoopEndInstruction *loop = new LoopEndInstruction(start->second); - emit_instruction(loop); - - m_loop_begin_block_map.erase(start); - return true; -} - -bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt) -{ - - auto value = from_nir(if_stmt->condition, 0, 0); - AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)), - value, Value::zero, EmitInstruction::last); - pred->set_flag(alu_update_exec); - pred->set_flag(alu_update_pred); - pred->set_cf_type(cf_alu_push_before); - - append_block(1); - - IfInstruction *ir = new IfInstruction(pred); - emit_instruction(ir); - assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end()); - m_if_block_start_map[if_id] = ir; - return true; -} - -bool ShaderFromNirProcessor::emit_else_start(int if_id) -{ - auto iif = m_if_block_start_map.find(if_id); - if (iif == m_if_block_start_map.end()) { - std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n"; - return false; - } - - if (iif->second->type() != Instruction::cond_if) { - std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n"; - return false; - } - IfInstruction *if_instr = static_cast(iif->second); - ElseInstruction *ir = new ElseInstruction(if_instr); - m_if_block_start_map[if_id] = ir; - m_pending_else = ir; - - return true; -} - -bool ShaderFromNirProcessor::emit_ifelse_end(int if_id) -{ - auto ifelse = m_if_block_start_map.find(if_id); - if (ifelse == m_if_block_start_map.end()) { - std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n"; - return false; - } - - if (ifelse->second->type() != Instruction::cond_if && - ifelse->second->type() != Instruction::cond_else) { - std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n"; - return false; - } - /* Clear pending else, if the else branch was empty, non will be emitted */ - - m_pending_else = nullptr; - - append_block(-1); - IfElseEndInstruction *ir = new IfElseEndInstruction(); - emit_instruction(ir); - - return true; -} - -bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset) -{ - PValue src = get_temp_register(); - emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr})); - - GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest)); - emit_instruction(new FetchTCSIOParam(dest, src, offset)); - - return true; - -} - -bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr) -{ - auto address = varvec_from_nir(instr->src[0], instr->num_components); - auto dest_value = varvec_from_nir(instr->dest, instr->num_components); - - emit_instruction(new LDSReadInstruction(address, dest_value)); - return true; -} - -static unsigned -lds_op_from_intrinsic(nir_intrinsic_op op) { - switch (op) { - case nir_intrinsic_shared_atomic_add: - return LDS_OP2_LDS_ADD_RET; - case nir_intrinsic_shared_atomic_and: - return LDS_OP2_LDS_AND_RET; - case nir_intrinsic_shared_atomic_or: - return LDS_OP2_LDS_OR_RET; - case nir_intrinsic_shared_atomic_imax: - return LDS_OP2_LDS_MAX_INT_RET; - case nir_intrinsic_shared_atomic_umax: - return LDS_OP2_LDS_MAX_UINT_RET; - case nir_intrinsic_shared_atomic_imin: - return LDS_OP2_LDS_MIN_INT_RET; - case nir_intrinsic_shared_atomic_umin: - return LDS_OP2_LDS_MIN_UINT_RET; - case nir_intrinsic_shared_atomic_xor: - return LDS_OP2_LDS_XOR_RET; - case nir_intrinsic_shared_atomic_exchange: - return LDS_OP2_LDS_XCHG_RET; - case nir_intrinsic_shared_atomic_comp_swap: - return LDS_OP3_LDS_CMP_XCHG_RET; - default: - unreachable("Unsupported shared atomic opcode"); - } -} - -bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr) -{ - auto address = from_nir(instr->src[0], 0); - auto dest_value = from_nir(instr->dest, 0); - auto value = from_nir(instr->src[1], 0); - auto op = lds_op_from_intrinsic(instr->intrinsic); - - if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) { - auto value2 = from_nir(instr->src[2], 0); - emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op)); - } else { - emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op)); - } - return true; -} - - -bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr) -{ - unsigned write_mask = nir_intrinsic_write_mask(instr); - - auto address = from_nir(instr->src[1], 0); - int swizzle_base = (write_mask & 0x3) ? 0 : 2; - write_mask |= write_mask >> 2; - - auto value = from_nir(instr->src[0], swizzle_base); - if (!(write_mask & 2)) { - emit_instruction(new LDSWriteInstruction(address, 0, value)); - } else { - auto value1 = from_nir(instr->src[0], swizzle_base + 1); - emit_instruction(new LDSWriteInstruction(address, 0, value, value1)); - } - - return true; -} - -bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr) -{ - r600::sfn_log << SfnLog::instr << "emit '" - << *reinterpret_cast(instr) - << "' (" << __func__ << ")\n"; - - if (emit_intrinsic_instruction_override(instr)) - return true; - - if (m_ssbo_instr.emit(&instr->instr)) { - m_sel.info.writes_memory = true; - return true; - } - - switch (instr->intrinsic) { - case nir_intrinsic_load_deref: { - auto var = get_deref_location(instr->src[0]); - if (!var) - return false; - auto mode_helper = m_var_mode.find(var); - if (mode_helper == m_var_mode.end()) { - cerr << "r600-nir: variable '" << var->name << "' not found\n"; - return false; - } - switch (mode_helper->second) { - case nir_var_function_temp: - return emit_load_function_temp(var, instr); - default: - cerr << "r600-nir: Unsupported mode" << mode_helper->second - << "for src variable\n"; - return false; - } - } - case nir_intrinsic_store_scratch: - return emit_store_scratch(instr); - case nir_intrinsic_load_scratch: - return emit_load_scratch(instr); - case nir_intrinsic_load_uniform: - return load_uniform(instr); - case nir_intrinsic_discard: - case nir_intrinsic_discard_if: - return emit_discard_if(instr); - case nir_intrinsic_load_ubo_vec4: - return emit_load_ubo_vec4(instr); - case nir_intrinsic_load_tcs_in_param_base_r600: - return emit_load_tcs_param_base(instr, 0); - case nir_intrinsic_load_tcs_out_param_base_r600: - return emit_load_tcs_param_base(instr, 16); - case nir_intrinsic_load_local_shared_r600: - case nir_intrinsic_load_shared: - return emit_load_local_shared(instr); - case nir_intrinsic_store_local_shared_r600: - case nir_intrinsic_store_shared: - return emit_store_local_shared(instr); - case nir_intrinsic_control_barrier: - case nir_intrinsic_memory_barrier_tcs_patch: - case nir_intrinsic_memory_barrier_shared: - case nir_intrinsic_memory_barrier_buffer: - case nir_intrinsic_memory_barrier: - case nir_intrinsic_memory_barrier_image: - case nir_intrinsic_group_memory_barrier: - return emit_barrier(instr); - case nir_intrinsic_memory_barrier_atomic_counter: - return true; - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_shared_atomic_comp_swap: - return emit_atomic_local_shared(instr); - case nir_intrinsic_shader_clock: - return emit_shader_clock(instr); - case nir_intrinsic_copy_deref: - case nir_intrinsic_load_constant: - case nir_intrinsic_load_input: - case nir_intrinsic_store_output: - - default: - fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic); - return false; - } - return false; -} - -bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr) -{ - return false; -} - -bool -ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr) -{ - return false; -} - -bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr) -{ - AluInstruction *ir = new AluInstruction(op0_group_barrier); - ir->set_flag(alu_last_instr); - emit_instruction(ir); - return true; -} - - -bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last) -{ - if (!dest.is_ssa) { - auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write}); - if (as_last) - ir->set_flag(alu_last_instr); - emit_instruction(ir); - } else { - inject_register(dest.ssa.index, chan, value, true); - } - return true; -} - -bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr) -{ - PValue address = from_nir(instr->src[1], 0, 0); - - auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1, - swizzle_from_comps(instr->num_components)); - - int writemask = nir_intrinsic_write_mask(instr); - int align = nir_intrinsic_align_mul(instr); - int align_offset = nir_intrinsic_align_offset(instr); - - WriteScratchInstruction *ir = nullptr; - if (address->type() == Value::literal) { - const auto& lv = static_cast(*address); - ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask); - } else { - address = from_nir_with_fetch_constant(instr->src[1], 0); - ir = new WriteScratchInstruction(address, value, align, align_offset, - writemask, m_scratch_size); - } - emit_instruction(ir); - sh_info().needs_scratch_space = 1; - return true; -} - -bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr) -{ - PValue address = from_nir_with_fetch_constant(instr->src[0], 0); - std::array dst_val; - for (int i = 0; i < 4; ++i) - dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7); - - GPRVector dst(dst_val); - auto ir = new LoadFromScratch(dst, address, m_scratch_size); - ir->prelude_append(new WaitAck(0)); - emit_instruction(ir); - sh_info().needs_scratch_space = 1; - return true; -} - -bool ShaderFromNirProcessor::emit_shader_clock(nir_intrinsic_instr* instr) -{ - emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 0), - PValue(new InlineConstValue(ALU_SRC_TIME_LO, 0)), EmitInstruction::write)); - emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 1), - PValue(new InlineConstValue(ALU_SRC_TIME_HI, 0)), EmitInstruction::last_write)); - return true; -} - -GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src, - unsigned mask, - const GPRVector::Swizzle& swizzle, - bool match) -{ - bool use_same = true; - GPRVector::Values v; - - std::array used_swizzles = {false, false, false, false}; - - /* Check whether all sources come from a GPR, and, - * if requested, whether they are swizzled as expected */ - - for (int i = 0; i < 4 && use_same; ++i) { - if ((1 << i) & mask) { - if (swizzle[i] < 4) { - v[i] = from_nir(src, swizzle[i]); - assert(v[i]); - use_same &= (v[i]->type() == Value::gpr); - if (match) { - use_same &= (v[i]->chan() == swizzle[i]); - } - used_swizzles[v[i]->chan()] = true; - } - } - } - - - /* Now check whether all inputs come from the same GPR, and fill - * empty slots in the vector with unused swizzles, bail out if - * the sources are not from the same GPR - */ - - if (use_same) { - int next_free_swizzle = 0; - while (used_swizzles[next_free_swizzle] && next_free_swizzle < 4) - next_free_swizzle++; - - /* Find the first GPR index used */ - int i = 0; - while (!v[i] && i < 4) ++i; - assert(i < 4); - unsigned sel = v[i]->sel(); - - - for (i = 0; i < 4 && use_same; ++i) { - if (!v[i]) { - if (swizzle[i] >= 4) - v[i] = PValue(new GPRValue(sel, swizzle[i])); - else { - assert(next_free_swizzle < 4); - v[i] = PValue(new GPRValue(sel, next_free_swizzle)); - used_swizzles[next_free_swizzle] = true; - while (next_free_swizzle < 4 && used_swizzles[next_free_swizzle]) - next_free_swizzle++; - } - } - else - use_same &= v[i]->sel() == sel; - } - } - - /* We can't re-use the source data because they either need re-swizzling, or - * they didn't come all from a GPR or the same GPR, so copy to a new vector - */ - if (!use_same) { - AluInstruction *ir = nullptr; - GPRVector result = get_temp_vec4(swizzle); - for (int i = 0; i < 4; ++i) { - if (swizzle[i] < 4 && (mask & (1 << i))) { - ir = new AluInstruction(op1_mov, result[i], from_nir(src, swizzle[i]), - EmitInstruction::write); - emit_instruction(ir); - } - } - if (ir) - ir->set_flag(alu_last_instr); - return result; - } else - return GPRVector(v);; -} - -bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr) -{ - auto bufid = nir_src_as_const_value(instr->src[0]); - auto buf_offset = nir_src_as_const_value(instr->src[1]); - auto base = nir_intrinsic_base(instr); - - if (!buf_offset) { - /* TODO: if buf_offset is constant then this can also be solved by using the CF indes - * on the ALU block, and this would probably make sense when there are more then one - * loads with the same buffer ID. */ - - PValue addr = from_nir_with_fetch_constant(instr->src[1], 0); - GPRVector trgt; - std::array swz = {7,7,7,7}; - for (unsigned i = 0; i < 4; ++i) { - if (i < nir_dest_num_components(instr->dest)) { - trgt.set_reg_i(i, from_nir(instr->dest, i)); - swz[i] = i + nir_intrinsic_component(instr); - } else { - trgt.set_reg_i(i, from_nir(instr->dest, 7)); - } - } - - FetchInstruction *ir; - if (bufid) { - ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, base, - 1 + bufid->u32, nullptr, bim_none); - } else { - PValue bufid = from_nir(instr->src[0], 0, 0); - ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, base, - 1, bufid, bim_zero); - } - ir->set_dest_swizzle(swz); - emit_instruction(ir); - m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT; - return true; - } - - uint32_t offset = 512 + base + buf_offset->u32; - - if (bufid) { - int buf_cmp = nir_intrinsic_component(instr); - AluInstruction *ir = nullptr; - for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { - int cmp = buf_cmp + i; - assert(cmp < 4); - auto u = PValue(new UniformValue(offset, cmp, bufid->u32 + 1)); - if (instr->dest.is_ssa) - load_preloaded_value(instr->dest, i, u); - else { - ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write}); - emit_instruction(ir); - } - } - if (ir) - ir->set_flag(alu_last_instr); - return true; - - } else { - int buf_cmp = nir_intrinsic_component(instr); - AluInstruction *ir = nullptr; - auto kc_id = from_nir(instr->src[0], 0); - for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { - int cmp = buf_cmp + i; - auto u = PValue(new UniformValue(offset, cmp, kc_id)); - if (instr->dest.is_ssa) - load_preloaded_value(instr->dest, i, u); - else { - ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write}); - emit_instruction(ir); - } - } - if (ir) - ir->set_flag(alu_last_instr); - return true; - } -} - -bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr) -{ - r600::sfn_log << SfnLog::instr << "emit '" - << *reinterpret_cast(instr) - << "' (" << __func__ << ")\n"; - - if (instr->intrinsic == nir_intrinsic_discard_if) { - emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)), - {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr})); - - } else { - emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)), - {Value::zero, Value::zero}, {alu_last_instr})); - } - m_sh_info.uses_kill = 1; - return true; -} - -bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr) -{ - r600::sfn_log << SfnLog::instr << __func__ << ": emit '" - << *reinterpret_cast(instr) - << "'\n"; - - - /* If the target register is a SSA register and the loading is not - * indirect then we can do lazy loading, i.e. the uniform value can - * be used directly. Otherwise we have to load the data for real - * rigt away. - */ - auto literal = nir_src_as_const_value(instr->src[0]); - int base = nir_intrinsic_base(instr); - - if (literal) { - AluInstruction *ir = nullptr; - for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { - PValue u = PValue(new UniformValue(512 + literal->u32 + base, i)); - sfn_log << SfnLog::io << "uniform " - << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n"; - - if (instr->dest.is_ssa) - load_preloaded_value(instr->dest, i, u); - else { - ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), - u, {alu_write}); - emit_instruction(ir); - } - } - if (ir) - ir->set_flag(alu_last_instr); - } else { - PValue addr = from_nir(instr->src[0], 0, 0); - return load_uniform_indirect(instr, addr, 16 * base, 0); - } - return true; -} - -bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid) -{ - if (!addr) { - std::cerr << "r600-nir: don't know how uniform is addressed\n"; - return false; - } - - GPRVector trgt; - std::array swz = {7,7,7,7}; - for (int i = 0; i < 4; ++i) { - trgt.set_reg_i(i, from_nir(instr->dest, i)); - swz[i] = i; - } - - if (addr->type() != Value::gpr) { - emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr}); - addr = trgt.reg_i(0); - } - - auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest, - bufferid, PValue(), bim_none); - ir->set_dest_swizzle(swz); - emit_instruction(ir); - m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT; - return true; -} - -AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask) -{ - AluInstruction *ir = nullptr; - for (int i = 0; i < literal->def.num_components ; ++i) { - if (writemask & (1 << i)){ - PValue lsrc; - switch (literal->def.bit_size) { - - case 1: - sfn_log << SfnLog::reg << "Got literal of bit size 1\n"; - lsrc = literal->value[i].b ? - PValue(new LiteralValue( 0xffffffff, i)) : - Value::zero; - break; - case 32: - sfn_log << SfnLog::reg << "Got literal of bit size 32\n"; - if (literal->value[i].u32 == 0) - lsrc = Value::zero; - else if (literal->value[i].u32 == 1) - lsrc = Value::one_i; - else if (literal->value[i].f32 == 1.0f) - lsrc = Value::one_f; - else if (literal->value[i].f32 == 0.5f) - lsrc = Value::zero_dot_5; - else - lsrc = PValue(new LiteralValue(literal->value[i].u32, i)); - break; - default: - sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size - << " falling back to 32 bit\n"; - lsrc = PValue(new LiteralValue(literal->value[i].u32, i)); - } - ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write); - - emit_instruction(ir); - } - } - return ir; -} - -PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel) -{ - PValue value = from_nir(src, component); - if (value->type() != Value::gpr && - value->type() != Value::gpr_vector && - value->type() != Value::gpr_array_value) { - PValue retval = get_temp_register(channel); - emit_instruction(new AluInstruction(op1_mov, retval, value, - EmitInstruction::last_write)); - value = retval; - } - return value; -} - -bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr) -{ - r600::sfn_log << SfnLog::instr << __func__ << ": emit '" - << *reinterpret_cast(instr) - << "'\n"; - - /* Give the specific shader type a chance to process this, i.e. Geometry and - * tesselation shaders need specialized deref_array, for the other shaders - * it is lowered. - */ - if (emit_deref_instruction_override(instr)) - return true; - - switch (instr->deref_type) { - case nir_deref_type_var: - set_var_address(instr); - return true; - case nir_deref_type_array: - case nir_deref_type_array_wildcard: - case nir_deref_type_struct: - case nir_deref_type_cast: - default: - fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type); - } - return false; -} - -bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest, - std::vector srcs, - const std::set& m_flags) -{ - AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags); - emit_instruction(ir); - return true; -} - -void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr) -{ - m_output_register_map[loc] = gpr; -} - -void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir) -{ - r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n"; - m_export_output.emit(PInstruction(ir)); -} - -const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const -{ - const GPRVector *retval = nullptr; - auto val = m_output_register_map.find(location); - if (val != m_output_register_map.end()) - retval = val->second; - return retval; -} - -void ShaderFromNirProcessor::set_input(unsigned pos, PValue var) -{ - r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n"; - m_inputs[pos] = var; -} - -void ShaderFromNirProcessor::set_output(unsigned pos, int sel) -{ - r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel << "\n"; - m_outputs[pos] = sel; -} - -void ShaderFromNirProcessor::append_block(int nesting_change) -{ - m_nesting_depth += nesting_change; - m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++)); -} - -void ShaderFromNirProcessor::get_array_info(r600_shader& shader) const -{ - shader.num_arrays = m_reg_arrays.size(); - if (shader.num_arrays) { - shader.arrays = (r600_shader_array *)calloc(shader.num_arrays, sizeof(r600_shader_array)); - for (unsigned i = 0; i < shader.num_arrays; ++i) { - shader.arrays[i].comp_mask = m_reg_arrays[i]->mask(); - shader.arrays[i].gpr_start = m_reg_arrays[i]->sel(); - shader.arrays[i].gpr_count = m_reg_arrays[i]->size(); - } - shader.indirect_files |= (1 << TGSI_FILE_TEMPORARY); - } -} - -void ShaderFromNirProcessor::finalize() -{ - do_finalize(); - - for (auto& i : m_inputs) - m_sh_info.input[i.first].gpr = i.second->sel(); - - for (auto& i : m_outputs) - m_sh_info.output[i.first].gpr = i.second; - - m_output.push_back(m_export_output); -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.h b/src/gallium/drivers/r600/sfn/sfn_shader_base.h deleted file mode 100644 index 7109a10..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.h +++ /dev/null @@ -1,231 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef sfn_shader_from_nir_h -#define sfn_shader_from_nir_h - - -#include "gallium/drivers/r600/r600_shader.h" - -#include "compiler/nir/nir.h" -#include "compiler/nir_types.h" - -#include "sfn_instruction_block.h" -#include "sfn_instruction_export.h" -#include "sfn_alu_defines.h" -#include "sfn_valuepool.h" -#include "sfn_debug.h" -#include "sfn_instruction_cf.h" -#include "sfn_emittexinstruction.h" -#include "sfn_emitaluinstruction.h" -#include "sfn_emitssboinstruction.h" - -#include -#include -#include -#include - -struct nir_instr; - -namespace r600 { - -extern SfnLog sfn_log; - -class ShaderFromNirProcessor : public ValuePool { -public: - ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel, - r600_shader& sh_info, int scratch_size, enum amd_gfx_level _chip_class, - int atomic_base); - virtual ~ShaderFromNirProcessor(); - - void emit_instruction(Instruction *ir); - - PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1); - GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask, - const GPRVector::Swizzle& swizzle, bool match = false); - - bool emit_instruction(EAluOp opcode, PValue dest, - std::vector src0, - const std::set& m_flags); - void emit_export_instruction(WriteoutInstruction *ir); - void emit_instruction(AluInstruction *ir); - - bool use_legacy_math_rules(void) { - return m_sel.nir->info.use_legacy_math_rules; - }; - - void split_constants(nir_alu_instr* instr); - void remap_registers(); - - const nir_variable *get_deref_location(const nir_src& src) const; - - r600_shader& sh_info() {return m_sh_info;} - void add_param_output_reg(int loc, const GPRVector *gpr); - void set_output(unsigned pos, int sel); - const GPRVector *output_register(unsigned location) const; - void evaluate_spi_sid(r600_shader_io &io); - - enum amd_gfx_level get_chip_class() const; - - int remap_atomic_base(int base) { - return m_atomic_base_map[base]; - } - - void get_array_info(r600_shader& shader) const; - - virtual bool scan_inputs_read(const nir_shader *sh); - void set_shader_info(const nir_shader *sh); - -protected: - - void set_var_address(nir_deref_instr *instr); - void set_input(unsigned pos, PValue var); - - bool scan_instruction(nir_instr *instr); - - virtual bool scan_sysvalue_access(nir_instr *instr) = 0; - - bool emit_if_start(int if_id, nir_if *if_stmt); - bool emit_else_start(int if_id); - bool emit_ifelse_end(int if_id); - - bool emit_loop_start(int loop_id); - bool emit_loop_end(int loop_id); - bool emit_jump_instruction(nir_jump_instr *instr); - - bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset); - bool emit_load_local_shared(nir_intrinsic_instr* instr); - bool emit_store_local_shared(nir_intrinsic_instr* instr); - bool emit_atomic_local_shared(nir_intrinsic_instr* instr); - - bool emit_barrier(nir_intrinsic_instr* instr); - - bool load_preloaded_value(const nir_dest& dest, int chan, PValue value, - bool as_last = true); - - void inc_atomic_file_count(); - - virtual void do_set_shader_info(const nir_shader *sh); - - enum ESlots { - es_face, - es_instanceid, - es_invocation_id, - es_patch_id, - es_pos, - es_rel_patch_id, - es_sample_mask_in, - es_sample_id, - es_sample_pos, - es_tess_factor_base, - es_vertexid, - es_tess_coord, - es_primitive_id, - es_helper_invocation, - es_last - }; - - std::bitset m_sv_values; - - bool allocate_reserved_registers(); - - -private: - virtual bool do_allocate_reserved_registers() = 0; - - - void emit_instruction_internal(Instruction *ir); - - bool emit_alu_instruction(nir_instr *instr); - bool emit_deref_instruction(nir_deref_instr* instr); - bool emit_intrinsic_instruction(nir_intrinsic_instr* instr); - virtual bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr); - bool emit_tex_instruction(nir_instr* instr); - bool emit_discard_if(nir_intrinsic_instr* instr); - bool emit_load_ubo_vec4(nir_intrinsic_instr* instr); - bool emit_ssbo_atomic_add(nir_intrinsic_instr* instr); - bool load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufid); - - /* Code creating functions */ - bool emit_load_function_temp(const nir_variable *var, nir_intrinsic_instr *instr); - AluInstruction *emit_load_literal(const nir_load_const_instr *literal, const nir_src& src, unsigned writemask); - - bool load_uniform(nir_intrinsic_instr* instr); - bool process_uniforms(nir_variable *uniform); - - void append_block(int nesting_change); - - virtual void emit_shader_start(); - virtual bool emit_deref_instruction_override(nir_deref_instr* instr); - - bool emit_store_scratch(nir_intrinsic_instr* instr); - bool emit_load_scratch(nir_intrinsic_instr* instr); - bool emit_shader_clock(nir_intrinsic_instr* instr); - virtual void do_finalize() = 0; - - void finalize(); - friend class ShaderFromNir; - - std::set m_arrays; - - std::map m_inputs; - std::map m_outputs; - - std::map m_var_derefs; - std::map m_var_mode; - - std::map m_uniform_type_map; - std::map m_if_block_start_map; - std::map m_loop_begin_block_map; - - pipe_shader_type m_processor_type; - - std::vector m_output; - unsigned m_nesting_depth; - unsigned m_block_number; - InstructionBlock m_export_output; - r600_shader& m_sh_info; - enum amd_gfx_level m_chip_class; - EmitTexInstruction m_tex_instr; - EmitAluInstruction m_alu_instr; - EmitSSBOInstruction m_ssbo_instr; - OutputRegisterMap m_output_register_map; - - IfElseInstruction *m_pending_else; - int m_scratch_size; - int m_next_hwatomic_loc; - - r600_pipe_shader_selector& m_sel; - int m_atomic_base ; - int m_image_count; - - std::unordered_map m_atomic_base_map; - AluInstruction *last_emitted_alu; -}; - -} - -#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp deleted file mode 100644 index 0977fe2..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_shader_compute.h" -#include "sfn_instruction_fetch.h" - -namespace r600 { - -ComputeShaderFromNir::ComputeShaderFromNir(r600_pipe_shader *sh, - r600_pipe_shader_selector& sel, - UNUSED const r600_shader_key& key, - enum amd_gfx_level gfx_level): - ShaderFromNirProcessor (PIPE_SHADER_COMPUTE, sel, sh->shader, - sh->scratch_space_needed, gfx_level, 0), - m_reserved_registers(0) -{ -} - -bool ComputeShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr) -{ - return true; -} -bool ComputeShaderFromNir::do_allocate_reserved_registers() -{ - int thread_id_sel = m_reserved_registers++; - int wg_id_sel = m_reserved_registers++; - - for (int i = 0; i < 3; ++i) { - auto tmp = new GPRValue(thread_id_sel, i); - tmp->set_as_input(); - tmp->set_keep_alive(); - m_local_invocation_id[i] = PValue(tmp); - inject_register(tmp->sel(), i, m_local_invocation_id[i], false); - - tmp = new GPRValue(wg_id_sel, i); - tmp->set_as_input(); - tmp->set_keep_alive(); - m_workgroup_id[i] = PValue(tmp); - inject_register(tmp->sel(), i, m_workgroup_id[i], false); - } - return true; -} - -bool ComputeShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) -{ - switch (instr->intrinsic) { - case nir_intrinsic_load_local_invocation_id: - return emit_load_3vec(instr, m_local_invocation_id); - case nir_intrinsic_load_workgroup_id: - return emit_load_3vec(instr, m_workgroup_id); - case nir_intrinsic_load_num_workgroups: - return emit_load_num_workgroups(instr); - default: - return false; - } -} - -bool ComputeShaderFromNir::emit_load_3vec(nir_intrinsic_instr* instr, - const std::array& src) -{ - for (int i = 0; i < 3; ++i) - load_preloaded_value(instr->dest, i, src[i], i == 2); - return true; -} - -bool ComputeShaderFromNir::emit_load_num_workgroups(nir_intrinsic_instr* instr) -{ - PValue a_zero = get_temp_register(1); - emit_instruction(new AluInstruction(op1_mov, a_zero, Value::zero, EmitInstruction::last_write)); - GPRVector dest; - for (int i = 0; i < 3; ++i) - dest.set_reg_i(i, from_nir(instr->dest, i)); - dest.set_reg_i(3, from_nir(instr->dest, 7)); - - auto ir = new FetchInstruction(vc_fetch, no_index_offset, - fmt_32_32_32_32, vtx_nf_int, vtx_es_none, a_zero, dest, 16, - false, 16, R600_BUFFER_INFO_CONST_BUFFER, 0, - bim_none, false, false, 0, 0, 0, PValue(), {0,1,2,7}); - ir->set_flag(vtx_srf_mode); - emit_instruction(ir); - return true; -} - -void ComputeShaderFromNir::do_finalize() -{ - -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_compute.h b/src/gallium/drivers/r600/sfn/sfn_shader_compute.h deleted file mode 100644 index 98cbc63..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_compute.h +++ /dev/null @@ -1,62 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_COMPUTE_SHADER_FROM_NIR_H -#define SFN_COMPUTE_SHADER_FROM_NIR_H - -#include "sfn_shader_base.h" -#include "sfn_shaderio.h" -#include - -namespace r600 { - -class ComputeShaderFromNir : public ShaderFromNirProcessor -{ -public: - ComputeShaderFromNir(r600_pipe_shader *sh, - r600_pipe_shader_selector& sel, - const r600_shader_key &key, - enum amd_gfx_level gfx_level); - - bool scan_sysvalue_access(nir_instr *instr) override; - -private: - bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; - - bool do_allocate_reserved_registers() override; - void do_finalize() override; - - bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array& src); - bool emit_load_num_workgroups(nir_intrinsic_instr* instr); - - int m_reserved_registers; - std::array m_workgroup_id; - std::array m_local_invocation_id; -}; - -} - -#endif // SFN_COMPUTE_SHADER_FROM_NIR_H diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp new file mode 100644 index 0000000..89aa796 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp @@ -0,0 +1,95 @@ +#include "sfn_shader_cs.h" +#include "sfn_instr_fetch.h" + + +namespace r600 { + +ComputeShader::ComputeShader(UNUSED const r600_shader_key& key): + Shader("CS") +{ + +} + +bool ComputeShader::do_scan_instruction(UNUSED nir_instr *instr) +{ + return false; +} + +int ComputeShader::do_allocate_reserved_registers() +{ + auto& vf = value_factory(); + + const int thread_id_sel = 0; + const int wg_id_sel = 1; + + for (int i = 0; i < 3; ++i) { + m_local_invocation_id[i] = vf.allocate_pinned_register(thread_id_sel, i); + m_local_invocation_id[i]->pin_live_range(true); + + m_workgroup_id[i] = vf.allocate_pinned_register(wg_id_sel, i); + m_workgroup_id[i]->pin_live_range(true); + } + return 2; +} + +bool ComputeShader::process_stage_intrinsic(nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_local_invocation_id: + return emit_load_3vec(instr, m_local_invocation_id); + case nir_intrinsic_load_workgroup_id: + return emit_load_3vec(instr, m_workgroup_id); + case nir_intrinsic_load_num_workgroups: + return emit_load_num_workgroups(instr); + default: + return false; + } +} + +void ComputeShader::do_get_shader_info(r600_shader *sh_info) +{ + sh_info->processor_type = PIPE_SHADER_COMPUTE; +} + +bool ComputeShader::read_prop(UNUSED std::istream& is) +{ + return true; +} + +void ComputeShader::do_print_properties(UNUSED std::ostream& os) const +{ + +} + +bool ComputeShader::emit_load_num_workgroups(nir_intrinsic_instr* instr) +{ + auto zero = value_factory().temp_register(); + + emit_instruction(new AluInstr(op1_mov, zero, value_factory().inline_const(ALU_SRC_0, 0), + AluInstr::last_write)); + auto dest = value_factory().dest_vec4(instr->dest, pin_group); + + auto ir = new LoadFromBuffer(dest, {0,1,2,7}, zero, 16, + R600_BUFFER_INFO_CONST_BUFFER, + nullptr, fmt_32_32_32_32); + + ir->set_fetch_flag(LoadFromBuffer::srf_mode); + ir->reset_fetch_flag(LoadFromBuffer::format_comp_signed); + ir->set_num_format(vtx_nf_int); + emit_instruction(ir); + return true; + +} + +bool ComputeShader::emit_load_3vec(nir_intrinsic_instr* instr, const std::array& src) +{ + auto& vf = value_factory(); + + for (int i = 0; i < 3; ++i) { + auto dest = vf.dest(instr->dest, i, pin_none); + emit_instruction(new AluInstr(op1_mov, dest, src[i], i == 2 ? AluInstr::last_write : AluInstr::write)); + } + return true; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_cs.h b/src/gallium/drivers/r600/sfn/sfn_shader_cs.h new file mode 100644 index 0000000..c58e606 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_cs.h @@ -0,0 +1,39 @@ +#ifndef COMPUTE_H +#define COMPUTE_H + +#include "sfn_shader.h" + +namespace r600 { + +class ComputeShader : public Shader +{ +public: + ComputeShader(const r600_shader_key& key); + +private: + bool do_scan_instruction(nir_instr *instr) override; + int do_allocate_reserved_registers() override; + + bool process_stage_intrinsic(nir_intrinsic_instr *intr) override; + void do_get_shader_info(r600_shader *sh_info) override; + + bool load_input(UNUSED nir_intrinsic_instr *intr) override { + unreachable("compute shaders have bno inputs"); + }; + bool store_output(UNUSED nir_intrinsic_instr *intr) override { + unreachable("compute shaders have no outputs"); + }; + + bool read_prop(std::istream& is) override; + void do_print_properties(std::ostream& os) const override; + + bool emit_load_num_workgroups(nir_intrinsic_instr* instr); + bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array& src); + + std::array m_workgroup_id{nullptr}; + std::array m_local_invocation_id{nullptr}; +}; + +} + +#endif // COMPUTE_H diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp deleted file mode 100644 index 02d5add..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp +++ /dev/null @@ -1,1074 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "pipe/p_defines.h" -#include "tgsi/tgsi_from_mesa.h" -#include "sfn_shader_fragment.h" -#include "sfn_instruction_fetch.h" - -namespace r600 { - -FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir, - r600_shader& sh, - r600_pipe_shader_selector &sel, - const r600_shader_key &key, - enum amd_gfx_level gfx_level): - ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, gfx_level, 0), - m_max_color_exports(MAX2(key.ps.nr_cbufs,1)), - m_max_counted_color_exports(0), - m_two_sided_color(key.ps.color_two_side), - m_last_pixel_export(nullptr), - m_nir(nir), - m_reserved_registers(0), - m_frag_pos_index(0), - m_need_back_color(false), - m_front_face_loaded(false), - m_depth_exports(0), - m_apply_sample_mask(key.ps.apply_sample_id_mask), - m_dual_source_blend(key.ps.dual_source_blend), - m_pos_input(nullptr) -{ - for (auto& i: m_interpolator) { - i.enabled = false; - i.ij_index= 0; - } - - sh_info().rat_base = key.ps.nr_cbufs; - sh_info().atomic_base = key.ps.first_atomic_counter; -} - -unsigned barycentric_ij_index(nir_intrinsic_instr *instr) -{ - unsigned index = 0; - switch (instr->intrinsic) { - case nir_intrinsic_load_barycentric_sample: - index = 0; - break; - case nir_intrinsic_load_barycentric_at_sample: - case nir_intrinsic_load_barycentric_at_offset: - case nir_intrinsic_load_barycentric_pixel: - index = 1; - break; - case nir_intrinsic_load_barycentric_centroid: - index = 2; - break; - default: - unreachable("Unknown interpolator intrinsic"); - } - - switch (nir_intrinsic_interp_mode(instr)) { - case INTERP_MODE_NONE: - case INTERP_MODE_SMOOTH: - case INTERP_MODE_COLOR: - return index; - case INTERP_MODE_NOPERSPECTIVE: - return index + 3; - case INTERP_MODE_FLAT: - case INTERP_MODE_EXPLICIT: - default: - unreachable("unknown/unsupported mode for load_interpolated"); - } - return 0; -} - -bool FragmentShaderFromNir::process_load_input(nir_intrinsic_instr *instr, - bool interpolated) -{ - sfn_log << SfnLog::io << "Parse " << instr->instr - << "\n"; - - auto index = nir_src_as_const_value(instr->src[interpolated ? 1 : 0]); - assert(index); - - unsigned location = nir_intrinsic_io_semantics(instr).location + index->u32; - auto semantic = r600_get_varying_semantic(location); - tgsi_semantic name = (tgsi_semantic)semantic.first; - unsigned sid = semantic.second; - - - if (location == VARYING_SLOT_POS) { - m_sv_values.set(es_pos); - m_pos_input = new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32, - nir_intrinsic_component(instr), - nir_dest_num_components(instr->dest), - TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER); - m_shaderio.add_input(m_pos_input); - return true; - } - - if (location == VARYING_SLOT_FACE) { - m_sv_values.set(es_face); - return true; - } - - - tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT; - tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER; - - bool uses_interpol_at_centroid = false; - - if (interpolated) { - - glsl_interp_mode mode = INTERP_MODE_NONE; - auto parent = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr); - mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent); - switch (parent->intrinsic) { - case nir_intrinsic_load_barycentric_sample: - tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE; - break; - case nir_intrinsic_load_barycentric_at_sample: - case nir_intrinsic_load_barycentric_at_offset: - case nir_intrinsic_load_barycentric_pixel: - tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER; - break; - case nir_intrinsic_load_barycentric_centroid: - tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID; - uses_interpol_at_centroid = true; - break; - default: - std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of " - << nir_intrinsic_infos[instr->intrinsic].name - << " interpolator?\n"; - assert(0); - } - - switch (mode) { - case INTERP_MODE_NONE: - if (name == TGSI_SEMANTIC_COLOR) { - tgsi_interpolate = TGSI_INTERPOLATE_COLOR; - break; - } - FALLTHROUGH; - case INTERP_MODE_SMOOTH: - tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE; - break; - case INTERP_MODE_NOPERSPECTIVE: - tgsi_interpolate = TGSI_INTERPOLATE_LINEAR; - break; - case INTERP_MODE_FLAT: - break; - case INTERP_MODE_COLOR: - tgsi_interpolate = TGSI_INTERPOLATE_COLOR; - break; - case INTERP_MODE_EXPLICIT: - default: - assert(0); - } - - m_interpolators_used.set(barycentric_ij_index(parent)); - - } - - switch (name) { - case TGSI_SEMANTIC_COLOR: { - auto input = m_shaderio.find_varying(name, sid); - if (!input) { - m_shaderio.add_input(new ShaderInputColor(name, sid, - nir_intrinsic_base(instr) + index->u32, - nir_intrinsic_component(instr), - nir_dest_num_components(instr->dest), - tgsi_interpolate, tgsi_loc)); - } else { - if (uses_interpol_at_centroid) - input->set_uses_interpolate_at_centroid(); - - auto varying = static_cast(*input); - varying.update_mask(nir_dest_num_components(instr->dest), - nir_intrinsic_component(instr)); - } - - m_need_back_color = m_two_sided_color; - return true; - } - case TGSI_SEMANTIC_PRIMID: - sh_info().gs_prim_id_input = true; - sh_info().ps_prim_id_input = m_shaderio.inputs().size(); - FALLTHROUGH; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_TEXCOORD: - case TGSI_SEMANTIC_LAYER: - case TGSI_SEMANTIC_PCOORD: - case TGSI_SEMANTIC_VIEWPORT_INDEX: - case TGSI_SEMANTIC_CLIPDIST: { - auto input = m_shaderio.find_varying(name, sid); - if (!input) { - m_shaderio.add_input(new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32, - nir_intrinsic_component(instr), - nir_dest_num_components(instr->dest), - tgsi_interpolate, tgsi_loc)); - } else { - if (uses_interpol_at_centroid) - input->set_uses_interpolate_at_centroid(); - - auto varying = static_cast(*input); - varying.update_mask(nir_dest_num_components(instr->dest), - nir_intrinsic_component(instr)); - } - - return true; - } - default: - return false; - } -} - - -bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr) -{ - switch (instr->type) { - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); - - switch (ii->intrinsic) { - case nir_intrinsic_load_front_face: - m_sv_values.set(es_face); - break; - case nir_intrinsic_load_sample_mask_in: - m_sv_values.set(es_sample_mask_in); - break; - case nir_intrinsic_load_sample_pos: - m_sv_values.set(es_sample_pos); - FALLTHROUGH; - case nir_intrinsic_load_sample_id: - m_sv_values.set(es_sample_id); - break; - case nir_intrinsic_load_helper_invocation: - m_sv_values.set(es_helper_invocation); - sh_info().uses_helper_invocation = true; - break; - case nir_intrinsic_load_input: - return process_load_input(ii, false); - case nir_intrinsic_load_interpolated_input: { - return process_load_input(ii, true); - } - case nir_intrinsic_store_output: - return process_store_output(ii); - - default: - ; - } - } - default: - ; - } - return true; -} - -bool FragmentShaderFromNir::do_allocate_reserved_registers() -{ - assert(!m_reserved_registers); - - int face_reg_index = -1; - int sample_id_index = -1; - // enabled interpolators based on inputs - for (unsigned i = 0; i < s_max_interpolators; ++i) { - if (m_interpolators_used.test(i)) { - sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n"; - m_interpolator[i].enabled = true; - } - } - - // sort the varying inputs - m_shaderio.sort_varying_inputs(); - - // handle interpolators - int num_baryc = 0; - for (int i = 0; i < 6; ++i) { - if (m_interpolator[i].enabled) { - sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n"; - - m_interpolator[i].ij_index = num_baryc; - - unsigned sel = num_baryc / 2; - unsigned chan = 2 * (num_baryc % 2); - - auto ip_i = new GPRValue(sel, chan + 1); - ip_i->set_as_input(); - m_interpolator[i].i.reset(ip_i); - inject_register(sel, chan + 1, m_interpolator[i].i, false); - - auto ip_j = new GPRValue(sel, chan); - ip_j->set_as_input(); - m_interpolator[i].j.reset(ip_j); - inject_register(sel, chan, m_interpolator[i].j, false); - - ++num_baryc; - } - } - m_reserved_registers += (num_baryc + 1) >> 1; - - if (m_sv_values.test(es_pos)) { - m_frag_pos_index = m_reserved_registers++; - assert(m_pos_input); - m_pos_input->set_gpr(m_frag_pos_index); - } - - // handle system values - if (m_sv_values.test(es_face) || m_need_back_color) { - face_reg_index = m_reserved_registers++; - m_front_face_reg = std::make_shared(face_reg_index,0); - m_front_face_reg->set_as_input(); - sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n"; - inject_register(m_front_face_reg->sel(), m_front_face_reg->chan(), m_front_face_reg, false); - - m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index)); - load_front_face(); - } - - if (m_sv_values.test(es_sample_mask_in)) { - if (face_reg_index < 0) - face_reg_index = m_reserved_registers++; - - m_sample_mask_reg = std::make_shared(face_reg_index,2); - m_sample_mask_reg->set_as_input(); - sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n"; - sh_info().nsys_inputs = 1; - m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index)); - } - - if (m_sv_values.test(es_sample_id) || - m_sv_values.test(es_sample_mask_in)) { - if (sample_id_index < 0) - sample_id_index = m_reserved_registers++; - - m_sample_id_reg = std::make_shared(sample_id_index, 3); - m_sample_id_reg->set_as_input(); - sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n"; - sh_info().nsys_inputs++; - m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index)); - } - - // The back color handling is not emmited in the code, so we have - // to add the inputs here and later we also need to inject the code to set - // the right color - if (m_need_back_color) { - size_t ninputs = m_shaderio.inputs().size(); - for (size_t k = 0; k < ninputs; ++k) { - ShaderInput& i = m_shaderio.input(k); - - if (i.name() != TGSI_SEMANTIC_COLOR) - continue; - - ShaderInputColor& col = static_cast(i); - - size_t next_pos = m_shaderio.size(); - auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos); - m_shaderio.add_input(bcol); - col.set_back_color(next_pos); - } - m_shaderio.set_two_sided(); - } - - m_shaderio.update_lds_pos(); - - set_reserved_registers(m_reserved_registers); - - return true; -} - -void FragmentShaderFromNir::emit_shader_start() -{ - if (m_sv_values.test(es_face)) - load_front_face(); - - if (m_sv_values.test(es_pos)) { - for (int i = 0; i < 4; ++i) { - auto v = new GPRValue(m_frag_pos_index, i); - v->set_as_input(); - auto reg = PValue(v); - m_frag_pos[i] = reg; - } - } - - if (m_sv_values.test(es_helper_invocation)) { - m_helper_invocation = get_temp_register(); - auto dummy = PValue(new GPRValue(m_helper_invocation->sel(), 7)); - emit_instruction(new AluInstruction(op1_mov, m_helper_invocation, literal(-1), {alu_write, alu_last_instr})); - GPRVector dst({dummy, dummy, dummy, dummy}); - std::array swz = {7,7,7,7}; - dst.set_reg_i(m_helper_invocation->chan(), m_helper_invocation); - swz[m_helper_invocation->chan()] = 4; - - auto vtx = new FetchInstruction(dst, m_helper_invocation, - R600_BUFFER_INFO_CONST_BUFFER, bim_none); - vtx->set_flag(vtx_vpm); - vtx->set_flag(vtx_use_tc); - vtx->set_dest_swizzle(swz); - emit_instruction(vtx); - } -} - -bool FragmentShaderFromNir::process_store_output(nir_intrinsic_instr *instr) -{ - - auto semantic = nir_intrinsic_io_semantics(instr); - unsigned driver_loc = nir_intrinsic_base(instr); - - if (sh_info().noutput <= driver_loc) - sh_info().noutput = driver_loc + 1; - - r600_shader_io& io = sh_info().output[driver_loc]; - tgsi_get_gl_frag_result_semantic(static_cast(semantic.location), - &io.name, &io.sid); - - unsigned component = nir_intrinsic_component(instr); - io.write_mask |= nir_intrinsic_write_mask(instr) << component; - - if (semantic.location == FRAG_RESULT_COLOR && !m_dual_source_blend) { - sh_info().fs_write_all = true; - } - - if (semantic.location == FRAG_RESULT_COLOR || - (semantic.location >= FRAG_RESULT_DATA0 && - semantic.location <= FRAG_RESULT_DATA7)) { - ++m_max_counted_color_exports; - - /* Hack: force dual source output handling if one color output has a - * dual_source_blend_index > 0 */ - if (semantic.location == FRAG_RESULT_COLOR && - semantic.dual_source_blend_index > 0) - m_dual_source_blend = true; - - if (m_max_counted_color_exports > 1) - sh_info().fs_write_all = false; - return true; - } - - if (semantic.location == FRAG_RESULT_DEPTH || - semantic.location == FRAG_RESULT_STENCIL || - semantic.location == FRAG_RESULT_SAMPLE_MASK) { - io.write_mask = 15; - return true; - } - - return false; - - -} - -bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr) -{ - auto dest = from_nir(instr->dest, 0); - assert(m_sample_id_reg); - assert(m_sample_mask_reg); - - emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write)); - emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write)); - return true; -} - -bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) -{ - switch (instr->intrinsic) { - case nir_intrinsic_load_sample_mask_in: - if (m_apply_sample_mask) { - return emit_load_sample_mask_in(instr); - } else - return load_preloaded_value(instr->dest, 0, m_sample_mask_reg); - case nir_intrinsic_load_sample_id: - return load_preloaded_value(instr->dest, 0, m_sample_id_reg); - case nir_intrinsic_load_front_face: - return load_preloaded_value(instr->dest, 0, m_front_face_reg); - case nir_intrinsic_load_sample_pos: - return emit_load_sample_pos(instr); - case nir_intrinsic_load_helper_invocation: - return load_preloaded_value(instr->dest, 0, m_helper_invocation); - case nir_intrinsic_load_input: - return emit_load_input(instr); - case nir_intrinsic_load_barycentric_sample: - case nir_intrinsic_load_barycentric_pixel: - case nir_intrinsic_load_barycentric_centroid: { - unsigned ij = barycentric_ij_index(instr); - return load_preloaded_value(instr->dest, 0, m_interpolator[ij].i) && - load_preloaded_value(instr->dest, 1, m_interpolator[ij].j); - } - case nir_intrinsic_load_barycentric_at_offset: - return load_barycentric_at_offset(instr); - case nir_intrinsic_load_barycentric_at_sample: - return load_barycentric_at_sample(instr); - - case nir_intrinsic_load_interpolated_input: { - return emit_load_interpolated_input(instr); - } - case nir_intrinsic_store_output: - return emit_store_output(instr); - - default: - return false; - } -} - -bool FragmentShaderFromNir::emit_store_output(nir_intrinsic_instr* instr) -{ - auto location = nir_intrinsic_io_semantics(instr).location; - - if (location == FRAG_RESULT_COLOR) - return emit_export_pixel(instr, m_dual_source_blend ? 1 : m_max_color_exports); - - if ((location >= FRAG_RESULT_DATA0 && - location <= FRAG_RESULT_DATA7) || - location == FRAG_RESULT_DEPTH || - location == FRAG_RESULT_STENCIL || - location == FRAG_RESULT_SAMPLE_MASK) - return emit_export_pixel(instr, 1); - - sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_output for " << location << ")\n"; - return false; - -} - -bool FragmentShaderFromNir::emit_load_interpolated_input(nir_intrinsic_instr* instr) -{ - unsigned loc = nir_intrinsic_io_semantics(instr).location; - switch (loc) { - case VARYING_SLOT_POS: - for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { - load_preloaded_value(instr->dest, i, m_frag_pos[i]); - } - return true; - case VARYING_SLOT_FACE: - return load_preloaded_value(instr->dest, 0, m_front_face_reg); - default: - ; - } - - auto param = nir_src_as_const_value(instr->src[1]); - assert(param && "Indirect PS inputs not (yet) supported"); - - auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr)); - auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4); - - io.set_gpr(dst.sel()); - - Interpolator ip = {true, 0, from_nir(instr->src[0], 0), from_nir(instr->src[0], 1)}; - - - if (!load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest), - nir_intrinsic_component(instr))) - return false; - - if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) { - - auto & color_input = static_cast (io); - auto& bgio = m_shaderio.input(color_input.back_color_input_index()); - - GPRVector bgcol = get_temp_vec4(); - bgio.set_gpr(bgcol.sel()); - load_interpolated(bgcol, bgio, ip, nir_dest_num_components(instr->dest), 0); - - load_front_face(); - - AluInstruction *ir = nullptr; - for (unsigned i = 0; i < 4 ; ++i) { - ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write}); - emit_instruction(ir); - } - if (ir) - ir->set_flag(alu_last_instr); - } - - - AluInstruction *ir = nullptr; - if (nir_intrinsic_component(instr) != 0) { - for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { - ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write}); - emit_instruction(ir); - } - if (ir) - ir->set_flag(alu_last_instr); - } - - return true; -} - -bool FragmentShaderFromNir::load_barycentric_at_offset(nir_intrinsic_instr* instr) -{ - auto interpolator = m_interpolator[barycentric_ij_index(instr)]; - PValue dummy(new GPRValue(interpolator.i->sel(), 0)); - - GPRVector help = get_temp_vec4(); - GPRVector interp({interpolator.j, interpolator.i, dummy, dummy}); - - auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue()); - getgradh->set_dest_swizzle({0,1,7,7}); - getgradh->set_flag(TexInstruction::x_unnormalized); - getgradh->set_flag(TexInstruction::y_unnormalized); - getgradh->set_flag(TexInstruction::z_unnormalized); - getgradh->set_flag(TexInstruction::w_unnormalized); - getgradh->set_flag(TexInstruction::grad_fine); - emit_instruction(getgradh); - - auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue()); - getgradv->set_dest_swizzle({7,7,0,1}); - getgradv->set_flag(TexInstruction::x_unnormalized); - getgradv->set_flag(TexInstruction::y_unnormalized); - getgradv->set_flag(TexInstruction::z_unnormalized); - getgradv->set_flag(TexInstruction::w_unnormalized); - getgradv->set_flag(TexInstruction::grad_fine); - emit_instruction(getgradv); - - PValue ofs_x = from_nir(instr->src[0], 0); - PValue ofs_y = from_nir(instr->src[0], 1); - emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write})); - emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr})); - emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write})); - emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write, alu_last_instr})); - - return true; -} - -bool FragmentShaderFromNir::load_barycentric_at_sample(nir_intrinsic_instr* instr) -{ - GPRVector slope = get_temp_vec4(); - - auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope, - from_nir_with_fetch_constant(instr->src[0], 0), - 0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none); - fetch->set_flag(vtx_srf_mode); - emit_instruction(fetch); - - GPRVector grad = get_temp_vec4(); - - auto interpolator = m_interpolator[barycentric_ij_index(instr)]; - assert(interpolator.enabled); - PValue dummy(new GPRValue(interpolator.i->sel(), 0)); - - GPRVector src({interpolator.j, interpolator.i, dummy, dummy}); - - auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue()); - tex->set_flag(TexInstruction::grad_fine); - tex->set_flag(TexInstruction::x_unnormalized); - tex->set_flag(TexInstruction::y_unnormalized); - tex->set_flag(TexInstruction::z_unnormalized); - tex->set_flag(TexInstruction::w_unnormalized); - tex->set_dest_swizzle({0,1,7,7}); - emit_instruction(tex); - - tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue()); - tex->set_flag(TexInstruction::x_unnormalized); - tex->set_flag(TexInstruction::y_unnormalized); - tex->set_flag(TexInstruction::z_unnormalized); - tex->set_flag(TexInstruction::w_unnormalized); - tex->set_flag(TexInstruction::grad_fine); - tex->set_dest_swizzle({7,7,0,1}); - emit_instruction(tex); - - emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write})); - emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr})); - - emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write})); - emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write, alu_last_instr})); - - return true; -} - -bool FragmentShaderFromNir::emit_load_input(nir_intrinsic_instr* instr) -{ - unsigned loc = nir_intrinsic_io_semantics(instr).location; - auto param = nir_src_as_const_value(instr->src[0]); - assert(param && "Indirect PS inputs not (yet) supported"); - - auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr)); - - assert(nir_intrinsic_io_semantics(instr).num_slots == 1); - - unsigned num_components = nir_dest_num_components(instr->dest); - - switch (loc) { - case VARYING_SLOT_POS: - for (unsigned i = 0; i < num_components; ++i) { - load_preloaded_value(instr->dest, i, m_frag_pos[i]); - } - return true; - case VARYING_SLOT_FACE: - return load_preloaded_value(instr->dest, 0, m_front_face_reg); - default: - ; - } - - auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4); - - AluInstruction *ir = nullptr; - for (unsigned i = 0; i < 4 ; ++i) { - ir = new AluInstruction(op1_interp_load_p0, dst[i], - PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + - io.lds_pos(), i)), - EmitInstruction::write); - emit_instruction(ir); - } - ir->set_flag(alu_last_instr); - - /* TODO: back color */ - if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) { - Interpolator ip = {false, 0, NULL, NULL}; - - auto & color_input = static_cast (io); - auto& bgio = m_shaderio.input(color_input.back_color_input_index()); - - GPRVector bgcol = get_temp_vec4(); - bgio.set_gpr(bgcol.sel()); - load_interpolated(bgcol, bgio, ip, num_components, 0); - - load_front_face(); - - AluInstruction *ir = nullptr; - for (unsigned i = 0; i < 4 ; ++i) { - ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write}); - emit_instruction(ir); - } - if (ir) - ir->set_flag(alu_last_instr); - } - - if (nir_intrinsic_component(instr) != 0) { - for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { - ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write}); - emit_instruction(ir); - } - if (ir) - ir->set_flag(alu_last_instr); - } - - - return true; -} - -void FragmentShaderFromNir::load_front_face() -{ - assert(m_front_face_reg); - if (m_front_face_loaded) - return; - - auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg, - Value::zero, {alu_write, alu_last_instr}); - m_front_face_loaded = true; - emit_instruction(ir); -} - -bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr) -{ - GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest)); - auto fetch = new FetchInstruction(vc_fetch, - no_index_offset, - fmt_32_32_32_32_float, - vtx_nf_scaled, - vtx_es_none, - m_sample_id_reg, - dest, - 0, - false, - 0xf, - R600_BUFFER_INFO_CONST_BUFFER, - 0, - bim_none, - false, - false, - 0, - 0, - 0, - PValue(), - {0,1,2,3}); - fetch->set_flag(vtx_srf_mode); - emit_instruction(fetch); - return true; -} - -bool FragmentShaderFromNir::load_interpolated(GPRVector &dest, - ShaderInput& io, const Interpolator &ip, - int num_components, int start_comp) -{ - // replace io with ShaderInputVarying - if (io.interpolate() > 0) { - - sfn_log << SfnLog::io << "Using Interpolator (" << *ip.j << ", " << *ip.i << ")" << "\n"; - - if (num_components == 1) { - switch (start_comp) { - case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x); - case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1); - case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z); - case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3); - default: - assert(0); - } - } - - if (num_components == 2) { - switch (start_comp) { - case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3); - case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc); - case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) && - load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1); - default: - assert(0); - } - } - - if (num_components == 3 && start_comp == 0) - return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) && - load_interpolated_one_comp(dest, io, ip, op2_interp_z); - - int full_write_mask = ((1 << num_components) - 1) << start_comp; - - bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc); - success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3); - return success; - - } else { - AluInstruction *ir = nullptr; - for (unsigned i = 0; i < 4 ; ++i) { - ir = new AluInstruction(op1_interp_load_p0, dest[i], - PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)), - EmitInstruction::write); - emit_instruction(ir); - } - ir->set_flag(alu_last_instr); - } - return true; -} - -bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest, - ShaderInput& io, const Interpolator& ip, EAluOp op) -{ - for (unsigned i = 0; i < 2 ; ++i) { - int chan = i; - if (op == op2_interp_z) - chan += 2; - - - auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i, - PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)), - i == 0 ? EmitInstruction::write : EmitInstruction::last); - dest.pin_to_channel(chan); - - ir->set_bank_swizzle(alu_vec_210); - emit_instruction(ir); - } - return true; -} - -bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, - const Interpolator& ip, EAluOp op, int writemask) -{ - AluInstruction *ir = nullptr; - assert(ip.j); - assert(ip.i); - for (unsigned i = 0; i < 4 ; ++i) { - ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)), - (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty); - dest.pin_to_channel(i); - ir->set_bank_swizzle(alu_vec_210); - emit_instruction(ir); - } - ir->set_flag(alu_last_instr); - return true; -} - -bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest, - ShaderInput& io, const Interpolator& ip, - EAluOp op, UNUSED int start, int comp) -{ - AluInstruction *ir = nullptr; - for (int i = 0; i < 4 ; ++i) { - ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, - PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)), - i == comp ? EmitInstruction::write : EmitInstruction::empty); - ir->set_bank_swizzle(alu_vec_210); - dest.pin_to_channel(i); - emit_instruction(ir); - } - ir->set_flag(alu_last_instr); - return true; -} - - -bool FragmentShaderFromNir::emit_export_pixel(nir_intrinsic_instr* instr, int outputs) -{ - std::array swizzle; - unsigned writemask = nir_intrinsic_write_mask(instr); - auto semantics = nir_intrinsic_io_semantics(instr); - unsigned driver_location = nir_intrinsic_base(instr); - - switch (semantics.location) { - case FRAG_RESULT_DEPTH: - writemask = 1; - swizzle = {0,7,7,7}; - break; - case FRAG_RESULT_STENCIL: - writemask = 2; - swizzle = {7,0,7,7}; - break; - case FRAG_RESULT_SAMPLE_MASK: - writemask = 4; - swizzle = {7,7,0,7}; - break; - default: - for (int i = 0; i < 4; ++i) { - swizzle[i] = (i < instr->num_components) ? i : 7; - } - } - - auto value = vec_from_nir_with_fetch_constant(instr->src[0], writemask, swizzle); - - set_output(driver_location, value.sel()); - - if (semantics.location == FRAG_RESULT_COLOR || - (semantics.location >= FRAG_RESULT_DATA0 && - semantics.location <= FRAG_RESULT_DATA7)) { - for (int k = 0 ; k < outputs; ++k) { - - unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR) - ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports; - - sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n"; - - if (location >= m_max_color_exports) { - sfn_log << SfnLog::io << "Pixel output loc:" << location - << " dl:" << driver_location - << " skipped because we have only " << m_max_color_exports << " CBs\n"; - continue; - } - - m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel); - - if (sh_info().ps_export_highest < location) - sh_info().ps_export_highest = location; - - sh_info().nr_ps_color_exports++; - - unsigned mask = (0xfu << (location * 4)); - sh_info().ps_color_export_mask |= mask; - - emit_export_instruction(m_last_pixel_export); - }; - } else if (semantics.location == FRAG_RESULT_DEPTH || - semantics.location == FRAG_RESULT_STENCIL || - semantics.location == FRAG_RESULT_SAMPLE_MASK) { - m_depth_exports++; - emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel)); - } else { - return false; - } - return true; -} - - -bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs) -{ - std::array swizzle; - unsigned writemask = nir_intrinsic_write_mask(instr); - switch (out_var->data.location) { - case FRAG_RESULT_DEPTH: - writemask = 1; - swizzle = {0,7,7,7}; - break; - case FRAG_RESULT_STENCIL: - writemask = 2; - swizzle = {7,0,7,7}; - break; - case FRAG_RESULT_SAMPLE_MASK: - writemask = 4; - swizzle = {7,7,0,7}; - break; - default: - for (int i = 0; i < 4; ++i) { - swizzle[i] = (i < instr->num_components) ? i : 7; - } - } - - auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle); - - set_output(out_var->data.driver_location, value.sel()); - - if (out_var->data.location == FRAG_RESULT_COLOR || - (out_var->data.location >= FRAG_RESULT_DATA0 && - out_var->data.location <= FRAG_RESULT_DATA7)) { - for (int k = 0 ; k < outputs; ++k) { - - unsigned location = (m_dual_source_blend && (out_var->data.location == FRAG_RESULT_COLOR) - ? out_var->data.index : out_var->data.driver_location) + k - m_depth_exports; - - sfn_log << SfnLog::io << "Pixel output " << out_var->name << " at loc:" << location << "\n"; - - if (location >= m_max_color_exports) { - sfn_log << SfnLog::io << "Pixel output loc:" << location - << " dl:" << out_var->data.location - << " skipped because we have only " << m_max_color_exports << " CBs\n"; - continue; - } - - m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel); - - if (sh_info().ps_export_highest < location) - sh_info().ps_export_highest = location; - - sh_info().nr_ps_color_exports++; - - unsigned mask = (0xfu << (location * 4)); - sh_info().ps_color_export_mask |= mask; - - emit_export_instruction(m_last_pixel_export); - }; - } else if (out_var->data.location == FRAG_RESULT_DEPTH || - out_var->data.location == FRAG_RESULT_STENCIL || - out_var->data.location == FRAG_RESULT_SAMPLE_MASK) { - m_depth_exports++; - emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel)); - } else { - return false; - } - return true; -} - -void FragmentShaderFromNir::do_finalize() -{ - // update shader io info and set LDS etc. - sh_info().ninput = m_shaderio.inputs().size(); - - sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n"; - for (size_t i = 0; i < sh_info().ninput; ++i) { - ShaderInput& input = m_shaderio.input(i); - int ij_idx = (input.ij_index() < 6 && - input.ij_index() >= 0) ? input.ij_index() : 0; - input.set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index); - } - - sh_info().two_side = m_shaderio.two_sided(); - sh_info().nlds = m_shaderio.nlds(); - - if (!m_last_pixel_export) { - GPRVector v(0, {7,7,7,7}); - m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel); - sh_info().nr_ps_color_exports++; - sh_info().ps_color_export_mask = 0xf; - emit_export_instruction(m_last_pixel_export); - } - - m_last_pixel_export->set_last(); -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h deleted file mode 100644 index 6c6fb2b..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h +++ /dev/null @@ -1,117 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef sfn_fragment_shader_from_nir_h -#define sfn_fragment_shader_from_nir_h - -#include "sfn_shader_base.h" -#include "sfn_shaderio.h" -#include - -namespace r600 { - -class FragmentShaderFromNir : public ShaderFromNirProcessor { -public: - FragmentShaderFromNir(const nir_shader& nir, r600_shader& sh_info, - r600_pipe_shader_selector &sel, const r600_shader_key &key, - enum amd_gfx_level gfx_level); - bool scan_sysvalue_access(nir_instr *instr) override; -private: - - struct Interpolator { - bool enabled; - unsigned ij_index; - PValue i; - PValue j; - }; - - void emit_shader_start() override; - bool do_allocate_reserved_registers() override; - bool process_store_output(nir_intrinsic_instr *instr); - - bool emit_store_output(nir_intrinsic_instr* instr); - - bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, int outputs); - bool emit_export_pixel(nir_intrinsic_instr* instr, int outputs); - bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip, - int num_components, int start_comp); - bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op); - bool load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip,EAluOp op, int writemask); - bool load_interpolated_two_comp_for_one(GPRVector &dest, - ShaderInput& io, const Interpolator& ip, EAluOp op, int start, int comp); - - bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; - void do_finalize() override; - - void load_front_face(); - - bool emit_load_input(nir_intrinsic_instr* instr); - bool emit_load_front_face(nir_intrinsic_instr* instr); - bool emit_load_sample_mask_in(nir_intrinsic_instr* instr); - bool emit_load_sample_pos(nir_intrinsic_instr* instr); - bool emit_load_sample_id(nir_intrinsic_instr* instr); - - bool process_load_input(nir_intrinsic_instr *instr, bool interpolated); - bool emit_load_interpolated_input(nir_intrinsic_instr* instr); - bool load_barycentric_at_offset(nir_intrinsic_instr* instr); - bool load_barycentric_at_sample(nir_intrinsic_instr* instr); - - - unsigned m_max_color_exports; - unsigned m_max_counted_color_exports; - bool m_two_sided_color; - ExportInstruction *m_last_pixel_export; - const nir_shader& m_nir; - - - std::array m_interpolator; - unsigned m_reserved_registers; - unsigned m_frag_pos_index; - PGPRValue m_front_face_reg; - PGPRValue m_sample_mask_reg; - PGPRValue m_sample_id_reg; - PGPRValue m_helper_invocation; - GPRVector m_frag_pos; - bool m_need_back_color; - bool m_front_face_loaded; - ShaderIO m_shaderio; - unsigned m_depth_exports; - - std::map m_input_cache; - - static const int s_max_interpolators = 6; - - std::bitset m_interpolators_used; - - unsigned m_apply_sample_mask; - bool m_dual_source_blend; - ShaderInput *m_pos_input; - -}; - -} - -#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp new file mode 100644 index 0000000..62b1413 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp @@ -0,0 +1,873 @@ + + +#include "sfn_debug.h" +#include "sfn_shader_fs.h" + +#include "sfn_instr_alugroup.h" +#include "sfn_instr_tex.h" +#include "sfn_instr_fetch.h" +#include "sfn_instr_export.h" + +#include "tgsi/tgsi_from_mesa.h" + +#include + +namespace r600 { + +using std::string; + +FragmentShader::FragmentShader(const r600_shader_key& key): + Shader("FS"), + m_dual_source_blend(key.ps.dual_source_blend), + m_max_color_exports(MAX2(key.ps.nr_cbufs, 1)), + m_export_highest(0), + m_num_color_exports(0), + m_color_export_mask(0), + m_depth_exports(0), + m_last_pixel_export(nullptr), + m_pos_input(127, false), + m_fs_write_all(false), + m_apply_sample_mask(key.ps.apply_sample_id_mask), + m_rat_base(key.ps.nr_cbufs) +{ +} + +void FragmentShader::do_get_shader_info(r600_shader *sh_info) +{ + sh_info->processor_type = PIPE_SHADER_FRAGMENT; + + sh_info->ps_color_export_mask = m_color_export_mask; + sh_info->ps_export_highest = m_export_highest; + sh_info->nr_ps_color_exports = m_num_color_exports; + + sh_info->fs_write_all = m_fs_write_all; + + sh_info->rat_base = m_rat_base; + sh_info->uses_kill = m_uses_discard; + sh_info->gs_prim_id_input = m_gs_prim_id_input; + sh_info->ps_prim_id_input = m_ps_prim_id_input; + sh_info->nsys_inputs = m_nsys_inputs; + sh_info->uses_helper_invocation = m_helper_invocation != nullptr; +} + + +bool FragmentShader::load_input(nir_intrinsic_instr *intr) +{ + auto& vf = value_factory(); + AluInstr *ir = nullptr; + + auto location = nir_intrinsic_io_semantics(intr).location; + if (location == VARYING_SLOT_POS) { + for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) { + ir = new AluInstr(op1_mov, + vf.dest(intr->dest, i, pin_none), + m_pos_input[i], + AluInstr::write); + emit_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + set_input_gpr(nir_intrinsic_base(intr), m_pos_input[0]->sel()); + return true; + } + + if (location == VARYING_SLOT_FACE) { + ir = new AluInstr(op2_setge_dx10, + vf.dest(intr->dest, 0, pin_none), + m_face_input, + vf.inline_const(ALU_SRC_0, 0), + AluInstr::last_write); + set_input_gpr(nir_intrinsic_base(intr), m_face_input->sel()); + + emit_instruction(ir); + return true; + } + + auto io = input(nir_intrinsic_base(intr)); + auto comp = nir_intrinsic_component(intr); + bool need_temp = comp > 0 || !intr->dest.is_ssa; + for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) { + if (need_temp) { + auto tmp = vf.temp_register(comp + i); + ir = new AluInstr(op1_interp_load_p0, + tmp, + new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp), + AluInstr::last_write); + emit_instruction(ir); + emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write)); + } else { + + ir = new AluInstr(op1_interp_load_p0, + vf.dest(intr->dest, i, pin_chan), + new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i), + AluInstr::write); + emit_instruction(ir); + } + + } + ir->set_alu_flag(alu_last_instr); + return true; +} + +bool FragmentShader::store_output(nir_intrinsic_instr *intr) +{ + auto location = nir_intrinsic_io_semantics(intr).location; + + if (location == FRAG_RESULT_COLOR) { + if (!m_dual_source_blend) { + m_fs_write_all = true; + } + + return emit_export_pixel(*intr, m_dual_source_blend ? 1 : m_max_color_exports); + } + + if ((location >= FRAG_RESULT_DATA0 && + location <= FRAG_RESULT_DATA7) || + location == FRAG_RESULT_DEPTH || + location == FRAG_RESULT_STENCIL || + location == FRAG_RESULT_SAMPLE_MASK) + return emit_export_pixel(*intr, 1); + + sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_output for " << location << ")\n"; + return false; +} + +unsigned +barycentric_ij_index(nir_intrinsic_instr *intr) +{ + unsigned index = 0; + switch (intr->intrinsic) { + case nir_intrinsic_load_barycentric_sample: + index = 0; + break; + case nir_intrinsic_load_barycentric_at_sample: + case nir_intrinsic_load_barycentric_at_offset: + case nir_intrinsic_load_barycentric_pixel: + index = 1; + break; + case nir_intrinsic_load_barycentric_centroid: + index = 2; + break; + default: + unreachable("Unknown interpolator intrinsic"); + } + + switch (nir_intrinsic_interp_mode(intr)) { + case INTERP_MODE_NONE: + case INTERP_MODE_SMOOTH: + case INTERP_MODE_COLOR: + return index; + case INTERP_MODE_NOPERSPECTIVE: + return index + 3; + case INTERP_MODE_FLAT: + case INTERP_MODE_EXPLICIT: + default: + unreachable("unknown/unsupported mode for load_interpolated"); + } + return 0; +} + +bool FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr) +{ + auto& vf = value_factory(); + switch (intr->intrinsic) { + case nir_intrinsic_load_barycentric_centroid: + case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_sample: { + unsigned ij = barycentric_ij_index(intr); + vf.inject_value(intr->dest, 0, m_interpolator[ij].i); + vf.inject_value(intr->dest, 1, m_interpolator[ij].j); + return true; + } + case nir_intrinsic_load_input: + return load_input(intr); + case nir_intrinsic_load_barycentric_at_offset: + return load_barycentric_at_offset(intr); + case nir_intrinsic_load_barycentric_at_sample: + return load_barycentric_at_sample(intr); + case nir_intrinsic_load_interpolated_input: + return load_interpolated_input(intr); + case nir_intrinsic_discard_if: + m_uses_discard = true; + emit_instruction(new AluInstr(op2_killne_int, nullptr, + value_factory().src(intr->src[0], 0), + value_factory().zero(), + {AluInstr::last})); + start_new_block(0); + return true; + case nir_intrinsic_discard: + m_uses_discard = true; + emit_instruction(new AluInstr(op2_kille_int, nullptr, + value_factory().zero(), + value_factory().zero(), + {AluInstr::last})); + return true; + case nir_intrinsic_load_sample_mask_in: + if (m_apply_sample_mask) { + return emit_load_sample_mask_in(intr); + } else + return emit_simple_mov(intr->dest, 0, m_sample_mask_reg); + case nir_intrinsic_load_sample_id: + return emit_simple_mov(intr->dest, 0, m_sample_id_reg); + case nir_intrinsic_load_helper_invocation: + return emit_load_helper_invocation(intr); + case nir_intrinsic_load_sample_pos: + return emit_load_sample_pos(intr); + default: + return false; + } +} + +bool FragmentShader::load_interpolated_input(nir_intrinsic_instr *intr) +{ + auto& vf = value_factory(); + unsigned loc = nir_intrinsic_io_semantics(intr).location; + switch (loc) { + case VARYING_SLOT_POS: + for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) + vf.inject_value(intr->dest, i, m_pos_input[i]); + return true; + case VARYING_SLOT_FACE: + return false; + default: + ; + } + + auto param = nir_src_as_const_value(intr->src[1]); + assert(param && "Indirect PS inputs not (yet) supported"); + + int dest_num_comp = nir_dest_num_components(intr->dest); + int start_comp = nir_intrinsic_component(intr); + bool need_temp = start_comp > 0 || !intr->dest.is_ssa; + + auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan); + + InterpolateParams params; + + params.i = vf.src(intr->src[0], 0); + params.j = vf.src(intr->src[0], 1); + params.base = input(nir_intrinsic_base(intr)).lds_pos(); + + if (!load_interpolated(dst, params, dest_num_comp, start_comp)) + return false; + + if (need_temp) { + AluInstr *ir = nullptr; + for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { + auto real_dst = vf.dest(intr->dest, i, pin_chan); + ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write); + emit_instruction(ir); + } + assert(ir); + ir->set_alu_flag(alu_last_instr); + } + + return true; +} + +bool FragmentShader::load_interpolated(RegisterVec4& dest, const InterpolateParams& params, + int num_dest_comp, int start_comp) +{ + sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i << ")" << "\n"; + + if (num_dest_comp == 1) { + switch (start_comp) { + case 0: return load_interpolated_one_comp(dest, params, op2_interp_x); + case 1: return load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1); + case 2: return load_interpolated_one_comp(dest, params, op2_interp_z); + case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 2, 3); + default: + assert(0); + } + } + + if (num_dest_comp == 2) { + switch (start_comp) { + case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3); + case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc); + case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) && + load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1); + default: + assert(0); + } + } + + if (num_dest_comp == 3 && start_comp == 0) + return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) && + load_interpolated_one_comp(dest, params, op2_interp_z); + + int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp; + + bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc); + success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3); + return success; +} + +bool FragmentShader::load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op) +{ + auto group = new AluGroup(); + bool success = true; + + AluInstr *ir = nullptr; + for (unsigned i = 0; i < 2 && success; ++i) { + int chan = i; + if (op == op2_interp_z) + chan += 2; + + + ir = new AluInstr(op, dest[chan], + i & 1 ? params.j : params.i, + new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan), + i == 0 ? AluInstr::write : AluInstr::last); + + ir->set_bank_swizzle(alu_vec_210); + success = group->add_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + if (success) + emit_instruction(group); + return success; +} + +bool FragmentShader::load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask) +{ + auto group = new AluGroup(); + bool success = true; + + AluInstr *ir = nullptr; + assert(params.j); + assert(params.i); + for (unsigned i = 0; i < 4 ; ++i) { + ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i, + new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i), + (writemask & (1 << i)) ? AluInstr::write : AluInstr::empty); + ir->set_bank_swizzle(alu_vec_210); + success = group->add_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + if (success) + emit_instruction(group); + return success; +} + +bool FragmentShader::load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, + UNUSED int start, int comp) +{ + auto group = new AluGroup(); + bool success = true; + AluInstr *ir = nullptr; + + for (int i = 0; i < 4 ; ++i) { + ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i, + new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i), + i == comp ? AluInstr::write : AluInstr::empty); + ir->set_bank_swizzle(alu_vec_210); + success = group->add_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + if (success) + emit_instruction(group); + + return success; +} + +int FragmentShader::do_allocate_reserved_registers() +{ + for (unsigned i = 0; i < s_max_interpolators; ++i) { + if (m_interpolators_used.test(i)) { + sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n"; + m_interpolator[i].enabled = true; + } + } + + int num_baryc = 0; + for (int i = 0; i < 6; ++i) { + if (m_interpolator[i].enabled) { + sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n"; + unsigned sel = num_baryc / 2; + unsigned chan = 2 * (num_baryc % 2); + + m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1); + m_interpolator[i].i->pin_live_range(true, false); + + m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan); + m_interpolator[i].j->pin_live_range(true, false); + + m_interpolator[i].ij_index = num_baryc++; + } + } + + int next_register = (num_baryc + 1) >> 1; + + if (m_sv_values.test(es_pos)) { + m_pos_input = value_factory().allocate_pinned_vec4(next_register++, false); + for (int i = 0; i < 4; ++i) + m_pos_input[i]->pin_live_range(true); + } + + int face_reg_index = -1; + if (m_sv_values.test(es_face)) { + face_reg_index = next_register++; + m_face_input = value_factory().allocate_pinned_register(face_reg_index, 0); + m_face_input->pin_live_range(true); + } + + if (m_sv_values.test(es_sample_mask_in)) { + if (face_reg_index < 0) + face_reg_index = next_register++; + m_sample_mask_reg = value_factory().allocate_pinned_register(face_reg_index, 2); + m_sample_mask_reg->pin_live_range(true); + sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n"; + m_nsys_inputs = 1; + ShaderInput input(ninputs(), TGSI_SEMANTIC_SAMPLEMASK); + input.set_gpr(face_reg_index); + add_input(input); + } + + if (m_sv_values.test(es_sample_id) || + m_sv_values.test(es_sample_mask_in)) { + int sample_id_reg = next_register++; + m_sample_id_reg = value_factory().allocate_pinned_register(sample_id_reg, 3); + m_sample_id_reg->pin_live_range(true); + sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n"; + m_nsys_inputs++; + ShaderInput input(ninputs(), TGSI_SEMANTIC_SAMPLEID); + input.set_gpr(sample_id_reg); + add_input(input); + } + + if (m_sv_values.test(es_helper_invocation)) { + m_helper_invocation = value_factory().allocate_pinned_register(next_register++, 0); + } + + return next_register; +} + +bool FragmentShader::do_scan_instruction(nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + auto intr = nir_instr_as_intrinsic(instr); + switch (intr->intrinsic) { + case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_sample: + case nir_intrinsic_load_barycentric_at_sample: + case nir_intrinsic_load_barycentric_at_offset: + case nir_intrinsic_load_barycentric_centroid: + m_interpolators_used.set(barycentric_ij_index(intr)); + break; + case nir_intrinsic_load_front_face: + m_sv_values.set(es_face); + break; + case nir_intrinsic_load_sample_mask_in: + m_sv_values.set(es_sample_mask_in); + break; + case nir_intrinsic_load_sample_pos: + m_sv_values.set(es_sample_pos); + FALLTHROUGH; + case nir_intrinsic_load_sample_id: + m_sv_values.set(es_sample_id); + break; + case nir_intrinsic_load_helper_invocation: + m_sv_values.set(es_helper_invocation); + break; + case nir_intrinsic_load_input: + return scan_input(intr, 0); + case nir_intrinsic_load_interpolated_input: + return scan_input(intr, 1); + default: + return false; + } + return true; +} + +bool FragmentShader::emit_load_sample_mask_in(nir_intrinsic_instr* instr) +{ + auto& vf = value_factory(); + auto dest = vf.dest(instr->dest, 0, pin_free); + auto tmp = vf.temp_register(); + assert(m_sample_id_reg); + assert(m_sample_mask_reg); + + emit_instruction(new AluInstr(op2_lshl_int, tmp, vf.one_i(), m_sample_id_reg, AluInstr::last_write)); + emit_instruction(new AluInstr(op2_and_int, dest, tmp, m_sample_mask_reg, AluInstr::last_write)); + return true; +} + +bool FragmentShader::emit_load_helper_invocation(nir_intrinsic_instr* instr) +{ + assert(m_helper_invocation); + auto& vf = value_factory(); + emit_instruction(new AluInstr(op1_mov, m_helper_invocation, vf.literal(-1), AluInstr::last_write)); + RegisterVec4 destvec{m_helper_invocation, nullptr, nullptr, nullptr, pin_group}; + + auto vtx = new LoadFromBuffer(destvec, {4,7,7,7}, m_helper_invocation, 0, + R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float); + vtx->set_fetch_flag(FetchInstr::vpm); + vtx->set_fetch_flag(FetchInstr::use_tc); + vtx->set_always_keep(); + auto dst = value_factory().dest(instr->dest, 0, pin_free); + auto ir = new AluInstr(op1_mov, dst, m_helper_invocation, AluInstr::last_write); + ir->add_required_instr(vtx); + emit_instruction(vtx); + emit_instruction(ir); + + return true; +} + +bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id) +{ + auto index = nir_src_as_const_value(intr->src[index_src_id]); + assert(index); + + bool uses_interpol_at_centroid = false; + + unsigned location = nir_intrinsic_io_semantics(intr).location + index->u32; + unsigned driver_location = nir_intrinsic_base(intr) + index->u32; + auto semantic = r600_get_varying_semantic(location); + tgsi_semantic name = (tgsi_semantic)semantic.first; + unsigned sid = semantic.second; + + if (location == VARYING_SLOT_POS) { + m_sv_values.set(es_pos); + ShaderInput pos_input(driver_location, name); + pos_input.set_sid(sid); + pos_input.set_interpolator(TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER, false); + add_input(pos_input); + return true; + } + + if (location == VARYING_SLOT_FACE) { + m_sv_values.set(es_face); + ShaderInput face_input(driver_location, name); + face_input.set_sid(sid); + add_input(face_input); + return true; + } + + tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT; + tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER; + + if (index_src_id > 0) { + glsl_interp_mode mode = INTERP_MODE_NONE; + auto parent = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr); + mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent); + switch (parent->intrinsic) { + case nir_intrinsic_load_barycentric_sample: + tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE; + break; + case nir_intrinsic_load_barycentric_at_sample: + case nir_intrinsic_load_barycentric_at_offset: + case nir_intrinsic_load_barycentric_pixel: + tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER; + break; + case nir_intrinsic_load_barycentric_centroid: + tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID; + uses_interpol_at_centroid = true; + break; + default: + std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of " + << nir_intrinsic_infos[intr->intrinsic].name + << " interpolator?\n"; + assert(0); + } + + switch (mode) { + case INTERP_MODE_NONE: + if (name == TGSI_SEMANTIC_COLOR || + name == TGSI_SEMANTIC_BCOLOR) { + tgsi_interpolate = TGSI_INTERPOLATE_COLOR; + break; + } + FALLTHROUGH; + case INTERP_MODE_SMOOTH: + tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + break; + case INTERP_MODE_NOPERSPECTIVE: + tgsi_interpolate = TGSI_INTERPOLATE_LINEAR; + break; + case INTERP_MODE_FLAT: + break; + case INTERP_MODE_COLOR: + tgsi_interpolate = TGSI_INTERPOLATE_COLOR; + break; + case INTERP_MODE_EXPLICIT: + default: + assert(0); + } + } + + switch (name) { + case TGSI_SEMANTIC_PRIMID: + std::cerr << "Found primid input\n"; + m_gs_prim_id_input = true; + m_ps_prim_id_input = ninputs(); + FALLTHROUGH; + case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_BCOLOR: + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + case TGSI_SEMANTIC_TEXCOORD: + case TGSI_SEMANTIC_LAYER: + case TGSI_SEMANTIC_PCOORD: + case TGSI_SEMANTIC_VIEWPORT_INDEX: + case TGSI_SEMANTIC_CLIPDIST: { + auto iinput = find_input(driver_location); + if (iinput == input_not_found()) { + ShaderInput input(driver_location, name); + input.set_sid(sid); + input.set_need_lds_pos(); + input.set_interpolator(tgsi_interpolate, tgsi_loc, uses_interpol_at_centroid); + add_input(input); + assert(find_input(input.location()) != input_not_found()); + } else { + if (uses_interpol_at_centroid) { + iinput->second.set_uses_interpolate_at_centroid(); + } + } + + return true; + } + default: + return false; + } +} + +bool FragmentShader::load_barycentric_at_sample(nir_intrinsic_instr* instr) +{ + auto& vf = value_factory(); + RegisterVec4 slope = vf.temp_vec4(pin_group); + auto src = emit_load_to_register(vf.src(instr->src[0], 0)); + auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0, + R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float); + + fetch->set_fetch_flag(FetchInstr::srf_mode); + emit_instruction(fetch); + + auto grad = vf.temp_vec4(pin_group); + + auto interpolator = m_interpolator[barycentric_ij_index(instr)]; + assert(interpolator.enabled); + + RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group); + + auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0); + tex->set_tex_flag(TexInstr::grad_fine); + tex->set_tex_flag(TexInstr::x_unnormalized); + tex->set_tex_flag(TexInstr::y_unnormalized); + tex->set_tex_flag(TexInstr::z_unnormalized); + tex->set_tex_flag(TexInstr::w_unnormalized); + emit_instruction(tex); + + tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0); + tex->set_tex_flag(TexInstr::x_unnormalized); + tex->set_tex_flag(TexInstr::y_unnormalized); + tex->set_tex_flag(TexInstr::z_unnormalized); + tex->set_tex_flag(TexInstr::w_unnormalized); + tex->set_tex_flag(TexInstr::grad_fine); + emit_instruction(tex); + + auto tmp0 = vf.temp_register(); + auto tmp1 = vf.temp_register(); + + emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write})); + emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr})); + + emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write})); + emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr})); + + return true; +} + +bool FragmentShader::load_barycentric_at_offset(nir_intrinsic_instr* instr) +{ + auto& vf = value_factory(); + auto interpolator = m_interpolator[barycentric_ij_index(instr)]; + + auto help = vf.temp_vec4(pin_group); + RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group); + + auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0); + getgradh->set_tex_flag(TexInstr::x_unnormalized); + getgradh->set_tex_flag(TexInstr::y_unnormalized); + getgradh->set_tex_flag(TexInstr::z_unnormalized); + getgradh->set_tex_flag(TexInstr::w_unnormalized); + getgradh->set_tex_flag(TexInstr::grad_fine); + emit_instruction(getgradh); + + auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0); + getgradv->set_tex_flag(TexInstr::x_unnormalized); + getgradv->set_tex_flag(TexInstr::y_unnormalized); + getgradv->set_tex_flag(TexInstr::z_unnormalized); + getgradv->set_tex_flag(TexInstr::w_unnormalized); + getgradv->set_tex_flag(TexInstr::grad_fine); + emit_instruction(getgradv); + + auto ofs_x = vf.src(instr->src[0], 0); + auto ofs_y = vf.src(instr->src[0], 1); + auto tmp0 = vf.temp_register(); + auto tmp1 = vf.temp_register(); + emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write})); + emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr})); + emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write})); + emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr})); + + return true; +} + + + +bool FragmentShader::emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs) +{ + RegisterVec4::Swizzle swizzle; + auto semantics = nir_intrinsic_io_semantics(&intr); + unsigned driver_location = nir_intrinsic_base(&intr); + unsigned write_mask = nir_intrinsic_write_mask(&intr); + + switch (semantics.location) { + case FRAG_RESULT_DEPTH: + swizzle = {0,7,7,7}; + break; + case FRAG_RESULT_STENCIL: + swizzle = {7,0,7,7}; + break; + case FRAG_RESULT_SAMPLE_MASK: + swizzle = {7,7,0,7}; + break; + default: + for (int i = 0; i < 4; ++i) { + swizzle[i] = (1 << i) & write_mask ? i : 7; + } + } + + auto value = value_factory().src_vec4(intr.src[0], pin_group, swizzle); + + if (semantics.location == FRAG_RESULT_COLOR || + (semantics.location >= FRAG_RESULT_DATA0 && + semantics.location <= FRAG_RESULT_DATA7)) { + + ShaderOutput output(driver_location, TGSI_SEMANTIC_COLOR, write_mask); + add_output(output); + + for (int k = 0 ; k < num_outputs; ++k) { + + unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR) + ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports; + + sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n"; + + if (location >= m_max_color_exports) { + sfn_log << SfnLog::io << "Pixel output loc:" << location + << " dl:" << driver_location + << " skipped because we have only " << m_max_color_exports << " CBs\n"; + continue; + } + + m_last_pixel_export = new ExportInstr(ExportInstr::pixel, location, value); + + if (m_export_highest < location) + m_export_highest = location; + + m_num_color_exports++; + + /* Hack: force dual source output handling if one color output has a + * dual_source_blend_index > 0 */ + if (semantics.location == FRAG_RESULT_COLOR && + semantics.dual_source_blend_index > 0) + m_dual_source_blend = true; + + if (m_num_color_exports > 1) + m_fs_write_all = false; + unsigned mask = (0xfu << (location * 4)); + m_color_export_mask |= mask; + + emit_instruction(m_last_pixel_export); + + }; + } else if (semantics.location == FRAG_RESULT_DEPTH || + semantics.location == FRAG_RESULT_STENCIL || + semantics.location == FRAG_RESULT_SAMPLE_MASK) { + m_depth_exports++; + emit_instruction(new ExportInstr(ExportInstr::pixel, 61, value)); + int semantic = TGSI_SEMANTIC_POSITION; + if (semantics.location == FRAG_RESULT_STENCIL) + semantic = TGSI_SEMANTIC_STENCIL; + else if (semantics.location == FRAG_RESULT_SAMPLE_MASK) + semantic = TGSI_SEMANTIC_SAMPLEMASK; + + ShaderOutput output(driver_location, semantic, write_mask); + add_output(output); + + } else { + return false; + } + return true; +} + +bool FragmentShader::emit_load_sample_pos(nir_intrinsic_instr* instr) +{ + auto dest = value_factory().dest_vec4(instr->dest, pin_group); + + + auto fetch = new LoadFromBuffer(dest, {0,1,2,3}, m_sample_id_reg, 0, + R600_BUFFER_INFO_CONST_BUFFER, + nullptr, fmt_32_32_32_32_float); + fetch->set_fetch_flag(FetchInstr::srf_mode); + emit_instruction(fetch); + return true; +} + +void FragmentShader::do_finalize() +{ + if (!m_last_pixel_export) { + RegisterVec4 value(0, false, {7,7,7,7}); + m_last_pixel_export = new ExportInstr(ExportInstr::pixel, 0, value); + emit_instruction(m_last_pixel_export); + m_num_color_exports++; + m_color_export_mask |= 0xf; + } + m_last_pixel_export->set_is_last_export(true); +} + +bool FragmentShader::read_prop(std::istream& is) +{ + string value; + is >> value; + + auto splitpos = value.find(':'); + assert(splitpos != string::npos); + + std::istringstream ival(value); + string name; + string val; + + std::getline(ival, name, ':'); + + if (name == "MAX_COLOR_EXPORTS") + ival >> m_max_color_exports; + else if (name == "COLOR_EXPORTS") + ival >> m_num_color_exports; + else if (name == "COLOR_EXPORT_MASK") + ival >> m_color_export_mask; + else if (name == "WRITE_ALL_COLORS") + ival >> m_fs_write_all; + else + return false; + return true; +} + +void FragmentShader::do_print_properties(std::ostream& os) const +{ + os << "PROP MAX_COLOR_EXPORTS:" << m_max_color_exports << "\n"; + os << "PROP COLOR_EXPORTS:" << m_num_color_exports << "\n"; + os << "PROP COLOR_EXPORT_MASK:" << m_color_export_mask << "\n"; + os << "PROP WRITE_ALL_COLORS:" << m_fs_write_all << "\n"; +} + + +FragmentShader::Interpolator::Interpolator(): + enabled(false) +{ +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fs.h b/src/gallium/drivers/r600/sfn/sfn_shader_fs.h new file mode 100644 index 0000000..e207fe4 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_fs.h @@ -0,0 +1,88 @@ +#ifndef R600_SFN_SHADER_FS_H +#define R600_SFN_SHADER_FS_H + +#include "sfn_shader.h" + +namespace r600 { + +class FragmentShader : public Shader { +public: + FragmentShader(const r600_shader_key& key); + bool load_input(nir_intrinsic_instr *intr) override; + bool store_output(nir_intrinsic_instr *intr) override; + + bool process_stage_intrinsic(nir_intrinsic_instr *intr) override; + +private: + class Interpolator { + public: + Interpolator(); + bool enabled : 1; + unsigned ij_index : 4; + PRegister i; + PRegister j; + }; + + struct InterpolateParams { + PVirtualValue i,j; + int base; + }; + + static const int s_max_interpolators = 6; + + bool do_scan_instruction(nir_instr *instr) override; + int do_allocate_reserved_registers() override; + + void do_get_shader_info(r600_shader *sh_info) override; + + bool scan_input(nir_intrinsic_instr *instr, int index_src_id); + + bool load_barycentric_pixel(nir_intrinsic_instr *intr); + bool load_barycentric_at_sample(nir_intrinsic_instr* instr); + bool load_barycentric_at_offset(nir_intrinsic_instr* instr); + bool load_interpolated_input(nir_intrinsic_instr *intr); + bool load_interpolated(RegisterVec4& dest, const InterpolateParams& params, + int num_dest_comp, int start_comp); + + bool load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op); + bool load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask); + bool load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, + int start, int dest_slot); + + bool emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs); + bool emit_load_sample_mask_in(nir_intrinsic_instr* instr); + bool emit_load_helper_invocation(nir_intrinsic_instr* instr); + bool emit_load_sample_pos(nir_intrinsic_instr* instr); + void do_finalize() override; + + bool read_prop(std::istream& is) override; + + void do_print_properties(std::ostream& os) const override; + + bool m_dual_source_blend; + unsigned m_max_color_exports; + unsigned m_export_highest; + unsigned m_num_color_exports; + unsigned m_color_export_mask; + unsigned m_depth_exports; + ExportInstr *m_last_pixel_export; + + std::bitset m_interpolators_used; + std::array m_interpolator; + RegisterVec4 m_pos_input; + Register *m_face_input{nullptr}; + bool m_fs_write_all; + bool m_uses_discard{false}; + bool m_gs_prim_id_input{false}; + int m_ps_prim_id_input{0}; + Register *m_sample_id_reg{nullptr}; + Register *m_sample_mask_reg{nullptr}; + Register *m_helper_invocation{nullptr}; + int m_nsys_inputs{0}; + bool m_apply_sample_mask{false}; + int m_rat_base{0}; +}; + +} + +#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp deleted file mode 100644 index a0ab219..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp +++ /dev/null @@ -1,343 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_shader_geometry.h" -#include "sfn_instruction_misc.h" -#include "sfn_instruction_fetch.h" -#include "sfn_shaderio.h" - -namespace r600 { - -GeometryShaderFromNir::GeometryShaderFromNir(r600_pipe_shader *sh, - r600_pipe_shader_selector &sel, - const r600_shader_key &key, - enum amd_gfx_level gfx_level): - VertexStage(PIPE_SHADER_GEOMETRY, sel, sh->shader, - sh->scratch_space_needed, gfx_level, key.gs.first_atomic_counter), - m_pipe_shader(sh), - m_so_info(&sel.so), - m_first_vertex_emitted(false), - m_offset(0), - m_next_input_ring_offset(0), - m_key(key), - m_clip_dist_mask(0), - m_cur_ring_output(0), - m_gs_tri_strip_adj_fix(false), - m_input_mask(0) -{ - sh_info().atomic_base = key.gs.first_atomic_counter; -} - -bool GeometryShaderFromNir::emit_store(nir_intrinsic_instr* instr) -{ - auto location = nir_intrinsic_io_semantics(instr).location; - auto index = nir_src_as_const_value(instr->src[1]); - assert(index); - auto driver_location = nir_intrinsic_base(instr) + index->u32; - - uint32_t write_mask = nir_intrinsic_write_mask(instr); - GPRVector::Swizzle swz = swizzle_from_mask(write_mask); - - auto out_value = vec_from_nir_with_fetch_constant(instr->src[0], write_mask, swz, true); - - sh_info().output[driver_location].write_mask = write_mask; - - auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write_ind, out_value, - 4 * driver_location, - instr->num_components, m_export_base[0]); - streamout_data[location] = ir; - - return true; -} - -bool GeometryShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr) -{ - if (instr->type != nir_instr_type_intrinsic) - return true; - - nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); - - switch (ii->intrinsic) { - case nir_intrinsic_store_output: - return process_store_output(ii); - case nir_intrinsic_load_input: - case nir_intrinsic_load_per_vertex_input: - return process_load_input(ii); - default: - return true; - } -} - -bool GeometryShaderFromNir::process_store_output(nir_intrinsic_instr* instr) -{ - auto location = nir_intrinsic_io_semantics(instr).location; - auto index = nir_src_as_const_value(instr->src[1]); - assert(index); - - auto driver_location = nir_intrinsic_base(instr) + index->u32; - - if (location == VARYING_SLOT_COL0 || - location == VARYING_SLOT_COL1 || - (location >= VARYING_SLOT_VAR0 && - location <= VARYING_SLOT_VAR31) || - (location >= VARYING_SLOT_TEX0 && - location <= VARYING_SLOT_TEX7) || - location == VARYING_SLOT_BFC0 || - location == VARYING_SLOT_BFC1 || - location == VARYING_SLOT_PNTC || - location == VARYING_SLOT_CLIP_VERTEX || - location == VARYING_SLOT_CLIP_DIST0 || - location == VARYING_SLOT_CLIP_DIST1 || - location == VARYING_SLOT_PRIMITIVE_ID || - location == VARYING_SLOT_POS || - location == VARYING_SLOT_PSIZ || - location == VARYING_SLOT_LAYER || - location == VARYING_SLOT_VIEWPORT || - location == VARYING_SLOT_FOGC) { - r600_shader_io& io = sh_info().output[driver_location]; - - auto semantic = r600_get_varying_semantic(location); - io.name = semantic.first; - io.sid = semantic.second; - - evaluate_spi_sid(io); - - if (sh_info().noutput <= driver_location) - sh_info().noutput = driver_location + 1; - - if (location == VARYING_SLOT_CLIP_DIST0 || - location == VARYING_SLOT_CLIP_DIST1) { - m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0); - } - - if (location == VARYING_SLOT_VIEWPORT) { - sh_info().vs_out_viewport = 1; - sh_info().vs_out_misc_write = 1; - } - return true; - } - return false; -} - -bool GeometryShaderFromNir::process_load_input(nir_intrinsic_instr* instr) -{ - auto location = nir_intrinsic_io_semantics(instr).location; - auto index = nir_src_as_const_value(instr->src[1]); - assert(index); - - auto driver_location = nir_intrinsic_base(instr) + index->u32; - - if (location == VARYING_SLOT_POS || - location == VARYING_SLOT_PSIZ || - location == VARYING_SLOT_FOGC || - location == VARYING_SLOT_CLIP_VERTEX || - location == VARYING_SLOT_CLIP_DIST0 || - location == VARYING_SLOT_CLIP_DIST1 || - location == VARYING_SLOT_COL0 || - location == VARYING_SLOT_COL1 || - location == VARYING_SLOT_BFC0 || - location == VARYING_SLOT_BFC1 || - location == VARYING_SLOT_PNTC || - (location >= VARYING_SLOT_VAR0 && - location <= VARYING_SLOT_VAR31) || - (location >= VARYING_SLOT_TEX0 && - location <= VARYING_SLOT_TEX7)) { - - uint64_t bit = 1ull << location; - if (!(bit & m_input_mask)) { - r600_shader_io& io = sh_info().input[driver_location]; - auto semantic = r600_get_varying_semantic(location); - io.name = semantic.first; - io.sid = semantic.second; - - io.ring_offset = 16 * driver_location; - ++sh_info().ninput; - m_next_input_ring_offset += 16; - m_input_mask |= bit; - } - return true; - } - return false; -} - -bool GeometryShaderFromNir::do_allocate_reserved_registers() -{ - const int sel[6] = {0, 0 ,0, 1, 1, 1}; - const int chan[6] = {0, 1 ,3, 0, 1, 2}; - - increment_reserved_registers(); - increment_reserved_registers(); - - /* Reserve registers used by the shaders (should check how many - * components are actually used */ - for (int i = 0; i < 6; ++i) { - auto reg = new GPRValue(sel[i], chan[i]); - reg->set_as_input(); - m_per_vertex_offsets[i].reset(reg); - inject_register(sel[i], chan[i], m_per_vertex_offsets[i], false); - } - auto reg = new GPRValue(0, 2); - reg->set_as_input(); - m_primitive_id.reset(reg); - inject_register(0, 2, m_primitive_id, false); - - reg = new GPRValue(1, 3); - reg->set_as_input(); - m_invocation_id.reset(reg); - inject_register(1, 3, m_invocation_id, false); - - m_export_base[0] = get_temp_register(0); - m_export_base[1] = get_temp_register(0); - m_export_base[2] = get_temp_register(0); - m_export_base[3] = get_temp_register(0); - emit_instruction(new AluInstruction(op1_mov, m_export_base[0], Value::zero, {alu_write, alu_last_instr})); - emit_instruction(new AluInstruction(op1_mov, m_export_base[1], Value::zero, {alu_write, alu_last_instr})); - emit_instruction(new AluInstruction(op1_mov, m_export_base[2], Value::zero, {alu_write, alu_last_instr})); - emit_instruction(new AluInstruction(op1_mov, m_export_base[3], Value::zero, {alu_write, alu_last_instr})); - - sh_info().ring_item_sizes[0] = m_next_input_ring_offset; - - if (m_key.gs.tri_strip_adj_fix) - emit_adj_fix(); - - return true; -} - -void GeometryShaderFromNir::emit_adj_fix() -{ - PValue adjhelp0(new GPRValue(m_export_base[0]->sel(), 1)); - emit_instruction(op2_and_int, adjhelp0, {m_primitive_id, Value::one_i}, {alu_write, alu_last_instr}); - - int reg_indices[6]; - int reg_chanels[6] = {1, 2, 3, 1, 2, 3}; - - int rotate_indices[6] = {4, 5, 0, 1, 2, 3}; - - reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel(); - reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel(); - - std::array adjhelp; - - AluInstruction *ir = nullptr; - for (int i = 0; i < 6; i++) { - adjhelp[i].reset(new GPRValue(reg_indices[i], reg_chanels[i])); - ir = new AluInstruction(op3_cnde_int, adjhelp[i], - {adjhelp0, m_per_vertex_offsets[i], - m_per_vertex_offsets[rotate_indices[i]]}, - {alu_write}); - if ((get_chip_class() == CAYMAN && i == 2) || (i == 3)) - ir->set_flag(alu_last_instr); - emit_instruction(ir); - } - ir->set_flag(alu_last_instr); - - for (int i = 0; i < 6; i++) - m_per_vertex_offsets[i] = adjhelp[i]; -} - - -bool GeometryShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) -{ - switch (instr->intrinsic) { - case nir_intrinsic_emit_vertex: - return emit_vertex(instr, false); - case nir_intrinsic_end_primitive: - return emit_vertex(instr, true); - case nir_intrinsic_load_primitive_id: - return load_preloaded_value(instr->dest, 0, m_primitive_id); - case nir_intrinsic_load_invocation_id: - return load_preloaded_value(instr->dest, 0, m_invocation_id); - case nir_intrinsic_store_output: - return emit_store(instr); - case nir_intrinsic_load_per_vertex_input: - return emit_load_per_vertex_input(instr); - default: - ; - } - return false; -} - -bool GeometryShaderFromNir::emit_vertex(nir_intrinsic_instr* instr, bool cut) -{ - int stream = nir_intrinsic_stream_id(instr); - assert(stream < 4); - - for(auto v: streamout_data) { - if (stream == 0 || v.first != VARYING_SLOT_POS) { - v.second->patch_ring(stream, m_export_base[stream]); - emit_instruction(v.second); - } else - delete v.second; - } - streamout_data.clear(); - emit_instruction(new EmitVertex(stream, cut)); - - if (!cut) - emit_instruction(new AluInstruction(op2_add_int, m_export_base[stream], m_export_base[stream], - PValue(new LiteralValue(sh_info().noutput)), - {alu_write, alu_last_instr})); - - return true; -} - -bool GeometryShaderFromNir::emit_load_per_vertex_input(nir_intrinsic_instr* instr) -{ - auto dest = vec_from_nir(instr->dest, 4); - - std::array swz = {7,7,7,7}; - for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { - swz[i] = i + nir_intrinsic_component(instr); - } - - auto literal_index = nir_src_as_const_value(instr->src[0]); - - if (!literal_index) { - sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n"; - return false; - } - assert(literal_index->u32 < 6); - assert(nir_intrinsic_io_semantics(instr).num_slots == 1); - - PValue addr = m_per_vertex_offsets[literal_index->u32]; - auto fetch = new FetchInstruction(vc_fetch, no_index_offset, dest, addr, - 16 * nir_intrinsic_base(instr), - R600_GS_RING_CONST_BUFFER, PValue(), bim_none, true); - fetch->set_dest_swizzle(swz); - - emit_instruction(fetch); - return true; -} - -void GeometryShaderFromNir::do_finalize() -{ - if (m_clip_dist_mask) { - int num_clip_dist = 4 * util_bitcount(m_clip_dist_mask); - sh_info().cc_dist_mask = (1 << num_clip_dist) - 1; - sh_info().clip_dist_write = (1 << num_clip_dist) - 1; - } -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h b/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h deleted file mode 100644 index de7501c..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#ifndef SFN_GEOMETRYSHADERFROMNIR_H -#define SFN_GEOMETRYSHADERFROMNIR_H - -#include "sfn_vertexstageexport.h" - -namespace r600 { - -class GeometryShaderFromNir : public VertexStage -{ -public: - GeometryShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, const r600_shader_key& key, enum amd_gfx_level gfx_level); - - bool scan_sysvalue_access(nir_instr *instr) override; - PValue primitive_id() override {return m_primitive_id;} - -private: - - bool do_allocate_reserved_registers() override; - bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; - - bool emit_vertex(nir_intrinsic_instr* instr, bool cut); - void emit_adj_fix(); - - bool process_store_output(nir_intrinsic_instr* instr); - bool process_load_input(nir_intrinsic_instr* instr); - - bool emit_store(nir_intrinsic_instr* instr); - bool emit_load_per_vertex_input(nir_intrinsic_instr* instr); - - void do_finalize() override; - - r600_pipe_shader *m_pipe_shader; - const pipe_stream_output_info *m_so_info; - - std::array m_per_vertex_offsets; - PValue m_primitive_id; - PValue m_invocation_id; - PValue m_export_base[4]; - bool m_first_vertex_emitted; - - int m_offset; - int m_next_input_ring_offset; - r600_shader_key m_key; - int m_clip_dist_mask; - unsigned m_cur_ring_output; - bool m_gs_tri_strip_adj_fix; - uint64_t m_input_mask; - - std::map streamout_data; -}; - -} - -#endif // SFN_GEOMETRYSHADERFROMNIR_H diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp new file mode 100644 index 0000000..e7db5a8 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp @@ -0,0 +1,370 @@ +#include "sfn_shader_gs.h" +#include "sfn_instr_fetch.h" +#include "sfn_debug.h" + +namespace r600 { + +GeometryShader::GeometryShader(const r600_shader_key& key): + Shader("GS"), + m_tri_strip_adj_fix(key.gs.tri_strip_adj_fix) +{ + +} + +bool GeometryShader::do_scan_instruction(nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); + + switch (ii->intrinsic) { + case nir_intrinsic_store_output: + return process_store_output(ii); + case nir_intrinsic_load_per_vertex_input: + return process_load_input(ii); + default: + return false; + } +} + +bool GeometryShader::process_store_output(nir_intrinsic_instr *instr) +{ + auto location = nir_intrinsic_io_semantics(instr).location; + auto index = nir_src_as_const_value(instr->src[1]); + assert(index); + + auto driver_location = nir_intrinsic_base(instr) + index->u32; + + if (location == VARYING_SLOT_COL0 || + location == VARYING_SLOT_COL1 || + (location >= VARYING_SLOT_VAR0 && + location <= VARYING_SLOT_VAR31) || + (location >= VARYING_SLOT_TEX0 && + location <= VARYING_SLOT_TEX7) || + location == VARYING_SLOT_BFC0 || + location == VARYING_SLOT_BFC1 || + location == VARYING_SLOT_PNTC || + location == VARYING_SLOT_CLIP_VERTEX || + location == VARYING_SLOT_CLIP_DIST0 || + location == VARYING_SLOT_CLIP_DIST1 || + location == VARYING_SLOT_PRIMITIVE_ID || + location == VARYING_SLOT_POS || + location == VARYING_SLOT_PSIZ || + location == VARYING_SLOT_LAYER || + location == VARYING_SLOT_VIEWPORT || + location == VARYING_SLOT_FOGC) { + + auto semantic = r600_get_varying_semantic(location); + tgsi_semantic name = (tgsi_semantic)semantic.first; + auto write_mask = nir_intrinsic_write_mask(instr); + ShaderOutput output(driver_location, name, write_mask); + output.set_sid(semantic.second); + add_output(output); + + if (location == VARYING_SLOT_CLIP_DIST0 || + location == VARYING_SLOT_CLIP_DIST1) { + m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0); + } + + if (location == VARYING_SLOT_VIEWPORT) { + m_out_viewport = true; + m_out_misc_write = true; + + } + if (m_noutputs <= driver_location) + m_noutputs = driver_location + 1; + + return true; + } + return false; +} + +bool GeometryShader::process_load_input(nir_intrinsic_instr *instr) +{ + auto location = nir_intrinsic_io_semantics(instr).location; + auto index = nir_src_as_const_value(instr->src[1]); + assert(index); + + auto driver_location = nir_intrinsic_base(instr) + index->u32; + + if (location == VARYING_SLOT_POS || + location == VARYING_SLOT_PSIZ || + location == VARYING_SLOT_FOGC || + location == VARYING_SLOT_CLIP_VERTEX || + location == VARYING_SLOT_CLIP_DIST0 || + location == VARYING_SLOT_CLIP_DIST1 || + location == VARYING_SLOT_COL0 || + location == VARYING_SLOT_COL1 || + location == VARYING_SLOT_BFC0 || + location == VARYING_SLOT_BFC1 || + location == VARYING_SLOT_PNTC || + (location >= VARYING_SLOT_VAR0 && + location <= VARYING_SLOT_VAR31) || + (location >= VARYING_SLOT_TEX0 && + location <= VARYING_SLOT_TEX7)) { + + uint64_t bit = 1ull << location; + if (!(bit & m_input_mask)) { + auto semantic = r600_get_varying_semantic(location); + ShaderInput input(driver_location, semantic.first); + input.set_sid(semantic.second); + input.set_ring_offset(16 * driver_location); + add_input(input); + m_next_input_ring_offset += 16; + m_input_mask |= bit; + } + return true; + } + return false; +} + +int GeometryShader::do_allocate_reserved_registers() +{ + const int sel[6] = {0, 0 ,0, 1, 1, 1}; + const int chan[6] = {0, 1 ,3, 0, 1, 2}; + + /* Reserve registers used by the shaders (should check how many + * components are actually used */ + for (int i = 0; i < 6; ++i) { + m_per_vertex_offsets[i] = value_factory().allocate_pinned_register(sel[i], chan[i]); + m_per_vertex_offsets[i]->pin_live_range(true); + } + + m_primitive_id = value_factory().allocate_pinned_register(0, 2); + m_primitive_id->pin_live_range(true); + m_invocation_id = value_factory().allocate_pinned_register(1, 3); + m_invocation_id->pin_live_range(true); + + value_factory().set_virtual_register_base(2); + + auto zero = value_factory().inline_const(ALU_SRC_0, 0); + + for (int i = 0; i < 4; ++i) { + m_export_base[i] = value_factory().temp_register(0, false); + emit_instruction(new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::last_write)); + } + + m_ring_item_sizes[0] = m_next_input_ring_offset; + + if (m_tri_strip_adj_fix) + emit_adj_fix(); + + return value_factory().next_register_index(); +} + +bool GeometryShader::process_stage_intrinsic(nir_intrinsic_instr *intr) +{ + switch (intr->intrinsic) { + case nir_intrinsic_emit_vertex: + return emit_vertex(intr, false); + case nir_intrinsic_end_primitive: + return emit_vertex(intr, true); + case nir_intrinsic_load_primitive_id: + return emit_simple_mov(intr->dest, 0, m_primitive_id); + case nir_intrinsic_load_invocation_id: + return emit_simple_mov(intr->dest, 0, m_invocation_id); + case nir_intrinsic_load_per_vertex_input: + return emit_load_per_vertex_input(intr); + default: + ; + } + return false; +} + +bool GeometryShader::emit_vertex(nir_intrinsic_instr* instr, bool cut) +{ + int stream = nir_intrinsic_stream_id(instr); + assert(stream < 4); + + auto cut_instr = new EmitVertexInstr(stream, cut); + + for(auto v: m_streamout_data) { + if (stream == 0 || v.first != VARYING_SLOT_POS) { + v.second->patch_ring(stream, m_export_base[stream]); + cut_instr->add_required_instr(v.second); + emit_instruction(v.second); + } else + delete v.second; + } + m_streamout_data.clear(); + + emit_instruction(cut_instr); + start_new_block(0); + + if (!cut) { + auto ir = new AluInstr(op2_add_int, m_export_base[stream], m_export_base[stream], + value_factory().literal(m_noutputs), + AluInstr::last_write); + //ir->add_required_instr(cut_instr); + emit_instruction(ir); + } + + + + return true; +} + +bool GeometryShader::store_output(nir_intrinsic_instr* instr) +{ + auto location = nir_intrinsic_io_semantics(instr).location; + auto index = nir_src_as_const_value(instr->src[1]); + assert(index); + auto driver_location = nir_intrinsic_base(instr) + index->u32; + + uint32_t write_mask = nir_intrinsic_write_mask(instr); + uint32_t shift = nir_intrinsic_component(instr); + + RegisterVec4::Swizzle src_swz {7,7,7,7}; + for (unsigned i = shift; i < 4; ++i) { + src_swz[i] = (1 << i) & (write_mask << shift) ? i - shift: 7; + } + + auto out_value = value_factory().src_vec4(instr->src[0], pin_group, src_swz); + + AluInstr *ir = nullptr; + if (m_streamout_data[location]) { + auto value = m_streamout_data[location]->value(); + auto tmp = value_factory().temp_vec4(pin_group); + + for (unsigned i = 0; i < 4 - shift; ++i) { + if (!(write_mask & (1 << i))) + continue; + if (value[i]->chan() < 4) { + ir = new AluInstr(op1_mov, tmp[i], value[src_swz[i]], AluInstr::write); + } else if (out_value[i]->chan() < 4) { + ir = new AluInstr(op1_mov, tmp[i], out_value[i], AluInstr::write); + } else + continue; + emit_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write_ind, tmp, + 4 * driver_location, + instr->num_components, m_export_base[0]); + } else { + + sfn_log << SfnLog::io << "None-streamout "; + bool need_copy = shift != 0; + if (!need_copy) { + for (int i = 0; i < 4; ++i) { + if ((write_mask & (1 << i)) && (out_value[i]->chan() != i)) { + need_copy = true; + break; + } + } + } + + if (need_copy) { + auto tmp = value_factory().temp_vec4(pin_group); + for (unsigned i = 0; i < 4 - shift; ++i) { + if (out_value[i]->chan() < 4) { + ir = new AluInstr(op1_mov, tmp[i], out_value[i], AluInstr::write); + emit_instruction(ir); + } + } + ir->set_alu_flag(alu_last_instr); + m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write_ind, tmp, + 4 * driver_location, + instr->num_components, m_export_base[0]); + } else { + m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write_ind, out_value, + 4 * driver_location, + instr->num_components, m_export_base[0]); + } + } + + + + + return true; +} + +bool GeometryShader::emit_load_per_vertex_input(nir_intrinsic_instr* instr) +{ + auto dest = value_factory().dest_vec4(instr->dest, pin_group); + + RegisterVec4::Swizzle dest_swz{7,7,7,7}; + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + dest_swz[i] = i + nir_intrinsic_component(instr); + } + + auto literal_index = nir_src_as_const_value(instr->src[0]); + + if (!literal_index) { + sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n"; + return false; + } + assert(literal_index->u32 < 6); + assert(nir_intrinsic_io_semantics(instr).num_slots == 1); + + auto addr = m_per_vertex_offsets[literal_index->u32]; + auto fetch = new LoadFromBuffer(dest, dest_swz, addr, + 16 * nir_intrinsic_base(instr), + R600_GS_RING_CONST_BUFFER, nullptr, + fmt_invalid); + + fetch->set_fetch_flag(FetchInstr::use_const_field); + fetch->set_num_format(vtx_nf_norm); + fetch->reset_fetch_flag(FetchInstr::format_comp_signed); + + emit_instruction(fetch); + return true; +} + +void GeometryShader::do_finalize() +{ + +} + +void GeometryShader::do_get_shader_info(r600_shader *sh_info) +{ + sh_info->processor_type = PIPE_SHADER_GEOMETRY; + sh_info->ring_item_sizes[0] = m_ring_item_sizes[0]; +} + +bool GeometryShader::read_prop(std::istream& is) +{ + (void)is; + return true; +} + +void GeometryShader::do_print_properties(std::ostream& os) const +{ + (void)os; +} + +void GeometryShader::emit_adj_fix() +{ + auto adjhelp0 = value_factory().temp_register(); + + emit_instruction(new AluInstr(op2_and_int, adjhelp0, + m_primitive_id, value_factory().one_i(), + AluInstr::last_write)); + + int reg_indices[6]; + int rotate_indices[6] = {4, 5, 0, 1, 2, 3}; + + reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel(); + reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel(); + + std::array adjhelp; + + AluInstr *ir = nullptr; + for (int i = 0; i < 6; i++) { + adjhelp[i] = value_factory().temp_register(); + ir = new AluInstr(op3_cnde_int, adjhelp[i], + adjhelp0, m_per_vertex_offsets[i], + m_per_vertex_offsets[rotate_indices[i]], + AluInstr::write); + + emit_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + + for (int i = 0; i < 6; i++) + m_per_vertex_offsets[i] = adjhelp[i]; +} + + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_gs.h b/src/gallium/drivers/r600/sfn/sfn_shader_gs.h new file mode 100644 index 0000000..f08c2e1 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_gs.h @@ -0,0 +1,66 @@ +#ifndef GEOMETRYSHADER_H +#define GEOMETRYSHADER_H + + +#include "sfn_shader.h" +#include "sfn_instr_export.h" + +namespace r600 { + +class GeometryShader : public Shader { +public: + GeometryShader(const r600_shader_key& key); + + + +private: + bool do_scan_instruction(nir_instr *instr) override; + int do_allocate_reserved_registers() override; + + bool process_stage_intrinsic(nir_intrinsic_instr *intr) override; + + bool process_store_output(nir_intrinsic_instr *intr); + bool process_load_input(nir_intrinsic_instr *intr); + + void do_finalize() override; + + void do_get_shader_info(r600_shader *sh_info) override; + + bool read_prop(std::istream& is) override; + void do_print_properties(std::ostream& os) const override; + + void emit_adj_fix(); + + bool emit_load_per_vertex_input(nir_intrinsic_instr* instr); + + bool load_input(UNUSED nir_intrinsic_instr *intr) override { + unreachable("load_input must be lowered in GS"); + }; + bool store_output(nir_intrinsic_instr* instr) override; + bool emit_vertex(nir_intrinsic_instr* instr, bool cut); + + std::array m_per_vertex_offsets{nullptr}; + PRegister m_primitive_id{nullptr}; + PRegister m_invocation_id{nullptr}; + std::array m_export_base{nullptr}; + + unsigned m_ring_item_sizes[4]{0}; + + bool m_tri_strip_adj_fix{false}; + bool m_first_vertex_emitted{false}; + int m_offset{0}; + int m_next_input_ring_offset{0}; + int m_clip_dist_mask{0}; + int m_cur_ring_output{0}; + bool m_gs_tri_strip_adj_fix{false}; + uint64_t m_input_mask{0}; + unsigned m_noutputs{0}; + bool m_out_viewport{false}; + bool m_out_misc_write{false}; + + std::map m_streamout_data; +}; + +} + +#endif // GEOMETRYSHADER_H diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp deleted file mode 100644 index d26f24d..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp +++ /dev/null @@ -1,108 +0,0 @@ -#include "sfn_shader_tcs.h" -#include "sfn_instruction_gds.h" -#include "tgsi/tgsi_from_mesa.h" - -namespace r600 { - -TcsShaderFromNir::TcsShaderFromNir(r600_pipe_shader *sh, - r600_pipe_shader_selector& sel, - const r600_shader_key& key, - enum amd_gfx_level gfx_level): - ShaderFromNirProcessor (PIPE_SHADER_TESS_CTRL, sel, sh->shader, - sh->scratch_space_needed, gfx_level, key.tcs.first_atomic_counter), - m_reserved_registers(0) -{ - sh_info().tcs_prim_mode = key.tcs.prim_mode; -} - -bool TcsShaderFromNir::scan_sysvalue_access(nir_instr *instr) -{ - if (instr->type != nir_instr_type_intrinsic) - return true; - - auto intr = nir_instr_as_intrinsic(instr); - - switch (intr->intrinsic) { - case nir_intrinsic_load_primitive_id: - m_sv_values.set(es_primitive_id); - break; - case nir_intrinsic_load_invocation_id: - m_sv_values.set(es_invocation_id); - break; - case nir_intrinsic_load_tcs_rel_patch_id_r600: - m_sv_values.set(es_rel_patch_id); - break; - case nir_intrinsic_load_tcs_tess_factor_base_r600: - m_sv_values.set(es_tess_factor_base); - break; - default: - - ; - } - return true; -} - -bool TcsShaderFromNir::do_allocate_reserved_registers() -{ - if (m_sv_values.test(es_primitive_id)) { - m_reserved_registers = 1; - auto gpr = new GPRValue(0,0); - gpr->set_as_input(); - m_primitive_id.reset(gpr); - } - - if (m_sv_values.test(es_invocation_id)) { - m_reserved_registers = 1; - auto gpr = new GPRValue(0,2); - gpr->set_as_input(); - m_invocation_id.reset(gpr); - } - - if (m_sv_values.test(es_rel_patch_id)) { - m_reserved_registers = 1; - auto gpr = new GPRValue(0,1); - gpr->set_as_input(); - m_rel_patch_id.reset(gpr); - } - - if (m_sv_values.test(es_tess_factor_base)) { - m_reserved_registers = 1; - auto gpr = new GPRValue(0,3); - gpr->set_as_input(); - m_tess_factor_base.reset(gpr); - } - - set_reserved_registers(m_reserved_registers); - - return true; -} - -bool TcsShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) -{ - switch (instr->intrinsic) { - case nir_intrinsic_load_tcs_rel_patch_id_r600: - return load_preloaded_value(instr->dest, 0, m_rel_patch_id); - case nir_intrinsic_load_invocation_id: - return load_preloaded_value(instr->dest, 0, m_invocation_id); - case nir_intrinsic_load_primitive_id: - return load_preloaded_value(instr->dest, 0, m_primitive_id); - case nir_intrinsic_load_tcs_tess_factor_base_r600: - return load_preloaded_value(instr->dest, 0, m_tess_factor_base); - case nir_intrinsic_store_tf_r600: - return store_tess_factor(instr); - default: - return false; - } -} - -bool TcsShaderFromNir::store_tess_factor(nir_intrinsic_instr* instr) -{ - const GPRVector::Swizzle& swizzle = (instr->src[0].ssa->num_components == 4) ? - GPRVector::Swizzle({0, 1, 2, 3}) : GPRVector::Swizzle({0, 1, 7, 7}); - auto val = vec_from_nir_with_fetch_constant(instr->src[0], - (1 << instr->src[0].ssa->num_components) - 1, swizzle); - emit_instruction(new GDSStoreTessFactor(val)); - return true; -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h b/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h deleted file mode 100644 index 886791e..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef TCSSHADERFROMNIR_H -#define TCSSHADERFROMNIR_H - -#include "sfn_shader_base.h" - -namespace r600 { - -class TcsShaderFromNir : public ShaderFromNirProcessor -{ -public: - TcsShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, const r600_shader_key& key, enum amd_gfx_level gfx_level); - bool scan_sysvalue_access(nir_instr *instr) override; - -private: - bool do_allocate_reserved_registers() override; - bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; - bool store_tess_factor(nir_intrinsic_instr* instr); - - void do_finalize() override {} - - int m_reserved_registers; - PValue m_patch_id; - PValue m_rel_patch_id; - PValue m_invocation_id; - PValue m_primitive_id; - PValue m_tess_factor_base; - - -}; - -} - -#endif // TCSSHADERFROMNIR_H diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp new file mode 100644 index 0000000..22054bb --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp @@ -0,0 +1,275 @@ + +#include "sfn_instr_export.h" +#include "sfn_shader_tess.h" +#include "sfn_shader_vs.h" + + +#include + +namespace r600 { + +using std::string; + +TCSShader::TCSShader(const r600_shader_key& key): + Shader("TCS"), + m_tcs_prim_mode(key.tcs.prim_mode) +{ + +} + +bool TCSShader::do_scan_instruction(nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); + + switch (ii->intrinsic) { + case nir_intrinsic_load_primitive_id: + m_sv_values.set(es_primitive_id); + break; + case nir_intrinsic_load_invocation_id: + m_sv_values.set(es_invocation_id); + break; + case nir_intrinsic_load_tcs_rel_patch_id_r600: + m_sv_values.set(es_rel_patch_id); + break; + case nir_intrinsic_load_tcs_tess_factor_base_r600: + m_sv_values.set(es_tess_factor_base); + break; + default: + return false; + ; + } + return true; +} + +int TCSShader::do_allocate_reserved_registers() +{ + if (m_sv_values.test(es_primitive_id)) { + m_primitive_id = value_factory().allocate_pinned_register(0, 0); + m_primitive_id->pin_live_range(true); + } + + if (m_sv_values.test(es_invocation_id)) { + m_invocation_id = value_factory().allocate_pinned_register(0, 2); + m_invocation_id->pin_live_range(true); + } + + if (m_sv_values.test(es_rel_patch_id)) { + m_rel_patch_id = value_factory().allocate_pinned_register(0, 1);; + m_rel_patch_id->pin_live_range(true); + } + + if (m_sv_values.test(es_tess_factor_base)) { + m_tess_factor_base = value_factory().allocate_pinned_register(0, 3); + m_tess_factor_base->pin_live_range(true); + } + + return value_factory().next_register_index();; +} + +bool TCSShader::process_stage_intrinsic(nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_tcs_rel_patch_id_r600: + return emit_simple_mov(instr->dest, 0, m_rel_patch_id); + case nir_intrinsic_load_invocation_id: + return emit_simple_mov(instr->dest, 0, m_invocation_id); + case nir_intrinsic_load_primitive_id: + return emit_simple_mov(instr->dest, 0, m_primitive_id); + case nir_intrinsic_load_tcs_tess_factor_base_r600: + return emit_simple_mov(instr->dest, 0, m_tess_factor_base); + case nir_intrinsic_store_tf_r600: + return store_tess_factor(instr); + default: + return false; + } +} + +bool TCSShader::store_tess_factor(nir_intrinsic_instr* instr) +{ + bool two_parts = nir_src_num_components(instr->src[0]) == 4; + + auto value0 = value_factory().temp_vec4(pin_group, {0, 1, 7, 7}); + emit_instruction(new AluInstr(op1_mov, value0[0], value_factory().src(instr->src[0], 0), + AluInstr::write)); + emit_instruction(new AluInstr(op1_mov, value0[1], value_factory().src(instr->src[0], 1), + two_parts ? AluInstr::write : AluInstr::last_write)); + + + if (two_parts) { + auto value1 = value_factory().temp_vec4(pin_group, {2, 3, 7, 7}); + emit_instruction(new AluInstr(op1_mov, value1[0], value_factory().src(instr->src[0], 2), + AluInstr::write)); + emit_instruction(new AluInstr(op1_mov, value1[1], value_factory().src(instr->src[0], 3), + AluInstr::last_write)); + emit_instruction(new WriteTFInstr(value1)); + } + + emit_instruction(new WriteTFInstr(value0)); + return true; +} + + +void TCSShader::do_get_shader_info(r600_shader *sh_info) +{ + sh_info->processor_type = PIPE_SHADER_TESS_CTRL; + sh_info->tcs_prim_mode = m_tcs_prim_mode; +} + +bool TCSShader::read_prop(std::istream& is) +{ + string value; + is >> value; + + auto splitpos = value.find(':'); + assert(splitpos != string::npos); + + std::istringstream ival(value); + string name; + string val; + + std::getline(ival, name, ':'); + + if (name == "TCS_PRIM_MODE") + ival >> m_tcs_prim_mode; + else + return false; + return true; +} + +void TCSShader::do_print_properties(std::ostream& os) const +{ + os << "PROP TCS_PRIM_MODE:" << m_tcs_prim_mode << "\n"; +} + +TESShader::TESShader(const pipe_stream_output_info *so_info, const r600_shader *gs_shader, + const r600_shader_key& key): + VertexStageShader("TES"), + m_vs_as_gs_a(key.vs.as_gs_a), + m_tes_as_es(key.tes.as_es) +{ + if (key.tes.as_es) + m_export_processor = new VertexExportForGS(this, gs_shader); + else + m_export_processor = new VertexExportForFs(this, so_info, key); +} + +bool TESShader::do_scan_instruction(nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + auto intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_load_tess_coord_r600: + m_sv_values.set(es_tess_coord); + break; + case nir_intrinsic_load_primitive_id: + m_sv_values.set(es_primitive_id); + break; + case nir_intrinsic_load_tcs_rel_patch_id_r600: + m_sv_values.set(es_rel_patch_id); + break; + case nir_intrinsic_store_output: { + int driver_location = nir_intrinsic_base(intr); + int location = nir_intrinsic_io_semantics(intr).location; + auto semantic = r600_get_varying_semantic(location); + tgsi_semantic name = (tgsi_semantic)semantic.first; + unsigned sid = semantic.second; + auto write_mask = nir_intrinsic_write_mask(intr); + + if (location == VARYING_SLOT_LAYER) + write_mask = 4; + + ShaderOutput output(driver_location, name, write_mask); + output.set_sid(sid); + + switch (location) { + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_POS: + case VARYING_SLOT_CLIP_VERTEX: + case VARYING_SLOT_EDGE: { + break; + } + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + case VARYING_SLOT_VIEWPORT: + case VARYING_SLOT_LAYER: + case VARYING_SLOT_VIEW_INDEX: + default: + output.set_is_param(true); + } + add_output(output); + break; + } + default: + return false; + } + return true; +} + +int TESShader::do_allocate_reserved_registers() +{ + if (m_sv_values.test(es_tess_coord)) { + m_tess_coord[0] = value_factory().allocate_pinned_register(0, 0); + m_tess_coord[0]->pin_live_range(true); + m_tess_coord[1] = value_factory().allocate_pinned_register(0, 1); + m_tess_coord[1]->pin_live_range(true); + } + + if (m_sv_values.test(es_rel_patch_id)) { + m_rel_patch_id = value_factory().allocate_pinned_register(0, 2); + m_rel_patch_id->pin_live_range(true); + } + + if (m_sv_values.test(es_primitive_id) || m_vs_as_gs_a) { + m_primitive_id = value_factory().allocate_pinned_register(0, 3); + m_primitive_id->pin_live_range(true); + } + return value_factory().next_register_index(); +} + +bool TESShader::process_stage_intrinsic(nir_intrinsic_instr *intr) +{ + switch (intr->intrinsic) { + case nir_intrinsic_load_tess_coord_r600: + return emit_simple_mov(intr->dest, 0, m_tess_coord[0], pin_none) && + emit_simple_mov(intr->dest, 1, m_tess_coord[1], pin_none); + case nir_intrinsic_load_primitive_id: + return emit_simple_mov(intr->dest, 0, m_primitive_id); + case nir_intrinsic_load_tcs_rel_patch_id_r600: + return emit_simple_mov(intr->dest, 0, m_rel_patch_id); + case nir_intrinsic_store_output: + return m_export_processor->store_output(*intr); + default: + return false; + } +} + +void TESShader::do_get_shader_info(r600_shader *sh_info) +{ + sh_info->processor_type = PIPE_SHADER_TESS_EVAL; + m_export_processor->get_shader_info(sh_info); +} + +void TESShader::do_finalize() +{ + m_export_processor->finalize(); +} + +bool TESShader::TESShader::read_prop(std::istream& is) +{ + (void)is; + return true; +} + +void TESShader::do_print_properties(std::ostream& os) const +{ + (void)os; +} + + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tess.h b/src/gallium/drivers/r600/sfn/sfn_shader_tess.h new file mode 100644 index 0000000..90d6a19 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_tess.h @@ -0,0 +1,76 @@ +#ifndef TCS_H +#define TCS_H + +#include "sfn_shader_vs.h" + +namespace r600 { + +class VertexExportStage; + +class TCSShader : public Shader { +public: + TCSShader(const r600_shader_key& key); +private: + bool do_scan_instruction(nir_instr *instr) override; + int do_allocate_reserved_registers() override; + + bool process_stage_intrinsic(nir_intrinsic_instr *intr) override; + void do_get_shader_info(r600_shader *sh_info) override; + bool store_tess_factor(nir_intrinsic_instr* instr); + + bool load_input(nir_intrinsic_instr *intr) override { + unreachable("load_input must be lowered in TCS"); + }; + bool store_output(nir_intrinsic_instr *intr) override { + unreachable("load_output must be lowered in TCS"); + }; + + bool read_prop(std::istream& is) override; + void do_print_properties(std::ostream& os) const override; + + PRegister m_tess_factor_base; + PRegister m_rel_patch_id; + PRegister m_invocation_id; + PRegister m_primitive_id; + + unsigned m_tcs_prim_mode{0}; +}; + +class TESShader : public VertexStageShader { +public: + TESShader(const pipe_stream_output_info *so_info, const r600_shader *gs_shader, + const r600_shader_key& key); +private: + bool do_scan_instruction(nir_instr *instr) override; + int do_allocate_reserved_registers() override; + + bool process_stage_intrinsic(nir_intrinsic_instr *intr) override; + void do_get_shader_info(r600_shader *sh_info) override; + + bool load_input(UNUSED nir_intrinsic_instr *intr) override { + unreachable("load_input must be lowered in TES"); + }; + bool store_output(UNUSED nir_intrinsic_instr *intr) override { + unreachable("load_output must be lowered in TES"); + }; + + bool read_prop(std::istream& is) override; + void do_print_properties(std::ostream& os) const override; + + void do_finalize() override; + + PRegister m_tess_coord[2]= {nullptr, nullptr}; + PRegister m_rel_patch_id{nullptr}; + PRegister m_primitive_id{nullptr}; + + VertexExportStage *m_export_processor{nullptr}; + + int m_tcs_vertices_out{0}; + bool m_vs_as_gs_a{false}; + bool m_tes_as_es{false}; +}; + + +} + +#endif // TCS_H diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp deleted file mode 100644 index 20a3f85..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp +++ /dev/null @@ -1,123 +0,0 @@ -#include "sfn_shader_tess_eval.h" -#include "tgsi/tgsi_from_mesa.h" - -namespace r600 { - -TEvalShaderFromNir::TEvalShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, - const r600_shader_key& key, r600_shader *gs_shader, - enum amd_gfx_level gfx_level): - VertexStage(PIPE_SHADER_TESS_EVAL, sel, sh->shader, - sh->scratch_space_needed, gfx_level, key.tes.first_atomic_counter), - m_reserved_registers(0), - m_key(key) - -{ - sh->shader.tes_as_es = key.tes.as_es; - if (key.tes.as_es) - m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader)); - else - m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key)); -} - -bool TEvalShaderFromNir::scan_sysvalue_access(nir_instr *instr) -{ - if (instr->type != nir_instr_type_intrinsic) - return true; - - auto ir = nir_instr_as_intrinsic(instr); - - switch (ir->intrinsic) { - case nir_intrinsic_load_tess_coord_r600: - m_sv_values.set(es_tess_coord); - break; - case nir_intrinsic_load_primitive_id: - m_sv_values.set(es_primitive_id); - break; - case nir_intrinsic_load_tcs_rel_patch_id_r600: - m_sv_values.set(es_rel_patch_id); - break; - case nir_intrinsic_store_output: - m_export_processor->scan_store_output(ir); - break; - default: - ; - } - return true; -} - -void TEvalShaderFromNir::emit_shader_start() -{ - m_export_processor->emit_shader_start(); -} - -bool TEvalShaderFromNir::do_allocate_reserved_registers() -{ - if (m_sv_values.test(es_tess_coord)) { - m_reserved_registers = 1; - auto gpr = new GPRValue(0,0); - gpr->set_as_input(); - m_tess_coord[0].reset(gpr); - gpr = new GPRValue(0,1); - gpr->set_as_input(); - m_tess_coord[1].reset(gpr); - } - - if (m_sv_values.test(es_rel_patch_id)) { - m_reserved_registers = 1; - auto gpr = new GPRValue(0,2); - gpr->set_as_input(); - m_rel_patch_id.reset(gpr); - } - - if (m_sv_values.test(es_primitive_id) || - m_key.vs.as_gs_a) { - m_reserved_registers = 1; - auto gpr = new GPRValue(0,3); - gpr->set_as_input(); - m_primitive_id.reset(gpr); - if (m_key.vs.as_gs_a) - inject_register(0, 3, m_primitive_id, false); - } - set_reserved_registers(m_reserved_registers); - return true; -} - -bool TEvalShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) -{ - switch (instr->intrinsic) { - case nir_intrinsic_load_tess_coord_r600: - return load_preloaded_value(instr->dest, 0, m_tess_coord[0]) && - load_preloaded_value(instr->dest, 1, m_tess_coord[1]); - case nir_intrinsic_load_primitive_id: - return load_preloaded_value(instr->dest, 0, m_primitive_id); - case nir_intrinsic_load_tcs_rel_patch_id_r600: - return load_preloaded_value(instr->dest, 0, m_rel_patch_id); - case nir_intrinsic_store_output: - return m_export_processor->store_output(instr); - default: - return false; - } -} - -void TEvalShaderFromNir::do_finalize() -{ - m_export_processor->finalize_exports(); -} - - -bool TEvalShaderFromNir::emit_load_tess_coord(nir_intrinsic_instr* instr) -{ - bool result = load_preloaded_value(instr->dest, 0, m_tess_coord[0]) && - load_preloaded_value(instr->dest, 1, m_tess_coord[1]); - - m_tess_coord[2] = from_nir(instr->dest, 2); - - - emit_instruction(new AluInstruction(op2_add, m_tess_coord[2], m_tess_coord[2], - m_tess_coord[0], {alu_last_instr, alu_write, alu_src0_neg})); - emit_instruction(new AluInstruction(op2_add, m_tess_coord[2], m_tess_coord[2], - m_tess_coord[1], {alu_last_instr, alu_write, alu_src0_neg})); - return result; -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h b/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h deleted file mode 100644 index 4ae572f..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef TEVALSHADERFROMNIR_H -#define TEVALSHADERFROMNIR_H - -#include "sfn_shader_base.h" -#include "sfn_vertexstageexport.h" - -namespace r600 { - -class TEvalShaderFromNir : public VertexStage -{ -public: - TEvalShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, - const r600_shader_key& key, r600_shader *gs_shader, - enum amd_gfx_level gfx_level); - bool scan_sysvalue_access(nir_instr *instr) override; - PValue primitive_id() override {return m_primitive_id;} - private: - void emit_shader_start() override; - bool do_allocate_reserved_registers() override; - bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; - bool emit_load_tess_coord(nir_intrinsic_instr* instr); - bool load_tess_z_coord(nir_intrinsic_instr* instr); - - void do_finalize() override; - - - unsigned m_reserved_registers; - PValue m_tess_coord[3]; - PValue m_rel_patch_id; - PValue m_primitive_id; - - std::unique_ptr m_export_processor; - const r600_shader_key& m_key; -}; - - -} - -#endif // TEVALSHADERFROMNIR_H diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp deleted file mode 100644 index dbce6f9..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "pipe/p_defines.h" -#include "tgsi/tgsi_from_mesa.h" -#include "sfn_shader_vertex.h" -#include "sfn_instruction_lds.h" - -#include - - -namespace r600 { - -using std::priority_queue; - -VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh, - r600_pipe_shader_selector& sel, - const r600_shader_key& key, - struct r600_shader* gs_shader, - enum amd_gfx_level gfx_level): - VertexStage(PIPE_SHADER_VERTEX, sel, sh->shader, - sh->scratch_space_needed, gfx_level, key.vs.first_atomic_counter), - m_num_clip_dist(0), - m_last_param_export(nullptr), - m_last_pos_export(nullptr), - m_pipe_shader(sh), - m_enabled_stream_buffers_mask(0), - m_so_info(&sel.so), - m_vertex_id(), - m_key(key), - m_max_attrib(0) -{ - // reg 0 is used in the fetch shader - increment_reserved_registers(); - - sh_info().atomic_base = key.vs.first_atomic_counter; - sh_info().vs_as_gs_a = m_key.vs.as_gs_a; - - if (key.vs.as_es) { - sh->shader.vs_as_es = true; - m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader)); - } else if (key.vs.as_ls) { - sh->shader.vs_as_ls = true; - sfn_log << SfnLog::trans << "Start VS for GS\n"; - m_export_processor.reset(new VertexStageExportForES(*this)); - } else { - m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key)); - } -} - -bool VertexShaderFromNir::scan_inputs_read(const nir_shader *sh) -{ - uint64_t inputs = sh->info.inputs_read; - - while (inputs) { - unsigned i = u_bit_scan64(&inputs); - if (i < VERT_ATTRIB_MAX) { - ++sh_info().ninput; - } - } - m_max_attrib = sh_info().ninput; - return true; -} - -bool VertexShaderFromNir::do_allocate_reserved_registers() -{ - /* Since the vertex ID is nearly always used, we add it here as an input so - * that the registers used for vertex attributes don't get clobbered by the - * register merge step */ - auto R0x = new GPRValue(0,0); - R0x->set_as_input(); - m_vertex_id.reset(R0x); - inject_register(0, 0, m_vertex_id, false); - - if (m_key.vs.as_gs_a || m_sv_values.test(es_primitive_id)) { - auto R0z = new GPRValue(0,2); - R0x->set_as_input(); - m_primitive_id.reset(R0z); - inject_register(0, 2, m_primitive_id, false); - } - - if (m_sv_values.test(es_instanceid)) { - auto R0w = new GPRValue(0,3); - R0w->set_as_input(); - m_instance_id.reset(R0w); - inject_register(0, 3, m_instance_id, false); - } - - - if (m_sv_values.test(es_rel_patch_id)) { - auto R0y = new GPRValue(0,1); - R0y->set_as_input(); - m_rel_vertex_id.reset(R0y); - inject_register(0, 1, m_rel_vertex_id, false); - } - - m_attribs.resize(4 * m_max_attrib + 4); - for (unsigned i = 0; i < m_max_attrib + 1; ++i) { - for (unsigned k = 0; k < 4; ++k) { - auto gpr = std::make_shared(i + 1, k); - gpr->set_as_input(); - m_attribs[4 * i + k] = gpr; - inject_register(i + 1, k, gpr, false); - } - } - - return true; -} - -void VertexShaderFromNir::emit_shader_start() -{ - m_export_processor->emit_shader_start(); -} - -bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr) -{ - switch (instr->type) { - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); - switch (ii->intrinsic) { - case nir_intrinsic_load_vertex_id: - m_sv_values.set(es_vertexid); - break; - case nir_intrinsic_load_instance_id: - m_sv_values.set(es_instanceid); - break; - case nir_intrinsic_load_tcs_rel_patch_id_r600: - m_sv_values.set(es_rel_patch_id); - break; - case nir_intrinsic_store_output: - m_export_processor->scan_store_output(ii); - default: - ; - } - } - default: - ; - } - return true; -} - -bool VertexShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) -{ - switch (instr->intrinsic) { - case nir_intrinsic_load_vertex_id: - return load_preloaded_value(instr->dest, 0, m_vertex_id); - case nir_intrinsic_load_tcs_rel_patch_id_r600: - return load_preloaded_value(instr->dest, 0, m_rel_vertex_id); - case nir_intrinsic_load_instance_id: - return load_preloaded_value(instr->dest, 0, m_instance_id); - case nir_intrinsic_store_local_shared_r600: - return emit_store_local_shared(instr); - case nir_intrinsic_store_output: - return m_export_processor->store_output(instr); - case nir_intrinsic_load_input: - return load_input(instr); - - default: - return false; - } -} - -bool VertexShaderFromNir::load_input(nir_intrinsic_instr* instr) -{ - unsigned location = nir_intrinsic_base(instr); - - if (location < VERT_ATTRIB_MAX) { - for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { - auto src = m_attribs[4 * location + i]; - - if (i == 0) - set_input(location, src); - - load_preloaded_value(instr->dest, i, src, i == (unsigned)(instr->num_components - 1)); - } - return true; - } - fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", location); - return false; -} - -bool VertexShaderFromNir::emit_store_local_shared(nir_intrinsic_instr* instr) -{ - unsigned write_mask = nir_intrinsic_write_mask(instr); - - auto address = from_nir(instr->src[1], 0); - int swizzle_base = (write_mask & 0x3) ? 0 : 2; - write_mask |= write_mask >> 2; - - auto value = from_nir(instr->src[0], swizzle_base); - if (!(write_mask & 2)) { - emit_instruction(new LDSWriteInstruction(address, 1, value)); - } else { - auto value1 = from_nir(instr->src[0], swizzle_base + 1); - emit_instruction(new LDSWriteInstruction(address, 1, value, value1)); - } - - return true; -} - -void VertexShaderFromNir::do_finalize() -{ - m_export_processor->finalize_exports(); -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h deleted file mode 100644 index a6577c2..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef sfn_vertex_shader_from_nir_h -#define sfn_vertex_shader_from_nir_h - -#include "sfn_shader_base.h" -#include "sfn_vertexstageexport.h" - -namespace r600 { - -class VertexShaderFromNir : public VertexStage { -public: - VertexShaderFromNir(r600_pipe_shader *sh, - r600_pipe_shader_selector &sel, - const r600_shader_key &key, r600_shader *gs_shader, - enum amd_gfx_level gfx_level); - - bool scan_sysvalue_access(nir_instr *instr) override; - - PValue primitive_id() override {return m_primitive_id;} -protected: - - // todo: encapsulate - unsigned m_num_clip_dist; - ExportInstruction *m_last_param_export; - ExportInstruction *m_last_pos_export; - r600_pipe_shader *m_pipe_shader; - unsigned m_enabled_stream_buffers_mask; - const pipe_stream_output_info *m_so_info; - void do_finalize() override; - - std::map m_param_map; - - bool scan_inputs_read(const nir_shader *sh) override; - -private: - bool load_input(nir_intrinsic_instr* instr); - - void finalize_exports(); - - void emit_shader_start() override; - bool do_allocate_reserved_registers() override; - bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; - bool emit_store_local_shared(nir_intrinsic_instr* instr); - - PValue m_vertex_id; - PValue m_instance_id; - PValue m_rel_vertex_id; - PValue m_primitive_id; - std::vector m_attribs; - r600_shader_key m_key; - - std::unique_ptr m_export_processor; - unsigned m_max_attrib; -}; - -} - -#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp new file mode 100644 index 0000000..2d043ff --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp @@ -0,0 +1,663 @@ + +#include "sfn_debug.h" +#include "sfn_shader_vs.h" + +#include "sfn_instr_alugroup.h" +#include "sfn_instr_export.h" + +#include "tgsi/tgsi_from_mesa.h" + + +namespace r600 { + +uint32_t VertexStageShader::enabled_stream_buffers_mask() const +{ + return m_enabled_stream_buffers_mask; +} + +void VertexStageShader::combine_enabled_stream_buffers_mask(uint32_t mask) +{ + m_enabled_stream_buffers_mask = mask; +} + +bool VertexExportStage::store_output(nir_intrinsic_instr& intr) +{ + auto index = nir_src_as_const_value(intr.src[1]); + assert(index && "Indirect outputs not supported"); + + const store_loc store_info = { + nir_intrinsic_component(&intr), + nir_intrinsic_io_semantics(&intr).location, + (unsigned)nir_intrinsic_base(&intr) + index->u32, + 0 + }; + + return do_store_output(store_info, intr); +} + +VertexExportStage::VertexExportStage(VertexStageShader *parent): + m_parent(parent) +{ + +} + +VertexExportForFs::VertexExportForFs(VertexStageShader *parent, + const pipe_stream_output_info *so_info, + const r600_shader_key& key): + VertexExportStage(parent), + m_vs_as_gs_a(key.vs.as_gs_a), + m_vs_prim_id_out(key.vs.prim_id_out), + m_so_info(so_info) +{ +} + +bool VertexExportForFs::do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr) +{ + switch (store_info.location) { + + case VARYING_SLOT_PSIZ: + m_writes_point_size = true; + FALLTHROUGH; + case VARYING_SLOT_POS: + return emit_varying_pos(store_info, intr); + case VARYING_SLOT_EDGE: { + std::array swizzle_override = {7 ,0, 7, 7}; + return emit_varying_pos(store_info, intr, &swizzle_override); + } + case VARYING_SLOT_VIEWPORT: { + std::array swizzle_override = {7, 7, 7, 0}; + return emit_varying_pos(store_info, intr, &swizzle_override) && + emit_varying_param(store_info, intr); + } + case VARYING_SLOT_CLIP_VERTEX: + return emit_clip_vertices(store_info, intr); + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + m_num_clip_dist += 4; + return emit_varying_param(store_info, intr) && emit_varying_pos(store_info, intr); + case VARYING_SLOT_LAYER: { + m_out_misc_write = 1; + m_vs_out_layer = 1; + std::array swz = {7,7,0,7}; + return emit_varying_pos(store_info, intr, &swz) && + emit_varying_param(store_info, intr); + } + case VARYING_SLOT_VIEW_INDEX: + return emit_varying_pos(store_info, intr) && + emit_varying_param(store_info, intr); + + default: + return emit_varying_param(store_info, intr); + return false; + } +} + +bool VertexExportForFs::emit_clip_vertices(const store_loc &store_info, const nir_intrinsic_instr &instr) +{ + auto& vf = m_parent->value_factory(); + + m_cc_dist_mask = 0xff; + m_clip_dist_write = 0xff; + + m_clip_vertex = vf.src_vec4(instr.src[store_info.data_loc], pin_group, {0,1,2,3}); + + m_output_registers[nir_intrinsic_base(&instr)] = &m_clip_vertex; + + RegisterVec4 clip_dist[2] = { vf.temp_vec4(pin_group), vf.temp_vec4(pin_group)}; + + for (int i = 0; i < 8; i++) { + int oreg = i >> 2; + int ochan = i & 3; + AluInstr *ir = nullptr; + AluInstr::SrcValues src(8); + + for (int j = 0; j < 4; j++) { + src[2 * j] = m_clip_vertex[j]; + src[2 * j + 1] = vf.uniform(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER); + } + + ir = new AluInstr(op2_dot4_ieee, clip_dist[oreg][ochan], src, AluInstr::last_write, 4); + m_parent->emit_instruction(ir); + } + + m_last_pos_export = new ExportInstr(ExportInstr::pos, m_cur_clip_pos++, clip_dist[0]); + m_parent->emit_instruction(m_last_pos_export); + + m_last_pos_export = new ExportInstr(ExportInstr::pos, m_cur_clip_pos++, clip_dist[1]); + m_parent->emit_instruction(m_last_pos_export); + + return true; +} + +void VertexExportForFs::get_shader_info(r600_shader *sh_info) const +{ + sh_info->cc_dist_mask = m_cc_dist_mask; + sh_info->clip_dist_write = m_clip_dist_write; + sh_info->vs_as_gs_a = m_vs_as_gs_a; + sh_info->vs_out_edgeflag = m_out_edgeflag; + sh_info->vs_out_viewport = m_out_viewport; + sh_info->vs_out_misc_write = m_out_misc_write; + sh_info->vs_out_point_size = m_out_point_size; + sh_info->vs_out_layer = m_vs_out_layer; +} + +void VertexExportForFs::finalize() +{ + if (m_vs_as_gs_a) { + auto primid = m_parent->value_factory().temp_vec4(pin_group, {2, 7, 7, 7}); + m_parent->emit_instruction(new AluInstr(op1_mov, primid[0], m_parent->primitive_id(), AluInstr::last_write)); + int param = m_last_param_export ? m_last_param_export->location() + 1 : 0; + + m_last_param_export = new ExportInstr(ExportInstr::param, param, primid); + m_parent->emit_instruction(m_last_param_export); + + ShaderOutput output(m_parent->noutputs(), TGSI_SEMANTIC_PRIMID, 1); + output.set_sid(0); + output.override_spi_sid(m_vs_prim_id_out); + m_parent->add_output(output); + } + + if (!m_last_pos_export) { + RegisterVec4 value(0, false, {7,7,7,7}); + m_last_pos_export = new ExportInstr(ExportInstr::pos, 0, value); + m_parent->emit_instruction(m_last_pos_export); + } + + if (!m_last_param_export) { + RegisterVec4 value(0,false, {7,7,7,7}); + m_last_param_export = new ExportInstr(ExportInstr::param, 0, value); + m_parent->emit_instruction(m_last_param_export); + } + + m_last_pos_export->set_is_last_export(true); + m_last_param_export->set_is_last_export(true); + + if (m_so_info && m_so_info->num_outputs) + emit_stream(-1); +} + +void VertexShader::do_get_shader_info(r600_shader *sh_info) +{ + sh_info->processor_type = PIPE_SHADER_VERTEX; + m_export_stage->get_shader_info(sh_info); +} + +bool VertexExportForFs::emit_varying_pos(const store_loc &store_info, nir_intrinsic_instr& intr, + std::array *swizzle_override) +{ + RegisterVec4::Swizzle swizzle; + uint32_t write_mask = 0; + + write_mask = nir_intrinsic_write_mask(&intr) << store_info.frac; + + if (!swizzle_override) { + for (int i = 0; i < 4; ++i) + swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7; + } else + std::copy(swizzle_override->begin(), swizzle_override->end(), swizzle.begin()); + + int export_slot = 0; + + auto in_value = m_parent->value_factory().src_vec4(intr.src[0], pin_group, swizzle); + auto& value = in_value; + RegisterVec4 out_value = m_parent->value_factory().temp_vec4(pin_group, swizzle); + + switch (store_info.location) { + case VARYING_SLOT_EDGE: { + m_out_misc_write = true; + m_out_edgeflag = true; + auto src = m_parent->value_factory().src(intr.src[0], 0); + auto clamped = m_parent->value_factory().temp_register(); + m_parent->emit_instruction(new AluInstr(op1_mov, clamped, src, {alu_write, alu_dst_clamp, alu_last_instr})); + m_parent->emit_instruction(new AluInstr(op1_flt_to_int, out_value[1], clamped, AluInstr::last_write)); + value = out_value; + } + FALLTHROUGH; + case VARYING_SLOT_PSIZ: + m_out_misc_write = true; + m_out_point_size = true; + FALLTHROUGH; + case VARYING_SLOT_LAYER: + export_slot = 1; + break; + case VARYING_SLOT_VIEWPORT: + m_out_misc_write = true; + m_out_viewport = true; + export_slot = 1; + break; + case VARYING_SLOT_POS: + break; + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + m_cc_dist_mask |= write_mask << (4 * (store_info.location - VARYING_SLOT_CLIP_DIST0)); + m_clip_dist_write |= write_mask << (4 * (store_info.location - VARYING_SLOT_CLIP_DIST0)); + export_slot = m_cur_clip_pos++; + break; + default: + sfn_log << SfnLog::err << __func__ << "Unsupported location " + << store_info.location << "\n"; + return false; + } + + + m_last_pos_export = new ExportInstr(ExportInstr::pos, export_slot, value); + + m_output_registers[nir_intrinsic_base(&intr)] = &m_last_pos_export->value(); + + m_parent->emit_instruction(m_last_pos_export); + + return true; +} + + +bool VertexExportForFs::emit_varying_param(const store_loc &store_info, nir_intrinsic_instr& intr) +{ + sfn_log << SfnLog::io << __func__ << ": emit DDL: " << store_info.driver_location << "\n"; + + int write_mask = nir_intrinsic_write_mask(&intr) << store_info.frac; + RegisterVec4::Swizzle swizzle; + for (int i = 0; i < 4; ++i) + swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7; + + Pin pin = util_bitcount(write_mask) > 1 ? pin_group: pin_free; + + int export_slot = m_parent->output(nir_intrinsic_base(&intr)).pos(); + auto value = m_parent->value_factory().temp_vec4(pin, swizzle); + + AluInstr *alu = nullptr; + for (int i = 0; i < 4; ++i) { + if (swizzle[i] < 4) { + alu = new AluInstr(op1_mov, value[i], m_parent->value_factory().src(intr.src[0], swizzle[i]), + AluInstr::write); + m_parent->emit_instruction(alu); + } + } + if (alu) + alu->set_alu_flag(alu_last_instr); + + m_last_param_export = new ExportInstr(ExportInstr::param, export_slot, value); + m_output_registers[nir_intrinsic_base(&intr)] = &m_last_param_export->value(); + + m_parent->emit_instruction(m_last_param_export); + + return true; +} + +bool VertexExportForFs::emit_stream(int stream) +{ + assert(m_so_info); + if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) { + R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs); + return false; + } + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + if (m_so_info->output[i].output_buffer >= 4) { + R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", + m_so_info->output[i].output_buffer); + return false; + } + } + const RegisterVec4 *so_gpr[PIPE_MAX_SHADER_OUTPUTS]; + unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; + std::vector tmp(m_so_info->num_outputs); + + /* Initialize locations where the outputs are stored. */ + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + if (stream != -1 && stream != m_so_info->output[i].stream) + continue; + + sfn_log << SfnLog::instr << "Emit stream " << i + << " with register index " << m_so_info->output[i].register_index << " so_gpr:"; + + + so_gpr[i] = output_register(m_so_info->output[i].register_index); + + if (!so_gpr[i]) { + sfn_log << SfnLog::err << "\nERR: register index " + << m_so_info->output[i].register_index + << " doesn't correspond to an output register\n"; + return false; + } + start_comp[i] = m_so_info->output[i].start_component; + /* Lower outputs with dst_offset < start_component. + * + * We can only output 4D vectors with a write mask, e.g. we can + * only output the W component at offset 3, etc. If we want + * to store Y, Z, or W at buffer offset 0, we need to use MOV + * to move it to X and output X. */ + + bool need_copy = m_so_info->output[i].dst_offset < m_so_info->output[i].start_component; + + int sc = m_so_info->output[i].start_component; + for (int j = 0; j < m_so_info->output[i].num_components; j++) { + if ((*so_gpr[i])[j + sc]->chan() != j + sc) { + need_copy = true; + break; + } + } + if (need_copy) { + RegisterVec4::Swizzle swizzle = {0,1,2,3}; + for (auto j = m_so_info->output[i].num_components; j < 4; ++j) + swizzle[j] = 7; + tmp[i] = m_parent->value_factory().temp_vec4(pin_group, swizzle); + + AluInstr *alu = nullptr; + for (int j = 0; j < m_so_info->output[i].num_components; j++) { + alu = new AluInstr(op1_mov, tmp[i][j], (*so_gpr[i])[j + sc], {alu_write}); + m_parent->emit_instruction(alu); + } + if (alu) + alu->set_alu_flag(alu_last_instr); + + start_comp[i] = 0; + so_gpr[i] = &tmp[i]; + } + sfn_log << SfnLog::instr << *so_gpr[i] << "\n"; + } + + uint32_t enabled_stream_buffers_mask = 0; + /* Write outputs to buffers. */ + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + sfn_log << SfnLog::instr << "Write output buffer " << i + << " with register index " << m_so_info->output[i].register_index << "\n"; + + auto out_stream = + new StreamOutInstr(*so_gpr[i], + m_so_info->output[i].num_components, + m_so_info->output[i].dst_offset - start_comp[i], + ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i], + m_so_info->output[i].output_buffer, + m_so_info->output[i].stream); + m_parent->emit_instruction(out_stream); + enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4; + } + m_parent->combine_enabled_stream_buffers_mask(enabled_stream_buffers_mask); + return true; +} + +const RegisterVec4 *VertexExportForFs::output_register(int loc) const +{ + const RegisterVec4 *retval = nullptr; + auto val = m_output_registers.find(loc); + if (val != m_output_registers.end()) + retval = val->second; + return retval; +} + +VertexShader::VertexShader(const pipe_stream_output_info *so_info, r600_shader *gs_shader, r600_shader_key& key): + VertexStageShader("VS"), + m_vs_as_gs_a(key.vs.as_gs_a) +{ + if (key.vs.as_es) + m_export_stage = new VertexExportForGS(this, gs_shader); + else if (key.vs.as_ls) + m_export_stage = new VertexExportForTCS(this); + else + m_export_stage = new VertexExportForFs(this, so_info, key); +} + +bool VertexShader::do_scan_instruction(nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + auto intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_load_input: { + int vtx_register = nir_intrinsic_base(intr) + 1; + if (m_last_vertex_atribute_register < vtx_register) + m_last_vertex_atribute_register = vtx_register; + return true; + } + case nir_intrinsic_store_output: { + int driver_location = nir_intrinsic_base(intr); + int location = nir_intrinsic_io_semantics(intr).location; + auto semantic = r600_get_varying_semantic(location); + tgsi_semantic name = (tgsi_semantic)semantic.first; + unsigned sid = semantic.second; + auto write_mask = nir_intrinsic_write_mask(intr); + + if (location == VARYING_SLOT_LAYER) + write_mask = 4; + + ShaderOutput output(driver_location, name, write_mask); + output.set_sid(sid); + + switch (location) { + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_POS: + case VARYING_SLOT_CLIP_VERTEX: + case VARYING_SLOT_EDGE: { + break; + } + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + case VARYING_SLOT_VIEWPORT: + case VARYING_SLOT_LAYER: + case VARYING_SLOT_VIEW_INDEX: + default: + output.set_is_param(true); + } + add_output(output); + break; + } + case nir_intrinsic_load_vertex_id: + m_sv_values.set(es_vertexid); + break; + case nir_intrinsic_load_instance_id: + m_sv_values.set(es_instanceid); + break; + case nir_intrinsic_load_primitive_id: + m_sv_values.set(es_primitive_id); + break; + case nir_intrinsic_load_tcs_rel_patch_id_r600: + m_sv_values.set(es_rel_patch_id); + break; + default: + return false; + } + + return true; +} + +bool VertexShader::load_input(nir_intrinsic_instr *intr) +{ + unsigned driver_location = nir_intrinsic_base(intr); + unsigned location = nir_intrinsic_io_semantics(intr).location; + auto& vf = value_factory(); + + AluInstr *ir = nullptr; + if (location < VERT_ATTRIB_MAX) { + for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { + auto src = vf.allocate_pinned_register(driver_location + 1, i); + src->pin_live_range(true); + if (intr->dest.is_ssa) + vf.inject_value(intr->dest, i, src); + else { + ir = new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_none), src, {alu_write}); + emit_instruction(ir); + } + } + if (ir) + ir->set_alu_flag(alu_last_instr); + + ShaderInput input(driver_location, location); + input.set_gpr(driver_location + 1); + add_input(input); + return true; + } + fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", location); + return false; +} + + +int VertexShader::do_allocate_reserved_registers() +{ + if (m_sv_values.test(es_vertexid)) { + m_vertex_id = value_factory().allocate_pinned_register(0, 0); + m_vertex_id->pin_live_range(true); + } + + if (m_sv_values.test(es_instanceid)) { + m_instance_id = value_factory().allocate_pinned_register(0, 3); + m_instance_id->pin_live_range(true); + } + + if (m_sv_values.test(es_primitive_id) || m_vs_as_gs_a) { + auto primitive_id = value_factory().allocate_pinned_register(0, 2); + primitive_id->pin_live_range(true); + set_primitive_id(primitive_id); + } + + if (m_sv_values.test(es_rel_patch_id)) { + m_rel_vertex_id = value_factory().allocate_pinned_register(0, 1); + m_rel_vertex_id->pin_live_range(true); + } + + return m_last_vertex_atribute_register + 1; +} + +bool VertexShader::store_output(nir_intrinsic_instr *intr) +{ + return m_export_stage->store_output(*intr); +} + +bool VertexShader::process_stage_intrinsic(nir_intrinsic_instr *intr) +{ + switch (intr->intrinsic) { + case nir_intrinsic_load_vertex_id: + return emit_simple_mov(intr->dest, 0, m_vertex_id); + case nir_intrinsic_load_instance_id: + return emit_simple_mov(intr->dest, 0, m_instance_id); + case nir_intrinsic_load_primitive_id: + return emit_simple_mov(intr->dest, 0, primitive_id()); + case nir_intrinsic_load_tcs_rel_patch_id_r600: + return emit_simple_mov(intr->dest, 0, m_rel_vertex_id); + default: + return false; + } +} + +void VertexShader::do_finalize() +{ + m_export_stage->finalize(); +} + +bool VertexShader::read_prop(std::istream& is) +{ + (void)is; + return false; +} + +void VertexShader::do_print_properties(std::ostream& os) const +{ + (void)os; +} + +VertexExportForGS::VertexExportForGS(VertexStageShader *parent, + const r600_shader *gs_shader): + VertexExportStage(parent), + m_gs_shader(gs_shader) +{ + +} + +bool VertexExportForGS::do_store_output(const store_loc &store_info, nir_intrinsic_instr& instr) +{ + int ring_offset = -1; + auto out_io = m_parent->output(store_info.driver_location); + + sfn_log << SfnLog::io << "check output " << store_info.driver_location + << " name=" << out_io.name()<< " sid=" << out_io.sid() << "\n"; + + for (unsigned k = 0; k < m_gs_shader->ninput; ++k) { + auto& in_io = m_gs_shader->input[k]; + sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n"; + + if (in_io.name == out_io.name() && + in_io.sid == out_io.sid()) { + ring_offset = in_io.ring_offset; + break; + } + } + + if (store_info.location == VARYING_SLOT_VIEWPORT) { + m_vs_out_viewport = 1; + m_vs_out_misc_write = 1; + return true; + } + + if (ring_offset == -1) { + sfn_log << SfnLog::err << "VS defines output at " + << store_info.driver_location << "name=" << out_io.name() + << " sid=" << out_io.sid() << " that is not consumed as GS input\n"; + return true; + } + + RegisterVec4::Swizzle src_swz = {7,7,7,7}; + for (int i = 0; i < 4; ++i) + src_swz[i] = i < instr.num_components ? i : 7; + + auto value = m_parent->value_factory().temp_vec4(pin_group, src_swz); + + AluInstr *ir = nullptr; + for (unsigned int i = 0; i < instr.num_components ; ++i) { + ir = new AluInstr(op1_mov, value[i], + m_parent->value_factory().src(instr.src[store_info.data_loc], i), + AluInstr::write); + m_parent->emit_instruction(ir); + } + if (ir) + ir->set_alu_flag(alu_last_instr); + + m_parent->emit_instruction(new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write, value, + ring_offset >> 2, 4, nullptr)); + + if (store_info.location == VARYING_SLOT_CLIP_DIST0 || + store_info.location == VARYING_SLOT_CLIP_DIST1) + m_num_clip_dist += 4; + + return true; +} + +void VertexExportForGS::finalize() +{ + +} + +void VertexExportForGS::get_shader_info(r600_shader *sh_info) const +{ + sh_info->vs_out_viewport = m_vs_out_viewport; + sh_info->vs_out_misc_write = m_vs_out_misc_write; + sh_info->vs_as_es = true; +} + +VertexExportForTCS::VertexExportForTCS(VertexStageShader *parent): + VertexExportStage(parent) +{ + +} + +void VertexExportForTCS::finalize() +{ + +} + +void VertexExportForTCS::get_shader_info(r600_shader *sh_info) const +{ + sh_info->vs_as_ls = 1; +} + + +bool VertexExportForTCS::do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr) +{ + (void)store_info; + (void)intr; + return true; +} + + + +} + diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.h b/src/gallium/drivers/r600/sfn/sfn_shader_vs.h new file mode 100644 index 0000000..571ebd9 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.h @@ -0,0 +1,156 @@ +#ifndef SFN_SHADER_VS_H +#define SFN_SHADER_VS_H + +#include "sfn_shader.h" + + + +namespace r600 { + +class VertexStageShader : public Shader { +protected: + using Shader::Shader; +public: + PRegister primitive_id() const { return m_primitive_id;} + void set_primitive_id(PRegister prim_id) { m_primitive_id = prim_id;} + + void combine_enabled_stream_buffers_mask(uint32_t mask); + uint32_t enabled_stream_buffers_mask() const override; + +private: + PRegister m_primitive_id{nullptr}; + uint32_t m_enabled_stream_buffers_mask{0}; +}; + +class VertexExportStage : public Allocate { +public: + + VertexExportStage(VertexStageShader *parent); + + bool store_output(nir_intrinsic_instr& intr); + + virtual void finalize() = 0; + + virtual void get_shader_info(r600_shader *sh_info) const = 0; + +protected: + struct store_loc { + unsigned frac; + unsigned location; + unsigned driver_location; + int data_loc; + }; + + virtual bool do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr) = 0; + + VertexStageShader *m_parent; + +private: +}; + +class VertexExportForFs : public VertexExportStage { + friend VertexExportStage; + +public: + + VertexExportForFs(VertexStageShader *parent, const pipe_stream_output_info *so_info, + const r600_shader_key& key); + + void finalize() override; + + void get_shader_info(r600_shader *sh_info) const override; + +private: + + bool do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr) override; + + bool emit_varying_pos(const store_loc &store_info, nir_intrinsic_instr& intr, + std::array *swizzle_override = nullptr); + bool emit_varying_param(const store_loc &store_info, nir_intrinsic_instr& intr); + + bool emit_clip_vertices(const store_loc &store_info, const nir_intrinsic_instr &instr); + + bool emit_stream(int stream); + + const RegisterVec4 *output_register(int loc) const; + + ExportInstr *m_last_param_export{nullptr}; + ExportInstr *m_last_pos_export{nullptr}; + + int m_num_clip_dist{0}; + int m_next_param{0}; + uint8_t m_cc_dist_mask{0}; + uint8_t m_clip_dist_write{0}; + int m_cur_clip_pos{1}; + bool m_writes_point_size{false}; + bool m_out_misc_write{false}; + bool m_vs_out_layer{false}; + bool m_vs_as_gs_a{false}; + int m_vs_prim_id_out{0}; + bool m_out_edgeflag{false}; + bool m_out_viewport{false}; + bool m_out_point_size{false}; + RegisterVec4 m_clip_vertex; + + const pipe_stream_output_info *m_so_info {nullptr}; + + std::unordered_map m_output_registers; +}; + + +class VertexExportForGS : public VertexExportStage { +public: + VertexExportForGS(VertexStageShader *parent, const r600_shader *gs_shader); + void finalize() override; + + void get_shader_info(r600_shader *sh_info) const override; + +private: + bool do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr) override; + unsigned m_num_clip_dist{0}; + bool m_vs_out_viewport{false}; + bool m_vs_out_misc_write{false}; + + const r600_shader *m_gs_shader; +}; + +class VertexExportForTCS : public VertexExportStage { +public: + VertexExportForTCS(VertexStageShader *parent); + void finalize() override; + void get_shader_info(r600_shader *sh_info) const override; +private: + bool do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr) override; +}; + +class VertexShader : public VertexStageShader { +public: + VertexShader(const pipe_stream_output_info *so_info, r600_shader *gs_shader, r600_shader_key& key); + + bool load_input(nir_intrinsic_instr *intr) override; + bool store_output(nir_intrinsic_instr *intr) override; + + bool process_stage_intrinsic(nir_intrinsic_instr *intr) override; + +private: + bool do_scan_instruction(nir_instr *instr) override; + int do_allocate_reserved_registers() override; + + void do_finalize() override; + + bool read_prop(std::istream& is) override; + + void do_print_properties(std::ostream& os) const override; + void do_get_shader_info(r600_shader *sh_info) override; + + VertexExportStage *m_export_stage {nullptr}; + int m_last_vertex_atribute_register {0}; + PRegister m_vertex_id{nullptr}; + PRegister m_instance_id{nullptr}; + PRegister m_rel_vertex_id{nullptr}; + bool m_vs_as_gs_a; +}; + +} + +#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp b/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp deleted file mode 100644 index 07cbebc..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp +++ /dev/null @@ -1,448 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_shaderio.h" -#include "sfn_debug.h" -#include "tgsi/tgsi_from_mesa.h" - -#include - -namespace r600 { - -using std::vector; -using std::priority_queue; - -ShaderIO::ShaderIO(): - m_two_sided(false), - m_lds_pos(0) -{ - -} - -ShaderInput::ShaderInput(tgsi_semantic name): - m_name(name), - m_gpr(0), - m_uses_interpolate_at_centroid(false) -{ -} - -ShaderInput::~ShaderInput() -{ -} - -void ShaderInput::set_lds_pos(UNUSED int lds_pos) -{ -} - -int ShaderInput::ij_index() const -{ - return -1; -} - -bool ShaderInput::interpolate() const -{ - return false; -} - -int ShaderInput::lds_pos() const -{ - return 0; -} - -bool ShaderInput::is_varying() const -{ - return false; -} - -void ShaderInput::set_uses_interpolate_at_centroid() -{ - m_uses_interpolate_at_centroid = true; -} - -void ShaderInput::set_ioinfo(r600_shader_io& io, int translated_ij_index) const -{ - io.name = m_name; - io.gpr = m_gpr; - io.ij_index = translated_ij_index; - io.lds_pos = lds_pos(); - io.uses_interpolate_at_centroid = m_uses_interpolate_at_centroid; - - set_specific_ioinfo(io); -} - -void ShaderInput::set_specific_ioinfo(UNUSED r600_shader_io& io) const -{ -} - -ShaderInputSystemValue::ShaderInputSystemValue(tgsi_semantic name, int gpr): - ShaderInput(name), - m_gpr(gpr) -{ -} - -void ShaderInputSystemValue::set_specific_ioinfo(r600_shader_io& io) const -{ - io.gpr = m_gpr; - io.ij_index = 0; -} - -ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, unsigned driver_location, - unsigned frac, unsigned components, - tgsi_interpolate_mode interpolate, - tgsi_interpolate_loc interp_loc): - ShaderInput(_name), - m_driver_location(driver_location), - m_location_frac(frac), - m_sid(sid), - m_interpolate(interpolate), - m_interpolate_loc(interp_loc), - m_ij_index(-10), - m_lds_pos(0), - m_mask(((1 << components) - 1) << frac) -{ - evaluate_spi_sid(); - - m_ij_index = interpolate == TGSI_INTERPOLATE_LINEAR ? 3 : 0; - switch (interp_loc) { - case TGSI_INTERPOLATE_LOC_CENTROID: m_ij_index += 2; break; - case TGSI_INTERPOLATE_LOC_CENTER: m_ij_index += 1; break; - default: - ; - } -} - -ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, nir_variable *input): - ShaderInput(_name), - m_driver_location(input->data.driver_location), - m_location_frac(input->data.location_frac), - m_sid(sid), - m_ij_index(-10), - m_lds_pos(0), - m_mask(((1 << input->type->components()) - 1) << input->data.location_frac) -{ - sfn_log << SfnLog::io << __func__ - << "name:" << _name - << " sid: " << sid - << " op: " << input->data.interpolation; - - evaluate_spi_sid(); - - enum glsl_base_type base_type = - glsl_get_base_type(glsl_without_array(input->type)); - - switch (input->data.interpolation) { - case INTERP_MODE_NONE: - if (glsl_base_type_is_integer(base_type)) { - m_interpolate = TGSI_INTERPOLATE_CONSTANT; - break; - } - - if (name() == TGSI_SEMANTIC_COLOR) { - m_interpolate = TGSI_INTERPOLATE_COLOR; - m_ij_index = 0; - break; - } - FALLTHROUGH; - - case INTERP_MODE_SMOOTH: - assert(!glsl_base_type_is_integer(base_type)); - - m_interpolate = TGSI_INTERPOLATE_PERSPECTIVE; - m_ij_index = 0; - break; - - case INTERP_MODE_NOPERSPECTIVE: - assert(!glsl_base_type_is_integer(base_type)); - - m_interpolate = TGSI_INTERPOLATE_LINEAR; - m_ij_index = 3; - break; - - case INTERP_MODE_FLAT: - m_interpolate = TGSI_INTERPOLATE_CONSTANT; - break; - - default: - m_interpolate = TGSI_INTERPOLATE_CONSTANT; - break; - } - - if (input->data.sample) { - m_interpolate_loc = TGSI_INTERPOLATE_LOC_SAMPLE; - } else if (input->data.centroid) { - m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTROID; - m_ij_index += 2; - } else { - m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTER; - m_ij_index += 1; - } - sfn_log << SfnLog::io - << " -> IP:" << m_interpolate - << " IJ:" << m_ij_index - << "\n"; -} - -bool ShaderInputVarying::is_varying() const -{ - return true; -} - -void ShaderInputVarying::update_mask(int additional_comps, int frac) -{ - m_mask |= ((1 << additional_comps) - 1) << frac; -} - -void ShaderInputVarying::evaluate_spi_sid() -{ - switch (name()) { - case TGSI_SEMANTIC_PSIZE: - case TGSI_SEMANTIC_EDGEFLAG: - case TGSI_SEMANTIC_FACE: - case TGSI_SEMANTIC_SAMPLEMASK: - assert(0 && "System value used as varying"); - break; - case TGSI_SEMANTIC_POSITION: - m_spi_sid = 0; - break; - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_TEXCOORD: - case TGSI_SEMANTIC_PCOORD: - m_spi_sid = m_sid + 1; - break; - default: - /* For non-generic params - pack name and sid into 8 bits */ - m_spi_sid = (0x80 | (name() << 3) | m_sid) + 1; - } -} - -ShaderInputVarying::ShaderInputVarying(tgsi_semantic name, - const ShaderInputVarying& orig, size_t location): - ShaderInput(name), - m_driver_location(location), - m_location_frac(orig.location_frac()), - - m_sid(orig.m_sid), - m_spi_sid(orig.m_spi_sid), - m_interpolate(orig.m_interpolate), - m_interpolate_loc(orig.m_interpolate_loc), - m_ij_index(orig.m_ij_index), - m_lds_pos(0), - m_mask(0) -{ - evaluate_spi_sid(); -} - -bool ShaderInputVarying::interpolate() const -{ - return m_interpolate > 0; -} - -int ShaderInputVarying::ij_index() const -{ - return m_ij_index; -} - -void ShaderInputVarying::set_lds_pos(int lds_pos) -{ - m_lds_pos = lds_pos; -} - -int ShaderInputVarying::lds_pos() const -{ - return m_lds_pos; -} - -void ShaderInputVarying::set_specific_ioinfo(r600_shader_io& io) const -{ - io.interpolate = m_interpolate; - io.interpolate_location = m_interpolate_loc; - io.sid = m_sid; - io.spi_sid = m_spi_sid; - set_color_ioinfo(io); -} - -void ShaderInputVarying::set_color_ioinfo(UNUSED r600_shader_io& io) const -{ - sfn_log << SfnLog::io << __func__ << " Don't set color_ioinfo\n"; -} - -ShaderInputColor::ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input): - ShaderInputVarying(name, sid, input), - m_back_color_input_idx(0) -{ - sfn_log << SfnLog::io << __func__ << "name << " << name << " sid << " << sid << "\n"; -} - -ShaderInputColor::ShaderInputColor(tgsi_semantic _name, int sid, unsigned driver_location, - unsigned frac, unsigned components, tgsi_interpolate_mode interpolate, - tgsi_interpolate_loc interp_loc): - ShaderInputVarying(_name, sid, driver_location,frac, components, interpolate, interp_loc), - m_back_color_input_idx(0) -{ - sfn_log << SfnLog::io << __func__ << "name << " << _name << " sid << " << sid << "\n"; -} - -void ShaderInputColor::set_back_color(unsigned back_color_input_idx) -{ - sfn_log << SfnLog::io << "Set back color index " << back_color_input_idx << "\n"; - m_back_color_input_idx = back_color_input_idx; -} - -void ShaderInputColor::set_color_ioinfo(r600_shader_io& io) const -{ - sfn_log << SfnLog::io << __func__ << " set color_ioinfo " << m_back_color_input_idx << "\n"; - io.back_color_input = m_back_color_input_idx; -} - -size_t ShaderIO::add_input(ShaderInput *input) -{ - m_inputs.push_back(PShaderInput(input)); - return m_inputs.size() - 1; -} - -PShaderInput ShaderIO::find_varying(tgsi_semantic name, int sid) -{ - for (auto& a : m_inputs) { - if (a->name() == name) { - assert(a->is_varying()); - auto& v = static_cast(*a); - if (v.sid() == sid) - return a; - } - } - return nullptr; -} - -struct VaryingShaderIOLess { - bool operator () (PShaderInput lhs, PShaderInput rhs) const - { - const ShaderInputVarying& l = static_cast(*lhs); - const ShaderInputVarying& r = static_cast(*rhs); - return l.location() > r.location(); - } -}; - -void ShaderIO::sort_varying_inputs() -{ - priority_queue, VaryingShaderIOLess> q; - - vector idx; - - for (auto i = 0u; i < m_inputs.size(); ++i) { - if (m_inputs[i]->is_varying()) { - q.push(m_inputs[i]); - idx.push_back(i); - } - } - - auto next_index = idx.begin(); - while (!q.empty()) { - auto si = q.top(); - q.pop(); - m_inputs[*next_index++] = si; - } -} - -void ShaderIO::update_lds_pos() -{ - m_lds_pos = -1; - m_ldspos.resize(m_inputs.size()); - for (auto& i : m_inputs) { - if (!i->is_varying()) - continue; - - auto& v = static_cast(*i); - /* There are shaders that miss an input ...*/ - if (m_ldspos.size() <= static_cast(v.location())) - m_ldspos.resize(v.location() + 1); - } - - std::fill(m_ldspos.begin(), m_ldspos.end(), -1); - for (auto& i : m_inputs) { - if (!i->is_varying()) - continue; - - auto& v = static_cast(*i); - if (v.name() == TGSI_SEMANTIC_POSITION) - continue; - - if (m_ldspos[v.location()] < 0) { - ++m_lds_pos; - m_ldspos[v.location()] = m_lds_pos; - } - v.set_lds_pos(m_lds_pos); - } - ++m_lds_pos; -} - -std::vector &ShaderIO::inputs() -{ - return m_inputs; -} - -ShaderInput& ShaderIO::input(size_t k) -{ - assert(k < m_inputs.size()); - return *m_inputs[k]; -} - -ShaderInput& ShaderIO::input(size_t driver_loc, int frac) -{ - for (auto& i: m_inputs) { - if (!i->is_varying()) - continue; - - auto& v = static_cast(*i); - if (v.location() == driver_loc) - return v; - } - return input(driver_loc); -} - -void ShaderIO::set_two_sided() -{ - m_two_sided = true; -} - -std::pair -r600_get_varying_semantic(unsigned varying_location) -{ - std::pair result; - tgsi_get_gl_varying_semantic(static_cast(varying_location), - true, &result.first, &result.second); - - if (result.first == TGSI_SEMANTIC_GENERIC) { - result.second += 9; - } else if (result.first == TGSI_SEMANTIC_PCOORD) { - result.second = 8; - } - return result; -} - - - -} - diff --git a/src/gallium/drivers/r600/sfn/sfn_shaderio.h b/src/gallium/drivers/r600/sfn/sfn_shaderio.h deleted file mode 100644 index 855bbe1..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_shaderio.h +++ /dev/null @@ -1,176 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_SHADERIO_H -#define SFN_SHADERIO_H - -#include "compiler/nir/nir.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "gallium/drivers/r600/r600_shader.h" - -#include -#include - -namespace r600 { - -class ShaderInput { -public: - ShaderInput(); - virtual ~ShaderInput(); - - ShaderInput(tgsi_semantic name); - tgsi_semantic name() const {return m_name;} - - void set_gpr(int gpr) {m_gpr = gpr;} - int gpr() const {return m_gpr;} - void set_ioinfo(r600_shader_io& io, int translated_ij_index) const; - - virtual void set_lds_pos(int lds_pos); - virtual int ij_index() const; - virtual bool interpolate() const; - virtual int lds_pos() const; - void set_uses_interpolate_at_centroid(); - - virtual bool is_varying() const; - -private: - virtual void set_specific_ioinfo(r600_shader_io& io) const; - - tgsi_semantic m_name; - int m_gpr; - bool m_uses_interpolate_at_centroid; -}; - -using PShaderInput = std::shared_ptr; - -class ShaderInputSystemValue: public ShaderInput { -public: - ShaderInputSystemValue(tgsi_semantic name, int gpr); - void set_specific_ioinfo(r600_shader_io& io) const; - int m_gpr; -}; - -class ShaderInputVarying : public ShaderInput { -public: - ShaderInputVarying(tgsi_semantic _name, int sid, unsigned driver_location, - unsigned frac, unsigned components, tgsi_interpolate_mode interpolate, - tgsi_interpolate_loc interp_loc); - ShaderInputVarying(tgsi_semantic name, int sid, nir_variable *input); - ShaderInputVarying(tgsi_semantic name, const ShaderInputVarying& orig, - size_t location); - - void set_lds_pos(int lds_pos) override; - - int ij_index() const override; - - bool interpolate() const override; - - int lds_pos() const override; - - int sid() const {return m_sid;} - - void update_mask(int additional_comps, int frac); - - size_t location() const {return m_driver_location;} - int location_frac() const {return m_location_frac;} - - bool is_varying() const override; - -private: - void evaluate_spi_sid(); - - virtual void set_color_ioinfo(r600_shader_io& io) const; - void set_specific_ioinfo(r600_shader_io& io) const override; - size_t m_driver_location; - int m_location_frac; - int m_sid; - int m_spi_sid; - tgsi_interpolate_mode m_interpolate; - tgsi_interpolate_loc m_interpolate_loc; - int m_ij_index; - int m_lds_pos; - int m_mask; -}; - -class ShaderInputColor: public ShaderInputVarying { -public: - ShaderInputColor(tgsi_semantic _name, int sid, unsigned driver_location, - unsigned frac, unsigned components, tgsi_interpolate_mode interpolate, - tgsi_interpolate_loc interp_loc); - ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input); - void set_back_color(unsigned back_color_input_idx); - unsigned back_color_input_index() const { - return m_back_color_input_idx; - } -private: - void set_color_ioinfo(UNUSED r600_shader_io& io) const override; - unsigned m_back_color_input_idx; - -}; - -class ShaderIO -{ -public: - ShaderIO(); - - size_t add_input(ShaderInput *input); - - std::vector& inputs(); - ShaderInput& input(size_t k); - - ShaderInput& input(size_t driver_loc, int frac); - - void set_two_sided(); - bool two_sided() {return m_two_sided;} - - int nlds() const { - return m_lds_pos; - } - - void sort_varying_inputs(); - - size_t size() const {return m_inputs.size();} - - PShaderInput find_varying(tgsi_semantic name, int sid); - - void update_lds_pos(); - -private: - std::vector m_inputs; - std::vector m_ldspos; - bool m_two_sided; - int m_lds_pos; - -}; - -std::pair -r600_get_varying_semantic(unsigned varying_location); - - -} - -#endif // SFN_SHADERIO_H diff --git a/src/gallium/drivers/r600/sfn/sfn_value.cpp b/src/gallium/drivers/r600/sfn/sfn_value.cpp deleted file mode 100644 index 3228b75..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_value.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_value.h" -#include "util/macros.h" - -#include -#include -#include - -namespace r600 { - -using std::unique_ptr; -using std::make_shared; - -const char *Value::component_names = "xyzw01?_!"; - -Value::Value(): - m_type(gpr), - m_chan(0) -{ -} - -Value::Value(Type type, uint32_t chan): - m_type(type), - m_chan(chan) -{ - -} - - - -Value::Value(Type type): - Value(type, 0) -{ -} - -Value::Type Value::type() const -{ - return m_type; -} - -void Value::set_chan(uint32_t chan) -{ - m_chan = chan; -} - -void Value::print(std::ostream& os) const -{ - do_print(os); -} - -void Value::print(std::ostream& os, const PrintFlags& flags) const -{ - if (flags.flags & PrintFlags::has_neg) os << '-'; - if (flags.flags & PrintFlags::has_abs) os << '|'; - do_print(os, flags); - if (flags.flags & PrintFlags::has_abs) os << '|'; -} - -void Value::do_print(std::ostream& os, const PrintFlags& flags) const -{ - (void)flags; - do_print(os); -} - -bool Value::operator < (const Value& lhs) const -{ - return sel() < lhs.sel() || - (sel() == lhs.sel() && chan() < lhs.chan()); -} - - -LiteralValue::LiteralValue(float value, uint32_t chan): - Value(Value::literal, chan) -{ - m_value.f=value; -} - - -LiteralValue::LiteralValue(uint32_t value, uint32_t chan): - Value(Value::literal, chan) -{ - m_value.u=value; -} - -LiteralValue::LiteralValue(int value, uint32_t chan): - Value(Value::literal, chan) -{ - m_value.u=value; -} - -uint32_t LiteralValue::sel() const -{ - return ALU_SRC_LITERAL; -} - -uint32_t LiteralValue::value() const -{ - return m_value.u; -} - -float LiteralValue::value_float() const -{ - return m_value.f; -} - -void LiteralValue::do_print(std::ostream& os) const -{ - os << "[0x" << std::setbase(16) << m_value.u << " " << std::setbase(10) - << m_value.f << "]."; - os << component_names[chan()]; -} - -void LiteralValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const -{ - os << "[0x" << std::setbase(16) << m_value.u << " " - << std::setbase(10); - - os << m_value.f << "f"; - - os<< "]"; -} - -bool LiteralValue::is_equal_to(const Value& other) const -{ - assert(other.type() == Value::Type::literal); - const auto& rhs = static_cast(other); - return (sel() == rhs.sel() && - value() == rhs.value()); -} - -InlineConstValue::InlineConstValue(int value, int chan): - Value(Value::cinline, chan), - m_value(static_cast(value)) -{ -} - -uint32_t InlineConstValue::sel() const -{ - return m_value; -} - -void InlineConstValue::do_print(std::ostream& os) const -{ - auto sv_info = alu_src_const.find(m_value); - if (sv_info != alu_src_const.end()) { - os << sv_info->second.descr; - if (sv_info->second.use_chan) - os << '.' << component_names[chan()]; - else if (chan() > 0) - os << "." << component_names[chan()] - << " (W: Channel ignored)"; - } else { - if (m_value >= ALU_SRC_PARAM_BASE && m_value < ALU_SRC_PARAM_BASE + 32) - os << " Param" << m_value - ALU_SRC_PARAM_BASE; - else - os << " E: unknown inline constant " << m_value; - } -} - -bool InlineConstValue::is_equal_to(const Value& other) const -{ - assert(other.type() == Value::Type::cinline); - const auto& rhs = static_cast(other); - return sel() == rhs.sel(); -} - -PValue Value::zero(new InlineConstValue(ALU_SRC_0, 0)); -PValue Value::one_f(new InlineConstValue(ALU_SRC_1, 0)); -PValue Value::one_i(new InlineConstValue(ALU_SRC_1_INT, 0)); -PValue Value::zero_dot_5(new InlineConstValue(ALU_SRC_0_5, 0)); - -UniformValue::UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank): - Value(Value::kconst, chan) -{ - m_index = sel; - m_kcache_bank = kcache_bank; -} - -UniformValue::UniformValue(uint32_t sel, uint32_t chan, PValue addr): - Value(Value::kconst, chan), - m_index(sel), - m_kcache_bank(1), - m_addr(addr) -{ - -} - -uint32_t UniformValue::sel() const -{ - const int bank_base[4] = {128, 160, 256, 288}; - return m_index < 512 ? m_index + bank_base[m_kcache_bank] : m_index; -} - -uint32_t UniformValue::kcache_bank() const -{ - return m_kcache_bank; -} - -bool UniformValue::is_equal_to(const Value& other) const -{ - const UniformValue& o = static_cast(other); - return sel() == o.sel() && - m_kcache_bank == o.kcache_bank(); -} - -void UniformValue::do_print(std::ostream& os) const -{ - if (m_index < 512) - os << "KC" << m_kcache_bank << "[" << m_index; - else if (m_addr) - os << "KC[" << *m_addr << "][" << m_index; - else - os << "KCx[" << m_index; - os << "]." << component_names[chan()]; -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_value.h b/src/gallium/drivers/r600/sfn/sfn_value.h deleted file mode 100644 index 7bc4528..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_value.h +++ /dev/null @@ -1,194 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_VALUE_H -#define SFN_VALUE_H - -#include "sfn_alu_defines.h" -#include "nir.h" - -#include -#include -#include -#include - -namespace r600 { - -class Value { -public: - using Pointer=std::shared_ptr; - - struct PrintFlags { - PrintFlags():index_mode(0), - flags(0) - { - } - PrintFlags(int im, int f):index_mode(im), - flags(f) - { - } - int index_mode; - int flags; - static const int is_rel = 1; - static const int has_abs = 2; - static const int has_neg = 4; - static const int literal_is_float = 8; - static const int index_ar = 16; - static const int index_loopidx = 32; - }; - - enum Type { - gpr, - kconst, - literal, - cinline, - lds_direct, - gpr_vector, - gpr_array_value, - unknown - }; - - static const char *component_names; - - using LiteralFlags=std::bitset<4>; - - Value(); - - Value(Type type); - - virtual ~Value(){} - - Type type() const; - virtual uint32_t sel() const = 0; - uint32_t chan() const {return m_chan;} - - void set_chan(uint32_t chan); - virtual void set_pin_to_channel() { assert(0 && "Only GPRs can be pinned to a channel ");} - void print(std::ostream& os, const PrintFlags& flags) const; - - void print(std::ostream& os) const; - - bool operator < (const Value& lhs) const; - - static Value::Pointer zero; - static Value::Pointer one_f; - static Value::Pointer zero_dot_5; - static Value::Pointer one_i; - -protected: - Value(Type type, uint32_t chan); - -private: - virtual void do_print(std::ostream& os) const = 0; - virtual void do_print(std::ostream& os, const PrintFlags& flags) const; - - virtual bool is_equal_to(const Value& other) const = 0; - - Type m_type; - uint32_t m_chan; - - friend bool operator == (const Value& lhs, const Value& rhs); -}; - - -inline std::ostream& operator << (std::ostream& os, const Value& v) -{ - v.print(os); - return os; -} - - -inline bool operator == (const Value& lhs, const Value& rhs) -{ - if (lhs.type() == rhs.type()) - return lhs.is_equal_to(rhs); - return false; -} - -inline bool operator != (const Value& lhs, const Value& rhs) -{ - return !(lhs == rhs); -} - -using PValue=Value::Pointer; - -struct value_less { - inline bool operator () (PValue lhs, PValue rhs) const { - return *lhs < *rhs; - } -}; - -using ValueSet = std::set; - - -class LiteralValue: public Value { -public: - LiteralValue(float value, uint32_t chan= 0); - LiteralValue(uint32_t value, uint32_t chan= 0); - LiteralValue(int value, uint32_t chan= 0); - uint32_t sel() const override final; - uint32_t value() const; - float value_float() const; -private: - void do_print(std::ostream& os) const override; - void do_print(std::ostream& os, const PrintFlags& flags) const override; - bool is_equal_to(const Value& other) const override; - union { - uint32_t u; - float f; - } m_value; -}; - -class InlineConstValue: public Value { -public: - InlineConstValue(int value, int chan); - uint32_t sel() const override final; -private: - void do_print(std::ostream& os) const override; - bool is_equal_to(const Value& other) const override; - AluInlineConstants m_value; -}; - -class UniformValue: public Value { -public: - UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank = 0); - UniformValue(uint32_t sel, uint32_t chan, PValue addr); - uint32_t sel() const override; - uint32_t kcache_bank() const; - PValue addr() const {return m_addr;} - void reset_addr(PValue v) {m_addr = v;} -private: - void do_print(std::ostream& os) const override; - bool is_equal_to(const Value& other) const override; - - uint32_t m_index; - uint32_t m_kcache_bank; - PValue m_addr; -}; - -} // end ns r600 - -#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp b/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp deleted file mode 100644 index c53b325..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp +++ /dev/null @@ -1,380 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_value_gpr.h" -#include "sfn_valuepool.h" -#include "sfn_debug.h" -#include "sfn_liverange.h" - -namespace r600 { - -using std::vector; -using std::array; - -GPRValue::GPRValue(uint32_t sel, uint32_t chan, int base_offset): - Value(Value::gpr, chan), - m_sel(sel), - m_base_offset(base_offset), - m_input(false), - m_pin_to_channel(false), - m_keep_alive(false) -{ -} - -GPRValue::GPRValue(uint32_t sel, uint32_t chan): - Value(Value::gpr, chan), - m_sel(sel), - m_base_offset(0), - m_input(false), - m_pin_to_channel(false), - m_keep_alive(false) -{ -} - -uint32_t GPRValue::sel() const -{ - return m_sel; -} - -void GPRValue::do_print(std::ostream& os) const -{ - os << 'R'; - os << m_sel; - os << '.' << component_names[chan()]; -} - -bool GPRValue::is_equal_to(const Value& other) const -{ - assert(other.type() == Value::Type::gpr); - const auto& rhs = static_cast(other); - return (sel() == rhs.sel() && - chan() == rhs.chan()); -} - -void GPRValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const -{ - os << 'R'; - os << m_sel; - os << '.' << component_names[chan()]; -} - -GPRVector::GPRVector(const GPRVector& orig): - Value(gpr_vector), - m_elms(orig.m_elms), - m_valid(orig.m_valid) -{ -} - -GPRVector::GPRVector(std::array elms): - Value(gpr_vector), - m_elms(elms), - m_valid(false) -{ - for (unsigned i = 0; i < 4; ++i) - if (!m_elms[i] || (m_elms[i]->type() != Value::gpr)) { - assert(0 && "GPR vector not valid because element missing or nit a GPR"); - return; - } - unsigned sel = m_elms[0]->sel(); - for (unsigned i = 1; i < 4; ++i) - if (m_elms[i]->sel() != sel) { - assert(0 && "GPR vector not valid because sel is not equal for all elements"); - return; - } - m_valid = true; -} - -GPRVector::GPRVector(uint32_t sel, std::array swizzle): - Value (gpr_vector), - m_valid(true) -{ - for (int i = 0; i < 4; ++i) - m_elms[i] = PValue(new GPRValue(sel, swizzle[i])); -} - -GPRVector::GPRVector(const GPRVector& orig, const std::array& swizzle) -{ - for (int i = 0; i < 4; ++i) - m_elms[i] = orig.reg_i(swizzle[i]); - m_valid = orig.m_valid; -} - -void GPRVector::validate() const -{ - assert(m_elms[0]); - uint32_t sel = m_elms[0]->sel(); - if (sel >= 124) - return; - - for (unsigned i = 1; i < 4; ++i) { - assert(m_elms[i]); - if (sel != m_elms[i]->sel()) - return; - } - - m_valid = true; -} - -uint32_t GPRVector::sel() const -{ - validate(); - assert(m_valid); - return m_elms[0] ? m_elms[0]->sel() : 999; -} - -void GPRVector::set_reg_i(int i, PValue reg) -{ - m_elms[i] = reg; -} - -void GPRVector::pin_to_channel(int i) -{ - auto& v = static_cast(*m_elms[i]); - v.set_pin_to_channel(); -} - -void GPRVector::pin_all_to_channel() -{ - for (auto& v: m_elms) { - auto& c = static_cast(*v); - c.set_pin_to_channel(); - } -} - -void GPRVector::do_print(std::ostream& os) const -{ - os << "R" << sel() << "."; - for (int i = 0; i < 4; ++i) - os << (m_elms[i] ? component_names[m_elms[i]->chan() < 8 ? m_elms[i]->chan() : 8] : '?'); -} - -void GPRVector::swizzle(const Swizzle& swz) -{ - Values v(m_elms); - for (uint32_t i = 0; i < 4; ++i) - if (i != swz[i]) { - assert(swz[i] < 4); - m_elms[i] = v[swz[i]]; - } -} - -bool GPRVector::is_equal_to(const Value& other) const -{ - if (other.type() != gpr_vector) { - std::cerr << "t"; - return false; - } - - const GPRVector& o = static_cast(other); - - for (int i = 0; i < 4; ++i) { - if (*m_elms[i] != *o.m_elms[i]) { - std::cerr << "elm" << i; - return false; - } - } - return true; -} - - -GPRArrayValue::GPRArrayValue(PValue value, PValue addr, GPRArray *array): - Value(gpr_array_value, value->chan()), - m_value(value), - m_addr(addr), - m_array(array) -{ -} - -GPRArrayValue::GPRArrayValue(PValue value, GPRArray *array): - Value(gpr_array_value, value->chan()), - m_value(value), - m_array(array) -{ -} - -static const char *swz_char = "xyzw01_"; - -void GPRArrayValue::do_print(std::ostream& os) const -{ - assert(m_array); - os << "R" << m_value->sel(); - if (m_addr) { - os << "[" << *m_addr << "] "; - } - os << swz_char[m_value->chan()]; - - os << "(" << *m_array << ")"; -} - -bool GPRArrayValue::is_equal_to(const Value& other) const -{ - const GPRArrayValue& v = static_cast(other); - - return *m_value == *v.m_value && - *m_array == *v.m_array; -} - -void GPRArrayValue::record_read(LiverangeEvaluator& ev) const -{ - if (m_addr) { - ev.record_read(*m_addr); - unsigned chan = m_value->chan(); - assert(m_array); - m_array->record_read(ev, chan); - } else - ev.record_read(*m_value); -} - -void GPRArrayValue::record_write(LiverangeEvaluator& ev) const -{ - if (m_addr) { - ev.record_read(*m_addr); - unsigned chan = m_value->chan(); - assert(m_array); - m_array->record_write(ev, chan); - } else - ev.record_write(*m_value); -} - -void GPRArrayValue::reset_value(PValue new_value) -{ - m_value = new_value; -} - -void GPRArrayValue::reset_addr(PValue new_addr) -{ - m_addr = new_addr; -} - - -GPRArray::GPRArray(int base, int size, int mask, int frac): - Value (gpr_vector), - m_base_index(base), - m_component_mask(mask), - m_frac(frac) -{ - m_values.resize(size); - for (int i = 0; i < size; ++i) { - for (int j = 0; j < 4; ++j) { - if (mask & (1 << j)) { - auto gpr = new GPRValue(base + i, j); - /* If we want to use sb, we have to keep arrays - * alife for the whole shader range, otherwise the sb scheduler - * thinks is not capable to rename non-array uses of these registers */ - gpr->set_as_input(); - gpr->set_keep_alive(); - m_values[i].set_reg_i(j, PValue(gpr)); - - } - } - } -} - -uint32_t GPRArray::sel() const -{ - return m_base_index; -} - -static const char *compchar = "xyzw"; -void GPRArray::do_print(std::ostream& os) const -{ - os << "ARRAY[R" << sel() << "..R" << sel() + m_values.size() - 1 << "]."; - for (int j = 0; j < 4; ++j) { - if (m_component_mask & (1 << j)) - os << compchar[j]; - } -} - -bool GPRArray::is_equal_to(const Value& other) const -{ - const GPRArray& o = static_cast(other); - return o.sel() == sel() && - o.m_values.size() == m_values.size() && - o.m_component_mask == m_component_mask; -} - -uint32_t GPRArrayValue::sel() const -{ - return m_value->sel(); -} - -PValue GPRArray::get_indirect(unsigned index, PValue indirect, unsigned component) -{ - assert(index < m_values.size()); - assert(m_component_mask & (1 << (component + m_frac))); - - sfn_log << SfnLog::reg << "Create indirect register from " << *this; - - PValue v = m_values[index].reg_i(component + m_frac); - assert(v); - - sfn_log << SfnLog::reg << " -> " << *v; - - if (indirect) { - sfn_log << SfnLog::reg << "[" << *indirect << "]"; - switch (indirect->type()) { - case Value::literal: { - const LiteralValue& lv = static_cast(*indirect); - v = m_values[lv.value()].reg_i(component + m_frac); - break; - } - case Value::gpr: { - v = PValue(new GPRArrayValue(v, indirect, this)); - sfn_log << SfnLog::reg << "(" << *v << ")"; - break; - } - default: - assert(0 && !"Indirect addressing must be literal value or GPR"); - } - } - sfn_log << SfnLog::reg <<" -> " << *v << "\n"; - return v; -} - -void GPRArray::record_read(LiverangeEvaluator& ev, int chan) const -{ - for (auto& v: m_values) - ev.record_read(*v.reg_i(chan), true); -} - -void GPRArray::record_write(LiverangeEvaluator& ev, int chan) const -{ - for (auto& v: m_values) - ev.record_write(*v.reg_i(chan), true); -} - -void GPRArray::collect_registers(ValueMap& output) const -{ - for (auto& v: m_values) { - for (int i = 0; i < 4; ++i) { - auto vv = v.reg_i(i); - if (vv) - output.insert(vv); - } - } -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_value_gpr.h b/src/gallium/drivers/r600/sfn/sfn_value_gpr.h deleted file mode 100644 index 7893488..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_value_gpr.h +++ /dev/null @@ -1,208 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SFN_GPRARRAY_H -#define SFN_GPRARRAY_H - -#include "sfn_value.h" -#include -#include - -namespace r600 { - -class ValuePool; -class ValueMap; -class LiverangeEvaluator; - -class GPRValue : public Value { -public: - GPRValue() = default; - GPRValue(GPRValue&& orig) = default; - GPRValue(const GPRValue& orig) = default; - - GPRValue(uint32_t sel, uint32_t chan, int base_offset); - - GPRValue(uint32_t sel, uint32_t chan); - - GPRValue& operator = (const GPRValue& orig) = default; - GPRValue& operator = (GPRValue&& orig) = default; - - uint32_t sel() const override final; - - void set_as_input(){ m_input = true; } - bool is_input() const {return m_input; } - void set_keep_alive() { m_keep_alive = true; } - bool keep_alive() const {return m_keep_alive; } - void set_pin_to_channel() override { m_pin_to_channel = true;} - bool pin_to_channel() const { return m_pin_to_channel;} - -private: - void do_print(std::ostream& os) const override; - void do_print(std::ostream& os, const PrintFlags& flags) const override; - bool is_equal_to(const Value& other) const override; - uint32_t m_sel; - bool m_base_offset; - bool m_input; - bool m_pin_to_channel; - bool m_keep_alive; -}; - -using PGPRValue = std::shared_ptr; - -class GPRVector : public Value { -public: - using Swizzle = std::array; - using Values = std::array; - GPRVector() = default; - GPRVector(GPRVector&& orig) = default; - GPRVector(const GPRVector& orig); - - GPRVector(const GPRVector& orig, const std::array& swizzle); - GPRVector(std::array elms); - GPRVector(uint32_t sel, std::array swizzle); - - GPRVector& operator = (const GPRVector& orig) = default; - GPRVector& operator = (GPRVector&& orig) = default; - - void swizzle(const Swizzle& swz); - - uint32_t sel() const override final; - - void set_reg_i(int i, PValue reg); - - unsigned chan_i(int i) const {return m_elms[i]->chan();} - PValue reg_i(int i) const {return m_elms[i];} - PValue operator [] (int i) const {return m_elms[i];} - PValue& operator [] (int i) {return m_elms[i];} - - void pin_to_channel(int i); - void pin_all_to_channel(); - - PValue x() const {return m_elms[0];} - PValue y() const {return m_elms[1];} - PValue z() const {return m_elms[2];} - PValue w() const {return m_elms[3];} - - Values& values() { return m_elms;} - -private: - void do_print(std::ostream& os) const override; - bool is_equal_to(const Value& other) const override; - void validate() const; - - Values m_elms; - mutable bool m_valid; -}; - - -class GPRArray : public Value -{ -public: - using Pointer = std::shared_ptr; - - GPRArray(int base, int size, int comp_mask, int frac); - - uint32_t sel() const override; - - uint32_t mask() const { return m_component_mask; }; - - size_t size() const {return m_values.size();} - - PValue get_indirect(unsigned index, PValue indirect, unsigned component); - - void record_read(LiverangeEvaluator& ev, int chan)const; - void record_write(LiverangeEvaluator& ev, int chan)const; - - void collect_registers(ValueMap& output) const; - -private: - void do_print(std::ostream& os) const override; - - bool is_equal_to(const Value& other) const override; - - int m_base_index; - int m_component_mask; - int m_frac; - - std::vector m_values; -}; - -using PGPRArray = GPRArray::Pointer; - -class GPRArrayValue :public Value { -public: - GPRArrayValue(PValue value, GPRArray *array); - GPRArrayValue(PValue value, PValue index, GPRArray *array); - - void record_read(LiverangeEvaluator& ev) const; - void record_write(LiverangeEvaluator& ev) const; - - size_t array_size() const; - uint32_t sel() const override; - - PValue value() {return m_value;} - - void reset_value(PValue new_value); - void reset_addr(PValue new_addr); - - Value::Pointer indirect() const {return m_addr;} - -private: - - void do_print(std::ostream& os) const override; - - bool is_equal_to(const Value& other) const override; - - PValue m_value; - PValue m_addr; - GPRArray *m_array; -}; - -inline size_t GPRArrayValue::array_size() const -{ - return m_array->size(); -} - -inline GPRVector::Swizzle swizzle_from_comps(unsigned ncomp) -{ - GPRVector::Swizzle swz = {0,1,2,3}; - for (int i = ncomp; i < 4; ++i) - swz[i] = 7; - return swz; -} - -inline GPRVector::Swizzle swizzle_from_mask(unsigned mask) -{ - GPRVector::Swizzle swz; - for (int i = 0; i < 4; ++i) - swz[i] = ((1 << i) & mask) ? i : 7; - return swz; -} - - -} - -#endif // SFN_GPRARRAY_H diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp new file mode 100644 index 0000000..74fca92 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp @@ -0,0 +1,959 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2021 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "sfn_valuefactory.h" +#include "sfn_instr.h" +#include "sfn_debug.h" + +#include "gallium/drivers/r600/r600_shader.h" + +#include +#include +#include +#include + +namespace r600 { + +using std::istringstream; +using std::string; + + +ValueFactory::ValueFactory(): + m_next_register_index(VirtualValue::virtual_register_base), + m_nowrite_idx(0) +{ +} + +void ValueFactory::set_virtual_register_base(int base) +{ + m_next_register_index = base; +} + +bool ValueFactory::allocate_registers(const exec_list *registers) +{ + bool has_arrays = false; + struct array_entry { + unsigned index; + unsigned length; + unsigned ncomponents; + + bool operator ()(const array_entry& a, const array_entry& b) const { + return a.length < b.length || (a.length == b.length && a.ncomponents > b.ncomponents); + } + }; + + using array_list = std::priority_queue, + array_entry>; + + array_list arrays; + + + foreach_list_typed(nir_register, reg, node, registers) { + if (reg->num_array_elems) { + array_entry ae = {reg->index, reg->num_array_elems, reg->bit_size / 32 * reg->num_components}; + arrays.push(ae); + has_arrays = true; + } + } + + int ncomponents = 0; + int sel = m_next_register_index; + unsigned length = 0; + + while (!arrays.empty()) { + auto a = arrays.top(); + arrays.pop(); + + /* This is a bit hackish, return an id that encodes the array merge. To make sure + * that the mapping doesn't go wrong we have to make sure the arrays is longer than + * the number of instances in this arrays slot */ + if (a.ncomponents + ncomponents > 4 || + a.length > length) { + sel = m_next_register_index; + ncomponents = 0; + length = 0; + } + + if (ncomponents == 0) + m_next_register_index += a.length; + + uint32_t frac = ncomponents; + auto array = new LocalArray( sel, a.ncomponents, a.length, frac); + + for (unsigned i = 0; i < a.ncomponents; ++i) { + RegisterKey key(a.index, i, vp_array); + m_channel_counts.inc_count(i); + m_registers[key] = array; + sfn_log << SfnLog::reg << __func__ << ": Allocate array " << key << ":" << *array << "\n"; + } + + ncomponents += a.ncomponents; + length = a.length; + } + + foreach_list_typed(nir_register, reg, node, registers) { + if (!reg->num_array_elems) { + uint32_t sel = m_next_register_index++; + unsigned num_components = reg->num_components * reg->bit_size / 32; + for (auto chan = 0u; chan < num_components; ++chan) { + RegisterKey key(reg->index, chan, vp_register); + m_channel_counts.inc_count(chan); + m_registers[key] = new Register( sel, chan, num_components > 1 ? pin_none : pin_free); + sfn_log << SfnLog::reg << "allocate register " << key << ":" << *m_registers[key] << "\n"; + } + } + } + return has_arrays; +} + +PRegister ValueFactory::allocate_pinned_register(int sel, int chan) +{ + if (m_next_register_index <= sel) + m_next_register_index = sel + 1; + + auto reg = new Register(sel, chan, pin_fully); + m_pinned_registers.push_back(reg); + return reg; +} + +RegisterVec4 ValueFactory::allocate_pinned_vec4(int sel, bool is_ssa) +{ + if (m_next_register_index <= sel) + m_next_register_index = sel + 1; + + RegisterVec4 retval(sel, is_ssa, {0,1,2,3}, pin_fully); + for (int i = 0; i < 4; ++i) + m_pinned_registers.push_back(retval[i]); + return retval; +} + + +void ValueFactory::inject_value(const nir_dest& dest, int chan, PVirtualValue value) +{ + assert(dest.is_ssa); + RegisterKey key(dest.ssa.index, chan, vp_ssa); + sfn_log << SfnLog::reg << "Inject value with key " << key << "\n"; + assert(m_values.find(key) == m_values.end()); + m_values[key] = value; +} + +PRegister ValueFactory::dest(const nir_alu_dest& dst, int chan, + Pin pin_channel) +{ + sfn_log << SfnLog::reg << "Search (ref) " << &dst << "\n"; + return dest(dst.dest, chan, pin_channel); +} + +class TranslateRegister: public RegisterVisitor { +public: + void visit(VirtualValue& value) {(void)value;} + void visit(Register& value) {(void)value;}; + void visit(LocalArray& value) { m_value = value.element(m_offset, m_addr, m_chan);} + void visit(LocalArrayValue& value) {(void)value;} + void visit(UniformValue& value) {(void)value;} + void visit(LiteralConstant& value) {(void)value;} + void visit(InlineConstant& value) {(void)value;} + + TranslateRegister(int offset, PVirtualValue addr, int chan): m_addr(addr), + m_value(nullptr), m_offset(offset), m_chan(chan) {} + + PVirtualValue m_addr; + PRegister m_value; + int m_offset; + int m_chan; +}; + +PRegister ValueFactory::resolve_array(nir_register *reg, nir_src *indirect, + int base_offset, int chan) +{ + PVirtualValue addr = nullptr; + auto type = reg->num_array_elems ? vp_array : vp_register; + RegisterKey key(reg->index, chan, type); + auto ireg = m_registers.find(key); + if (ireg == m_registers.end()) { + std::cerr << "Key " << key << " not found\n"; + assert(0); + } + + if (reg->num_array_elems) { + + if (indirect) + addr = src(*indirect, 0); + + TranslateRegister array_resolution(base_offset, addr, chan); + + ireg->second->accept(array_resolution); + assert(array_resolution.m_value); + return array_resolution.m_value; + } else { + return ireg->second; + } +} + +PRegister ValueFactory::dest(const nir_dest& dst, int chan, Pin pin_channel) +{ + if (dst.is_ssa) { + return dest(dst.ssa, chan, pin_channel); + } else { + return resolve_array(dst.reg.reg, dst.reg.indirect, + dst.reg.base_offset, chan); + } +} + +void ValueFactory::allocate_const(nir_load_const_instr *load_const) +{ + assert(load_const->def.bit_size == 32); + for (int i = 0; i < load_const->def.num_components; ++i) { + RegisterKey key(load_const->def.index, i, vp_ssa); + m_values[key] = literal(load_const->value[i].i32); + sfn_log << SfnLog::reg << "Add const with key " << key << " as " << m_values[key] << "\n"; + } +} + +PVirtualValue ValueFactory::uniform(nir_intrinsic_instr *load_uniform, int chan) +{ + auto literal = nir_src_as_const_value(load_uniform->src[0]); + assert(literal); + + int index = nir_intrinsic_base(load_uniform) + + literal->u32 + 512; + + return uniform(index, chan, 0); +} + +PVirtualValue ValueFactory::uniform(uint32_t index, int chan, int kcache) +{ + return new UniformValue( index, chan, kcache); +} + +PRegister ValueFactory::temp_register(int pinned_channel, bool is_ssa) +{ + int sel = m_next_register_index++; + int chan = (pinned_channel >= 0) ? + pinned_channel : m_channel_counts.least_used(); + + auto reg = new Register( sel, chan, + pinned_channel >= 0 ? pin_chan : pin_free); + m_channel_counts.inc_count(chan); + + reg->set_is_ssa(is_ssa); + m_registers[RegisterKey(sel, chan, vp_temp)] = reg; + return reg; +} + +RegisterVec4 ValueFactory::temp_vec4(Pin pin, const RegisterVec4::Swizzle &swizzle) +{ + int sel = m_next_register_index++; + + if (pin == pin_free) + pin = pin_chan; + + PRegister vec4[4]; + + for (int i = 0; i < 4; ++i) { + vec4[i] = new Register( sel, swizzle[i], pin); + vec4[i]->set_is_ssa(true); + m_registers[RegisterKey(sel, swizzle[i], vp_temp)] = vec4[i]; + } + return RegisterVec4(vec4[0], vec4[1], vec4[2], vec4[3], pin); +} + +RegisterVec4 ValueFactory::dest_vec4(const nir_dest& dst, Pin pin) +{ + if (pin != pin_group && pin != pin_chgr) + pin = pin_chan; + if (dst.is_ssa) { + PRegister x = dest(dst, 0, pin); + PRegister y = dest(dst, 1, pin); + PRegister z = dest(dst, 2, pin); + PRegister w = dest(dst, 3, pin); + return RegisterVec4(x, y, z, w, pin); + } else { + assert(!dst.reg.indirect); + PRegister v[4]; + int sel = -1; + for (int i = 0; i < 4; ++i) { + RegisterKey key(dst.reg.reg->index, i, vp_register); + v[i] = m_registers[key]; + assert(sel >= 0 || v[i]); + if (sel < 0) + sel = v[i]->sel(); + + if (!v[i]) { + v[i] = m_registers[key] = new Register(sel, i, pin_group); + } + } + return RegisterVec4(v[0], v[1], v[2], v[3], pin); + } + unreachable("unsupported"); +} + +PVirtualValue ValueFactory::src(const nir_alu_src& alu_src, int chan) +{ + return src(alu_src.src, alu_src.swizzle[chan]); +} + +PVirtualValue ValueFactory::src64(const nir_alu_src& alu_src, int chan, int comp) +{ + return src(alu_src.src, 2 * alu_src.swizzle[chan] + comp); +} + +PVirtualValue ValueFactory::src(const nir_src& src, int chan) +{ + sfn_log << SfnLog::reg << "search (ref) " << (void *)&src << "\n"; + + if (src.is_ssa) { + sfn_log << SfnLog::reg << "search ssa " << src.ssa->index << " c:" << chan << " got "; + auto val = ssa_src(*src.ssa, chan); + sfn_log << *val << "\n"; + return val; + } else { + sfn_log << SfnLog::reg << "search reg " << src.reg.reg->index << "\n"; + return local_register(src.reg, chan); + } +} + +PVirtualValue ValueFactory::src(const nir_tex_src& tex_src, int chan) +{ + return src(tex_src.src, chan); +} + +PRegister ValueFactory::dummy_dest(unsigned chan) +{ + assert(chan < 4); + return m_dummy_dest_pinned[chan]; +} + +PRegister +ValueFactory::dest(const nir_ssa_def& ssa, int chan, Pin pin_channel) +{ + RegisterKey key(ssa.index, chan, vp_ssa); + + /* dirty workaround for Cayman trans ops, because we may request + * the same sa reg more than once, but only write to it once. */ + auto ireg = m_registers.find(key); + if (ireg != m_registers.end()) + return ireg->second; + + auto isel = m_ssa_index_to_sel.find(ssa.index); + int sel; + if (isel != m_ssa_index_to_sel.end()) + sel = isel->second; + else { + sel = m_next_register_index++; + m_ssa_index_to_sel[ssa.index] = sel; + } + + if (pin_channel == pin_free) + chan = m_channel_counts.least_used(); + + auto vreg = new Register( sel, chan, pin_channel); + m_channel_counts.inc_count(chan); + vreg->set_is_ssa(true); + m_registers[key] = vreg; + sfn_log << SfnLog::reg << "allocate Ssa " << key << ":" << *vreg << "\n"; + return vreg; +} + +PVirtualValue ValueFactory::zero() +{ + return inline_const(ALU_SRC_0, 0); +} + +PVirtualValue ValueFactory::one() +{ + return inline_const(ALU_SRC_1, 0); +} + +PVirtualValue ValueFactory::one_i() +{ + return inline_const(ALU_SRC_1_INT, 0); +} + +PRegister ValueFactory::undef(int index, int chan) +{ + RegisterKey key(index, chan, vp_ssa); + PRegister reg = new Register(m_next_register_index++, 0, pin_free); + reg->set_is_ssa(true); + m_registers[key] = reg; + return reg; +} + +PVirtualValue +ValueFactory::ssa_src(const nir_ssa_def& ssa, int chan) +{ + RegisterKey key(ssa.index, chan, vp_ssa); + sfn_log << SfnLog::reg << "search src with key" << key << "\n"; + + auto ireg = m_registers.find(key); + if (ireg != m_registers.end()) + return ireg->second; + + auto ival = m_values.find(key); + if (ival != m_values.end()) + return ival->second; + + std::cerr << "Didn't find source with key " << key << "\n"; + unreachable("Source values should always exist"); +} + +PRegister ValueFactory::local_register(const nir_reg_dest& dst, int chan) +{ + return resolve_array(dst.reg, dst.indirect, + dst.base_offset, chan); +} + +PRegister ValueFactory::local_register(const nir_reg_src& src, int chan) +{ + return resolve_array(src.reg, src.indirect, + src.base_offset, chan); +} + +PVirtualValue ValueFactory::literal(uint32_t value) +{ + auto iv = m_literal_values.find(value); + if (iv != m_literal_values.end()) + return iv->second; + + auto v = new LiteralConstant( value); + m_literal_values[value] = v; + return v; +} + +PInlineConstant ValueFactory::inline_const(AluInlineConstants sel, int chan) +{ + int hash = (sel << 3) | chan; + auto iv = m_inline_constants.find(hash); + if (iv != m_inline_constants.end()) + return iv->second; + auto v = new InlineConstant( sel, chan); + m_inline_constants[hash] = v; + return v; +} + + std::vector> ValueFactory::src_vec(const nir_src& source, int components) +{ + std::vector> retval; + retval.reserve(components); + for (int i = 0; i < components; ++i) + retval.push_back(src(source, i)); + return retval; +} + +std::vector> +ValueFactory::dest_vec(const nir_dest& dst, int num_components) +{ + std::vector> retval; + retval.reserve(num_components); + for (int i = 0; i < num_components; ++i) + retval.push_back(dest(dst, i, num_components > 1 ? pin_chan : pin_free)); + return retval; +} + +RegisterVec4 ValueFactory::src_vec4(const nir_src& source, Pin pin, const RegisterVec4::Swizzle& swz) +{ + auto sx = swz[0] < 4 ? src(source, swz[0])->as_register() : nullptr; + auto sy = swz[1] < 4 ? src(source, swz[1])->as_register() : nullptr; + auto sz = swz[2] < 4 ? src(source, swz[2])->as_register() : nullptr; + auto sw = swz[3] < 4 ? src(source, swz[3])->as_register() : nullptr; + + assert(sx || sy || sz || sw); + + int sel = sx ? sx->sel() : (sy ? sy->sel() : (sz ? sz->sel() : sw ? sw->sel() : -1)); + if (sel < 0) + unreachable("source vector without valid components"); + + if (!sx) sx = new Register(sel, 7, pin); + if (!sy) sy = new Register(sel, 7, pin); + if (!sz) sz = new Register(sel, 7, pin); + if (!sw) sw = new Register(sel, 7, pin); + + return RegisterVec4(sx, sy, sz, sw, pin); +} + +static Pin +pin_from_string(const std::string& pinstr) +{ + if (pinstr == "chan") + return pin_chan; + if (pinstr == "array") + return pin_array; + if (pinstr == "fully") + return pin_fully; + if (pinstr == "group") + return pin_group; + if (pinstr == "chgr") + return pin_chgr; + if (pinstr == "free") + return pin_free; + return pin_none; +} + +static int chan_from_char(char chan) +{ + switch (chan) { + case 'x' : return 0; + case 'y' : return 1; + case 'z' : return 2; + case 'w' : return 3; + case '0' : return 4; + case '1' : return 5; + case '_' : return 7; + } + unreachable("Unknown swizzle char"); +} + +static int +str_to_int(const string& s) +{ + istringstream ss(s); + int retval; + ss >> retval; + return retval; +} + +static bool +split_register_string(const string& s, + string& index_str, + string& size_str, + string& swizzle_str, + string& pin_str) +{ + int type = 0; + for (unsigned i = 1; i < s.length(); ++i) { + if (s[i] == '.' && type != 3) { + type = 1; + continue; + } else if (s[i] == '@' && type != 3) { + type = 2; + continue; + } else if (s[i] == '[') { + type = 3; + continue; + } else if (s[i] == ']') { + if (type != 3) + std::cerr << "s=" << s + << ": type=" << type + << ": i=" << i + << "\n"; + assert(type == 3); + + type = 4; + continue; + } + + switch (type) { + case 0: index_str.append(1, s[i]); break; + case 1: swizzle_str.append(1, s[i]); break; + case 2: pin_str.append(1, s[i]); break; + case 3: size_str.append(1, s[i]); break; + default: + unreachable("Malformed Array allocation string"); + } + } + return true; +} + +PRegister ValueFactory::dest_from_string(const std::string& s) +{ + assert(s.length() >= 4); + + assert(strchr("ARS_", s[0])); + + string index_str; + string size_str; + string swizzle_str; + string pin_str; + + split_register_string(s, index_str, size_str, + swizzle_str, pin_str); + + int sel = 0; + if (s[0] == '_') { + /* Since these instructions still may use or switch to a different channel + * we have to create a new instance for each occurance */ + sel = std::numeric_limits::max() - m_nowrite_idx++; + } else { + std::istringstream n(index_str); + n >> sel; + } + + auto p = pin_from_string(pin_str); + char chan = chan_from_char(swizzle_str[0]); + + EValuePool pool = vp_temp; + switch (s[0]) { + case 'A': pool = vp_array; break; + case 'R': pool = vp_register; break; + case '_': pool = vp_ignore; break; + case 'S': pool = vp_ssa; break; + default: + unreachable("Unknown value type"); + } + + bool is_ssa = s[0] == 'S'; + + RegisterKey key (sel, chan, pool); + + sfn_log << SfnLog::reg << "Search register with key " << key << "\n"; + + auto ireg = m_registers.find(key); + if ( ireg == m_registers.end()) { + auto reg = new Register(sel, chan, p); + reg->set_is_ssa(is_ssa); + if (p == pin_fully) + reg->pin_live_range(true); + m_registers[key] = reg; + return reg; + } else if (pool == vp_ignore) { + assert(ireg->second->sel() == std::numeric_limits::max()); + return ireg->second; + } else { + assert(!is_ssa || s[0] == '_'); + + if (size_str.length()) { + auto array = static_cast(ireg->second); + PVirtualValue addr = nullptr; + int offset = 0; + if (size_str[0] == 'S' || size_str[0] == 'R') { + addr = src_from_string(size_str); + } else { + istringstream num_str(size_str); + num_str >> offset; + } + + return array->element(offset, addr, chan - array->frac()); + } else + return ireg->second; + } +} + +PVirtualValue ValueFactory::src_from_string(const std::string& s) +{ + switch (s[0]) { + case 'A': + case 'S': + case 'R': break; + case 'L': return LiteralConstant::from_string(s); + case 'K': return UniformValue::from_string(s); + case 'P': return InlineConstant::param_from_string(s); + case 'I': return InlineConstant::from_string(s); + + default: + std::cerr << "'" << s << "'"; + unreachable("Unknown register type"); + } + + assert(strchr("ARS_", s[0])); + + string index_str; + string size_str; + string swizzle_str; + string pin_str; + + split_register_string(s, index_str, size_str, + swizzle_str, pin_str); + + int sel = 0; + if (s[0] == '_') { + sel = std::numeric_limits::max(); + } else { + std::istringstream n(index_str); + n >> sel; + } + + auto p = pin_from_string(pin_str); + char chan = chan_from_char(swizzle_str[0]); + + EValuePool pool = vp_temp; + switch (s[0]) { + case 'A': pool = vp_array; break; + case 'R': pool = vp_register; break; + case '_': pool = vp_ignore; break; + case 'S': pool = vp_ssa; break; + default: + unreachable("Unknown value type"); + } + + RegisterKey key (sel, chan, pool); + + auto ireg = m_registers.find(key); + if (ireg != m_registers.end()) { + if (pool != vp_ssa && size_str.length()) { + auto array = static_cast(ireg->second); + PVirtualValue addr = nullptr; + int offset = 0; + if (size_str[0] == 'S' || size_str[0] == 'R') { + addr = src_from_string(size_str); + } else { + istringstream num_str(size_str); + num_str >> offset; + } + return array->element(offset, addr, chan - array->frac()); + } else { + return ireg->second; + } + } else { + if (sel != std::numeric_limits::max()) { + std::cerr << "register " << key << "not found \n"; + unreachable("Source register should exist"); + } else { + auto reg = new Register( sel, chan, p); + m_registers[key] = reg; + return reg; + } + } +} + +RegisterVec4 ValueFactory::dest_vec4_from_string(const std::string& s, + RegisterVec4::Swizzle& swz, + Pin pin) +{ + bool is_ssa = false; + int sel = sel_and_szw_from_string(s, swz, is_ssa); + + PRegister v[4]; + + for (int i = 0; i < 4; ++i) { + auto pool = is_ssa ? vp_ssa : vp_register; + if (swz[i] > 3) + pool = vp_ignore; + + RegisterKey key(sel, i, pool); + auto ireg = m_registers.find(key); + if (ireg != m_registers.end()) { + v[i] = ireg->second; + assert(!is_ssa || pool == vp_ignore); + } else { + v[i] = new Register( sel, i, pin); + v[i]->set_is_ssa(is_ssa); + m_registers[key] = v[i]; + } + } + return RegisterVec4(v[0], v[1], v[2], v[3], pin); +} + +RegisterVec4 ValueFactory::src_vec4_from_string(const std::string& s) +{ + RegisterVec4::Swizzle swz; + bool is_ssa = false; + int sel = sel_and_szw_from_string(s, swz, is_ssa); + + PRegister v[4]; + + PRegister used_reg = nullptr; + for (int i = 0; i < 4; ++i) { + if (swz[i] < 4) { + RegisterKey key(sel, swz[i], is_ssa ? vp_ssa : vp_register); + auto ireg = m_registers.find(key); + if (ireg == m_registers.end()) { + std::cerr << s << ": Register with key " << key << " not found\n"; + assert(0); + } + used_reg = v[i] = ireg->second; + } else { + v[i] = nullptr; + } + } + sel = used_reg ? used_reg->sel() : 0; + Pin pin = used_reg ? used_reg->pin() : pin_group; + + for (int i = 0; i < 4; ++i) { + if (!v[i]) { + v[i] = new Register( sel, swz[i], pin); + v[i]->set_is_ssa(is_ssa); + } else { + if (v[i]->pin() == pin_none) + v[i]->set_pin(pin_group); + } + } + return RegisterVec4(v[0], v[1], v[2], v[3], pin); +} + +LocalArray *ValueFactory::array_from_string(const std::string& s) +{ + assert(s[0] == 'A'); + string index_str; + string size_str; + string swizzle_str; + string pin_str; + + + int type = 0; + for (unsigned i = 1; i < s.length(); ++i) { + if (s[i] == '.') { + type = 1; + continue; + } else if (s[i] == '@') { + type = 2; + continue; + } else if (s[i] == '[') { + type = 3; + continue; + } else if (s[i] == ']') { + assert(type == 3); + type = 4; + continue; + } + + switch (type) { + case 0: index_str.append(1, s[i]); break; + case 1: swizzle_str.append(1, s[i]); break; + case 2: pin_str.append(1, s[i]); break; + case 3: size_str.append(1, s[i]); break; + default: + unreachable("Malformed Array allocation string"); + } + } + + int sel = str_to_int(index_str); + int size = str_to_int(size_str); + int ncomp = swizzle_str.length(); + + if (ncomp > 4 || ncomp <= 0) { + std::cerr << "Error reading array from '" << s << ": "; + std::cerr << "index:'" << index_str << "' -> '" << sel + << "' size:'" << size_str << "' -> '" << size + << " swizzle:'" << swizzle_str << "' -> '" << ncomp << "'\n"; + assert(0); + } + + const char *swz = "xyzw"; + const char *first_swz = strchr(swz, swizzle_str[0]); + long frac = first_swz - swz; + assert(frac >= 0 && frac <= 4 - ncomp); + + auto array = new LocalArray( sel, ncomp, size, frac); + + for (int i = 0; i < ncomp; ++i) { + RegisterKey key(sel, i + frac, vp_array); + m_registers[key] = array; + } + return array; +} + +void LiveRangeMap::append_register(Register *reg) +{ + sfn_log << SfnLog::merge << __func__ << ": " << *reg << "\n"; + + auto chan = reg->chan(); + auto& ranges = m_life_ranges[chan]; + + LiveRangeEntry entry(reg); + ranges.emplace_back(entry); +} + +std::array LiveRangeMap::sizes() const +{ + std::array result; + std::transform(m_life_ranges.begin(), m_life_ranges.end(), + result.begin(), [](auto lr) {return lr.size(); }); + return result; +} + +LiveRangeMap ValueFactory::prepare_live_range_map() +{ + LiveRangeMap result; + + for (auto [key, reg] : m_registers) { + if (key.value.pool == vp_ignore) + continue; + + if (key.value.pool == vp_array) { + if (key.value.chan == 0) { + auto array = static_cast(reg); + for (auto& a : *array) { + result.append_register(a); + } + } + } else { + if (reg->chan() < 4) + result.append_register(reg); + } + } + + for (auto r : m_pinned_registers) { + result.append_register(r); + } + + for (int i = 0; i < 4; ++i) { + auto& comp = result.component(i); + std::sort(comp.begin(), comp.end(), + [](const LiveRangeEntry& lhs, const LiveRangeEntry& rhs) { + return lhs.m_register->sel() < rhs.m_register->sel(); + }); + for(size_t j = 0; j < comp.size(); ++j) + comp[j].m_register->set_index(j); + } + + return result; +} + +void ValueFactory::clear_pins() +{ + for (auto [key, reg] : m_registers) + reg->set_pin(pin_none); + + for (auto reg : m_pinned_registers) + reg->set_pin(pin_none); +} + +void ValueFactory::clear() +{ + m_registers.clear(); + m_values.clear(); + m_literal_values.clear(); + m_inline_constants.clear(); + m_ssa_index_to_sel.clear(); +} + +void ValueFactory::get_shader_info(r600_shader *sh_info) +{ + std::set arrays; + + for (auto& [key, reg] : m_registers) { + if (key.value.pool == vp_array) + arrays.insert(static_cast(reg)); + } + + if (!arrays.empty()) { + + sh_info->num_arrays = arrays.size(); + sh_info->arrays = new r600_shader_array[arrays.size()]; + + for (auto& arr : arrays) { + sh_info->arrays->gpr_start = arr->sel(); + sh_info->arrays->gpr_count = arr->size(); + sh_info->arrays->comp_mask = + ((1 << arr->nchannels()) - 1) + << arr->frac(); + } + sh_info->indirect_files |= 1 << TGSI_FILE_TEMPORARY; + } +} + + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.h b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h new file mode 100644 index 0000000..89a9bcf --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h @@ -0,0 +1,299 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2021 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef VALUEFACTORY_H +#define VALUEFACTORY_H + +#include "sfn_virtualvalues.h" +#include "sfn_alu_defines.h" + +#include "nir.h" + +#include +#include +#include +#include + +struct r600_shader; + +namespace r600 { + +struct LiveRangeEntry { + enum EUse { + use_export, + use_unspecified + }; + + LiveRangeEntry (Register *reg): m_register(reg) {} + int m_start{-1}; + int m_end{-1}; + int m_index{-1}; + int m_color{-1}; + std::bitset m_use; + Register *m_register; + + void print(std::ostream& os) const { + os << *m_register << "(" << m_index << ", " << m_color << ") [" + << m_start << ":" << m_end << "]"; + } +}; + +inline std::ostream& operator << (std::ostream& os, const LiveRangeEntry& lre) +{ + lre.print(os); + return os; +} + +class LiveRangeMap { +public: + using ChannelLiveRange = std::vector; + + LiveRangeEntry& operator()(int index, int chan) { + assert(chan < 4); + return m_life_ranges[chan].at(index); + } + + void append_register(Register *reg); + + void set_life_range(const Register& reg, int start, int end) { + auto& entry = m_life_ranges[reg.chan()].at(reg.index()); + entry.m_start = start; + entry.m_end = end; + } + + std::array sizes() const; + + ChannelLiveRange& component(int i) { + return m_life_ranges[i]; + } + + const ChannelLiveRange& component(int i) const { + return m_life_ranges[i]; + } + +private: + + std::array m_life_ranges; +}; + +std::ostream& operator << (std::ostream& os, const LiveRangeMap& lrm); + +bool operator == (const LiveRangeMap& lhs, const LiveRangeMap& rhs); + +inline bool operator != (const LiveRangeMap& lhs, const LiveRangeMap& rhs) +{ + return !(lhs == rhs); +} + +enum EValuePool { + vp_ssa, + vp_register, + vp_temp, + vp_array, + vp_ignore +}; + +union RegisterKey { + struct { + uint32_t index; + uint32_t chan : 29; + EValuePool pool : 3; + } value; + uint64_t hash; + + RegisterKey(uint32_t index, uint32_t chan, EValuePool pool) + { + value.index = index; + value.chan = chan; + value.pool = pool; + } + + void print(std::ostream& os) const { + os << "(" << value.index + << ", " << value.chan + << ", "; + switch (value.pool) { + case vp_ssa: os << "ssa"; break; + case vp_register: os << "reg"; break; + case vp_temp: os << "temp"; break; + case vp_array : os << "array"; break; + case vp_ignore : break; + } + os << ")"; + } +}; + + +inline bool operator == (const RegisterKey& lhs, const RegisterKey& rhs) { + return lhs.hash == rhs.hash; +} + +inline std::ostream& operator << (std::ostream& os, const RegisterKey& key) { + key.print(os); + return os; +} + +struct register_key_hash { + std::size_t operator () (const RegisterKey& key) const { + return key.hash; + } +}; + +class ChannelCounts { +public: + void inc_count(int chan) {++m_counts[chan];} + int least_used() const { + int least_used = 0; + uint32_t count = m_counts[0]; + for (int i = 1; i < 4; ++i) { + if (count > m_counts[i]) { + count = m_counts[i]; + least_used = i; + } + } + return least_used; + } + void print(std::ostream& os) const { + os << "CC:" << m_counts[0] << " " << m_counts[1] + << " " << m_counts[2] << " " << m_counts[3]; + } +private: + std::array m_counts{0,0,0,0}; +}; + +inline std::ostream& operator << (std::ostream& os, const ChannelCounts& cc) +{ + cc.print(os); + return os; +} + +class ValueFactory : public Allocate { +public: + ValueFactory(); + + void clear(); + + ValueFactory(const ValueFactory& orig) = delete; + ValueFactory& operator = (const ValueFactory& orig) = delete; + + void set_virtual_register_base(int base); + + bool allocate_registers(const exec_list *registers); + PRegister allocate_pinned_register(int sel, int chan); + RegisterVec4 allocate_pinned_vec4(int sel, bool is_ssa); + + void inject_value(const nir_dest& dest, int chan, PVirtualValue value); + + std::vector> dest_vec(const nir_dest& dest, int num_components); + std::vector> dest_vector(const nir_src& src, + const std::vector& components); + + + PRegister dest(const nir_alu_dest& dest, int chan, Pin pin_channel); + PRegister dest(const nir_dest& dest, int chan, Pin pin_channel); + PRegister dest(const nir_ssa_def& dest, int chan, Pin pin_channel); + + PRegister dummy_dest(unsigned chan); + PRegister temp_register(int pinned_channel = -1, bool is_ssa = true); + RegisterVec4 temp_vec4(Pin pin, const RegisterVec4::Swizzle& swizzle = {0,1,2,3}); + RegisterVec4 dest_vec4(const nir_dest& dest, Pin pin); + + RegisterVec4 src_vec4(const nir_src& src, Pin pin, const RegisterVec4::Swizzle &swz = {0,1,2,3}); + + PVirtualValue src(const nir_alu_src& alu_src, int chan); + PVirtualValue src64(const nir_alu_src& alu_src, int chan, int comp); + PVirtualValue src(const nir_src& src, int chan); + PVirtualValue src(const nir_tex_src& tex_src, int chan); + PVirtualValue literal(uint32_t value); + PVirtualValue uniform(nir_intrinsic_instr *load_uniform, int chan); + PVirtualValue uniform(uint32_t index, int chan, int kcache); + + void allocate_const(nir_load_const_instr *load_const); + + PRegister dest_from_string(const std::string& s); + RegisterVec4 dest_vec4_from_string(const std::string& s, RegisterVec4::Swizzle &swz, + Pin pin = pin_none); + PVirtualValue src_from_string(const std::string& s); + RegisterVec4 src_vec4_from_string(const std::string& s); + + LocalArray *array_from_string(const std::string& s); + + std::vector> src_vec(const nir_src& src, int components); + + PInlineConstant inline_const(AluInlineConstants sel, int chan); + + void get_shader_info(r600_shader *sh_info); + + PRegister undef(int index, int chan); + PVirtualValue zero(); + PVirtualValue one(); + PVirtualValue one_i(); + + LiveRangeMap prepare_live_range_map(); + + void clear_pins(); + + int next_register_index() const { return m_next_register_index; } +private: + + PVirtualValue ssa_src(const nir_ssa_def &dest, int chan); + + PRegister local_register(const nir_reg_dest& dest, int chan); + PRegister local_register(const nir_reg_src& dest, int chan); + PRegister resolve_array(nir_register *reg, nir_src *indirect, + int base_offset, int chan); + + int m_next_register_index; + int m_next_temp_channel{0}; + + template + using unordered_map_alloc = std::unordered_map, std::equal_to, + Allocator>>; + + template + using unordered_reg_map_alloc = std::unordered_map, + Allocator>>; + + using RegisterMap = unordered_reg_map_alloc; + using ROValueMap = unordered_reg_map_alloc; + + RegisterMap m_registers; + std::vector m_pinned_registers; + ROValueMap m_values; + unordered_map_alloc m_literal_values; + unordered_map_alloc m_inline_constants; + unordered_map_alloc m_ssa_index_to_sel; + + uint32_t m_nowrite_idx; + + RegisterVec4 m_dummy_dest_pinned{126, pin_chan, {0,1,2,3}}; + ChannelCounts m_channel_counts; +}; + +} + +#endif // VALUEFACTORY_H diff --git a/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp b/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp deleted file mode 100644 index efc9efd..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp +++ /dev/null @@ -1,526 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018-2019 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sfn_debug.h" -#include "sfn_value_gpr.h" -#include "sfn_valuepool.h" - -#include -#include - -namespace r600 { - -using std::vector; -using std::pair; -using std::make_pair; -using std::queue; - -ValuePool::ValuePool(): - m_next_register_index(0), - current_temp_reg_index(0), - next_temp_reg_comp(4) -{ -} - -PValue ValuePool::m_undef = Value::zero; - -GPRVector ValuePool::vec_from_nir(const nir_dest& dst, int num_components) -{ - std::array result; - for (int i = 0; i < 4; ++i) - result[i] = from_nir(dst, i < num_components ? i : 7); - return GPRVector(result); -} - -std::vector ValuePool::varvec_from_nir(const nir_dest& dst, int num_components) -{ - std::vector result(num_components); - for (int i = 0; i < num_components; ++i) - result[i] = from_nir(dst, i); - return result; -} - - -std::vector ValuePool::varvec_from_nir(const nir_src& src, int num_components) -{ - std::vector result(num_components); - int i; - for (i = 0; i < num_components; ++i) - result[i] = from_nir(src, i); - - return result; -} - - -PValue ValuePool::from_nir(const nir_src& v, unsigned component, unsigned swizzled) -{ - sfn_log << SfnLog::reg << "Search " << (v.is_ssa ? "ssa_reg " : "reg ") - << (v.is_ssa ? v.ssa->index : v.reg.reg->index); - - if (!v.is_ssa) { - int idx = lookup_register_index(v); - sfn_log << SfnLog::reg << " -> got index " << idx << "\n"; - if (idx >= 0) { - auto reg = lookup_register(idx, swizzled, false); - if (reg) { - if (reg->type() == Value::gpr_vector) { - auto& array = static_cast(*reg); - reg = array.get_indirect(v.reg.base_offset, - v.reg.indirect ? - from_nir(*v.reg.indirect, 0, 0) : nullptr, - component); - } - return reg; - } - } - assert(0 && "local registers should always be found"); - } - - unsigned index = v.ssa->index; - /* For undefs we use zero and let ()yet to be implemeneted dce deal with it */ - if (m_ssa_undef.find(index) != m_ssa_undef.end()) - return Value::zero; - - - int idx = lookup_register_index(v); - sfn_log << SfnLog::reg << " -> got index " << idx << "\n"; - if (idx >= 0) { - auto reg = lookup_register(idx, swizzled, false); - if (reg) - return reg; - } - - auto literal_val = nir_src_as_const_value(v); - if (literal_val) { - assert(v.is_ssa); - switch (v.ssa->bit_size) { - case 1: - return PValue(new LiteralValue(literal_val[swizzled].b ? 0xffffffff : 0, component)); - case 32: - return literal(literal_val[swizzled].u32); - default: - sfn_log << SfnLog::reg << "Unsupported bit size " << v.ssa->bit_size - << " fall back to 32\n"; - return PValue(new LiteralValue(literal_val[swizzled].u32, component)); - } - } - - return PValue(); -} - -PValue ValuePool::from_nir(const nir_src& v, unsigned component) -{ - return from_nir(v, component, component); -} - -PValue ValuePool::from_nir(const nir_tex_src &v, unsigned component) -{ - return from_nir(v.src, component, component); -} - -PValue ValuePool::from_nir(const nir_alu_src &v, unsigned component) -{ - return from_nir(v.src, component, v.swizzle[component]); -} - -PGPRValue ValuePool::get_temp_register(int channel) -{ - /* Skip to next register to get the channel we want */ - if (channel >= 0) { - if (next_temp_reg_comp <= channel) - next_temp_reg_comp = channel; - else - next_temp_reg_comp = 4; - } - - if (next_temp_reg_comp > 3) { - current_temp_reg_index = allocate_temp_register(); - next_temp_reg_comp = 0; - } - return std::make_shared(current_temp_reg_index, next_temp_reg_comp++); -} - -GPRVector ValuePool::get_temp_vec4(const GPRVector::Swizzle& swizzle) -{ - int sel = allocate_temp_register(); - return GPRVector(sel, swizzle); -} - -PValue ValuePool::create_register_from_nir_src(const nir_src& src, int comp) -{ - int idx = src.is_ssa ? get_dst_ssa_register_index(*src.ssa): - get_local_register_index(*src.reg.reg); - - auto retval = lookup_register(idx, comp, false); - if (!retval || retval->type() != Value::gpr || retval->type() != Value::gpr_array_value) - retval = create_register(idx, comp); - return retval; -} - -PValue ValuePool::from_nir(const nir_alu_dest &v, unsigned component) -{ - //assert(v->write_mask & (1 << component)); - return from_nir(v.dest, component); -} - -int ValuePool::lookup_register_index(const nir_dest& dst) -{ - return dst.is_ssa ? get_dst_ssa_register_index(dst.ssa): - get_local_register_index(*dst.reg.reg); -} - -int ValuePool::lookup_register_index(const nir_src& src) const -{ - int index = 0; - - index = src.is_ssa ? - get_ssa_register_index(*src.ssa) : - get_local_register_index(*src.reg.reg); - - sfn_log << SfnLog::reg << " LIDX:" << index; - - auto r = m_register_map.find(index); - if (r == m_register_map.end()) { - return -1; - } - return static_cast(r->second.index); -} - - -int ValuePool::allocate_temp_register() -{ - return m_next_register_index++; -} - - -PValue ValuePool::from_nir(const nir_dest& v, unsigned component) -{ - int idx = lookup_register_index(v); - sfn_log << SfnLog::reg << __func__ << ": "; - if (v.is_ssa) - sfn_log << "ssa_" << v.ssa.index; - else - sfn_log << "r" << v.reg.reg->index; - sfn_log << " -> " << idx << "\n"; - - auto retval = lookup_register(idx, component, false); - if (!retval) - retval = create_register(idx, component); - - if (retval->type() == Value::gpr_vector) { - assert(!v.is_ssa); - auto& array = static_cast(*retval); - retval = array.get_indirect(v.reg.base_offset, - v.reg.indirect ? - from_nir(*v.reg.indirect, 0, 0) : nullptr, - component); - } - - return retval; -} - -ValueMap ValuePool::get_temp_registers() const -{ - ValueMap result; - - for (auto& v : m_registers) { - if (v.second->type() == Value::gpr) - result.insert(v.second); - else if (v.second->type() == Value::gpr_vector) { - auto& array = static_cast(*v.second); - array.collect_registers(result); - } - } - return result; -} - -static const char swz[] = "xyzw01?_"; - -PValue ValuePool::create_register(unsigned sel, unsigned swizzle) -{ - sfn_log << SfnLog::reg - <<"Create register " << sel << '.' << swz[swizzle] << "\n"; - auto retval = PValue(new GPRValue(sel, swizzle)); - m_registers[(sel << 3) + swizzle] = retval; - return retval; -} - -bool ValuePool::inject_register(unsigned sel, unsigned swizzle, - const PValue& reg, bool map) -{ - uint32_t ssa_index = sel; - - if (map) { - auto pos = m_ssa_register_map.find(sel); - if (pos == m_ssa_register_map.end()) - ssa_index = m_next_register_index++; - else - ssa_index = pos->second; - } - - sfn_log << SfnLog::reg - << "Inject register " << sel << '.' << swz[swizzle] - << " at index " << ssa_index << " ..."; - - if (map) - m_ssa_register_map[sel] = ssa_index; - - allocate_with_mask(ssa_index, swizzle, true); - - unsigned idx = (ssa_index << 3) + swizzle; - auto p = m_registers.find(idx); - if ( (p != m_registers.end()) && *p->second != *reg) { - std::cerr << "Register location (" << ssa_index << ", " << swizzle << ") was already reserved\n"; - assert(0); - return false; - } - sfn_log << SfnLog::reg << " at idx:" << idx << " to " << *reg << "\n"; - m_registers[idx] = reg; - - if (m_next_register_index <= ssa_index) - m_next_register_index = ssa_index + 1; - return true; -} - - -PValue ValuePool::lookup_register(unsigned sel, unsigned swizzle, - bool required) -{ - - PValue retval; - sfn_log << SfnLog::reg - << "lookup register " << sel << '.' << swz[swizzle] << "(" - << ((sel << 3) + swizzle) << ")..."; - - - auto reg = m_registers.find((sel << 3) + swizzle); - if (reg != m_registers.end()) { - sfn_log << SfnLog::reg << " -> Found " << *reg->second << "\n"; - retval = reg->second; - } else if (swizzle == 7) { - PValue retval = create_register(sel, swizzle); - sfn_log << SfnLog::reg << " -> Created " << *retval << "\n"; - } else if (required) { - sfn_log << SfnLog::reg << "Register (" << sel << ", " - << swizzle << ") not found but required\n"; - assert(0 && "Unallocated register value requested\n"); - } - sfn_log << SfnLog::reg << " -> Not required and not allocated\n"; - return retval; -} - -unsigned ValuePool::get_dst_ssa_register_index(const nir_ssa_def& ssa) -{ - sfn_log << SfnLog::reg << __func__ << ": search dst ssa " - << ssa.index; - - auto pos = m_ssa_register_map.find(ssa.index); - if (pos == m_ssa_register_map.end()) { - sfn_log << SfnLog::reg << " Need to allocate ..."; - allocate_ssa_register(ssa); - pos = m_ssa_register_map.find(ssa.index); - assert(pos != m_ssa_register_map.end()); - } - sfn_log << SfnLog::reg << "... got " << pos->second << "\n"; - return pos->second; -} - -unsigned ValuePool::get_ssa_register_index(const nir_ssa_def& ssa) const -{ - sfn_log << SfnLog::reg << __func__ << ": search ssa " - << ssa.index; - - auto pos = m_ssa_register_map.find(ssa.index); - sfn_log << SfnLog::reg << " got " << pos->second<< "\n"; - if (pos == m_ssa_register_map.end()) { - sfn_log << SfnLog::reg << __func__ << ": ssa register " - << ssa.index << " lookup failed\n"; - return -1; - } - return pos->second; -} - -unsigned ValuePool::get_local_register_index(const nir_register& reg) -{ - unsigned index = reg.index | 0x80000000; - - auto pos = m_ssa_register_map.find(index); - if (pos == m_ssa_register_map.end()) { - allocate_local_register(reg); - pos = m_ssa_register_map.find(index); - assert(pos != m_ssa_register_map.end()); - } - return pos->second; -} - -unsigned ValuePool::get_local_register_index(const nir_register& reg) const -{ - unsigned index = reg.index | 0x80000000; - auto pos = m_ssa_register_map.find(index); - if (pos == m_ssa_register_map.end()) { - sfn_log << SfnLog::err << __func__ << ": local register " - << reg.index << " lookup failed"; - return -1; - } - return pos->second; -} - -void ValuePool::allocate_ssa_register(const nir_ssa_def& ssa) -{ - sfn_log << SfnLog::reg << "ValuePool: Allocate ssa register " << ssa.index - << " as " << m_next_register_index << "\n"; - int index = m_next_register_index++; - m_ssa_register_map[ssa.index] = index; - allocate_with_mask(index, 0xf, true); -} - -void ValuePool::allocate_arrays(array_list& arrays) -{ - int ncomponents = 0; - int current_index = m_next_register_index; - unsigned instance = 0; - - while (!arrays.empty()) { - auto a = arrays.top(); - arrays.pop(); - - /* This is a bit hackish, return an id that encodes the array merge. To make sure - * that the mapping doesn't go wrong we have to make sure the arrays is longer than - * the number of instances in this arrays slot */ - if (a.ncomponents + ncomponents > 4 || - a.length < instance) { - current_index = m_next_register_index; - ncomponents = 0; - instance = 0; - } - - if (ncomponents == 0) - m_next_register_index += a.length; - - uint32_t mask = ((1 << a.ncomponents) - 1) << ncomponents; - - PGPRArray array = PGPRArray(new GPRArray(current_index, a.length, mask, ncomponents)); - - m_reg_arrays.push_back(array); - - sfn_log << SfnLog::reg << "Add array at "<< current_index - << " of size " << a.length << " with " << a.ncomponents - << " components, mask " << mask << "\n"; - - m_ssa_register_map[a.index | 0x80000000] = current_index + instance; - - for (unsigned i = 0; i < a.ncomponents; ++i) - m_registers[((current_index + instance) << 3) + i] = array; - - VRec next_reg = {current_index + instance, mask, mask}; - m_register_map[current_index + instance] = next_reg; - - ncomponents += a.ncomponents; - ++instance; - } -} - -void ValuePool::allocate_local_register(const nir_register& reg) -{ - int index = m_next_register_index++; - m_ssa_register_map[reg.index | 0x80000000] = index; - allocate_with_mask(index, 0xf, true); - - /* Create actual register and map it */; - for (int i = 0; i < 4; ++i) { - int k = (index << 3) + i; - m_registers[k] = std::make_shared(index, i); - } -} - -void ValuePool::allocate_local_register(const nir_register& reg, array_list& arrays) -{ - sfn_log << SfnLog::reg << "ValuePool: Allocate local register " << reg.index - << " as " << m_next_register_index << "\n"; - - if (reg.num_array_elems) { - array_entry ae = {reg.index, reg.num_array_elems, reg.num_components}; - arrays.push(ae); - } - else - allocate_local_register(reg); -} - -bool ValuePool::create_undef(nir_ssa_undef_instr* instr) -{ - m_ssa_undef.insert(instr->def.index); - return true; -} - -int ValuePool::allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc) -{ - int retval; - VRec next_register = { index, mask }; - - sfn_log << SfnLog::reg << (pre_alloc ? "Pre-alloc" : "Allocate") - << " register (" << index << ", " << mask << ")\n"; - retval = index; - auto r = m_register_map.find(index); - - if (r != m_register_map.end()) { - if ((r->second.mask & next_register.mask) && - !(r->second.pre_alloc_mask & next_register.mask)) { - std::cerr << "r600 ERR: register (" - << index << ", " << mask - << ") already allocated as (" << r->second.index << ", " - << r->second.mask << ", " << r->second.pre_alloc_mask - << ") \n"; - retval = -1; - } else { - r->second.mask |= next_register.mask; - if (pre_alloc) - r->second.pre_alloc_mask |= next_register.mask; - retval = r->second.index; - } - } else { - if (pre_alloc) - next_register.pre_alloc_mask = mask; - m_register_map[index] = next_register; - retval = next_register.index; - } - - sfn_log << SfnLog::reg << "Allocate register (" << index << "," << mask << ") in R" - << retval << "\n"; - - return retval; -} - -PValue ValuePool::literal(uint32_t value) -{ - auto l = m_literals.find(value); - if (l != m_literals.end()) - return l->second; - - m_literals[value] = PValue(new LiteralValue(value)); - return m_literals[value]; -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_valuepool.h b/src/gallium/drivers/r600/sfn/sfn_valuepool.h deleted file mode 100644 index fa1e550..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_valuepool.h +++ /dev/null @@ -1,242 +0,0 @@ -/* -*- mesa-c++ -*- - * - * Copyright (c) 2018 Collabora LTD - * - * Author: Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#ifndef SFN_VALUEPOOL_H -#define SFN_VALUEPOOL_H - -#include "sfn_value.h" -#include "sfn_value_gpr.h" - -#include -#include - -namespace r600 { - -using LiteralBuffer = std::map; - -class ValueMap { -public: - void insert(const PValue& v) { - auto idx = index_from(v->sel(), v->chan()); - m_map[idx] = v; - } - PValue get_or_inject(uint32_t index, uint32_t chan) { - auto idx = index_from(index, chan); - auto v = m_map.find(idx); - if (v == m_map.end()) { - insert(PValue(new GPRValue(index, chan))); - v = m_map.find(idx); - } - return v->second; - } - std::map::const_iterator begin() const {return m_map.begin();} - std::map::const_iterator end() const {return m_map.end();} - -private: - uint32_t index_from(uint32_t index, uint32_t chan) { - return (index << 3) + chan; - } - std::map m_map; -}; - -/** \brief Class to keep track of registers, uniforms, and literals - * This class holds the references to the uniforms and the literals - * and is responsible for allocating the registers. - */ -class ValuePool -{ -public: - - struct array_entry { - unsigned index; - unsigned length; - unsigned ncomponents; - - bool operator ()(const array_entry& a, const array_entry& b) const { - return a.length < b.length || (a.length == b.length && a.ncomponents > b.ncomponents); - } - }; - - using array_list = std::priority_queue, - array_entry>; - - ValuePool(); - - - GPRVector vec_from_nir(const nir_dest& dst, int num_components); - - std::vector varvec_from_nir(const nir_dest& src, int num_components); - std::vector varvec_from_nir(const nir_src& src, int num_components); - - PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled); - - PValue from_nir(const nir_src& v, unsigned component); - /** Get a register that is used as source register in an ALU instruction - * The PValue holds one componet as specified. If the register refers to - * a GPR it must already have been allocated, uniforms and literals on - * the other hand might be pre-loaded. - */ - PValue from_nir(const nir_alu_src& v, unsigned component); - - /** Get a register that is used as source register in an Texture instruction - * The PValue holds one componet as specified. - */ - PValue from_nir(const nir_tex_src& v, unsigned component); - - /** Allocate a register that is used as destination register in an ALU - * instruction. The PValue holds one componet as specified. - */ - PValue from_nir(const nir_alu_dest& v, unsigned component); - - /** Allocate a register that is used as destination register in any - * instruction. The PValue holds one componet as specified. - */ - PValue from_nir(const nir_dest& v, unsigned component); - - - /** Inject a register into a given ssa index position - * This is used to redirect loads from system values and vertex attributes - * that are already loaded into registers */ - bool inject_register(unsigned sel, unsigned swizzle, const PValue ®, bool map); - - /** Reserve space for a local register */ - void allocate_local_register(const nir_register& reg); - void allocate_local_register(const nir_register ®, array_list& arrays); - - void allocate_arrays(array_list& arrays); - - - void increment_reserved_registers() { - ++m_next_register_index; - } - - void set_reserved_registers(unsigned rr) { - m_next_register_index =rr; - } - - /** Reserve a undef register, currently it uses (0,7), - * \todo should be eliminated in the final pass - */ - bool create_undef(nir_ssa_undef_instr* instr); - - /** Create a new register with the given index and store it in the - * lookup map - */ - PValue create_register_from_nir_src(const nir_src& sel, int comp); - - ValueMap get_temp_registers() const; - - PValue lookup_register(unsigned sel, unsigned swizzle, bool required); - - size_t register_count() const {return m_next_register_index;} - - PValue literal(uint32_t value); - - PGPRValue get_temp_register(int channel = -1); - - GPRVector get_temp_vec4(const GPRVector::Swizzle &swizzle = {0,1,2,3}); - -protected: - std::vector m_reg_arrays; - -private: - - /** Get the register index mapped from the NIR code to the r600 ir - * \param index NIR index of register - * \returns r600 ir inxex - */ - int lookup_register_index(const nir_src& src) const; - - /** Get the register index mapped from the NIR code to the r600 ir - * \param index NIR index of register - * \returns r600 ir inxex - */ - int lookup_register_index(const nir_dest& dst); - - /** Allocate a register that is is needed for lowering an instruction - * that requires complex calculations, - */ - int allocate_temp_register(); - - - PValue create_register(unsigned index, unsigned swizzle); - - unsigned get_dst_ssa_register_index(const nir_ssa_def& ssa); - - unsigned get_ssa_register_index(const nir_ssa_def& ssa) const; - - unsigned get_local_register_index(const nir_register& reg); - - unsigned get_local_register_index(const nir_register& reg) const; - - void allocate_ssa_register(const nir_ssa_def& ssa); - - void allocate_array(const nir_register& reg); - - - /** Allocate a register index with the given component mask. - * If one of the components is already been allocated the function - * will signal an error bz returning -1, otherwise a register index is - * returned. - */ - int allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc); - - /** search for a new register with the given index in the - * lookup map. - * \param sel register sel value - * \param swizzle register component, can also be 4,5, and 7 - * \param required true: in debug mode assert when register doesn't exist - * false: return nullptr on failure - */ - - std::set m_ssa_undef; - - std::map m_ssa_register_map; - - std::map m_registers; - - static PValue m_undef; - - struct VRec { - unsigned index; - unsigned mask; - unsigned pre_alloc_mask; - }; - std::map m_register_map; - - unsigned m_next_register_index; - - - std::map m_literals; - - int current_temp_reg_index; - int next_temp_reg_comp; -}; - -} - -#endif // SFN_VALUEPOOL_H diff --git a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp deleted file mode 100644 index ff49216..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp +++ /dev/null @@ -1,535 +0,0 @@ -#include "sfn_vertexstageexport.h" - -#include "sfn_shaderio.h" - -namespace r600 { - -using std::priority_queue; - -VertexStageExportBase::VertexStageExportBase(VertexStage& proc): - m_proc(proc), - m_cur_clip_pos(1) -{ - -} - -VertexStageExportBase::~VertexStageExportBase() -{ - -} - -bool VertexStageExportBase::do_process_outputs(nir_variable *output) -{ - return true; -} - -void VertexStageExportBase::emit_shader_start() -{ - -} - -void VertexStageExportBase::scan_store_output(nir_intrinsic_instr* instr) -{ - -} - -bool VertexStageExportBase::store_output(nir_intrinsic_instr* instr) -{ - auto index = nir_src_as_const_value(instr->src[1]); - assert(index && "Indirect outputs not supported"); - - const store_loc store_info = { - nir_intrinsic_component(instr), - nir_intrinsic_io_semantics(instr).location, - (unsigned)nir_intrinsic_base(instr) + index->u32, - 0 - }; - - return do_store_output(store_info, instr); -} - -VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc, - const pipe_stream_output_info *so_info, - r600_pipe_shader *pipe_shader, const r600_shader_key &key): - VertexStageWithOutputInfo(proc), - m_last_param_export(nullptr), - m_last_pos_export(nullptr), - m_num_clip_dist(0), - m_enabled_stream_buffers_mask(0), - m_so_info(so_info), - m_pipe_shader(pipe_shader), - m_key(key) -{ -} - -bool VertexStageWithOutputInfo::do_process_outputs(nir_variable *output) -{ - if (output->data.location == VARYING_SLOT_COL0 || - output->data.location == VARYING_SLOT_COL1 || - (output->data.location >= VARYING_SLOT_VAR0 && - output->data.location <= VARYING_SLOT_VAR31) || - (output->data.location >= VARYING_SLOT_TEX0 && - output->data.location <= VARYING_SLOT_TEX7) || - output->data.location == VARYING_SLOT_BFC0 || - output->data.location == VARYING_SLOT_BFC1 || - output->data.location == VARYING_SLOT_CLIP_VERTEX || - output->data.location == VARYING_SLOT_CLIP_DIST0 || - output->data.location == VARYING_SLOT_CLIP_DIST1 || - output->data.location == VARYING_SLOT_POS || - output->data.location == VARYING_SLOT_PSIZ || - output->data.location == VARYING_SLOT_FOGC || - output->data.location == VARYING_SLOT_LAYER || - output->data.location == VARYING_SLOT_EDGE || - output->data.location == VARYING_SLOT_VIEWPORT - ) { - - r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location]; - auto semantic = r600_get_varying_semantic(output->data.location); - io.name = semantic.first; - io.sid = semantic.second; - - m_proc.evaluate_spi_sid(io); - io.write_mask = ((1 << glsl_get_components(output->type)) - 1) - << output->data.location_frac; - ++m_proc.sh_info().noutput; - - if (output->data.location == VARYING_SLOT_PSIZ || - output->data.location == VARYING_SLOT_EDGE || - output->data.location == VARYING_SLOT_LAYER) // VIEWPORT? - m_cur_clip_pos = 2; - - if (output->data.location != VARYING_SLOT_POS && - output->data.location != VARYING_SLOT_EDGE && - output->data.location != VARYING_SLOT_PSIZ && - output->data.location != VARYING_SLOT_CLIP_VERTEX) - m_param_driver_locations.push(output->data.driver_location); - - return true; - } - return false; -} - -bool VertexStageExportForFS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) -{ - switch (store_info.location) { - case VARYING_SLOT_PSIZ: - m_proc.sh_info().vs_out_point_size = 1; - m_proc.sh_info().vs_out_misc_write = 1; - FALLTHROUGH; - case VARYING_SLOT_POS: - return emit_varying_pos(store_info, instr); - case VARYING_SLOT_EDGE: { - std::array swizzle_override = {7 ,0, 7, 7}; - return emit_varying_pos(store_info, instr, &swizzle_override); - } - case VARYING_SLOT_VIEWPORT: { - std::array swizzle_override = {7, 7, 7, 0}; - return emit_varying_pos(store_info, instr, &swizzle_override) && - emit_varying_param(store_info, instr); - } - case VARYING_SLOT_CLIP_VERTEX: - return emit_clip_vertices(store_info, instr); - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CLIP_DIST1: - m_num_clip_dist += 4; - return emit_varying_param(store_info, instr) && emit_varying_pos(store_info, instr); - case VARYING_SLOT_LAYER: { - m_proc.sh_info().vs_out_misc_write = 1; - m_proc.sh_info().vs_out_layer = 1; - std::array swz = {7,7,0,7}; - return emit_varying_pos(store_info, instr, &swz) && - emit_varying_param(store_info, instr); - } - case VARYING_SLOT_VIEW_INDEX: - return emit_varying_pos(store_info, instr) && - emit_varying_param(store_info, instr); - - default: - return emit_varying_param(store_info, instr); - } - - fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n", - store_info.location); - return false; -} - -bool VertexStageExportForFS::emit_varying_pos(const store_loc &store_info, nir_intrinsic_instr* instr, - std::array *swizzle_override) -{ - std::array swizzle; - uint32_t write_mask = 0; - - if (swizzle_override) { - swizzle = *swizzle_override; - for (int i = 0; i < 4; ++i) { - if (swizzle[i] < 6) - write_mask |= 1 << i; - } - } else { - write_mask = nir_intrinsic_write_mask(instr) << store_info.frac; - for (int i = 0; i < 4; ++i) - swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7; - } - - m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask; - - GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle); - m_proc.set_output(store_info.driver_location, value.sel()); - - int export_slot = 0; - - switch (store_info.location) { - case VARYING_SLOT_EDGE: { - m_proc.sh_info().vs_out_misc_write = 1; - m_proc.sh_info().vs_out_edgeflag = 1; - m_proc.emit_instruction(op1_mov, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr}); - m_proc.emit_instruction(op1_flt_to_int, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_last_instr}); - m_proc.sh_info().output[store_info.driver_location].write_mask = 0xf; - } - FALLTHROUGH; - case VARYING_SLOT_PSIZ: - case VARYING_SLOT_LAYER: - export_slot = 1; - break; - case VARYING_SLOT_VIEWPORT: - m_proc.sh_info().vs_out_misc_write = 1; - m_proc.sh_info().vs_out_viewport = 1; - export_slot = 1; - break; - case VARYING_SLOT_POS: - break; - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CLIP_DIST1: - export_slot = m_cur_clip_pos++; - break; - default: - sfn_log << SfnLog::err << __func__ << "Unsupported location " - << store_info.location << "\n"; - return false; - } - - m_last_pos_export = new ExportInstruction(export_slot, value, ExportInstruction::et_pos); - m_proc.emit_export_instruction(m_last_pos_export); - m_proc.add_param_output_reg(store_info.driver_location, m_last_pos_export->gpr_ptr()); - return true; -} - -bool VertexStageExportForFS::emit_varying_param(const store_loc &store_info, nir_intrinsic_instr* instr) -{ - assert(store_info.driver_location < m_proc.sh_info().noutput); - sfn_log << SfnLog::io << __func__ << ": emit DDL: " << store_info.driver_location << "\n"; - - int write_mask = nir_intrinsic_write_mask(instr) << store_info.frac; - std::array swizzle; - for (int i = 0; i < 4; ++i) - swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7; - - //m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask; - - GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle, true); - m_proc.sh_info().output[store_info.driver_location].gpr = value.sel(); - - /* This should use the registers!! */ - m_proc.set_output(store_info.driver_location, value.sel()); - - m_last_param_export = new ExportInstruction(param_id(store_info.driver_location), - value, ExportInstruction::et_param); - m_proc.emit_export_instruction(m_last_param_export); - m_proc.add_param_output_reg(store_info.driver_location, m_last_param_export->gpr_ptr()); - return true; -} - -bool VertexStageExportForFS::emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr) -{ - m_proc.sh_info().cc_dist_mask = 0xff; - m_proc.sh_info().clip_dist_write = 0xff; - - m_clip_vertex = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], 0xf, {0,1,2,3}); - m_proc.add_param_output_reg(store_info.driver_location, &m_clip_vertex); - - for (int i = 0; i < 4; ++i) - m_proc.sh_info().output[store_info.driver_location].write_mask |= 1 << i; - - GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()}; - - for (int i = 0; i < 8; i++) { - int oreg = i >> 2; - int ochan = i & 3; - AluInstruction *ir = nullptr; - for (int j = 0; j < 4; j++) { - ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), m_clip_vertex.reg_i(j), - PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)), - (j == ochan) ? EmitInstruction::write : EmitInstruction::empty); - m_proc.emit_instruction(ir); - } - ir->set_flag(alu_last_instr); - } - - m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos); - m_proc.emit_export_instruction(m_last_pos_export); - - m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos); - m_proc.emit_export_instruction(m_last_pos_export); - - return true; -} - -VertexStageWithOutputInfo::VertexStageWithOutputInfo(VertexStage& proc): - VertexStageExportBase(proc), - m_current_param(0) -{ - -} - -void VertexStageWithOutputInfo::scan_store_output(nir_intrinsic_instr* instr) -{ - auto location = nir_intrinsic_io_semantics(instr).location; - auto driver_location = nir_intrinsic_base(instr); - auto index = nir_src_as_const_value(instr->src[1]); - assert(index); - - unsigned noutputs = driver_location + index->u32 + 1; - if (m_proc.sh_info().noutput < noutputs) - m_proc.sh_info().noutput = noutputs; - - r600_shader_io& io = m_proc.sh_info().output[driver_location + index->u32]; - auto semantic = r600_get_varying_semantic(location + index->u32); - io.name = semantic.first; - io.sid = semantic.second; - m_proc.evaluate_spi_sid(io); - io.write_mask = nir_intrinsic_write_mask(instr); - - if (location == VARYING_SLOT_PSIZ || - location == VARYING_SLOT_EDGE || - location == VARYING_SLOT_LAYER) // VIEWPORT? - m_cur_clip_pos = 2; - - if (location != VARYING_SLOT_POS && - location != VARYING_SLOT_EDGE && - location != VARYING_SLOT_PSIZ && - location != VARYING_SLOT_CLIP_VERTEX) { - m_param_driver_locations.push(driver_location + index->u32); - } -} - -unsigned VertexStageWithOutputInfo::param_id(unsigned driver_location) -{ - auto param_loc = m_param_map.find(driver_location); - assert(param_loc != m_param_map.end()); - return param_loc->second; -} - -void VertexStageWithOutputInfo::emit_shader_start() -{ - while (!m_param_driver_locations.empty()) { - auto loc = m_param_driver_locations.top(); - m_param_driver_locations.pop(); - m_param_map[loc] = m_current_param++; - } -} - -unsigned VertexStageWithOutputInfo::current_param() const -{ - return m_current_param; -} - -void VertexStageExportForFS::finalize_exports() -{ - if (m_key.vs.as_gs_a) { - PValue o(new GPRValue(0,PIPE_SWIZZLE_0)); - GPRVector primid({m_proc.primitive_id(), o,o,o}); - m_last_param_export = new ExportInstruction(current_param(), primid, ExportInstruction::et_param); - m_proc.emit_export_instruction(m_last_param_export); - int i; - i = m_proc.sh_info().noutput++; - auto& io = m_proc.sh_info().output[i]; - io.name = TGSI_SEMANTIC_PRIMID; - io.sid = 0; - io.gpr = 0; - io.interpolate = TGSI_INTERPOLATE_CONSTANT; - io.write_mask = 0x1; - io.spi_sid = m_key.vs.prim_id_out; - m_proc.sh_info().vs_as_gs_a = 1; - } - - if (m_so_info && m_so_info->num_outputs) - emit_stream(-1); - - m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask; - - if (!m_last_param_export) { - GPRVector value(0,{7,7,7,7}); - m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param); - m_proc.emit_export_instruction(m_last_param_export); - } - m_last_param_export->set_last(); - - if (!m_last_pos_export) { - GPRVector value(0,{7,7,7,7}); - m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos); - m_proc.emit_export_instruction(m_last_pos_export); - } - m_last_pos_export->set_last(); -} - -bool VertexStageExportForFS::emit_stream(int stream) -{ - assert(m_so_info); - if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) { - R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs); - return false; - } - for (unsigned i = 0; i < m_so_info->num_outputs; i++) { - if (m_so_info->output[i].output_buffer >= 4) { - R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", - m_so_info->output[i].output_buffer); - return false; - } - } - const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS]; - unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; - std::vector tmp(m_so_info->num_outputs); - - /* Initialize locations where the outputs are stored. */ - for (unsigned i = 0; i < m_so_info->num_outputs; i++) { - if (stream != -1 && stream != m_so_info->output[i].stream) - continue; - - sfn_log << SfnLog::instr << "Emit stream " << i - << " with register index " << m_so_info->output[i].register_index << " so_gpr:"; - - - so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index); - - if (!so_gpr[i]) { - sfn_log << SfnLog::err << "\nERR: register index " - << m_so_info->output[i].register_index - << " doesn't correspond to an output register\n"; - return false; - } - start_comp[i] = m_so_info->output[i].start_component; - /* Lower outputs with dst_offset < start_component. - * - * We can only output 4D vectors with a write mask, e.g. we can - * only output the W component at offset 3, etc. If we want - * to store Y, Z, or W at buffer offset 0, we need to use MOV - * to move it to X and output X. */ - if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) { - - GPRVector::Swizzle swizzle = {0,1,2,3}; - for (auto j = m_so_info->output[i].num_components; j < 4; ++j) - swizzle[j] = 7; - tmp[i] = m_proc.get_temp_vec4(swizzle); - - int sc = m_so_info->output[i].start_component; - AluInstruction *alu = nullptr; - for (int j = 0; j < m_so_info->output[i].num_components; j++) { - alu = new AluInstruction(op1_mov, tmp[i][j], so_gpr[i]->reg_i(j + sc), {alu_write}); - m_proc.emit_instruction(alu); - } - if (alu) - alu->set_flag(alu_last_instr); - - start_comp[i] = 0; - so_gpr[i] = &tmp[i]; - } - sfn_log << SfnLog::instr << *so_gpr[i] << "\n"; - } - - /* Write outputs to buffers. */ - for (unsigned i = 0; i < m_so_info->num_outputs; i++) { - sfn_log << SfnLog::instr << "Write output buffer " << i - << " with register index " << m_so_info->output[i].register_index << "\n"; - - StreamOutIntruction *out_stream = - new StreamOutIntruction(*so_gpr[i], - m_so_info->output[i].num_components, - m_so_info->output[i].dst_offset - start_comp[i], - ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i], - m_so_info->output[i].output_buffer, - m_so_info->output[i].stream); - m_proc.emit_export_instruction(out_stream); - m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4; - } - return true; -} - - -VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc, - const r600_shader *gs_shader): - VertexStageWithOutputInfo(proc), - m_num_clip_dist(0), - m_gs_shader(gs_shader) -{ - -} - -bool VertexStageExportForGS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) -{ - int ring_offset = -1; - const r600_shader_io& out_io = m_proc.sh_info().output[store_info.driver_location]; - - sfn_log << SfnLog::io << "check output " << store_info.driver_location - << " name=" << out_io.name<< " sid=" << out_io.sid << "\n"; - for (unsigned k = 0; k < m_gs_shader->ninput; ++k) { - auto& in_io = m_gs_shader->input[k]; - sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n"; - - if (in_io.name == out_io.name && - in_io.sid == out_io.sid) { - ring_offset = in_io.ring_offset; - break; - } - } - - if (store_info.location == VARYING_SLOT_VIEWPORT) { - m_proc.sh_info().vs_out_viewport = 1; - m_proc.sh_info().vs_out_misc_write = 1; - return true; - } - - if (ring_offset == -1) { - sfn_log << SfnLog::err << "VS defines output at " - << store_info.driver_location << "name=" << out_io.name - << " sid=" << out_io.sid << " that is not consumed as GS input\n"; - return true; - } - - uint32_t write_mask = (1 << instr->num_components) - 1; - - GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, - swizzle_from_comps(instr->num_components), true); - - auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, value, - ring_offset >> 2, 4, PValue()); - m_proc.emit_export_instruction(ir); - - m_proc.sh_info().output[store_info.driver_location].write_mask |= write_mask; - if (store_info.location == VARYING_SLOT_CLIP_DIST0 || - store_info.location == VARYING_SLOT_CLIP_DIST1) - m_num_clip_dist += 4; - - return true; -} - -void VertexStageExportForGS::finalize_exports() -{ - -} - -VertexStageExportForES::VertexStageExportForES(VertexStage& proc): - VertexStageExportBase(proc) -{ -} - -bool VertexStageExportForES::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) -{ - return true; -} - -void VertexStageExportForES::finalize_exports() -{ - -} - -} diff --git a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h deleted file mode 100644 index 46aee80..0000000 --- a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h +++ /dev/null @@ -1,116 +0,0 @@ -#ifndef VERTEXSTAGEEXPORT_H -#define VERTEXSTAGEEXPORT_H - -#include "sfn_shader_base.h" -#include - -namespace r600 { - -class VertexStage : public ShaderFromNirProcessor { -public: - using ShaderFromNirProcessor::ShaderFromNirProcessor; - - virtual PValue primitive_id() = 0; -}; - -class VertexStageExportBase -{ -public: - VertexStageExportBase(VertexStage& proc); - virtual ~VertexStageExportBase(); - virtual void finalize_exports() = 0; - virtual bool do_process_outputs(nir_variable *output); - - virtual void emit_shader_start(); - - virtual void scan_store_output(nir_intrinsic_instr* instr); - bool store_output(nir_intrinsic_instr* instr); -protected: - - struct store_loc { - unsigned frac; - unsigned location; - unsigned driver_location; - int data_loc; - }; - virtual bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) = 0; - - VertexStage& m_proc; - int m_cur_clip_pos; - GPRVector m_clip_vertex; -}; - - -class VertexStageWithOutputInfo: public VertexStageExportBase -{ -protected: - VertexStageWithOutputInfo(VertexStage& proc); - void scan_store_output(nir_intrinsic_instr* instr) override; - void emit_shader_start() override; - bool do_process_outputs(nir_variable *output) override; -protected: - unsigned param_id(unsigned driver_location); - unsigned current_param() const; -private: - std::priority_queue, std::greater > m_param_driver_locations; - std::map m_param_map; - unsigned m_current_param; -}; - - -class VertexStageExportForFS : public VertexStageWithOutputInfo -{ -public: - VertexStageExportForFS(VertexStage& proc, - const pipe_stream_output_info *so_info, - r600_pipe_shader *pipe_shader, - const r600_shader_key& key); - - void finalize_exports() override; -private: - bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override; - - bool emit_varying_param(const store_loc& store_info, nir_intrinsic_instr* instr); - bool emit_varying_pos(const store_loc& store_info, nir_intrinsic_instr* instr, - std::array *swizzle_override = nullptr); - bool emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr); - bool emit_stream(int stream); - - ExportInstruction *m_last_param_export; - ExportInstruction *m_last_pos_export; - - int m_num_clip_dist; - int m_enabled_stream_buffers_mask; - const pipe_stream_output_info *m_so_info; - r600_pipe_shader *m_pipe_shader; - const r600_shader_key& m_key; - - -}; - -class VertexStageExportForGS : public VertexStageWithOutputInfo -{ -public: - VertexStageExportForGS(VertexStage& proc, - const r600_shader *gs_shader); - void finalize_exports() override; - -private: - bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override; - unsigned m_num_clip_dist; - const r600_shader *m_gs_shader; -}; - -class VertexStageExportForES : public VertexStageExportBase -{ -public: - VertexStageExportForES(VertexStage& proc); - void finalize_exports() override; -private: - bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override; -}; - - -} - -#endif // VERTEXSTAGEEXPORT_H diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp new file mode 100644 index 0000000..9b96d60 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp @@ -0,0 +1,1072 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2021 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_virtualvalues.h" +#include "sfn_alu_defines.h" +#include "sfn_valuefactory.h" +#include "sfn_instr.h" +#include "sfn_debug.h" + +#include "util/macros.h" + +#include +#include +#include + +namespace r600 { + +std::ostream& operator << (std::ostream& os, Pin pin) +{ +#define PRINT_PIN(X) case pin_ ## X : os << #X; break + switch (pin) { + PRINT_PIN(chan); + PRINT_PIN(array); + PRINT_PIN(fully); + PRINT_PIN(group); + PRINT_PIN(chgr); + PRINT_PIN(free); + case pin_none: + default: + ; + } +#undef PRINT_PIN + return os; +} + +VirtualValue::VirtualValue(int sel, int chan, Pin pin): + m_sel(sel), m_chan(chan), m_pins(pin) +{ +#if __cpp_exceptions >= 199711L + ASSERT_OR_THROW(m_sel < virtual_register_base || pin != pin_fully, "Register is virtual but pinned to sel"); +#endif +} + +bool VirtualValue::ready(int block, int index) const +{ + (void)block; + (void)index; + return true; +} + +bool VirtualValue::is_virtual() const +{ + return m_sel >= virtual_register_base; +} + +class ValueComparer: public ConstRegisterVisitor { +public: + ValueComparer(); + ValueComparer(const Register *value); + ValueComparer(const LocalArray *value); + ValueComparer(const LocalArrayValue *value); + ValueComparer(const UniformValue *value); + ValueComparer(const LiteralConstant *value); + ValueComparer(const InlineConstant *value); + + void visit(const Register& other) override; + void visit(const LocalArray& other) override; + void visit(const LocalArrayValue& other) override; + void visit(const UniformValue& value) override; + void visit(const LiteralConstant& other) override; + void visit(const InlineConstant& other) override; + + bool m_result; +private: + const Register *m_register; + const LocalArray *m_array; + const LocalArrayValue *m_array_value; + const UniformValue *m_uniform_value; + const LiteralConstant *m_literal_value; + const InlineConstant *m_inline_constant; +}; + +class ValueCompareCreater: public ConstRegisterVisitor { +public: + void visit(const Register& value) { compare = ValueComparer(&value);} + void visit(const LocalArray& value) {compare = ValueComparer(&value);} + void visit(const LocalArrayValue& value) {compare = ValueComparer(&value);} + void visit(const UniformValue& value) {compare = ValueComparer(&value);} + void visit(const LiteralConstant& value) {compare = ValueComparer(&value);} + void visit(const InlineConstant& value) {compare = ValueComparer(&value);} + + ValueComparer compare; +}; + +VirtualValue::Pointer VirtualValue::from_string(const std::string& s) +{ + switch (s[0]) { + case 'S': + case 'R': return Register::from_string(s); + case 'L': return LiteralConstant::from_string(s); + case 'K': return UniformValue::from_string(s); + case 'P': return InlineConstant::param_from_string(s); + case 'I': return InlineConstant::from_string(s); + + default: + std::cerr << "'" << s << "'"; + unreachable("Unknown register type"); + } +} + +bool VirtualValue::equal_to(const VirtualValue& other) const +{ + bool result = m_sel == other.m_sel && + m_chan == other.m_chan && + m_pins == other.m_pins; + + if (result) { + ValueCompareCreater comp_creater; + accept(comp_creater); + other.accept(comp_creater.compare); + result &= comp_creater.compare.m_result; + } + + return result; +} + +VirtualValue::Pointer VirtualValue::get_addr() const +{ + class GetAddressRegister: public ConstRegisterVisitor { + public: + void visit(const VirtualValue& value) {(void)value;} + void visit(const Register& value) {(void)value;}; + void visit(const LocalArray& value) {(void)value;} + void visit(const LocalArrayValue& value) {m_result = value.addr();} + void visit(const UniformValue& value) {(void)value;} + void visit(const LiteralConstant& value) {(void)value;} + void visit(const InlineConstant& value) {(void)value;} + + GetAddressRegister() : m_result(nullptr) {} + + PVirtualValue m_result; + }; + GetAddressRegister get_addr; + accept(get_addr); + return get_addr.m_result; +} + +Register::Register(int sel, int chan, Pin pin): + VirtualValue(sel, chan, pin) +{ +} + +void Register::add_parent(Instr *instr) +{ + m_parents.insert(instr); + instr->add_use(); + add_parent_to_array(instr); +} + +void Register::add_parent_to_array(Instr *instr) +{ + (void)instr; +} + +void Register::del_parent(Instr *instr) +{ + m_parents.erase(instr); + instr->dec_use(); + del_parent_from_array(instr); +} + +void Register::del_parent_from_array(Instr *instr) +{ + (void)instr; +} + + +void Register::add_use(Instr *instr) +{ + const auto& [itr, inserted] = m_uses.insert(instr); {} + + if (inserted) { + for (auto& p: m_parents) + p->add_use(); + } +} + +void Register::del_use(Instr *instr) +{ + sfn_log << SfnLog::opt << "Del use of " << *this << " in " << *instr << "\n"; + if (m_uses.find(instr) != m_uses.end()) { + m_uses.erase(instr); + if (is_ssa()) + for (auto& p: m_parents) + p->dec_use(); + } +} + +bool Register::ready(int block, int index) const +{ + for (auto p : m_parents) { + if (p->block_id() <= block) { + if (p->index() < index && !p->is_scheduled()) { + return false; + } + } + } + return true; +} + +void Register::accept(RegisterVisitor& visitor) +{ + visitor.visit(*this); +} + +void Register::accept(ConstRegisterVisitor& visitor) const +{ + visitor.visit(*this); +} + + +void Register::pin_live_range(bool start, bool end) +{ + m_pin_start = start; + m_pin_end = end; +} + +void Register::set_is_ssa(bool value) +{ + m_is_ssa = value; +} + +void Register::print(std::ostream& os) const +{ + os << (m_is_ssa ? "S" : "R") << sel() << "." << chanchar[chan()]; + + if (pin() != pin_none) + os << "@" << pin(); +} + +Register::Pointer Register::from_string(const std::string &s) +{ + std::string numstr; + char chan = 0; + std::string pinstr; + + assert(s[0] == 'R' || s[0] == '_' || s[0] == 'S' ); + + int type = 0; + for (unsigned i = 1; i < s.length(); ++i) { + if (s[i] == '.') { + type = 1; + continue; + } else if (s[i] == '@') { + type = 2; + continue; + } + + switch (type) { + case 0: numstr.append(1, s[i]); break; + case 1: chan = s[i]; break; + case 2: pinstr.append(1, s[i]); break; + default: + unreachable("Malformed register string"); + } + } + + int sel; + if (s[0] != '_') { + std::istringstream n(numstr); + n >> sel; + } else { + sel = std::numeric_limits::max(); + } + + auto p = pin_none; + if (pinstr == "chan") + p = pin_chan; + else if (pinstr == "array") + p = pin_array; + else if (pinstr == "fully") + p = pin_fully; + else if (pinstr == "group") + p = pin_group; + else if (pinstr == "chgr") + p = pin_chgr; + else if (pinstr == "free") + p = pin_free; + + switch (chan) { + case 'x' : chan = 0; break; + case 'y' : chan = 1; break; + case 'z' : chan = 2; break; + case 'w' : chan = 3; break; + case '0' : chan = 4; break; + case '1' : chan = 5; break; + case '_' : chan = 7; break; + } + + auto reg = new Register( sel, chan, p); + reg->set_is_ssa(s[0] == 'S'); + if (p == pin_fully || p == pin_array) + reg->pin_live_range(true); + return reg; +} + +RegisterVec4::RegisterVec4(): + m_sel(-1), + m_swz({7,7,7,7}), + m_values({nullptr, nullptr, nullptr, nullptr}) +{ +} + +RegisterVec4::RegisterVec4(int sel, bool is_ssa, const Swizzle& swz, Pin pin): + m_sel(sel), + m_swz(swz) +{ + for (int i = 0; i < 4; ++i) { + m_values[i] = new Element( *this, new Register(m_sel, swz[i], pin)); + m_values[i]->value()->set_is_ssa(is_ssa); + } +} + +RegisterVec4::RegisterVec4(const RegisterVec4& orig): + m_sel(orig.m_sel), + m_swz(orig.m_swz) +{ + for (int i = 0; i < 4; ++i) + m_values[i] = new Element(*this, orig.m_values[i]->value()); +} + +RegisterVec4::RegisterVec4(PRegister x, PRegister y, PRegister z, PRegister w, Pin pin) +{ + PRegister dummy = nullptr; + + if (x) { + m_sel = x->sel(); + } else if (y) { + m_sel = y->sel(); + } else if (z) { + m_sel = z->sel(); + } else if (w) { + m_sel = w->sel(); + } else + m_sel = 0; + + if (!(x && y && z && w)) + dummy = new Register (m_sel, 7, pin_none); + + m_values[0] = new Element(*this, x ? x : dummy); + m_values[1] = new Element(*this, y ? y : dummy); + m_values[2] = new Element(*this, z ? z : dummy); + m_values[3] = new Element(*this, w ? w : dummy); + + for (int i = 0; i < 4; ++i) { + if (m_values[0]->value()->pin() == pin_fully) { + pin = pin_fully; + break; + } + } + + for (int i = 0; i < 4; ++i) { + switch (m_values[i]->value()->pin()) { + case pin_none: + case pin_free: + m_values[i]->value()->set_pin(pin); + break; + case pin_chan: + if (pin == pin_group) + m_values[i]->value()->set_pin(pin_chgr); + break; + default: + ; + } + + m_swz[i] = m_values[i]->value()->chan(); + assert(m_values[i]->value()->sel() == m_sel); + } +} + +void RegisterVec4::add_use(Instr *instr) +{ + for (auto& r: m_values) { + if (r->value()->chan() < 4) + r->value()->add_use(instr); + } +} + +void RegisterVec4::del_use(Instr *instr) +{ + for (auto& r: m_values) { + r->value()->del_use(instr); + } +} + +bool RegisterVec4::has_uses() const +{ + for (auto& r: m_values) { + if (r->value()->has_uses()) + return true; + } + return false; +} + + +int RegisterVec4::sel() const +{ + int comp = 0; + while (comp < 4 && m_values[comp]->value()->chan() > 3) + ++comp; + return m_values[comp < 4 ? comp : 0]->value()->sel(); +} + +bool RegisterVec4::ready(int block_id, int index) const +{ + for (int i = 0; i < 4; ++i) { + if (m_values[i]->value()->chan() < 4) { + if (!m_values[i]->value()->ready(block_id, index)) + return false; + } + } + return true; +} + +void RegisterVec4::print(std::ostream& os) const +{ + os << (m_values[0]->value()->is_ssa() ? 'S' : 'R') << sel() << "."; + for (int i = 0; i < 4; ++i) + os << VirtualValue::chanchar[m_swz[i]]; +} + +bool operator == (const RegisterVec4& lhs, const RegisterVec4& rhs) +{ + for (int i = 0; i < 4; ++i) { + assert(lhs[i]); + assert(rhs[i]); + if (!lhs[i]->equal_to(*rhs[i])) { + return false; + } + } + return true; +} + +RegisterVec4::Element::Element(const RegisterVec4& parent, int chan): + m_parent(parent), + m_value(new Register(parent.m_sel, chan, pin_none)) +{ +} + +RegisterVec4::Element::Element(const RegisterVec4& parent, PRegister value): + m_parent(parent), + m_value(value) +{ +} + +LiteralConstant::LiteralConstant(uint32_t value): + VirtualValue(ALU_SRC_LITERAL, -1, pin_none), + m_value(value) +{ +} + +void LiteralConstant::accept(RegisterVisitor& vistor) +{ + vistor.visit(*this); +} + +void LiteralConstant::accept(ConstRegisterVisitor& vistor) const +{ + vistor.visit(*this); +} + +void LiteralConstant::print(std::ostream& os) const +{ + os << "L[0x" << std::hex << m_value << std::dec << "]"; +} + +LiteralConstant::Pointer LiteralConstant::from_string(const std::string& s) +{ + if (s[1] != '[') + return nullptr; + + std::string numstr; + for (unsigned i = 2; i < s.length(); ++i) { + if (s[i] == ']') + break; + + if (isxdigit(s[i])) + numstr.append(1, s[i]); + if (s[i] == 'x') + continue; + } + + std::istringstream n(numstr); + + uint32_t num; + n >> std::hex >> num; + return new LiteralConstant( num); +} + + +// Inline constants usually don't care about the channel but +// ALU_SRC_PV should be pinned, but we only emit these constants +// very late, and based on the real register they replace +InlineConstant::InlineConstant(int sel, int chan): + VirtualValue(sel, chan, pin_none) +{ +} + +void InlineConstant::accept(RegisterVisitor& vistor) +{ + vistor.visit(*this); +} + +void InlineConstant::accept(ConstRegisterVisitor& vistor) const +{ + vistor.visit(*this); +} + +void InlineConstant::print(std::ostream& os) const +{ + auto ivalue = alu_src_const.find(static_cast(sel())); + if (ivalue != alu_src_const.end()) { + os << "I[" << ivalue->second.descr<< "]"; + if (ivalue->second.use_chan) + os << "." << chanchar[chan()]; + } else if (sel() >= ALU_SRC_PARAM_BASE && + sel() < ALU_SRC_PARAM_BASE + 32 ) { + os << "Param" + << sel() - ALU_SRC_PARAM_BASE + << "." << chanchar[chan()]; + } else { + unreachable("Unknown inline constant"); + } +} + +std::map> InlineConstant::s_opmap; + +InlineConstant::Pointer InlineConstant::from_string(const std::string& s) +{ + std::string namestr; + char chan = 0; + + ASSERT_OR_THROW(s[1] == '[', "inline const not started with '['"); + + unsigned i = 2; + while (i < s.length()) { + if (s[i] == ']') + break; + namestr.append(1, s[i]); + ++i; + } + + ASSERT_OR_THROW(s[i] == ']', "inline const not closed with ']'"); + + auto entry = s_opmap.find(namestr); + AluInlineConstants value = ALU_SRC_UNKNOWN; + bool use_chan = false; + + if (entry == s_opmap.end()) { + for (auto& [opcode, descr] : alu_src_const) { + if (namestr == descr.descr) { + value = opcode; + use_chan = descr.use_chan; + s_opmap[namestr] = std::make_pair(opcode, use_chan); + + break; + } + } + } else { + value = entry->second.first; + use_chan = entry->second.second; + } + + ASSERT_OR_THROW(value != ALU_SRC_UNKNOWN, "Unknwon inline constant was given"); + + if (use_chan) { + ASSERT_OR_THROW(s[i + 1] == '.', "inline const channel not started with '.'"); + switch (s[i + 2]) { + case 'x': chan = 0; break; + case 'y': chan = 1; break; + case 'z': chan = 2; break; + case 'w': chan = 3; break; + case '0': chan = 4; break; + case '1': chan = 5; break; + case '_': chan = 7; break; + default: + ASSERT_OR_THROW(0, "invalied inline const channel "); + } + } + return new InlineConstant( value, chan); +} + +InlineConstant::Pointer InlineConstant::param_from_string(const std::string& s) +{ + assert(s.substr(0, 5) == "Param"); + + int param = 0; + int i = 5; + while (isdigit(s[i])) { + param *= 10; + param += s[i] - '0'; + ++i; + } + + int chan = 7; + assert(s[i] == '.'); + switch (s[i+1]) { + case 'x': chan = 0; break; + case 'y': chan = 1; break; + case 'z': chan = 2; break; + case 'w': chan = 3; break; + default: + unreachable("unsupported channel char"); + } + + return new InlineConstant( ALU_SRC_PARAM_BASE + param, chan); +} + +UniformValue::UniformValue(int sel, int chan, int kcache_bank): + VirtualValue(sel, chan, pin_none), + m_kcache_bank(kcache_bank), + m_buf_addr(nullptr) +{ +} + +UniformValue::UniformValue(int sel, int chan, PVirtualValue buf_addr): + VirtualValue(sel, chan, pin_none), + m_kcache_bank(0), + m_buf_addr(buf_addr) +{ +} + +void UniformValue::accept(RegisterVisitor& vistor) +{ + vistor.visit(*this); +} + +void UniformValue::accept(ConstRegisterVisitor& vistor) const +{ + vistor.visit(*this); +} + +PVirtualValue UniformValue::buf_addr() const +{ + return m_buf_addr; +} + +void UniformValue::print(std::ostream& os) const +{ + os << "KC" << m_kcache_bank; + if (m_buf_addr) { + os << "[" << *m_buf_addr + << "]"; + } + os << "[" << (sel() - 512) << "]." << chanchar[chan()]; +} + +bool UniformValue::equal_buf_and_cache(const UniformValue& other) const +{ + bool result = m_kcache_bank == other.m_kcache_bank; + if (result) { + if (m_buf_addr && other.m_buf_addr) { + result = m_buf_addr->equal_to(other); + } else { + result = !m_buf_addr && !other.m_buf_addr; + } + } + return result; +} + + +UniformValue::Pointer UniformValue::from_string(const std::string& s) +{ + assert(s[1] == 'C'); + std::istringstream is(s.substr(2)); + int bank; + char c; + is >> bank; + is >> c; + + assert(c == '['); + + int index; + is >> index; + + is >> c; + assert(c == ']'); + is >> c; + assert(c == '.'); + + is >> c; + int chan = 0; + switch (c) { + case 'x': chan = 0; break; + case 'y': chan = 1; break; + case 'z': chan = 2; break; + case 'w': chan = 3; break; + default: + unreachable("Unknown channle when reading uniform"); + } + return new UniformValue(index + 512, chan, bank); +} + +LocalArray::LocalArray(int base_sel, int nchannels, int size, int frac): + Register(base_sel, nchannels, pin_array), + m_base_sel(base_sel), + m_nchannels(nchannels), + m_size(size), + m_values(size * nchannels), + m_frac(frac) +{ + assert(nchannels <= 4); + assert(nchannels + frac <= 4); + + sfn_log << SfnLog::reg << "Allocate array A" << base_sel << "(" + << size << ", " << frac << ", " << nchannels << ")\n"; + + for (int c = 0; c < nchannels; ++c) { + for (unsigned i = 0; i < m_size; ++i) { + PRegister reg = new Register( base_sel + i, c + frac, pin_array); + m_values[m_size * c + i] = new LocalArrayValue(reg, *this); + + /* Pin the array register on the start, because currently we don't + * don't track the first write to an array element as write to all + * array elements, and it seems that the one can not just use registers + * that are not written to in an array for other purpouses */ + m_values[m_size * c + i]->pin_live_range(true); + } + } +} + +void LocalArray::accept(RegisterVisitor& vistor) +{ + vistor.visit(*this); +} + +void LocalArray::accept(ConstRegisterVisitor& vistor) const +{ + vistor.visit(*this); +} + +void LocalArray::print(std::ostream& os) const +{ + os << "A" << m_base_sel << "[0 " << ":" << m_values.size() << "]."; + for (unsigned i = 0; i < m_nchannels; ++i) { + os << chanchar[i]; + } +} + + +size_t LocalArray::size() const +{ + return m_size; +} + +uint32_t LocalArray::nchannels() const +{ + return m_nchannels; +} + +PRegister LocalArray::element(size_t offset, PVirtualValue indirect, uint32_t chan) +{ + ASSERT_OR_THROW(offset < m_size, "Array: index out of range"); + ASSERT_OR_THROW(chan < m_nchannels, "Array: channel out of range"); + + sfn_log << SfnLog::reg << "Request element A" << m_base_sel << "[" << offset; + if (indirect) + sfn_log << "+" << *indirect; + sfn_log << SfnLog::reg << "]\n"; + + if (indirect) { + class ResolveDirectArrayElement: public ConstRegisterVisitor { + public: + void visit(const Register& value) {(void) value;}; + void visit(const LocalArray& value) {(void)value; unreachable("An array can't be used as address");} + void visit(const LocalArrayValue& value) {(void) value;} + void visit(const UniformValue& value) {(void)value;} + void visit(const LiteralConstant& value) {offset = value.value(); is_contant = true;} + void visit(const InlineConstant& value) {(void)value;} + + ResolveDirectArrayElement(): offset(0), is_contant(false) {} + + int offset; + bool is_contant; + } addr; + + // If the address os a literal constant then update the offset + // and don't access the value indirectly + indirect->accept(addr); + if (addr.is_contant) { + offset += addr.offset; + indirect = nullptr; + ASSERT_OR_THROW(offset < m_size, "Array: indirect constant index out of range"); + } + } + + LocalArrayValue *reg = m_values[m_size * chan + offset]; + if (indirect) { + reg = new LocalArrayValue( reg, indirect, *this); + m_values_indirect.push_back(reg); + } + + sfn_log << SfnLog::reg << " got " << *reg << "\n"; + return reg; +} + +bool LocalArray::ready_for_direct(int block, int index, int chan) const +{ + if (!Register::ready(block, index)) + return false; + + /* For direct access to an array value we also have to take indirect + * writes on the same channels into account */ + for (LocalArrayValue *e : m_values_indirect) { + if (e->chan() == chan && !e->Register::ready(block, index)) { + return false; + } + } + + return true; +} + +bool LocalArray::ready_for_indirect(int block, int index, int chan) const +{ + int offset = (chan - m_frac) * m_size; + for (unsigned i = 0; i < m_size; ++i) { + if (!m_values[offset + i]->Register::ready(block, index)) + return false; + } + + return ready_for_direct(block, index, chan); +} + + +LocalArrayValue::LocalArrayValue(PRegister reg, PVirtualValue index, + LocalArray& array): + Register(reg->sel(), reg->chan(), pin_array), + m_addr(index), + m_array(array) +{ +} + +const Register& LocalArray::operator ()(size_t idx, size_t chan) const +{ + return *m_values[m_size * (chan - m_frac) + idx]; +} + +LocalArrayValue::LocalArrayValue(PRegister reg, LocalArray& array): + LocalArrayValue(reg, nullptr, array) +{ + +} + + +PVirtualValue LocalArrayValue::addr() const +{ + return m_addr; +} + +const LocalArray& LocalArrayValue::array() const +{ + return m_array; +} + + +void LocalArrayValue::forward_del_use(Instr *instr) +{ + if (m_addr && m_addr->as_register()) + m_addr->as_register()->del_use(instr); +} + +void LocalArrayValue::forward_add_use(Instr *instr) +{ + if (m_addr && m_addr->as_register()) + m_addr->as_register()->add_use(instr); +} + +void LocalArrayValue::accept(RegisterVisitor& vistor) +{ + vistor.visit(*this); +} + +void LocalArrayValue::accept(ConstRegisterVisitor& vistor) const +{ + vistor.visit(*this); +} + +void LocalArrayValue::add_parent_to_array(Instr *instr) +{ + m_array.add_parent(instr); +} + +void LocalArrayValue::del_parent_from_array(Instr *instr) +{ + m_array.del_parent(instr); +} + +void LocalArrayValue::print(std::ostream& os) const +{ + int offset = sel() - m_array.sel(); + os << "A" << m_array.sel() << "["; + if ( offset > 0 && m_addr) + os << offset << "+" << *m_addr; + else if (m_addr) + os << *m_addr; + else + os << offset; + os << "]." << chanchar[chan()]; +} + +bool LocalArrayValue::ready(int block, int index) const +{ + return m_addr ? + (m_array.ready_for_indirect(block, index, chan()) && m_addr->ready(block, index)): + m_array.ready_for_direct(block, index, chan()); +} + +ValueComparer::ValueComparer() : + m_result(false), + m_register(nullptr), + m_array(nullptr), + m_array_value(nullptr), + m_uniform_value(nullptr), + m_literal_value(nullptr), + m_inline_constant(nullptr) +{} + +ValueComparer::ValueComparer(const Register *value): + m_result(false), + m_register(value), + m_array(nullptr), + m_array_value(nullptr), + m_uniform_value(nullptr), + m_literal_value(nullptr), + m_inline_constant(nullptr) +{} + +ValueComparer::ValueComparer(const LocalArray *value): + m_result(false), + m_register(nullptr), + m_array(value), + m_array_value(nullptr), + m_uniform_value(nullptr), + m_literal_value(nullptr), + m_inline_constant(nullptr) +{} + +ValueComparer::ValueComparer(const LocalArrayValue *value): + m_result(false), + m_register(nullptr), + m_array(nullptr), + m_array_value(value), + m_uniform_value(nullptr), + m_literal_value(nullptr), + m_inline_constant(nullptr) +{} + +ValueComparer::ValueComparer(const UniformValue *value): + m_result(false), + m_register(nullptr), + m_array(nullptr), + m_array_value(nullptr), + m_uniform_value(value), + m_literal_value(nullptr), + m_inline_constant(nullptr) +{} + +ValueComparer::ValueComparer(const LiteralConstant *value): + m_result(false), + m_register(nullptr), + m_array(nullptr), + m_array_value(nullptr), + m_uniform_value(nullptr), + m_literal_value(value), + m_inline_constant(nullptr) +{} + +ValueComparer::ValueComparer(const InlineConstant *value): + m_result(false), + m_register(nullptr), + m_array(nullptr), + m_array_value(nullptr), + m_uniform_value(nullptr), + m_literal_value(nullptr), + m_inline_constant(value) +{} + +void ValueComparer::visit(const Register& other) +{ + (void)other; + m_result = !!m_register; +}; + +void ValueComparer::visit(const LocalArray& other) +{ + m_result = false; + if (m_array) { + m_result = m_array->size() == other.size() && + m_array->nchannels() == other.nchannels(); + } +}; + +void ValueComparer::visit(const LocalArrayValue& other) +{ + m_result = false; + if (m_array_value) { + m_result = m_array_value->array().equal_to(other.array()); + if (m_result) { + auto my_addr = m_array_value->addr(); + auto other_addr = other.addr(); + if (my_addr && other_addr) { + m_result = my_addr->equal_to(*other_addr); + } else { + m_result = !my_addr && !other_addr; + } + } + } +}; + +void ValueComparer::visit(const UniformValue& value) +{ + m_result = false; + if (m_uniform_value) { + m_result = m_uniform_value->kcache_bank() == value.kcache_bank(); + if (m_result) { + auto my_buf_addr = m_uniform_value->buf_addr(); + auto other_buf_addr = value.buf_addr(); + if (my_buf_addr && other_buf_addr) { + m_result = my_buf_addr->equal_to(*other_buf_addr); + } else { + m_result = !my_buf_addr && !other_buf_addr; + } + } + } +}; + +void ValueComparer::visit(const LiteralConstant& other) +{ + m_result = m_literal_value && (m_literal_value->value() == other.value()); +}; + +void ValueComparer::visit(const InlineConstant& other) +{ + (void)other; + m_result = !!m_inline_constant; +}; + + +} // namespace r600 diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h new file mode 100644 index 0000000..25fd958 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h @@ -0,0 +1,460 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2021 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include "sfn_memorypool.h" +#include "sfn_alu_defines.h" +#include +#include +#include +#include +#include +#include + +#if __cpp_exceptions >= 199711L +#include +#define ASSERT_OR_THROW(EXPR, ERROR) if (!(EXPR)) throw std::invalid_argument(ERROR) +#else +#define ASSERT_OR_THROW(EXPR, ERROR) if (!(EXPR)) unreachable(ERROR) +#endif + +namespace r600 { + +enum Pin { + pin_none, + pin_chan, + pin_array, + pin_group, + pin_chgr, + pin_fully, + pin_free +}; + +std::ostream& operator << (std::ostream& os, Pin pin); + +class Register; +class RegisterVisitor; +class ConstRegisterVisitor; +class Instr; +class InlineConstant; +class LiteralConstant; +class UniformValue; + +using InstructionSet = std::set, Allocator>; + +class VirtualValue : public Allocate { +public: + + static const uint32_t virtual_register_base = 1024; + static const uint32_t clause_temp_registers = 2; + static const uint32_t gpr_register_end = 128 - 2 * clause_temp_registers; + static const uint32_t clause_temp_register_begin = gpr_register_end; + static const uint32_t clause_temp_register_end = 128; + + static const uint32_t uniforms_begin = 512; + static const uint32_t uniforms_end = 640; + + using Pointer = R600_POINTER_TYPE(VirtualValue); + + VirtualValue(int sel, int chan, Pin pin); + VirtualValue(const VirtualValue& orig) = default; + + int sel() const { return m_sel; } + int chan() const { return m_chan;} + Pin pin() const { return m_pins;}; + bool is_virtual() const; + + void set_pin(Pin p) { m_pins = p;} + + + virtual void accept(RegisterVisitor& vistor) = 0; + virtual void accept(ConstRegisterVisitor& vistor) const = 0; + virtual void print(std::ostream& os) const = 0; + + bool equal_to(const VirtualValue& other) const; + Pointer get_addr() const; + + static Pointer from_string(const std::string& s); + + virtual Register *as_register() { return nullptr;} + virtual InlineConstant * as_inline_const() { return nullptr;} + virtual LiteralConstant *as_literal() { return nullptr;} + virtual UniformValue *as_uniform() { return nullptr;} + virtual bool ready(int block, int index) const; + + static constexpr char chanchar[9] = "xyzw01?_"; + +protected: + void do_set_chan(int c) {m_chan = c;} + void set_sel_internal(int sel) { m_sel = sel; } + +private: + uint32_t m_sel; + int m_chan; + Pin m_pins; +}; +using PVirtualValue = VirtualValue::Pointer; + + +inline std::ostream& operator << (std::ostream& os, const VirtualValue& val) +{ + val.print(os); + return os; +} + +inline bool operator == (const VirtualValue& lhs, const VirtualValue& rhs) +{ + return lhs.equal_to(rhs); +} + +struct LiveRange { + LiveRange(): start(-1), end(-1), is_pinned(false) {} + LiveRange(int s, int e): start(s), end(e), is_pinned(false) {} + int start; + int end; + int is_pinned; +}; + +class Register : public VirtualValue { +public: + using Pointer = R600_POINTER_TYPE(Register); + + Register(int sel, int chan, Pin pin); + void accept(RegisterVisitor& vistor) override; + void accept(ConstRegisterVisitor& vistor) const override; + void print(std::ostream& os) const override; + + int live_start_pinned() const { return m_pin_start;} + int live_end_pinned() const { return m_pin_end;} + + void pin_live_range(bool start, bool end = false); + + static Pointer from_string(const std::string& s); + + Register *as_register() override { return this;} + + void set_is_ssa(bool value); + + bool is_ssa() const { return m_is_ssa;} + + void add_parent(Instr *instr); + void del_parent(Instr *instr); + const InstructionSet& parents() const {return m_parents;} + + bool ready(int block, int index) const override; + + const InstructionSet& uses() const {return m_uses;} + void add_use(Instr *instr); + void del_use(Instr *instr); + bool has_uses() const {return !m_uses.empty() || pin() == pin_array;} + void set_chan(int c) {do_set_chan(c);} + + virtual VirtualValue *addr() const { return nullptr;} + + int index() const {return m_index;} + void set_index(int idx) {m_index = idx;} + + void set_sel(int new_sel) { set_sel_internal(new_sel); m_is_ssa = false;} + +private: + Register(const Register& orig) = delete; + Register(const Register&& orig) = delete; + Register& operator = (const Register& orig) = delete; + Register& operator = (Register&& orig) = delete; + + virtual void forward_del_use(Instr *instr) {(void)instr;} + virtual void forward_add_use(Instr *instr) {(void)instr;} + virtual void add_parent_to_array(Instr *instr); + virtual void del_parent_from_array(Instr *instr); + + InstructionSet m_parents; + InstructionSet m_uses; + + int m_index{-1}; + + bool m_is_ssa {false}; + bool m_pin_start {false}; + bool m_pin_end {false}; +}; +using PRegister = Register::Pointer; + +inline std::ostream& operator << (std::ostream& os, const Register& val) +{ + val.print(os); + return os; +} + +class InlineConstant : public VirtualValue { +public: + using Pointer = R600_POINTER_TYPE(InlineConstant); + + InlineConstant(int sel, int chan = 0); + + void accept(RegisterVisitor& vistor) override; + void accept(ConstRegisterVisitor& vistor) const override; + void print(std::ostream& os) const override; + static Pointer from_string(const std::string& s); + static Pointer param_from_string(const std::string& s); + + InlineConstant * as_inline_const() override { return this;} +private: + InlineConstant(const InlineConstant& orig) = default; + static std::map> s_opmap; + +}; +using PInlineConstant = InlineConstant::Pointer; + +inline std::ostream& operator << (std::ostream& os, const InlineConstant& val) +{ + val.print(os); + return os; +} + +class RegisterVec4 { +public: + using Swizzle = std::array; + RegisterVec4(); + RegisterVec4(int sel, bool is_ssa = false, const Swizzle& swz = {0,1,2,3}, Pin pin = pin_group); + RegisterVec4(PRegister x, PRegister y, PRegister z, PRegister w, Pin pin); + + RegisterVec4(const RegisterVec4& orig); + + RegisterVec4(RegisterVec4&& orig) = default; + RegisterVec4& operator = (RegisterVec4& orig) = default; + RegisterVec4& operator = (RegisterVec4&& orig) = default; + + void add_use(Instr *instr); + void del_use(Instr *instr); + bool has_uses() const; + + int sel() const; + void print(std::ostream& os) const; + + class Element : public Allocate { + public: + Element(const RegisterVec4& parent, int chan); + Element(const RegisterVec4& parent, PRegister value); + PRegister value() { return m_value; } + void set_value(PRegister reg) { m_value = reg;} + private: + const RegisterVec4& m_parent; + PRegister m_value; + }; + + friend class Element; + + PRegister operator [] (int i) const { + return m_values[i]->value(); + } + + PRegister operator [] (int i) { + return m_values[i]->value(); + } + + void set_value(int i, PRegister reg) { + assert(reg->sel() == m_sel); + m_swz[i] = reg->chan(); + m_values[i]->set_value(reg); + } + + bool ready(int block_id, int index) const; +private: + int m_sel; + Swizzle m_swz; + std::array m_values; +}; + +bool operator == (const RegisterVec4& lhs, const RegisterVec4& rhs); + +inline bool operator != (const RegisterVec4& lhs, const RegisterVec4& rhs) +{ + return !(lhs == rhs); +} + +inline std::ostream& operator << (std::ostream& os, const RegisterVec4& val) +{ + val.print(os); + return os; +} + + +class LiteralConstant : public VirtualValue { +public: + using Pointer = R600_POINTER_TYPE(LiteralConstant); + + LiteralConstant(uint32_t value); + void accept(RegisterVisitor& vistor) override; + void accept(ConstRegisterVisitor& vistor) const override; + void print(std::ostream& os) const override; + uint32_t value() const {return m_value;} + static Pointer from_string(const std::string& s); + LiteralConstant *as_literal() override { return this;} + +private: + LiteralConstant(const LiteralConstant& orig) = default; + uint32_t m_value; +}; +using PLiteralVirtualValue = LiteralConstant::Pointer; + + +class UniformValue : public VirtualValue { +public: + using Pointer = R600_POINTER_TYPE(UniformValue); + + UniformValue(int sel, int chan, int kcache_bank = 0); + UniformValue(int sel, int chan, PVirtualValue buf_addr); + + void accept(RegisterVisitor& vistor) override; + void accept(ConstRegisterVisitor& vistor) const override; + void print(std::ostream& os) const override; + int kcache_bank() const { return m_kcache_bank; } + PVirtualValue buf_addr() const; + UniformValue *as_uniform() override { return this;} + + bool equal_buf_and_cache(const UniformValue& other) const; + static Pointer from_string(const std::string& s); + +private: + int m_kcache_bank; + PVirtualValue m_buf_addr; +}; +using PUniformVirtualValue = UniformValue::Pointer; + +inline std::ostream& operator << (std::ostream& os, const UniformValue& val) +{ + val.print(os); + return os; +} + +class LocalArrayValue; +class LocalArray : public Register { +public: + using Pointer = R600_POINTER_TYPE(LocalArray); + using Values = std::vector >; + + LocalArray(int base_sel, int nchannels, int size, int frac = 0); + void accept(RegisterVisitor& vistor) override; + void accept(ConstRegisterVisitor& vistor) const override; + void print(std::ostream& os) const override; + bool ready_for_direct(int block, int index, int chan) const; + bool ready_for_indirect(int block, int index, int chan) const; + + PRegister element(size_t offset, PVirtualValue indirect, uint32_t chan); + + size_t size() const; + uint32_t nchannels() const; + uint32_t frac() const { return m_frac;} + + void add_parent_to_elements(Instr *instr); + + const Register& operator ()(size_t idx, size_t chan) const; + + Values::iterator begin() { return m_values.begin();} + Values::iterator end() { return m_values.end();} + +private: + uint32_t m_base_sel; + uint32_t m_nchannels; + size_t m_size; + Values m_values; + Values m_values_indirect; + int m_frac; +}; + +inline std::ostream& operator << (std::ostream& os, const LocalArray & val) +{ + val.print(os); + return os; +} + +class LocalArrayValue : public Register { +public: + using Pointer = R600_POINTER_TYPE(LocalArrayValue); + + LocalArrayValue(PRegister reg, LocalArray& array); + LocalArrayValue(PRegister reg, PVirtualValue index, LocalArray &array); + + void accept(RegisterVisitor& vistor) override; + void accept(ConstRegisterVisitor& vistor) const override; + void print(std::ostream& os) const override; + bool ready(int block, int index) const override; + + VirtualValue *addr() const override; + const LocalArray& array() const; +private: + void forward_del_use(Instr *instr) override; + void forward_add_use(Instr *instr) override; + void add_parent_to_array(Instr *instr) override; + void del_parent_from_array(Instr *instr) override; + + PVirtualValue m_addr; + LocalArray& m_array; +}; + +inline std::ostream& operator << (std::ostream& os, const LocalArrayValue& val) +{ + val.print(os); + return os; +} + +template +bool sfn_value_equal(const T* lhs, const T* rhs) +{ + if (lhs) { + if (!rhs) return + false; + if ( !lhs->equal_to(*rhs)) + return false; + } else { + if (rhs) + return false; + } + return true; +} + +class RegisterVisitor { +public: + virtual void visit(Register& value) = 0; + virtual void visit(LocalArray& value) = 0; + virtual void visit(LocalArrayValue& value) = 0; + virtual void visit(UniformValue& value) = 0; + virtual void visit(LiteralConstant& value) = 0; + virtual void visit(InlineConstant& value) = 0; +}; + +class ConstRegisterVisitor { +public: + virtual void visit(const Register& value) = 0; + virtual void visit(const LocalArray& value) = 0; + virtual void visit(const LocalArrayValue& value) = 0; + virtual void visit(const UniformValue& value) = 0; + virtual void visit(const LiteralConstant& value) = 0; + virtual void visit(const InlineConstant& value) = 0; +}; + +} + diff --git a/src/gallium/drivers/r600/sfn/tests/meson.build b/src/gallium/drivers/r600/sfn/tests/meson.build new file mode 100644 index 0000000..e256548 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/tests/meson.build @@ -0,0 +1,37 @@ + +r600_test_lib = static_library('r600_test', 'sfn_test_shaders.cpp', + cpp_args: '-std=c++17', + include_directories : [ inc_src, inc_mapi, inc_mesa, inc_include, + inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common, + inc_gallium_drivers, ], + dependencies : [idep_gtest, dep_thread, dep_llvm, idep_nir, + idep_nir_headers], + +) + +r600_test_dep = declare_dependency( + include_directories : [ inc_src, inc_mapi, inc_mesa, inc_include, + inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common, + inc_gallium_drivers, ], + link_with : [ libr600, libmesa, libgalliumvl, libgallium, libradeonwinsys, r600_test_lib], + dependencies : [idep_gtest, dep_thread, dep_llvm, idep_nir, + idep_nir_headers] +) + + + +if with_tests + foreach t : ['valuefactory', 'value', 'instr', 'instrfromstring', 'liverange', + 'optimizer', 'shaderfromstring' ] + test( + t, + executable('test-@0@-r600-sfn'.format(t), + ['sfn_@0@_test.cpp'.format(t)], + dependencies : [r600_test_dep], + cpp_args: '-std=c++17' + ), + suite : ['r600'] + ) + endforeach +endif + diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp new file mode 100644 index 0000000..227f577 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp @@ -0,0 +1,798 @@ + +#include "../sfn_instr_alugroup.h" +#include "../sfn_instr_export.h" +#include "../sfn_instr_fetch.h" +#include "../sfn_instr_lds.h" +#include "../sfn_instr_tex.h" + +#include "gtest/gtest.h" + +using namespace r600; + +using std::vector; + +class InstrTest : public ::testing::Test +{ + void SetUp() override { + init_pool(); + } + + void TearDown() override { + release_pool(); + } +protected: + void check(const Instr& lhs,const Instr& rhs) const { + EXPECT_EQ(lhs, rhs); + } +}; + + +TEST_F(InstrTest, test_alu_barrier) +{ + AluInstr alu(op0_group_barrier); + + EXPECT_FALSE(alu.has_alu_flag(alu_write)); + EXPECT_EQ(alu.opcode(), op0_group_barrier); + + EXPECT_EQ(alu.dest_chan(), 0); + + EXPECT_EQ(alu, alu); +} + + +TEST_F(InstrTest, test_alu_uni_op_mov) +{ + AluInstr alu(op1_mov, + new Register( 128, 2, pin_none), + new Register( 129, 0, pin_chan), {alu_write}); + + EXPECT_TRUE(alu.has_alu_flag(alu_write)); + + EXPECT_FALSE(alu.has_alu_flag(alu_last_instr)); + EXPECT_FALSE(alu.end_group()); + EXPECT_FALSE(alu.has_alu_flag(alu_op3)); + EXPECT_FALSE(alu.has_alu_flag(alu_src0_abs)); + EXPECT_FALSE(alu.has_alu_flag(alu_src0_neg)); + + EXPECT_EQ(alu.opcode(), op1_mov); + + EXPECT_EQ(alu.dest_chan(), 2); + auto dest = alu.dest(); + + ASSERT_TRUE(dest); + EXPECT_EQ(dest->sel(), 128); + EXPECT_EQ(dest->chan(), 2); + EXPECT_EQ(dest->pin(), pin_none); + + auto src0 = alu.psrc(0); + ASSERT_TRUE(src0); + + EXPECT_EQ(src0->sel(), 129); + EXPECT_EQ(src0->chan(), 0); + EXPECT_EQ(src0->pin(), pin_chan); + + EXPECT_EQ(alu.n_sources(), 1); + + EXPECT_FALSE(alu.psrc(1)); + EXPECT_FALSE(alu.psrc(2)); + + alu.set_alu_flag(alu_src0_abs); + EXPECT_TRUE(alu.has_alu_flag(alu_src0_abs)); + + alu.set_alu_flag(alu_src0_neg); + EXPECT_TRUE(alu.has_alu_flag(alu_src0_neg)); + +} + +TEST_F(InstrTest, test_alu_op2) +{ + AluInstr alu(op2_add, + new Register( 130, 1, pin_none), + new Register( 129, 2, pin_chan), + new Register( 129, 3, pin_none), + {alu_write, alu_last_instr}); + + EXPECT_TRUE(alu.has_alu_flag(alu_write)); + + EXPECT_TRUE(alu.has_alu_flag(alu_last_instr)); + EXPECT_FALSE(alu.has_alu_flag(alu_op3)); + + EXPECT_FALSE(alu.has_alu_flag(alu_src0_neg)); + EXPECT_FALSE(alu.has_alu_flag(alu_src1_neg)); + EXPECT_FALSE(alu.has_alu_flag(alu_src2_neg)); + + EXPECT_FALSE(alu.has_alu_flag(alu_src0_rel)); + EXPECT_FALSE(alu.has_alu_flag(alu_src1_rel)); + EXPECT_FALSE(alu.has_alu_flag(alu_src2_rel)); + + EXPECT_EQ(alu.opcode(), op2_add); + + EXPECT_EQ(alu.dest_chan(), 1); + auto dest = alu.dest(); + + ASSERT_TRUE(dest); + EXPECT_EQ(dest->sel(), 130); + EXPECT_EQ(dest->chan(), 1); + EXPECT_EQ(dest->pin(), pin_none); + + EXPECT_EQ(alu.n_sources(), 2); + + auto src0 = alu.psrc(0); + ASSERT_TRUE(src0); + + EXPECT_EQ(src0->sel(), 129); + EXPECT_EQ(src0->chan(), 2); + EXPECT_EQ(src0->pin(), pin_chan); + + auto src1 = alu.psrc(1); + ASSERT_TRUE(src1); + + EXPECT_EQ(src1->sel(), 129); + EXPECT_EQ(src1->chan(), 3); + EXPECT_EQ(src1->pin(), pin_none); + + EXPECT_FALSE(alu.psrc(2)); + EXPECT_EQ(alu, alu); +} + +TEST_F(InstrTest, test_alu_op3) +{ + AluInstr alu(op3_cnde, + new Register( 130, 1, pin_none), + new Register( 129, 2, pin_chan), + new Register( 129, 3, pin_none), + new Register( 131, 1, pin_none), + {alu_write, alu_last_instr}); + + EXPECT_TRUE(alu.has_alu_flag(alu_write)); + EXPECT_TRUE(alu.has_alu_flag(alu_last_instr)); + EXPECT_TRUE(alu.end_group()); + EXPECT_TRUE(alu.has_alu_flag(alu_op3)); + + EXPECT_EQ(alu.opcode(), op3_cnde); + + EXPECT_EQ(alu.dest_chan(), 1); + auto dest = alu.dest(); + + ASSERT_TRUE(dest); + EXPECT_EQ(dest->sel(), 130); + EXPECT_EQ(dest->chan(), 1); + EXPECT_EQ(dest->pin(), pin_none); + + EXPECT_EQ(alu.n_sources(), 3); + + auto src0 = alu.psrc(0); + ASSERT_TRUE(src0); + + EXPECT_EQ(src0->sel(), 129); + EXPECT_EQ(src0->chan(), 2); + EXPECT_EQ(src0->pin(), pin_chan); + + auto src1 = alu.psrc(1); + ASSERT_TRUE(src1); + + EXPECT_EQ(src1->sel(), 129); + EXPECT_EQ(src1->chan(), 3); + EXPECT_EQ(src1->pin(), pin_none); + + auto src2 = alu.psrc(2); + ASSERT_TRUE(src2); + + EXPECT_EQ(src2->sel(), 131); + EXPECT_EQ(src2->chan(), 1); + EXPECT_EQ(src2->pin(), pin_none); + + EXPECT_EQ(alu, alu); +} + +TEST_F(InstrTest, test_alu_op1_comp) +{ + auto r128z = new Register( 128, 2, pin_none); + auto r128zc = new Register( 128, 2, pin_chan); + auto r128y = new Register( 128, 1, pin_none); + auto r129x = new Register( 129, 0, pin_none); + auto r129xc = new Register( 129, 0, pin_chan); + auto r129y = new Register( 129, 1, pin_none); + auto r130x = new Register( 130, 0, pin_none); + + + AluInstr alu1(op1_mov, r128z, r129x, {alu_write}); + EXPECT_NE(alu1, AluInstr(op1_mov, r128y, r129x, {alu_write})); + EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129xc, {alu_write})); + EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129y, {alu_write})); + EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r130x, {alu_write})); + EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129x, {alu_write, alu_last_instr})); + EXPECT_NE(alu1, AluInstr(op1_flt_to_int, r128z, r129x, {alu_write})); + EXPECT_NE(alu1, AluInstr(op1_mov, r128zc, r129x, {alu_write})); + + EXPECT_EQ(alu1, alu1); +} + +TEST_F(InstrTest, test_alu_op2_comp) +{ + auto r128x = new Register( 128, 0, pin_none); + auto r128y = new Register( 128, 1, pin_none); + auto r128z = new Register( 128, 2, pin_none); + + AluInstr alu1(op2_add, r128z, r128x, r128y, {alu_write}); + + EXPECT_NE(alu1, AluInstr(op2_add, r128z, r128x, new Register( 129, 2, pin_none), {alu_write})); + EXPECT_NE(alu1, AluInstr(op2_add, r128z, r128x, new Register( 128, 0, pin_none), {alu_write})); + EXPECT_NE(alu1, AluInstr(op2_add, r128z, r128x, new Register( 128, 1, pin_chan), {alu_write})); +} + +TEST_F(InstrTest, test_alu_op3_comp) +{ + auto r128x = new Register( 128, 0, pin_none); + auto r128y = new Register( 128, 1, pin_none); + auto r128z = new Register( 128, 2, pin_none); + + AluInstr alu1(op3_muladd, r128z, r128x, r128y, r128y, {alu_write}); + + EXPECT_NE(alu1, AluInstr(op3_muladd, r128z, r128x, r128y, new Register( 129, 2, pin_none), {alu_write})); + EXPECT_NE(alu1, AluInstr(op3_muladd, r128z, r128x, r128y, new Register( 128, 0, pin_none), {alu_write})); + EXPECT_NE(alu1, AluInstr(op3_muladd, r128z, r128x, r128y, new Register( 128, 1, pin_chan), {alu_write})); +} + +TEST_F(InstrTest, test_alu_op3_ne) +{ + auto R130x = new Register( 130, 0, pin_none); + auto R130y = new Register( 130, 1, pin_none); + auto R130z = new Register( 130, 2, pin_none); + auto R131z = new Register( 131, 2, pin_none); + auto R131w = new Register( 131, 3, pin_none); + + AluInstr alu(op3_cnde, R130x, R130y, R131z, R131w, {alu_write, alu_last_instr}); + + EXPECT_NE(alu, AluInstr(op3_muladd, R130x, R130y, R131z, R131w, {alu_write, alu_last_instr})); + + EXPECT_NE(alu, AluInstr(op3_cnde, R130z, R130y, R131z, R131w, {alu_write, alu_last_instr})); + EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130z, R131z, R131w, {alu_write, alu_last_instr})); + EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R130z, R131w, {alu_write, alu_last_instr})); + EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R130z, {alu_write, alu_last_instr})); + EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R131w, {alu_write})); + + AluInstr alu_cf_changes = alu; + alu_cf_changes.set_cf_type(cf_alu_push_before); + + EXPECT_NE(alu, alu_cf_changes); + + AluInstr alu_bs_changes = alu; + alu_bs_changes.set_bank_swizzle(alu_vec_021); + + EXPECT_NE(alu, alu_bs_changes); +}; + + +TEST_F(InstrTest, test_alu_op1_ne) +{ + auto R130x = new Register( 130, 0, pin_none); + auto R130y = new Register( 130, 1, pin_none); + auto R130z = new Register( 130, 2, pin_none); + + AluInstr alu(op1_mov, R130x, R130y, {alu_write, alu_last_instr}); + + EXPECT_NE(alu, AluInstr(op1_cos, R130x, R130y, {alu_write, alu_last_instr})); + + EXPECT_NE(alu, AluInstr(op1_mov, R130z, R130y, {alu_write, alu_last_instr})); + EXPECT_NE(alu, AluInstr(op1_mov, R130x, R130z, {alu_write, alu_last_instr})); + EXPECT_NE(alu, AluInstr(op1_mov, R130x, R130y, {alu_last_instr})); + + AluInstr alu_cf_changes = alu; + alu_cf_changes.set_cf_type(cf_alu_push_before); + + EXPECT_NE(alu, alu_cf_changes); + + AluInstr alu_bs_changes = alu; + alu_bs_changes.set_bank_swizzle(alu_vec_021); + + EXPECT_NE(alu, alu_bs_changes); +}; + +TEST_F(InstrTest, test_alu_dot4_grouped) +{ + auto R130x = new Register( 130, 0, pin_none); + auto R130y = new Register( 130, 1, pin_none); + auto R130z = new Register( 130, 2, pin_none); + auto R130w = new Register( 130, 3, pin_none); + + auto R131x = new Register( 131, 0, pin_none); + auto R131y = new Register( 131, 1, pin_none); + auto R131z = new Register( 131, 2, pin_none); + auto R131w = new Register( 131, 3, pin_none); + + auto R132x = new Register( 132, 0, pin_chan); + auto R132y = new Register( 132, 1, pin_chan); + auto R132z = new Register( 132, 2, pin_chan); + auto R132w = new Register( 132, 3, pin_chan); + + AluInstr::SrcValues src({R130x, R130y, R130z, R130w, + R131x, R131y, R131z, R131w}); + + AluInstr alu(op2_dot4_ieee, R132x, src, {alu_write, alu_last_instr}, 4); + + EXPECT_NE(alu, AluInstr(op1_cos, R130x, R130y, {alu_write, alu_last_instr})); + EXPECT_EQ(alu, alu); + + ValueFactory vf; + auto group = alu.split(vf); + group->fix_last_flag(); + ASSERT_TRUE(group); + + auto i = group->begin(); + EXPECT_NE(i, group->end()); + ASSERT_TRUE(*i); + check(**i, AluInstr(op2_dot4_ieee, R132x, R130x, R130y, {alu_write})); + ++i; + EXPECT_NE(i, group->end()); + ASSERT_TRUE(*i); + check(**i, AluInstr(op2_dot4_ieee, R132y, R130z, R130w, {})); + ++i; + EXPECT_NE(i, group->end()); + ASSERT_TRUE(*i); + check(**i, AluInstr(op2_dot4_ieee, R132z, R131x, R131y, {})); + ++i; + EXPECT_NE(i, group->end()); + ASSERT_TRUE(*i); + check(**i, AluInstr(op2_dot4_ieee, R132w, R131z, R131w, {alu_last_instr})); + ++i; + EXPECT_NE(i, group->end()); + ASSERT_FALSE(*i); + ++i; + EXPECT_EQ(i, group->end()); +}; + + + + +#ifdef __cpp_exceptions +TEST_F(InstrTest, test_alu_wrong_source_count) +{ + EXPECT_THROW(AluInstr(op3_cnde, + new Register( 130, 1, pin_none), + new Register( 129, 2, pin_chan), + new Register( 129, 3, pin_none), + {alu_write, alu_last_instr}), std::invalid_argument); + + EXPECT_THROW(AluInstr(op3_cnde, + new Register( 130, 1, pin_none), + new Register( 129, 2, pin_chan), + {alu_write, alu_last_instr}), std::invalid_argument); + + EXPECT_THROW(AluInstr(op1_mov, + new Register( 130, 1, pin_none), + new Register( 129, 2, pin_chan), + new Register( 129, 2, pin_chan), + {alu_write, alu_last_instr}), std::invalid_argument); + + EXPECT_THROW(AluInstr(op2_add, + new Register( 130, 1, pin_none), + new Register( 129, 2, pin_chan), + {alu_write, alu_last_instr}), std::invalid_argument); + + EXPECT_THROW(AluInstr(op2_add, + new Register( 130, 1, pin_none), + new Register( 129, 2, pin_chan), + new Register( 129, 2, pin_chan), + new Register( 129, 2, pin_chan), + {alu_write, alu_last_instr}), std::invalid_argument); +} + +TEST_F(InstrTest, test_alu_write_no_dest) +{ + EXPECT_THROW(AluInstr(op2_add, + nullptr, + new Register( 129, 2, pin_chan), + new Register( 129, 2, pin_chan), + {alu_write, alu_last_instr}), std::invalid_argument); +} + +#endif + +TEST_F(InstrTest, test_tex_basic) +{ + TexInstr tex(TexInstr::sample, + RegisterVec4(129), + {0,1,2,3}, + RegisterVec4(130), + 1, + 17); + + EXPECT_EQ(tex.opcode(), TexInstr::sample); + + auto& dst = tex.dst(); + auto& src = tex.src(); + + for (int i = 0; i < 4; ++i) { + EXPECT_EQ(*dst[i], Register(129, i, pin_group)); + EXPECT_EQ(*src[i], Register(130, i, pin_group)); + EXPECT_EQ(tex.dest_swizzle(i), i); + } + + EXPECT_EQ(tex.sampler_id(), 1); + EXPECT_EQ(tex.resource_id(), 17); + + EXPECT_TRUE(tex.end_group()); + + for (int i = 0; i < 3; ++i) + EXPECT_EQ(tex.get_offset(i), 0); + + EXPECT_FALSE(tex.has_tex_flag(TexInstr::x_unnormalized)); + EXPECT_FALSE(tex.has_tex_flag(TexInstr::y_unnormalized)); + EXPECT_FALSE(tex.has_tex_flag(TexInstr::z_unnormalized)); + EXPECT_FALSE(tex.has_tex_flag(TexInstr::w_unnormalized)); + + tex.set_tex_flag(TexInstr::x_unnormalized); + EXPECT_TRUE(tex.has_tex_flag(TexInstr::x_unnormalized)); + EXPECT_FALSE(tex.has_tex_flag(TexInstr::y_unnormalized)); + EXPECT_FALSE(tex.has_tex_flag(TexInstr::z_unnormalized)); + EXPECT_FALSE(tex.has_tex_flag(TexInstr::w_unnormalized)); + + tex.set_tex_flag(TexInstr::y_unnormalized); + EXPECT_TRUE(tex.has_tex_flag(TexInstr::x_unnormalized)); + EXPECT_TRUE(tex.has_tex_flag(TexInstr::y_unnormalized)); + EXPECT_FALSE(tex.has_tex_flag(TexInstr::z_unnormalized)); + EXPECT_FALSE(tex.has_tex_flag(TexInstr::w_unnormalized)); + + tex.set_tex_flag(TexInstr::z_unnormalized); + tex.set_tex_flag(TexInstr::w_unnormalized); + EXPECT_TRUE(tex.has_tex_flag(TexInstr::x_unnormalized)); + EXPECT_TRUE(tex.has_tex_flag(TexInstr::y_unnormalized)); + EXPECT_TRUE(tex.has_tex_flag(TexInstr::z_unnormalized)); + EXPECT_TRUE(tex.has_tex_flag(TexInstr::w_unnormalized)); + + EXPECT_EQ(tex.inst_mode(), 0); + + EXPECT_FALSE(tex.sampler_offset()); + + tex.set_dest_swizzle({4, 7, 0, 1}); + EXPECT_EQ(tex.dest_swizzle(0), 4); + EXPECT_EQ(tex.dest_swizzle(1), 7); + EXPECT_EQ(tex.dest_swizzle(2), 0); + EXPECT_EQ(tex.dest_swizzle(3), 1); + + tex.set_dest_swizzle({7, 2, 5, 0}); + EXPECT_EQ(tex.dest_swizzle(0), 7); + EXPECT_EQ(tex.dest_swizzle(1), 2); + EXPECT_EQ(tex.dest_swizzle(2), 5); + EXPECT_EQ(tex.dest_swizzle(3), 0); + + tex.set_offset(0, 2); + tex.set_offset(1, -1); + tex.set_offset(2, 3); + + EXPECT_EQ(tex.get_offset(0), 4); + EXPECT_EQ(tex.get_offset(1), -2); + EXPECT_EQ(tex.get_offset(2), 6); + +} + +TEST_F(InstrTest, test_tex_gather4) +{ + TexInstr tex(TexInstr::gather4, + RegisterVec4(131), + {0,1,2,3}, + RegisterVec4(132), + 2, + 19); + + EXPECT_EQ(tex.opcode(), TexInstr::gather4); + + auto& dst = tex.dst(); + auto& src = tex.src(); + + for (int i = 0; i < 4; ++i) { + EXPECT_EQ(*dst[i], Register(131, i, pin_group)); + EXPECT_EQ(*src[i], Register(132, i, pin_group)); + EXPECT_EQ(tex.dest_swizzle(i), i); + } + + EXPECT_EQ(tex.sampler_id(), 2); + EXPECT_EQ(tex.resource_id(), 19); + + for (int i = 0; i < 3; ++i) + EXPECT_EQ(tex.get_offset(i), 0); + + EXPECT_FALSE(tex.has_tex_flag(TexInstr::x_unnormalized)); + EXPECT_FALSE(tex.has_tex_flag(TexInstr::y_unnormalized)); + EXPECT_FALSE(tex.has_tex_flag(TexInstr::z_unnormalized)); + EXPECT_FALSE(tex.has_tex_flag(TexInstr::w_unnormalized)); + + tex.set_gather_comp(2); + EXPECT_EQ(tex.inst_mode(), 2); + +} + +TEST_F(InstrTest, test_tex_neq) +{ + TexInstr tex_ref(TexInstr::sample, + RegisterVec4(129), + {0,1,2,3}, + RegisterVec4(130), + 1, + 17); + EXPECT_EQ(tex_ref, tex_ref); + + + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample_c, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130), 1, 17)); + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(130), {0,1,2,3}, RegisterVec4(130), 1, 17)); + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(130), {0,1,2,3}, RegisterVec4(130), 1, 17)); + + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {7,1,2,3}, RegisterVec4(130), 1, 17)); + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,7,2,3}, RegisterVec4(130), 1, 17)); + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,7,3}, RegisterVec4(130), 1, 17)); + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,7}, RegisterVec4(130), 1, 17)); + + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130, false, {7,1,2,3}), 1, 17)); + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130, false, {0,7,2,3}), 1, 17)); + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130, false, {0,1,7,3}), 1, 17)); + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130, false, {0,1,2,7}), 1, 17)); + + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130), 2, 17)); + EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130), 1, 18)); + + /* + auto tex_with_sampler_offset = tex_ref; + tex_with_sampler_offset.set_sampler_offset(new LiteralConstant( 2)); + EXPECT_NE(tex_ref, tex_with_sampler_offset); + + auto tex_cmp1 = tex_ref; + EXPECT_EQ(tex_ref, tex_cmp1); + + tex_cmp1.set_tex_flag(TexInstr::x_unnormalized); EXPECT_NE(tex_ref, tex_cmp1); + auto tex_cmp2 = tex_ref; tex_cmp2.set_tex_flag(TexInstr::y_unnormalized); EXPECT_NE(tex_ref, tex_cmp2); + auto tex_cmp3 = tex_ref; tex_cmp3.set_tex_flag(TexInstr::z_unnormalized); EXPECT_NE(tex_ref, tex_cmp3); + auto tex_cmp4 = tex_ref; tex_cmp4.set_tex_flag(TexInstr::w_unnormalized); EXPECT_NE(tex_ref, tex_cmp4); + + for (int i = 0; i < 3; ++i) { + auto tex_ofs = tex_ref; + tex_ofs.set_offset(i, 1); + EXPECT_NE(tex_ref, tex_ofs); + } + + for (int i = 0; i < 4; ++i) { + auto tex_swz = tex_ref; + RegisterVec4::Swizzle dst_swz = {0,1,2,3}; + dst_swz[i] = 7; + tex_swz.set_dest_swizzle(dst_swz); + EXPECT_NE(tex_ref, tex_swz); + } + + auto tex_cmp_mode = tex_ref; + tex_cmp_mode.set_inst_mode(1); + EXPECT_NE(tex_ref, tex_cmp_mode);*/ +} + + +TEST_F(InstrTest, test_export_basic) +{ + ExportInstr exp0(ExportInstr::param, 60, RegisterVec4(200)); + + EXPECT_EQ(exp0.export_type(), ExportInstr::param); + EXPECT_EQ(exp0.location(), 60); + EXPECT_EQ(exp0.value(), RegisterVec4(200)); + EXPECT_FALSE(exp0.is_last_export()); + + ExportInstr exp1(ExportInstr::param, 60, RegisterVec4(200)); + exp1.set_is_last_export(true); + EXPECT_TRUE(exp1.is_last_export()); + + EXPECT_EQ(exp0, exp0); + EXPECT_NE(exp0, exp1); + + ExportInstr exp2(ExportInstr::pos, 60, RegisterVec4(200)); + EXPECT_EQ(exp2.export_type(), ExportInstr::pos); + EXPECT_NE(exp0, exp2); + + ExportInstr exp3(ExportInstr::param, 61, RegisterVec4(200)); + EXPECT_EQ(exp3.location(), 61); + EXPECT_NE(exp0, exp3); + + ExportInstr exp4(ExportInstr::param, 60, RegisterVec4(201)); + EXPECT_EQ(exp4.value(), RegisterVec4(201)); + EXPECT_NE(exp0, exp4); + + EXPECT_NE(exp0, ExportInstr(ExportInstr::param, 60, RegisterVec4(200, false, {7,1,2,3}))); + EXPECT_NE(exp0, ExportInstr(ExportInstr::param, 60, RegisterVec4(200, false, {0,7,2,3}))); + EXPECT_NE(exp0, ExportInstr(ExportInstr::param, 60, RegisterVec4(200, false, {0,1,7,3}))); + EXPECT_NE(exp0, ExportInstr(ExportInstr::param, 60, RegisterVec4(200, false, {0,1,2,7}))); +} + + +TEST_F(InstrTest, test_fetch_basic) +{ + FetchInstr fetch(vc_fetch, + RegisterVec4(200), + {0,2,1,3}, + new Register( 201, 2, pin_none), + 0, + vertex_data, + fmt_8, + vtx_nf_norm, + vtx_es_none, + 1, + nullptr); + + + + EXPECT_EQ(fetch.opcode(), vc_fetch); + EXPECT_EQ(fetch.dst(), RegisterVec4(200)); + EXPECT_EQ(fetch.dest_swizzle(0), 0); + EXPECT_EQ(fetch.dest_swizzle(1), 2); + EXPECT_EQ(fetch.dest_swizzle(2), 1); + EXPECT_EQ(fetch.dest_swizzle(3), 3); + + EXPECT_EQ(fetch.src(), Register(201, 2, pin_none)); + EXPECT_EQ(fetch.src_offset(), 0); + + EXPECT_EQ(fetch.resource_id(), 1); + EXPECT_FALSE(fetch.resource_offset()); + + EXPECT_EQ(fetch.fetch_type(), vertex_data); + EXPECT_EQ(fetch.data_format(), fmt_8); + EXPECT_EQ(fetch.num_format(), vtx_nf_norm); + EXPECT_EQ(fetch.endian_swap(), vtx_es_none); + + EXPECT_EQ(fetch.mega_fetch_count(), 0); + EXPECT_EQ(fetch.array_base(), 0); + EXPECT_EQ(fetch.array_size(), 0); + EXPECT_EQ(fetch.elm_size(), 0); + + for (int i = 0; i < FetchInstr::unknown; ++i) { + EXPECT_FALSE(fetch.has_fetch_flag(static_cast(i))); + } + + EXPECT_NE(fetch, FetchInstr(vc_get_buf_resinfo, RegisterVec4(200),{0,2,1,3}, + new Register( 201, 2, pin_none), + 0, vertex_data, + fmt_8, vtx_nf_norm, vtx_es_none, 1, nullptr)); + + EXPECT_NE(fetch, FetchInstr(vc_fetch, RegisterVec4(201),{0,2,1,3}, + new Register( 201, 2, pin_none), + 0, vertex_data, + fmt_8, vtx_nf_norm, vtx_es_none, + 1, nullptr)); + + + EXPECT_NE(fetch, FetchInstr(vc_fetch, RegisterVec4(200),{1,2,0,3}, + new Register( 201, 2, pin_none), + 0, vertex_data, + fmt_8, vtx_nf_norm, vtx_es_none, + 1, nullptr)); + + + EXPECT_NE(fetch, FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3}, + new Register( 200, 2, pin_none), + 0, vertex_data, + fmt_8, vtx_nf_norm, vtx_es_none, + 1, nullptr)); + + EXPECT_NE(fetch, FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3}, + new Register( 201, 2, pin_none), + 8, vertex_data, + fmt_8, vtx_nf_norm, vtx_es_none, + 1, nullptr)); + + + EXPECT_NE(fetch, FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3}, + new Register( 201, 2, pin_none), + 0, instance_data, + fmt_8, vtx_nf_norm, vtx_es_none, + 1, nullptr)); + + + EXPECT_NE(fetch, FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3}, + new Register( 201, 2, pin_none), + 0, vertex_data, + fmt_8_8, vtx_nf_norm, vtx_es_none, + 1, nullptr)); + + EXPECT_NE(fetch, FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3}, + new Register( 201, 2, pin_none), + 0, vertex_data, + fmt_8, vtx_nf_int, vtx_es_none, + 1, nullptr)); + + EXPECT_NE(fetch, FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3}, + new Register( 201, 2, pin_none), + 0, vertex_data, + fmt_8, vtx_nf_norm, vtx_es_8in16, + 1, nullptr)); + + EXPECT_NE(fetch, FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3}, + new Register( 201, 2, pin_none), + 0, vertex_data, + fmt_8, vtx_nf_norm, vtx_es_none, + 2, nullptr)); + + EXPECT_NE(fetch, FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3}, + new Register( 201, 2, pin_none), + 0, vertex_data, + fmt_8, vtx_nf_norm, vtx_es_none, + 1, new Register( 1000, 0, pin_none))); + + auto fetch1 = fetch; + fetch1.set_mfc(31); + EXPECT_NE(fetch1, fetch); + EXPECT_EQ(fetch1.mega_fetch_count(), 31); + EXPECT_TRUE(fetch1.has_fetch_flag(static_cast(FetchInstr::is_mega_fetch))); + + auto fetch2 = fetch; + fetch2.set_array_base(32); + EXPECT_NE(fetch, fetch2); + EXPECT_EQ(fetch2.array_base(), 32); + + auto fetch3 = fetch; + fetch3.set_array_size(16); + EXPECT_NE(fetch, fetch3); + EXPECT_EQ(fetch3.array_size(), 16); + + auto fetch4 = fetch; + fetch4.set_element_size(3); + EXPECT_NE(fetch, fetch4); + EXPECT_EQ(fetch4.elm_size(), 3); +} + +TEST_F(InstrTest, test_fetch_basic2) +{ + FetchInstr fetch(vc_get_buf_resinfo, + RegisterVec4(201), + {0,1,3,4}, + new Register( 202, 3, pin_none), + 1, + no_index_offset, + fmt_32_32, + vtx_nf_int, + vtx_es_8in16, + 3, + new Register( 300, 1, pin_none)); + + + EXPECT_EQ(fetch.opcode(), vc_get_buf_resinfo); + EXPECT_EQ(fetch.dst(), RegisterVec4(201)); + EXPECT_EQ(fetch.dest_swizzle(0), 0); + EXPECT_EQ(fetch.dest_swizzle(1), 1); + EXPECT_EQ(fetch.dest_swizzle(2), 3); + EXPECT_EQ(fetch.dest_swizzle(3), 4); + + EXPECT_EQ(fetch.src(), Register(202, 3, pin_none)); + EXPECT_EQ(fetch.src_offset(), 1); + + EXPECT_EQ(fetch.resource_id(), 3); + EXPECT_EQ(*fetch.resource_offset(), Register(300, 1, pin_none)); + + EXPECT_EQ(fetch.fetch_type(), no_index_offset); + EXPECT_EQ(fetch.data_format(), fmt_32_32); + EXPECT_EQ(fetch.num_format(), vtx_nf_int); + EXPECT_EQ(fetch.endian_swap(), vtx_es_8in16); + + EXPECT_EQ(fetch.mega_fetch_count(), 0); + EXPECT_EQ(fetch.array_base(), 0); + EXPECT_EQ(fetch.array_size(), 0); + EXPECT_EQ(fetch.elm_size(), 0); + + for (int i = 0; i < FetchInstr::unknown; ++i) { + EXPECT_FALSE(fetch.has_fetch_flag(static_cast(i))); + } + + auto fetch1 = fetch; + fetch1.set_mfc(15); + EXPECT_NE(fetch1, fetch); + EXPECT_EQ(fetch1.mega_fetch_count(), 15); + EXPECT_TRUE(fetch1.has_fetch_flag(static_cast(FetchInstr::is_mega_fetch))); + + auto fetch2 = fetch; + fetch2.set_array_base(128); + EXPECT_NE(fetch, fetch2); + EXPECT_EQ(fetch2.array_base(), 128); + + auto fetch3 = fetch; + fetch3.set_array_size(8); + EXPECT_NE(fetch, fetch3); + EXPECT_EQ(fetch3.array_size(), 8); + + auto fetch4 = fetch; + fetch4.set_element_size(1); + EXPECT_NE(fetch, fetch4); + EXPECT_EQ(fetch4.elm_size(), 1); +} diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp new file mode 100644 index 0000000..df6a9ac --- /dev/null +++ b/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp @@ -0,0 +1,728 @@ + +#include "../sfn_instrfactory.h" + +#include "../sfn_instr_alu.h" +#include "../sfn_instr_export.h" +#include "../sfn_instr_fetch.h" +#include "../sfn_instr_lds.h" +#include "../sfn_instr_mem.h" +#include "../sfn_instr_tex.h" + +#include "gtest/gtest.h" +#include + +namespace r600 { + +using std::istringstream; +using std::ostringstream; +using std::string; + +class TestInstrFromString : public ::testing::Test +{ +public: + TestInstrFromString(); + + PInst from_string(const std::string& s); + +protected: + void add_dest_from_string(const char *init); + void add_dest_vec4_from_string(const char *init); + + void check(const Instr& eval, const Instr& expect); + void check(const string& init, const Instr& expect); + + InstrFactory m_instr_factory; + +}; + +TEST_F(TestInstrFromString, test_alu_mov) +{ + add_dest_from_string("R1999.x"); + + AluInstr expect(op1_mov, + new Register( 2000, 1, pin_none), + new Register( 1999, 0, pin_none), + {alu_write, alu_last_instr}); + + check("ALU MOV R2000.y : R1999.x {WL}", expect); +} + +TEST_F(TestInstrFromString, test_alu_lds_read_ret) +{ + add_dest_from_string("R1999.x"); + + AluInstr expect(DS_OP_READ_RET, + {new Register( 1999, 0, pin_none)}, {}); + + check("ALU LDS READ_RET __.x : R1999.x {}", expect); +} + + +TEST_F(TestInstrFromString, test_alu_mov_literal) +{ + AluInstr expect(op1_mov, + new Register( 2000, 1, pin_none), + new LiteralConstant( 0x10), + {alu_write, alu_last_instr}); + + check("ALU MOV R2000.y : L[0x10] {WL}", expect); +} + + +TEST_F(TestInstrFromString, test_alu_mov_neg) +{ + add_dest_from_string("R1999.x"); + AluInstr expect(op1_mov, + new Register( 2000, 1, pin_none), + new Register( 1999, 0, pin_none), + {alu_write, alu_last_instr, alu_src0_neg}); + + check("ALU MOV R2000.y : -R1999.x {WL}", expect); +} + +TEST_F(TestInstrFromString, test_alu_mov_abs) +{ + add_dest_from_string("R1999.x"); + AluInstr expect(op1_mov, + new Register( 2000, 1, pin_none), + new Register( 1999, 0, pin_none), + {alu_write, alu_last_instr, alu_src0_abs}); + + check("ALU MOV R2000.y : |R1999.x| {WL}", expect); +} + +TEST_F(TestInstrFromString, test_alu_mov_neg_abs) +{ + add_dest_from_string("R1999.x"); + AluInstr expect(op1_mov, + new Register( 2000, 1, pin_none), + new Register( 1999, 0, pin_none), + {alu_write, alu_src0_neg, alu_src0_abs}); + check("ALU MOV R2000.y : -|R1999.x| {W}", expect); +} + +TEST_F(TestInstrFromString, test_alu_add) +{ + add_dest_from_string("R1998.z"); + add_dest_from_string("R1999.w"); + + AluInstr expect(op2_add, + new Register( 2000, 1, pin_none), + new Register( 1999, 3, pin_none), + new Register( 1998, 2, pin_none), + {alu_last_instr}); + check("ALU ADD __.y : R1999.w R1998.z {L}", expect); + +} + +TEST_F(TestInstrFromString, test_alu_add_clmap) +{ + add_dest_from_string("R1998.z"); + add_dest_from_string("R1999.w"); + AluInstr expect(op2_add, + new Register( 2000, 1, pin_none), + new Register( 1999, 3, pin_none), + new Register( 1998, 2, pin_none), + {alu_last_instr, alu_dst_clamp}); + check("ALU ADD CLAMP __.y : R1999.w R1998.z {L}", expect); + +} + +TEST_F(TestInstrFromString, test_alu_add_neg2) +{ + add_dest_from_string("R1998.z"); + add_dest_from_string("R1999.w"); + AluInstr expect(op2_add, + new Register( 2000, 1, pin_none), + new Register( 1999, 3, pin_none), + new Register( 1998, 2, pin_none), + {alu_last_instr, alu_src1_neg}); + check("ALU ADD __.y : R1999.w -R1998.z {L}", expect); +} + +TEST_F(TestInstrFromString, test_alu_sete_update_pref) +{ + add_dest_from_string("R1998.z"); + add_dest_from_string("R1999.w"); + AluInstr expect(op2_sete, + new Register( 2000, 1, pin_none), + new Register( 1999, 3, pin_none), + new Register( 1998, 2, pin_none), + {alu_last_instr, alu_src1_neg, alu_update_pred}); + check("ALU SETE __.y : R1999.w -R1998.z {LP}", expect); +} + +TEST_F(TestInstrFromString, test_alu_sete_update_pref_empty_dest) +{ + add_dest_from_string("R1998.z"); + add_dest_from_string("R1999.w"); + AluInstr expect(op2_sete, + new Register( 2000, 0, pin_none), + new Register( 1999, 3, pin_none), + new Register( 1998, 2, pin_none), + {alu_last_instr, alu_update_pred}); + check("ALU SETE __.x : R1999.w R1998.z {LP}", expect); +} + + +TEST_F(TestInstrFromString, test_alu_setne_update_exec) +{ + add_dest_from_string("R1998.z"); + add_dest_from_string("R1999.w"); + AluInstr expect(op2_setne, + new Register( 2000, 1, pin_none), + new Register( 1999, 3, pin_none), + new Register( 1998, 2, pin_none), + {alu_last_instr, alu_src1_neg, alu_update_exec}); + check("ALU SETNE __.y : R1999.w -R1998.z {LE}", expect); +} + + +TEST_F(TestInstrFromString, test_alu_add_abs2) +{ + add_dest_from_string("R1998.z"); + add_dest_from_string("R1999.w"); + AluInstr expect(op2_add, + new Register( 2000, 1, pin_none), + new Register( 1999, 3, pin_none), + new Register( 1998, 2, pin_none), + {alu_write, alu_last_instr, alu_src1_abs}); + check("ALU ADD R2000.y : R1999.w |R1998.z| {WL}", expect); +} + +TEST_F(TestInstrFromString, test_alu_add_abs2_neg2) +{ + add_dest_from_string("R1998.z"); + add_dest_from_string("R1999.w"); + AluInstr expect(op2_add, + new Register( 2000, 1, pin_none), + new Register( 1999, 3, pin_none), + new Register( 1998, 2, pin_none), + {alu_write, alu_last_instr, alu_src1_abs, alu_src1_neg}); + check("ALU ADD R2000.y : R1999.w -|R1998.z| {WL}", expect); +} + + +TEST_F(TestInstrFromString, test_alu_muladd) +{ + add_dest_from_string("R1998.z"); + add_dest_from_string("R1999.w"); + add_dest_from_string("R2000.y"); + AluInstr expect(op3_muladd_ieee, + new Register( 2000, 1, pin_none), + new Register( 1999, 3, pin_none), + new Register( 1998, 2, pin_none), + new Register( 2000, 1, pin_none), + {alu_write, alu_last_instr}); + check("ALU MULADD_IEEE R2000.y : R1999.w R1998.z R2000.y {WL}", expect); +} + +TEST_F(TestInstrFromString, test_alu_muladd_neg3) +{ + add_dest_from_string("R1998.z"); + add_dest_from_string("R1999.w"); + add_dest_from_string("R2000.y"); + AluInstr expect(op3_muladd_ieee, + new Register( 2000, 1, pin_none), + new Register( 1999, 3, pin_none), + new Register( 1998, 2, pin_none), + new Register( 2000, 1, pin_none), + {alu_last_instr, alu_src2_neg}); + check("ALU MULADD_IEEE __.y : R1999.w R1998.z -R2000.y {L}", expect); +} + + +TEST_F(TestInstrFromString, test_alu_mov_bs) +{ + add_dest_from_string("R1999.x"); + for (auto& [expect_bs, str] : AluInstr::bank_swizzle_map) { + auto init = std::string("ALU MOV R2000.y : R1999.x {WL} ") + str; + + AluInstr expect(op1_mov, + new Register( 2000, 1, pin_none), + new Register( 1999, 0, pin_none), + {alu_write, alu_last_instr}); + expect.set_bank_swizzle(expect_bs); + + check(init, expect); + } +} + +TEST_F(TestInstrFromString, test_alu_dot4_ieee) +{ + add_dest_from_string("R199.x"); + add_dest_from_string("R199.y"); + add_dest_from_string("R199.z"); + add_dest_from_string("R199.w"); + add_dest_from_string("R198.x"); + add_dest_from_string("R198.y"); + add_dest_from_string("R198.z"); + add_dest_from_string("R198.w"); + auto init = std::string("ALU DOT4_IEEE R2000.y : R199.x R198.w + R199.y R198.z + R199.z R198.y + R199.w R198.x {WL}"); + + AluInstr expect(op2_dot4_ieee, + new Register( 2000, 1, pin_none), + {new Register( 199, 0, pin_none), + new Register( 198, 3, pin_none), + new Register( 199, 1, pin_none), + new Register( 198, 2, pin_none), + new Register( 199, 2, pin_none), + new Register( 198, 1, pin_none), + new Register( 199, 3, pin_none), + new Register( 198, 0, pin_none)}, + {alu_write, alu_last_instr}, 4); + + check(init, expect); +} + +TEST_F(TestInstrFromString, test_alu_mov_cf) +{ + add_dest_from_string("R1999.x"); + for (auto& [expect_cf, str] : AluInstr::cf_map) { + auto init = std::string("ALU MOV R2000.y : R1999.x {WL} ") + str; + + AluInstr expect(op1_mov, + new Register( 2000, 1, pin_none), + new Register( 1999, 0, pin_none), + {alu_write, alu_last_instr}); + expect.set_cf_type(expect_cf); + + check(init, expect); + } +} + +TEST_F(TestInstrFromString, test_alu_interp_xy) +{ + add_dest_from_string("R0.y@fully"); + auto init = std::string("ALU INTERP_ZW R1024.z@chan : R0.y@fully Param0.z {W} VEC_210"); + + AluInstr expect(op2_interp_zw, + new Register( 1024, 2, pin_chan), + new Register( 0, 1, pin_fully), + new InlineConstant( ALU_SRC_PARAM_BASE, 2), + {alu_write}); + expect.set_bank_swizzle(alu_vec_210); + + check(init, expect); +} + + +TEST_F(TestInstrFromString, test_alu_interp_xy_no_write) +{ + add_dest_from_string("R0.x@fully"); + auto init = std::string("ALU INTERP_XY __.x@chan : R0.x@fully Param0.z {} VEC_210"); + + AluInstr expect(op2_interp_xy, + new Register( 1024, 0, pin_chan), + new Register( 0, 0, pin_fully), + new InlineConstant( ALU_SRC_PARAM_BASE, 2), + {}); + expect.set_bank_swizzle(alu_vec_210); + + check(init, expect); +} + + +TEST_F(TestInstrFromString, test_alu_mov_cf_bs) +{ + add_dest_from_string("R1999.x"); + auto init = std::string("ALU MOV R2000.y : R1999.x {WL} VEC_210 POP_AFTER"); + AluInstr expect(op1_mov, + new Register( 2000, 1, pin_none), + new Register( 1999, 0, pin_none), + {alu_write, alu_last_instr}); + expect.set_cf_type(cf_alu_pop_after); + expect.set_bank_swizzle(alu_vec_210); + check(init, expect); +} + +TEST_F(TestInstrFromString, test_tex_sample_basic) +{ + add_dest_vec4_from_string("R2000.xyzw"); + auto init = std::string("TEX SAMPLE R1000.xyzw : R2000.xyzw RID:10 SID:1 NNNN"); + TexInstr expect(TexInstr::sample, RegisterVec4(1000), {0,1,2,3}, RegisterVec4(2000), 1, 10); + check(init, expect); +} + +TEST_F(TestInstrFromString, test_tex_ld_basic) +{ + add_dest_vec4_from_string("R2002.xyzw"); + auto init = std::string("TEX LD R1001.xyzw : R2002.xyzw RID:27 SID:7 NNNN"); + TexInstr expect(TexInstr::ld, RegisterVec4(1001), {0,1,2,3}, RegisterVec4(2002), 7, 27); + check(init, expect); +} + +TEST_F(TestInstrFromString, test_tex_sample_with_offset) +{ + add_dest_vec4_from_string("R2002.xyzw"); + auto init = std::string("TEX SAMPLE R1001.xyzw : R2002.xyzw RID:27 SID:2 OX:1 OY:-2 OZ:5 NNNN"); + + TexInstr expect(TexInstr::sample, RegisterVec4(1001), {0,1,2,3}, RegisterVec4(2002), 2, 27); + expect.set_offset(0, 1); + expect.set_offset(1, -2); + expect.set_offset(2, 5); + + check(init, expect); +} + +TEST_F(TestInstrFromString, test_tex_gather4_x) +{ + add_dest_vec4_from_string("R2002.xyzw"); + auto init = std::string("TEX GATHER4 R1001.xyzw : R2002.xyzw RID:7 SID:27 MODE:0 NNNN"); + TexInstr expect(TexInstr::gather4, RegisterVec4(1001), {0,1,2,3}, RegisterVec4(2002), 27, 7); + check(init, expect); +} + +TEST_F(TestInstrFromString, test_tex_gather4_y) +{ + add_dest_vec4_from_string("R2002.xyzw"); + auto init = std::string("TEX GATHER4 R1001.xyzw : R2002.xyzw RID:7 SID:27 MODE:1 NNNN"); + TexInstr expect(TexInstr::gather4, RegisterVec4(1001), {0,1,2,3}, RegisterVec4(2002), 27, 7); + expect.set_gather_comp(1); + check(init, expect); +} + +TEST_F(TestInstrFromString, test_tex_sampler_with_offset) +{ + add_dest_vec4_from_string("R2002.xyzw"); + auto init = std::string("TEX SAMPLE R1001.xyzw : R2002.xyzw RID:7 SID:27 SO:R200.z NNNN"); + TexInstr expect(TexInstr::sample, RegisterVec4(1001), {0,1,2,3}, RegisterVec4(2002), 27, 7); + expect.set_sampler_offset(new Register( 200, 2, pin_none)); + check(init, expect); +} + +TEST_F(TestInstrFromString, test_export_param_60) +{ + add_dest_vec4_from_string("R1001.xyzw"); + + ExportInstr expect(ExportInstr::param, 60, RegisterVec4(1001)); + check("EXPORT PARAM 60 R1001.xyzw", expect); +} + +TEST_F(TestInstrFromString, test_export_pos_61) +{ + add_dest_from_string("R1002.y@group"); + + ExportInstr expect(ExportInstr::pos, 61, RegisterVec4(1002, false, {1, 4, 5, 7})); + check("EXPORT POS 61 R1002.y01_", expect); +} + +TEST_F(TestInstrFromString, test_export_last_pixel_0) +{ + add_dest_vec4_from_string("R1002.xyzw"); + + ExportInstr expect(ExportInstr::pixel, 0, RegisterVec4(1002, false, {2, 3, 0, 1})); + expect.set_is_last_export(true); + check("EXPORT_DONE PIXEL 0 R1002.zwxy", expect); +} + + +TEST_F(TestInstrFromString, test_fetch_basic) +{ + add_dest_from_string("R201.z"); + + FetchInstr expect(vc_fetch, + RegisterVec4(1002), + {0,4,5,1}, + new Register( 201, 2, pin_none), + 0, + vertex_data, + fmt_8, + vtx_nf_norm, + vtx_es_none, + 1, + nullptr); + expect.set_mfc(31); + expect.set_element_size(3); + check("VFETCH R1002.x01y : R201.z RID:1 VERTEX FMT(8,UNORM) MFC:31 ES:3", expect); +} + +TEST_F(TestInstrFromString, test_query_buffer_size) +{ + QueryBufferSizeInstr expect(RegisterVec4(1002),RegisterVec4::Swizzle({0,1,2,3}), 1); + check("GET_BUF_RESINFO R1002.xyzw : RID:1", expect); + + FetchInstr expect_fetch(vc_get_buf_resinfo, + RegisterVec4(1002),RegisterVec4::Swizzle({0,1,2,3}), + new Register( 0, 7, pin_fully), + 0, + no_index_offset, + fmt_32_32_32_32, + vtx_nf_norm, + vtx_es_none, + 1, + nullptr); + expect_fetch.set_fetch_flag(FetchInstr::format_comp_signed); + check("GET_BUF_RESINFO R1002.xyzw : RID:1", expect_fetch); +} + +TEST_F(TestInstrFromString, test_load_from_buffer) +{ + add_dest_from_string("R201.x"); + add_dest_from_string("R202.x"); + string init = "LOAD_BUF R200.xzwy : R201.x + 16b RID:10 + R202.x"; + LoadFromBuffer expect(RegisterVec4(200), RegisterVec4::Swizzle({0,2,3,1}), + new Register( 201, 0, pin_none), 16, 10, + new Register( 202, 0, pin_none), fmt_32_32_32_32_float); + check(init, expect); + + auto instr = from_string(init); + FetchInstr expect_fetch(vc_fetch, + RegisterVec4(200),RegisterVec4::Swizzle({0,2,3,1}), + new Register( 201, 0, pin_none), + 16, + no_index_offset, + fmt_32_32_32_32_float, + vtx_nf_scaled, + vtx_es_none, + 10, + new Register( 202, 0, pin_none)); + expect_fetch.set_fetch_flag(FetchInstr::format_comp_signed); + expect_fetch.set_mfc(16); + check(*instr, expect_fetch); +} + +TEST_F(TestInstrFromString, test_load_from_scratch) +{ + + add_dest_from_string("R201.x"); + string init = "READ_SCRATCH R200.xzwy : R201.x SIZE:20 ES:3"; + + LoadFromScratch expect(RegisterVec4(200), RegisterVec4::Swizzle({0,2,3,1}), + new Register( 201, 0, pin_none), 20); + check(init, expect); + + FetchInstr expect_fetch(vc_read_scratch, + RegisterVec4(200),RegisterVec4::Swizzle({0,2,3,1}), + new Register( 201, 0, pin_none), + 0, + no_index_offset, + fmt_32_32_32_32, + vtx_nf_int, + vtx_es_none, + 0, + nullptr); + expect_fetch.set_element_size(3); + expect_fetch.set_print_skip(FetchInstr::EPrintSkip::mfc); + expect_fetch.set_print_skip(FetchInstr::EPrintSkip::fmt); + expect_fetch.set_print_skip(FetchInstr::EPrintSkip::ftype); + expect_fetch.set_fetch_flag(FetchInstr::EFlags::uncached); + expect_fetch.set_fetch_flag(FetchInstr::EFlags::indexed); + expect_fetch.set_fetch_flag(FetchInstr::EFlags::wait_ack); + expect_fetch.set_array_size(19); + + check(init, expect_fetch); +} + +TEST_F(TestInstrFromString, test_write_scratch_to_offset) +{ + add_dest_vec4_from_string("R1.xyzw"); + string init = "WRITE_SCRATCH 20 R1.xyzw AL:4 ALO:16"; + WriteScratchInstr expect(RegisterVec4(1), 20, 4, 16, 0xf); + check(init, expect); + + add_dest_vec4_from_string("R2.xyzw"); + string init2 = "WRITE_SCRATCH 10 R2.xy_w AL:8 ALO:8"; + WriteScratchInstr expect2(RegisterVec4(2), 10, 8, 8, 0xb); + check(init2, expect2); +} + +TEST_F(TestInstrFromString, test_write_scratch_to_index) +{ + add_dest_vec4_from_string("R1.xyzw"); + add_dest_from_string("R3.x"); + string init = "WRITE_SCRATCH @R3.x[10] R1.xyzw AL:4 ALO:16"; + WriteScratchInstr expect(RegisterVec4(1), new Register(3, 0, pin_none), 4, 16, 0xf, 10); + check(init, expect); + + add_dest_vec4_from_string("R2.xyzw"); + add_dest_from_string("R4.x"); + string init2 = "WRITE_SCRATCH @R4.x[20] R2.xy__ AL:4 ALO:16"; + WriteScratchInstr expect2(RegisterVec4(2), new Register(4, 0, pin_none), 4, 16, 0x3, 20); + check(init2, expect2); + + +} + + + +TEST_F(TestInstrFromString, test_load_from_scratch_fixed_offset) +{ + string init = "READ_SCRATCH R200.xzwy : L[0xA] SIZE:40 ES:3"; + + LoadFromScratch expect(RegisterVec4(200), RegisterVec4::Swizzle({0,2,3,1}), + new LiteralConstant( 10), 40); + check(init, expect); + + FetchInstr expect_fetch(vc_read_scratch, + RegisterVec4(200),RegisterVec4::Swizzle({0,2,3,1}), + new Register( 0, 7, pin_none), + 0, + no_index_offset, + fmt_32_32_32_32, + vtx_nf_int, + vtx_es_none, + 0, + nullptr); + expect_fetch.set_element_size(3); + expect_fetch.set_print_skip(FetchInstr::EPrintSkip::mfc); + expect_fetch.set_print_skip(FetchInstr::EPrintSkip::fmt); + expect_fetch.set_print_skip(FetchInstr::EPrintSkip::ftype); + expect_fetch.set_fetch_flag(FetchInstr::EFlags::uncached); + expect_fetch.set_fetch_flag(FetchInstr::EFlags::wait_ack); + expect_fetch.set_array_base(10); + expect_fetch.set_array_size(39); + + check(init, expect_fetch); +} + + +TEST_F(TestInstrFromString, test_lds_read_3_values) +{ + add_dest_from_string("R5.x@free"); + add_dest_from_string("R5.y@free"); + add_dest_from_string("R5.z@free"); + + auto init = "LDS_READ [ R10.x@free R11.x@free R12.x@free ] : [ R5.x@free R5.y@free R5.z@free ]"; + + std::vector> dests(3); + std::vector> srcs(3); + + for (int i = 0; i < 3; ++i) { + dests[i] = new Register(10 + i, 0, pin_free); + srcs[i] = new Register(5, i, pin_free); + } + + LDSReadInstr expect(dests, srcs); + check(init, expect); +} + +TEST_F(TestInstrFromString, test_lds_read_2_values) +{ + add_dest_from_string("R5.x@free"); + add_dest_from_string("R5.y@free"); + + auto init = "LDS_READ [ R11.x@free R12.x@free ] : [ R5.x@free R5.y@free ]"; + + std::vector> dests(2); + std::vector> srcs(2); + + for (int i = 0; i < 2; ++i) { + dests[i] = new Register(11 + i, 0, pin_free); + srcs[i] = new Register(5, i, pin_free); + } + + LDSReadInstr expect(dests, srcs); + check(init, expect); +} + +TEST_F(TestInstrFromString, test_lds_write_1_value) +{ + auto init = "LDS WRITE __.x [ R1.x ] : R2.y"; + add_dest_from_string("R1.x"); + add_dest_from_string("R2.y"); + + LDSAtomicInstr expect(DS_OP_WRITE, nullptr, + new Register(1, 0, pin_none), + {new Register(2, 1, pin_none)}); + + check(init, expect); +} + +TEST_F(TestInstrFromString, test_lds_write_2_value) +{ + auto init = "LDS WRITE2 __.x [ R1.x ] : R2.y KC0[1].z"; + + add_dest_from_string("R1.x"); + add_dest_from_string("R2.y"); + + LDSAtomicInstr expect(DS_OP_WRITE2, nullptr, + new Register(1, 0, pin_none), + {new Register(2, 1, pin_none), + new UniformValue(513, 2, 0) }); + + check(init, expect); +} + +TEST_F(TestInstrFromString, test_lds_write_atomic_add_ret) +{ + auto init = "LDS ADD_RET R7.y [ R1.x ] : R2.y"; + + add_dest_from_string("R1.x"); + add_dest_from_string("R2.y"); + + LDSAtomicInstr expect(DS_OP_ADD_RET, + new Register(7, 1, pin_none), + new Register(1, 0, pin_none), + {new Register(2, 1, pin_none)}); + + check(init, expect); +} + +TEST_F(TestInstrFromString, test_lds_write_atomic_add) +{ + auto init = "LDS ADD __.x [ R1.x ] : R2.y"; + + add_dest_from_string("R1.x"); + add_dest_from_string("R2.y"); + + LDSAtomicInstr expect(DS_OP_ADD, + nullptr, + new Register(1, 0, pin_none), + {new Register(2, 1, pin_none)}); + + check(init, expect); +} + + +TEST_F(TestInstrFromString, test_writeTF) +{ + auto init = "WRITE_TF R1.xyzw"; + + add_dest_vec4_from_string("R1.xyzw"); + + WriteTFInstr expect(RegisterVec4(1, true, {0,1,2,3}, pin_group)); + + check(init, expect); +} + +TestInstrFromString::TestInstrFromString() +{ + +} + +PInst TestInstrFromString::from_string(const std::string& s) +{ + return m_instr_factory.from_string(s, 0); +} + +void TestInstrFromString::check(const Instr& eval, const Instr& expect) +{ + EXPECT_EQ(eval, expect); +} + +void TestInstrFromString::check(const string& init, const Instr& expect) +{ + auto instr = from_string(init); + ASSERT_TRUE(instr); + EXPECT_EQ(*instr, expect); + + ostringstream os; + instr->print(os); + EXPECT_EQ(os.str(), init); +} + +void TestInstrFromString::add_dest_from_string(const char *init) +{ + m_instr_factory.value_factory().dest_from_string(init); +} + +void TestInstrFromString::add_dest_vec4_from_string(const char *init) +{ + RegisterVec4::Swizzle dummy; + m_instr_factory.value_factory().dest_vec4_from_string(init, dummy); +} + + + +} diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp new file mode 100644 index 0000000..f12f600 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp @@ -0,0 +1,217 @@ +#include "../sfn_shader.h" +#include "../sfn_liverangeevaluator.h" +#include "sfn_test_shaders.h" + +#include "gtest/gtest.h" +#include + +#include "../sfn_liverangeevaluator.h" + +namespace r600 { + +using std::ostringstream; + +class LiveRangeTests : public TestShader { + +protected: + + void check(const char *shader, LiveRangeMap& expect); + +}; + +using SimpleTest = testing::Test; + +TEST_F(SimpleTest, SimpleLiveRangeMapTest) +{ + LiveRangeMap a; + LiveRangeMap b; + + EXPECT_EQ(a, b); + + Register r1x(1, 0, pin_none); + a.append_register(&r1x); + r1x.set_index(0); + a.set_life_range(r1x, 0, 1); + + EXPECT_NE(a, b); + + b.append_register(&r1x); + b.set_life_range(r1x, 0, 1); + EXPECT_EQ(a, b); + + Register r2x(2, 0, pin_none); + a.append_register(&r2x); + r2x.set_index(0); + a.set_life_range(r2x, 0, 2); + + EXPECT_NE(a, b); + + b.append_register(&r2x); + b.set_life_range(r2x, 0, 2); + EXPECT_EQ(a, b); + + a.set_life_range(r2x, 1, 2); + EXPECT_NE(a, b); + + b.set_life_range(r2x, 1, 2); + EXPECT_EQ(a, b); + + a.set_life_range(r2x, 0, 1); + EXPECT_NE(a, b); +} + +TEST_F(LiveRangeTests, SimpleAssignments) +{ + RegisterVec4::Swizzle dummy; + + ValueFactory vf; + + Register *r1x = vf.dest_from_string("S1.x@free"); + RegisterVec4 r0 = vf.dest_vec4_from_string("S0.xyzw", dummy, pin_group); + + LiveRangeMap expect = vf.prepare_live_range_map(); + + + expect.set_life_range(*r1x, 2, 3); + for (int i = 0; i < 4; ++i) + expect.set_life_range(*r0[i], 1, 3); + + check(red_triangle_fs_expect_from_nir, expect); +} + +TEST_F(LiveRangeTests, SimpleAdd) +{ + RegisterVec4::Swizzle dummy; + + ValueFactory vf; + Register *r0x = vf.dest_from_string("S0.x@free"); + Register *r1x = vf.dest_from_string("S1.x@free"); + RegisterVec4 r2 = vf.dest_vec4_from_string("S2.xyzw", dummy, pin_none); + Register *r3x = vf.dest_from_string("S3.x@free"); + RegisterVec4 r4 = vf.dest_vec4_from_string("S4.xyzw", dummy, pin_group); + + LiveRangeMap expect = vf.prepare_live_range_map(); + + expect.set_life_range(*r0x, 1, 4); + expect.set_life_range(*r1x, 2, 3); + expect.set_life_range(*r3x, 4, 5); + + expect.set_life_range(*r2[0], 3, 4); + for (int i = 1; i < 4; ++i) + expect.set_life_range(*r2[i], 3, 5); + + for (int i = 0; i < 4; ++i) + expect.set_life_range(*r4[i], 5, 6); + + check(add_add_1_expect_from_nir, expect); +} + +TEST_F(LiveRangeTests, SimpleAInterpolation) +{ + RegisterVec4::Swizzle dummy; + + ValueFactory vf; + Register *r0x = vf.dest_from_string("R0.x@fully"); + r0x->pin_live_range(true, false); + Register *r0y = vf.dest_from_string("R0.y@fully"); + r0y->pin_live_range(true, false); + + Register *r1x = vf.dest_from_string("S1.x@free"); + RegisterVec4 r2 = vf.dest_vec4_from_string("S2.xyzw", dummy, pin_chan); + + Register *r3x = vf.dest_from_string("S3.x"); + Register *r3y = vf.dest_from_string("S3.y"); + Register *r3z = vf.dest_from_string("S3.z"); + + Register *r4x = vf.dest_from_string("S4.x"); + Register *r4y = vf.dest_from_string("S4.y"); + + RegisterVec4 r5 = vf.dest_vec4_from_string("S5.xy_w", dummy, pin_group); + RegisterVec4 r6 = vf.dest_vec4_from_string("S6.xyzw", dummy, pin_group); + + LiveRangeMap expect = vf.prepare_live_range_map(); + + expect.set_life_range(*r0x, 0, 3); + expect.set_life_range(*r0y, 0, 3); + + expect.set_life_range(*r1x, 1, 2); + + expect.set_life_range(*r2[0], 3, 4); + expect.set_life_range(*r2[1], 3, 4); + expect.set_life_range(*r2[2], 2, 3); + expect.set_life_range(*r2[3], 2, 4); + + expect.set_life_range(*r3x, 4, 5); + expect.set_life_range(*r3y, 4, 5); + expect.set_life_range(*r3z, 4, 6); + + expect.set_life_range(*r4x, 5, 6); + expect.set_life_range(*r4y, 5, 6); + + expect.set_life_range(*r5[0], 6, 7); + expect.set_life_range(*r5[1], 6, 7); + expect.set_life_range(*r5[3], 6, 7); + + + expect.set_life_range(*r6[0], 7, 8); + expect.set_life_range(*r6[1], 7, 8); + expect.set_life_range(*r6[2], 7, 8); + expect.set_life_range(*r6[3], 7, 8); + + check(basic_interpolation_translated_1, expect); +} + +TEST_F(LiveRangeTests, SimpleArrayAccess) +{ + RegisterVec4::Swizzle dummy; + + ValueFactory vf; + + auto array = vf.array_from_string("A0[2].xy"); + + auto s1 = vf.dest_from_string("S1.x"); + auto s2x = vf.dest_from_string("S2.x"); + auto s2y = vf.dest_from_string("S2.y"); + + auto s3 = vf.dest_vec4_from_string("S3.xy01", dummy, pin_group); + + + LiveRangeMap expect = vf.prepare_live_range_map(); + + expect.set_life_range(*array->element(0, nullptr, 0), 0, 4); + expect.set_life_range(*array->element(0, nullptr, 1), 0, 4); + expect.set_life_range(*array->element(1, nullptr, 0), 0, 4); + + expect.set_life_range(*array->element(1, nullptr, 1), 0, 4); + + expect.set_life_range(*s1, 2, 3); + + expect.set_life_range(*s2x, 4, 5); + expect.set_life_range(*s2y, 4, 5); + + expect.set_life_range(*s3[0], 5, 6); + expect.set_life_range(*s3[1], 5, 6); + + check(shader_with_dest_array2_scheduled, expect); +} + +void LiveRangeTests::check(const char *shader, LiveRangeMap& expect) +{ + auto sh = from_string(shader); + ASSERT_TRUE(sh); + + LiveRangeEvaluator eval; + + LiveRangeMap eval_map = eval.run(*sh); + + ostringstream eval_str; + eval_str << eval_map; + + ostringstream expect_str; + expect_str << expect; + + EXPECT_EQ(eval_str.str(), expect_str.str()); + +} + +} diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp new file mode 100644 index 0000000..b14bec1 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp @@ -0,0 +1,300 @@ + +#include "sfn_test_shaders.h" +#include "../sfn_shader.h" +#include "../sfn_optimizer.h" +#include "../sfn_ra.h" +#include "../sfn_scheduler.h" + +using namespace r600; +using std::ostringstream; + +class TestShaderFromNir : public TestShader { + +protected: + + void check(Shader *s, const char *expect_str); + void ra_check(Shader *s, const char *expect_str); +}; + + +TEST_F(TestShaderFromNir, SimpleDCE) +{ + auto sh = from_string(red_triangle_fs_expect_from_nir); + dead_code_elimination(*sh); + + check(sh, red_triangle_fs_expect_from_nir_dce); +} + + +TEST_F(TestShaderFromNir, CopyPropagationForwardBackward) +{ + auto sh = from_string(add_add_1_expect_from_nir); + copy_propagation_fwd(*sh); + check(sh, add_add_1_expect_from_nir_copy_prop_fwd); +} + +TEST_F(TestShaderFromNir, CopyPropagationForwardDCE) +{ + auto sh = from_string(add_add_1_expect_from_nir); + copy_propagation_fwd(*sh); + dead_code_elimination(*sh); + check(sh, add_add_1_expect_from_nir_copy_prop_fwd_dce); +} + +TEST_F(TestShaderFromNir, CopyPropagationBackwardDCE) +{ + auto sh = from_string(add_add_1_expect_from_nir_copy_prop_fwd_dce); + copy_propagation_backward(*sh); + dead_code_elimination(*sh); + check(sh, add_add_1_expect_from_nir_copy_prop_fwd_dce_bwd); +} + + +TEST_F(TestShaderFromNir, FullOPtimize) +{ + auto sh = from_string(basic_interpolation_orig); + + bool progress; + + do { + progress = false; + progress |= copy_propagation_fwd(*sh); + progress |= dead_code_elimination(*sh); + progress |= copy_propagation_backward(*sh); + progress |= dead_code_elimination(*sh); + } while (progress); + + check(sh, basic_interpolation_expect_from_nir_opt); +} + +TEST_F(TestShaderFromNir, CombinePinFlags) +{ + auto sh = from_string(shader_group_chan_pin_to_combine); + + bool progress; + + do { + progress = false; + progress |= copy_propagation_fwd(*sh); + progress |= dead_code_elimination(*sh); + progress |= copy_propagation_backward(*sh); + progress |= dead_code_elimination(*sh); + } while (progress); + + check(sh, shader_group_chan_pin_combined); +} + + +TEST_F(TestShaderFromNir, FullOPtimizeLoop) +{ + auto sh = from_string(vs_nexted_loop_from_nir_expect); + + optimize(*sh); + + check(sh, vs_nexted_loop_from_nir_expect_opt); +} +TEST_F(TestShaderFromNir, OptimizeWithDestArrayValue) +{ + auto sh = from_string(shader_with_dest_array); + + optimize(*sh); + + check(sh, shader_with_dest_array_opt_expect); +} + +TEST_F(TestShaderFromNir, ScheduleOPtimizedWithDestArrayValue) +{ + auto sh = from_string(shader_with_dest_array_opt_expect); + check(schedule(sh), shader_with_dest_array_opt_scheduled); +} + +TEST_F(TestShaderFromNir, ScheduleWithArrayWriteAndRead) +{ + auto sh = from_string(shader_with_dest_array2); + check(schedule(sh), shader_with_dest_array2_scheduled); +} + +TEST_F(TestShaderFromNir, RA_with_dest_array) +{ + auto sh = from_string(shader_with_dest_array2_scheduled); + + auto lrm = r600::LiveRangeEvaluator().run(*sh); + EXPECT_TRUE(r600::register_allocation(lrm)); + + + ra_check(sh, shader_with_dest_array2_scheduled_ra); +} + +TEST_F(TestShaderFromNir, RA_with_chan_group) +{ + auto sh = from_string(shader_group_chan_pin_combined_sheduled); + + auto lrm = r600::LiveRangeEvaluator().run(*sh); + EXPECT_TRUE(r600::register_allocation(lrm)); + ra_check(sh, shader_group_chan_pin_combined_sheduled_ra); +} + + +TEST_F(TestShaderFromNir, TES_opt) +{ + auto sh = from_string(tes_pre_op); + + optimize(*sh); + + check(sh, tes_optimized); +} + +TEST_F(TestShaderFromNir, TES_scheduled) +{ + auto sh = from_string(tes_optimized_pre_sched); + + check(schedule(sh), tes_optimized_sched); +} + + +/* +TEST_F(TestShaderFromNir, ShaderClone) +{ + auto sh = from_string(red_triangle_fs_expect_from_nir); + + auto sh_cloned = sh->clone(); + + MemoryPool::instance().push(); + dead_code_elimination(*sh); + + check(sh, red_triangle_fs_expect_from_nir_dce); + + check(sh_cloned, red_triangle_fs_expect_from_nir); + + MemoryPool::instance().pop(); + check(sh, red_triangle_fs_expect_from_nir_dce); +} +*/ + +TEST_F(TestShaderFromNir, ShaderSchedule) +{ + auto sh = from_string(basic_interpolation_orig); + + check(schedule(sh), basic_interpolation_expect_from_nir_sched); + +} + +TEST_F(TestShaderFromNir, ShaderScheduleCayman) +{ + auto sh = from_string(basic_interpolation_orig_cayman); + + check(schedule(sh), basic_interpolation_expect_from_nir_sched_cayman); +} + + +TEST_F(TestShaderFromNir, ShaderScheduleOptimizedCayman) +{ + auto sh = from_string(basic_interpolation_orig_cayman); + + optimize(*sh); + + check(schedule(sh), basic_interpolation_expect_opt_sched_cayman); +} + + +TEST_F(TestShaderFromNir, CopyPropLegalConst) +{ + auto sh = from_string(dot4_pre); + + copy_propagation_fwd(*sh); + dead_code_elimination(*sh); + + check(sh, dot4_copy_prop_dce); +} + + +TEST_F(TestShaderFromNir, FullOPtimize_glxgears_vs2) +{ + + auto sh = from_string(glxgears_vs2_from_nir_expect); + optimize(*sh); + check(sh, glxgears_vs2_from_nir_expect_optimized); +} + +TEST_F(TestShaderFromNir, test_schedule_group) +{ + + auto sh = from_string(test_schedule_group); + optimize(*sh); + check(schedule(sh), test_schedule_group_expect); +} + +TEST_F(TestShaderFromNir, test_dont_kill_dual_use) +{ + auto sh = from_string(shader_copy_prop_dont_kill_double_use); + optimize(*sh); + check(schedule(sh), shader_copy_prop_dont_kill_double_use_expect); +} + + + +TEST_F(TestShaderFromNir, test_schedule_with_bany) +{ + + auto sh = from_string(shader_with_bany_expect_eg); + optimize(*sh); + check(schedule(sh), shader_with_bany_expect_opt_sched_eg); +} + + +TEST_F(TestShaderFromNir, GroupAndChanCombine) +{ + auto sh = from_string(shader_group_chan_pin_to_combine_2); + optimize(*sh); + check(sh, shader_group_chan_pin_to_combine_2_opt); +} + +TEST_F(TestShaderFromNir, RemoveUseAfterSplitgroup) +{ + auto sh = from_string(fs_with_loop_multislot_reuse); + check(schedule(sh), fs_with_loop_multislot_reuse_scheduled); +} + +TEST_F(TestShaderFromNir, OptimizeVSforTCS) +{ + auto sh = from_string(vtx_for_tcs_inp); + optimize(*sh); + check(sh, vtx_for_tcs_opt); +} + +TEST_F(TestShaderFromNir, ScheduleVSforTCS) +{ + auto sh = from_string(vtx_for_tcs_pre_sched); + check(schedule(sh), vtx_for_tcs_sched); +} + + +void TestShaderFromNir::check(Shader *s, const char *expect_orig) +{ + ostringstream test_str; + s->print(test_str); + + auto expect = from_string(expect_orig); + + ostringstream expect_str; + expect->print(expect_str); + + EXPECT_EQ(test_str.str(), expect_str.str()); +} + +void TestShaderFromNir::ra_check(Shader *s, const char *expect_orig) +{ + s->value_factory().clear_pins(); + ostringstream test_str; + s->print(test_str); + + auto expect = from_string(expect_orig); + expect->value_factory().clear_pins(); + + ostringstream expect_str; + expect->print(expect_str); + + EXPECT_EQ(test_str.str(), expect_str.str()); +} + + diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp new file mode 100644 index 0000000..8ecbd3c --- /dev/null +++ b/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp @@ -0,0 +1,123 @@ + +#include "../sfn_instrfactory.h" + +#include "../sfn_instr_alu.h" +#include "../sfn_instr_tex.h" +#include "../sfn_instr_export.h" + +#include "gtest/gtest.h" +#include + +using namespace r600; + +using std::istringstream; +using std::string; +using std::vector; + +class TestShaderFromString : public ::testing::Test +{ +public: + void SetUp() override { + m_instr_factory = new InstrFactory(); + init_pool(); + } + + void TearDown() override { + release_pool(); + } + + TestShaderFromString(); + + std::vector from_string(const std::string& s); + +protected: + void check(const vector& eval, const std::vector>& expect); +private: + InstrFactory *m_instr_factory; +}; + + +TEST_F(TestShaderFromString, test_simple_fs) +{ + auto init_str = + R"( + +# load constant color +ALU MOV R2000.x@group : L[0x38000000] {W} +ALU MOV R2000.y@group : L[0x0] {W} +ALU MOV R2000.z@group : L[0x0] {W} +ALU MOV R2000.w@group : L[0x38F00000] {WL} + +# write output +EXPORT_DONE PIXEL 0 R2000.xyzw +)"; + + + auto shader = from_string(init_str); + + std::vector> expect; + + expect.push_back(new AluInstr(op1_mov, + new Register( 2000, 0, pin_group), + new LiteralConstant(0x38000000), + {alu_write})); + + expect.push_back(new AluInstr(op1_mov, + new Register( 2000, 1, pin_group), + new LiteralConstant( 0x0), + {alu_write})); + + expect.push_back(new AluInstr(op1_mov, + new Register( 2000, 2, pin_group), + new LiteralConstant( 0x0), + {alu_write})); + + expect.push_back(new AluInstr(op1_mov, + new Register( 2000, 3, pin_group), + new LiteralConstant( 0x38F00000), + {alu_write, alu_last_instr})); + + auto exp = new ExportInstr( + ExportInstr::pixel, 0, RegisterVec4(2000, false)); + exp->set_is_last_export(true); + expect.push_back(exp); + + check(shader, expect); + +} + + + +TestShaderFromString::TestShaderFromString() +{ + +} + +std::vector TestShaderFromString::from_string(const std::string& s) +{ + istringstream is(s); + string line; + + std::vector shader; + + while (std::getline(is, line)) { + if (line.find_first_not_of(" \t") == std::string::npos) + continue; + if (line[0] == '#') + continue; + + shader.push_back(m_instr_factory->from_string(line, 0)); + } + + return shader; +} + +void TestShaderFromString::check(const vector& eval, + const std::vector>& expect) +{ + ASSERT_EQ(eval.size(), expect.size()); + + for (unsigned i = 0; i < eval.size(); ++i) { + EXPECT_EQ(*eval[i], *expect[i]); + } +} diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp new file mode 100644 index 0000000..1dde8ce --- /dev/null +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp @@ -0,0 +1,3116 @@ +#include "sfn_test_shaders.h" +#include "../sfn_shader_fs.h" +#include "../sfn_shader_gs.h" +#include "../sfn_shader_tess.h" +#include "../sfn_shader_vs.h" +#include "../sfn_memorypool.h" + +namespace r600 { + +using std::istringstream; +using std::string; + +const char *red_triangle_fs_nir = +R"(shader: MESA_SHADER_FRAGMENT +name: TTN +inputs: 0 +outputs: 1 +uniforms: 0 +shared: 0 +decl_function main (0 params) + +impl main { + decl_var INTERP_MODE_FLAT vec4 out@out_0-temp + block block_0: + /* preds: */ + vec4 32 ssa_0 = load_const (0x3f800000 /* 1.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */) + vec1 32 ssa_1 = load_const (0x00000000 /* 0.000000 */) + intrinsic store_output (ssa_0, ssa_1) (0, 15, 0, 160, 132) /* base=0 */ /* wrmask=xyz */ /* component=0 */ /* src_type=float32 */ /* location=4 slots=1 */ + /* succs: block_1 */ + block block_1: +})"; + +const char *red_triangle_fs_expect_from_nir = R"( +FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV S0.x@group : I[1.0] {W} +ALU MOV S0.y@group : I[0] {W} +ALU MOV S0.z@group : I[0] {W} +ALU MOV S0.w@group : I[1.0] {WL} +ALU MOV S1.x@free : I[0] {WL} +EXPORT_DONE PIXEL 0 S0.xyzw +)"; + +const char *red_triangle_fs_expect_from_nir_dce = R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV S0.x@group : I[1.0] {W} +ALU MOV S0.y@group : I[0] {W} +ALU MOV S0.z@group : I[0] {W} +ALU MOV S0.w@group : I[1.0] {WL} +EXPORT_DONE PIXEL 0 S0.xyzw +)"; + + +const char *add_add_1_nir = +R"(shader: MESA_SHADER_FRAGMENT +name: GLSL3 +inputs: 0 +outputs: 1 +uniforms: 1 +shared: 0 +decl_var uniform INTERP_MODE_NONE vec4 color (0, 0, 0) +decl_function main (0 params) + +impl main { + decl_var INTERP_MODE_NONE vec4 out@gl_FragColor-temp + block block_0: + /* preds: */ + vec1 32 ssa_0 = load_const (0xbf000000 /* -0.500000 */) + vec1 32 ssa_1 = load_const (0x00000000 /* 0.000000 */) + vec4 32 ssa_2 = intrinsic load_uniform (ssa_1) (0, 1, 160) /* base=0 */ /* range=1 */ /* dest_type=float32 */ /* color */ + vec1 32 ssa_3 = fadd ssa_0, ssa_2.x + vec4 32 ssa_4 = vec4 ssa_3, ssa_2.y, ssa_2.z, ssa_2.w + intrinsic store_output (ssa_4, ssa_1) (0, 15, 0, 160, 130) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=2 slots=1 */ + /* succs: block_1 */ + block block_1: +})"; + +const char *add_add_1_expect_from_nir = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP WRITE_ALL_COLORS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV S0.x@free : L[0xbf000000] {WL} +ALU MOV S1.x@free : I[0] {WL} +ALU MOV S2.x : KC0[0].x {W} +ALU MOV S2.y : KC0[0].y {W} +ALU MOV S2.z : KC0[0].z {W} +ALU MOV S2.w : KC0[0].w {WL} +ALU ADD S3.x@free : S0.x@free S2.x {WL} +ALU MOV S4.x@group : S3.x@free {W} +ALU MOV S4.y@group : S2.y {W} +ALU MOV S4.z@group : S2.z {W} +ALU MOV S4.w@group : S2.w {WL} +EXPORT_DONE PIXEL 0 S4.xyzw +)"; + + +const char *add_add_1_expect_from_nir_copy_prop_fwd = +R"( +FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP WRITE_ALL_COLORS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV S0.x@free : L[0xbf000000] {WL} +ALU MOV S1.x@free : I[0] {WL} +ALU MOV S2.x : KC0[0].x {W} +ALU MOV S2.y : KC0[0].y {W} +ALU MOV S2.z : KC0[0].z {W} +ALU MOV S2.w : KC0[0].w {WL} +ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {WL} +ALU MOV S4.x@group : S3.x@free {W} +ALU MOV S4.y@group : KC0[0].y {W} +ALU MOV S4.z@group : KC0[0].z {W} +ALU MOV S4.w@group : KC0[0].w {WL} +EXPORT_DONE PIXEL 0 S4.xyzw +)"; + +const char *add_add_1_expect_from_nir_copy_prop_fwd_dce = +R"( +FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP WRITE_ALL_COLORS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {WL} +ALU MOV S4.x@group : S3.x@free {W} +ALU MOV S4.y@group : KC0[0].y {W} +ALU MOV S4.z@group : KC0[0].z {W} +ALU MOV S4.w@group : KC0[0].w {WL} +EXPORT_DONE PIXEL 0 S4.xyzw +)"; + + +const char *add_add_1_expect_from_nir_copy_prop_fwd_dce_bwd = +R"( +FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP WRITE_ALL_COLORS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.xy__ +SHADER +ALU ADD S4.x@group : L[0xbf000000] KC0[0].x {W} +ALU MOV S4.y@group : KC0[0].y {W} +ALU MOV S4.z@group : KC0[0].z {W} +ALU MOV S4.w@group : KC0[0].w {WL} +EXPORT_DONE PIXEL 0 S4.xyzw +)"; + + +const char *basic_interpolation_nir = +R"(shader: MESA_SHADER_FRAGMENT +name: TTN +inputs: 1 +outputs: 1 +uniforms: 0 +shared: 0 +decl_var uniform INTERP_MODE_NONE sampler2D sampler (0, 0, 0) +decl_function main (0 params) + +impl main { + decl_var INTERP_MODE_NOPERSPECTIVE vec4 in@in_0-temp + decl_var INTERP_MODE_FLAT vec4 out@out_0-temp + block block_0: + /* preds: */ + vec2 32 ssa_0 = intrinsic load_barycentric_pixel () (3) /* interp_mode=3 */ + vec1 32 ssa_1 = load_const (0x00000000 /* 0.000000 */) + vec4 32 ssa_2 = intrinsic load_interpolated_input (ssa_0, ssa_1) (0, 0, 160, 160) /* base=0 */ /* component=0 */ /* dest_type=float32 */ /* location=32 slots=1 */ + vec3 32 ssa_3 = f2i32 ssa_2.xyw + vec1 32 ssa_4 = mov ssa_3.z + vec2 32 ssa_5 = vec2 ssa_3.x, ssa_3.y + vec4 32 ssa_6 = (float32)txf ssa_5 (coord), ssa_4 (lod), 0 (texture), 0 (sampler) + intrinsic store_output (ssa_6, ssa_1) (0, 15, 0, 160, 132) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=4 slots=1 */ + /* succs: block_1 */ + block block_1: +})"; + +const char *basic_interpolation_expect_from_nir = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +#PROP RAT_BASE:1 +INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.xy__ +SHADER +ALU MOV S1.x@free : I[0] {WL} +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x VEC_210 {} +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y VEC_210 {} +ALU INTERP_ZW S2.z@chan : R0.y@fully Param0.z VEC_210 {W} +ALU INTERP_ZW S2.w@chan : R0.x@fully Param0.w VEC_210 {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S2.x@chan : R0.y@fully Param0.x VEC_210 {W} +ALU INTERP_XY S2.y@chan : R0.x@fully Param0.y VEC_210 {W} +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z VEC_210 {} +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w VEC_210 {L} +ALU_GROUP_END + +ALU TRUNC S3.x@free : S2.x@chan {WL} +ALU TRUNC S4.y@free : S2.y@chan {WL} +ALU TRUNC S5.z@free : S2.w@chan {WL} + +ALU FLT_TO_INT S6.x : S3.x@free {W} +ALU FLT_TO_INT S6.y : S4.y@free {W} +ALU FLT_TO_INT S6.z : S5.z@free {WL} + +ALU MOV S7.x@free : S6.z {WL} +ALU MOV S8.x : S6.x {W} +ALU MOV S8.y : S6.y {WL} +ALU MOV S9.x@group : S8.x {W} +ALU MOV S9.y@group : S8.y {W} +ALU MOV S9.w@group : S7.x@free {WL} +TEX LD S10.xyzw : S9.xy_w RID:18 SID:0 NNNN +EXPORT_DONE PIXEL 0 S10.xyzw)"; + + +const char *basic_interpolation_translated_1 = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +#PROP RAT_BASE:1 +INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.xy__ +SHADER +ALU MOV S1.x@free : I[0] {WL} +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x VEC_210 {} +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y VEC_210 {} +ALU INTERP_ZW S2.z@chan : R0.y@fully Param0.z VEC_210 {W} +ALU INTERP_ZW S2.w@chan : R0.x@fully Param0.w VEC_210 {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S2.x@chan : R0.y@fully Param0.x VEC_210 {W} +ALU INTERP_XY S2.y@chan : R0.x@fully Param0.y VEC_210 {W} +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z VEC_210 {} +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w VEC_210 {L} +ALU_GROUP_END + +ALU FLT_TO_INT S3.x : S2.x@free {W} +ALU FLT_TO_INT S3.y : S2.y@free {W} +ALU FLT_TO_INT S3.z : S2.w@free {WL} +ALU MOV S4.x : S3.x {W} +ALU MOV S4.y : S3.y {WL} +ALU MOV S5.x@group : S4.x {W} +ALU MOV S5.y@group : S4.y {W} +ALU MOV S5.w@group : S3.z {WL} +TEX LD S6.xyzw : S5.xy_w RID:18 SID:0 NNNN +EXPORT_DONE PIXEL 0 S6.xyzw)"; + + + +const char *basic_interpolation_2 = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +#PROP RAT_BASE:1 +INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.xy__ +SHADER +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x VEC_210 {} +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y VEC_210 {} +ALU INTERP_ZW S2.z@chan : R0.y@fully Param0.z VEC_210 {W} +ALU INTERP_ZW S2.w@chan : R0.x@fully Param0.w VEC_210 {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S2.x@chan : R0.y@fully Param0.x VEC_210 {W} +ALU INTERP_XY S2.y@chan : R0.x@fully Param0.y VEC_210 {W} +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z VEC_210 {} +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w VEC_210 {L} +ALU_GROUP_END +EXPORT_DONE PIXEL 0 S2.xyzw +)"; + + +const char *basic_interpolation_orig = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +#PROP RAT_BASE:1 +INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.xy__ +SHADER +ALU MOV S1024.x : I[0] {WL} +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x VEC_210 {} +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y VEC_210 {} +ALU INTERP_ZW S1025.z@chan : R0.y@fully Param0.z VEC_210 {W} +ALU INTERP_ZW S1025.w@chan : R0.x@fully Param0.w VEC_210 {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S1025.x@chan : R0.y@fully Param0.x VEC_210 {W} +ALU INTERP_XY S1025.y@chan : R0.x@fully Param0.y VEC_210 {W} +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z VEC_210 {} +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w VEC_210 {L} +ALU_GROUP_END + +ALU FLT_TO_INT S1026.x : S1025.x@chan {W} +ALU FLT_TO_INT S1026.y : S1025.y@chan {W} +ALU FLT_TO_INT S1026.z : S1025.w@chan {WL} +ALU MOV S1027.x : S1026.x {W} +ALU MOV S1027.y : S1026.y {WL} +ALU MOV S1028.x@group : S1027.x {W} +ALU MOV S1028.y@group : S1027.y {W} +ALU MOV S1028.w@group : S1026.z {WL} +TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN +EXPORT_DONE PIXEL 0 S1029.xyzw +)"; + +const char *basic_interpolation_expect_from_nir_sched = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +#PROP RAT_BASE:1 +INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.xy__ +SHADER +BLOCK_START +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x VEC_210 {} +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y VEC_210 {} +ALU INTERP_ZW S1025.z@chan : R0.y@fully Param0.z VEC_210 {W} +ALU INTERP_ZW S1025.w@chan : R0.x@fully Param0.w VEC_210 {W} +ALU MOV S1024.x : I[0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S1025.x@chan : R0.y@fully Param0.x VEC_210 {W} +ALU INTERP_XY S1025.y@chan : R0.x@fully Param0.y VEC_210 {W} +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z VEC_210 {} +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w VEC_210 {L} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU FLT_TO_INT S1026.x : S1025.x@chan {W} +ALU FLT_TO_INT S1026.y : S1025.y@chan {W} +ALU FLT_TO_INT S1026.z : S1025.w@chan {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU MOV S1027.x : S1026.x {W} +ALU MOV S1027.y : S1026.y {W} +ALU MOV S1028.w@group : S1026.z {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU MOV S1028.x@group : S1027.x {W} +ALU MOV S1028.y@group : S1027.y {WL} +ALU_GROUP_END +BLOCK_END +BLOCK_START +TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN +BLOCK_END +BLOCK_START +EXPORT_DONE PIXEL 0 S1029.xyzw +BLOCK_END +)"; + + +const char *basic_interpolation_orig_cayman = +R"(FS +CHIPCLASS CAYMAN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +#PROP RAT_BASE:1 +INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.xy__ +SHADER +ALU MOV S1024.x : I[0] {WL} +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x VEC_210 {} +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y VEC_210 {} +ALU INTERP_ZW S1025.z@chan : R0.y@fully Param0.z VEC_210 {W} +ALU INTERP_ZW S1025.w@chan : R0.x@fully Param0.w VEC_210 {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S1025.x@chan : R0.y@fully Param0.x VEC_210 {W} +ALU INTERP_XY S1025.y@chan : R0.x@fully Param0.y VEC_210 {W} +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z VEC_210 {} +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w VEC_210 {L} +ALU_GROUP_END + +ALU FLT_TO_INT S1026.x : S1025.x@chan {W} +ALU FLT_TO_INT S1026.y : S1025.y@chan {W} +ALU FLT_TO_INT S1026.z : S1025.w@chan {WL} +ALU MOV S1027.x : S1026.x {W} +ALU MOV S1027.y : S1026.y {WL} +ALU MOV S1028.x@group : S1027.x {W} +ALU MOV S1028.y@group : S1027.y {W} +ALU MOV S1028.w@group : S1026.z {WL} +TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN +EXPORT_DONE PIXEL 0 S1029.xyzw +)"; + +const char *basic_interpolation_expect_from_nir_sched_cayman = +R"(FS +CHIPCLASS CAYMAN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +#PROP RAT_BASE:1 +INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.xy__ +SHADER +BLOCK_START +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210 +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210 +ALU INTERP_ZW S1025.z@chan : R0.y@fully Param0.z {W} VEC_210 +ALU INTERP_ZW S1025.w@chan : R0.x@fully Param0.w {WL} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S1025.x@chan : R0.y@fully Param0.x {W} VEC_210 +ALU INTERP_XY S1025.y@chan : R0.x@fully Param0.y {W} VEC_210 +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210 +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU FLT_TO_INT S1026.x : S1025.x@chan {W} +ALU FLT_TO_INT S1026.y : S1025.y@chan {W} +ALU FLT_TO_INT S1026.z : S1025.w@chan {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU MOV S1027.x : S1026.x {W} +ALU MOV S1027.y : S1026.y {W} +ALU MOV S1028.w@group : S1026.z {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU MOV S1028.x@group : S1027.x {W} +ALU MOV S1028.y@group : S1027.y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU MOV S1024.x : I[0] {WL} +ALU_GROUP_END +BLOCK_START +BLOCK_END +TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN +BLOCK_START +BLOCK_END +EXPORT_DONE PIXEL 0 S1029.xyzw +BLOCK_END +)"; + +const char *basic_interpolation_expect_opt_sched_cayman = +R"(FS +CHIPCLASS CAYMAN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +#PROP RAT_BASE:1 +INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.xy__ +SHADER +BLOCK_START +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210 +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210 +ALU INTERP_ZW S1025.z@chan : R0.y@fully Param0.z {W} VEC_210 +ALU INTERP_ZW S1025.w@chan : R0.x@fully Param0.w {WL} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S1025.x@chan : R0.y@fully Param0.x {W} VEC_210 +ALU INTERP_XY S1025.y@chan : R0.x@fully Param0.y {W} VEC_210 +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210 +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W} +ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W} +ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL} +ALU_GROUP_END +BLOCK_END +BLOCK_START +TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN +BLOCK_END +BLOCK_START +EXPORT_DONE PIXEL 0 S1029.xyzw +BLOCK_END +)"; + +const char *basic_interpolation_expect_from_nir_opt = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +#PROP RAT_BASE:1 +INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.xy__ +SHADER +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x VEC_210 {} +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y VEC_210 {} +ALU INTERP_ZW S1025.z@chan : R0.y@fully Param0.z VEC_210 {W} +ALU INTERP_ZW S1025.w@chan : R0.x@fully Param0.w VEC_210 {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S1025.x@chan : R0.y@fully Param0.x VEC_210 {W} +ALU INTERP_XY S1025.y@chan : R0.x@fully Param0.y VEC_210 {W} +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z VEC_210 {} +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w VEC_210 {L} +ALU_GROUP_END +ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W} +ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W} +ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL} +TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN +EXPORT_DONE PIXEL 0 S1029.xyzw +)"; + +const char *dot4_pre = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +#PROP RAT_BASE:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV S1.x : KC0[0].x {W} +ALU MOV S1.y : KC0[0].y {W} +ALU MOV S1.z : KC0[0].z {W} +ALU MOV S1.w : KC0[0].w {WL} +ALU MOV S2.x : KC0[1].x {W} +ALU MOV S2.y : KC0[1].y {W} +ALU MOV S2.z : KC0[1].z {W} +ALU MOV S2.w : KC0[1].w {WL} +ALU DOT4_IEEE S3.x@free : S1.x S2.x + S1.y S2.y + S1.z S2.z + S1.w S2.w {WL} +ALU MOV S4.x : S3.x@free {W} +ALU MOV S4.y : S3.x@free {W} +ALU MOV S4.z : S3.x@free {W} +ALU MOV S4.w : S3.x@free {W} +EXPORT_DONE PIXEL 0 S4.xyzw +)"; + +const char *dot4_copy_prop_dce = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +#PROP RAT_BASE:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV S2.x : KC0[1].x {W} +ALU MOV S2.y : KC0[1].y {W} +ALU MOV S2.z : KC0[1].z {W} +ALU MOV S2.w : KC0[1].w {WL} +ALU DOT4_IEEE S3.x@free : KC0[0].x S2.x + KC0[0].y S2.y + KC0[0].z S2.z + KC0[0].w S2.w {WL} +ALU MOV S4.x : S3.x@free {W} +ALU MOV S4.y : S3.x@free {W} +ALU MOV S4.z : S3.x@free {W} +ALU MOV S4.w : S3.x@free {W} +EXPORT_DONE PIXEL 0 S4.xyzw +)"; + +const char *glxgears_vs2_nir = +R"(shader: MESA_SHADER_VERTEX +name: ARB0 +inputs: 2 +outputs: 2 +uniforms: 11 +shared: 0 +decl_var uniform INTERP_MODE_NONE vec4[11] name (0, 0, 0) +decl_function main (0 params) + +impl main { + block block_0: + /* preds: */ + vec1 32 ssa_0 = load_const (0x00000000 /* 0.000000 */) + vec4 32 ssa_1 = intrinsic load_input (ssa_0) (0, 0, 160, 128) /* base=0 */ /* component=0 */ /* dest_type=float32 */ /* location=0 slots=1 */ + vec1 32 ssa_2 = load_const (0x00000006 /* 0.000000 */) + vec4 32 ssa_3 = intrinsic load_uniform (ssa_2) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /* */ + vec4 32 ssa_4 = fmul ssa_1.xxxx, ssa_3 + vec1 32 ssa_5 = load_const (0x00000007 /* 0.000000 */) + vec4 32 ssa_6 = intrinsic load_uniform (ssa_5) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /* */ + vec4 32 ssa_7 = ffma ssa_1.yyyy, ssa_6, ssa_4 + vec1 32 ssa_8 = load_const (0x00000008 /* 0.000000 */) + vec4 32 ssa_9 = intrinsic load_uniform (ssa_8) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /* */ + vec4 32 ssa_10 = ffma ssa_1.zzzz, ssa_9, ssa_7 + vec1 32 ssa_11 = load_const (0x00000009 /* 0.000000 */) + vec4 32 ssa_12 = intrinsic load_uniform (ssa_11) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /* */ + vec4 32 ssa_13 = ffma ssa_1.wwww, ssa_12, ssa_10 + vec4 32 ssa_14 = intrinsic load_input (ssa_0) (1, 0, 160, 129) /* base=1 */ /* component=0 */ /* dest_type=float32 */ /* location=1 slots=1 */ + vec1 32 ssa_15 = fdot3 ssa_14.xyz, ssa_14.xyz + vec1 32 ssa_16 = frsq abs(ssa_15) + vec4 32 ssa_17 = fmul ssa_14, ssa_16.xxxx + vec1 32 ssa_18 = load_const (0x00000002 /* 0.000000 */) + vec4 32 ssa_19 = intrinsic load_uniform (ssa_18) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /* */ + vec1 32 ssa_20 = load_const (0x0000000a /* 0.000000 */) + vec4 32 ssa_21 = intrinsic load_uniform (ssa_20) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /* */ + vec1 32 ssa_22 = fdot3 ssa_17.xyz, ssa_21.xyz + vec4 32 ssa_23 = load_const (0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */) + vec1 32 ssa_24 = fmax ssa_23.y, ssa_22 + vec4 32 ssa_25 = load_const (0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */) + vec1 32 ssa_26 = slt ssa_25.z, ssa_22 + vec1 32 ssa_27 = load_const (0x00000003 /* 0.000000 */) + vec4 32 ssa_28 = intrinsic load_uniform (ssa_27) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /* */ + vec3 32 ssa_29 = fadd ssa_28.xyz, ssa_19.xyz + vec1 32 ssa_30 = load_const (0x00000004 /* 0.000000 */) + vec4 32 ssa_31 = intrinsic load_uniform (ssa_30) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /* */ + vec3 32 ssa_32 = ffma ssa_24.xxx, ssa_31.xyz, ssa_29 + vec1 32 ssa_33 = load_const (0x00000005 /* 0.000000 */) + vec4 32 ssa_34 = intrinsic load_uniform (ssa_33) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /* */ + vec3 32 ssa_35 = ffma.sat ssa_26.xxx, ssa_34.xyz, ssa_32 + intrinsic store_output (ssa_13, ssa_0) (0, 15, 0, 160, 128) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=0 slots=1 */ + vec3 32 ssa_36 = mov ssa_35 + vec1 32 ssa_37 = fsat ssa_19.w + vec4 32 ssa_38 = vec4 ssa_36.x, ssa_36.y, ssa_36.z, ssa_37 + intrinsic store_output (ssa_38, ssa_0) (1, 15, 0, 160, 129) /* base=1 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=1 slots=1 */ + /* succs: block_1 */ + block block_1: +})"; + +const char *glxgears_vs2_from_nir_expect = +R"(VS +CHIPCLASS EVERGREEN +INPUT LOC:0 NAME:0 +INPUT LOC:1 NAME:1 +OUTPUT LOC:0 NAME:0 MASK:15 SID:0 SPI_SID:0 +OUTPUT LOC:1 NAME:1 MASK:15 SID:0 SPI_SID:137 +SYSVALUES R1.xyzw R2.xyzw +SHADER +ALU MOV S3.x@free : I[0] {WL} +ALU MOV S4.x@free : L[0x6] {WL} +ALU MOV S5.x : KC0[6].x {W} +ALU MOV S5.y : KC0[6].y {W} +ALU MOV S5.z : KC0[6].z {W} +ALU MOV S5.w : KC0[6].w {WL} +ALU MUL_IEEE S6.x : R1.x@fully S5.x {W} +ALU MUL_IEEE S6.y : R1.x@fully S5.y {W} +ALU MUL_IEEE S6.z : R1.x@fully S5.z {W} +ALU MUL_IEEE S6.w : R1.x@fully S5.w {WL} +ALU MOV S7.x@free : L[0x7] {WL} +ALU MOV S8.x : KC0[7].x {W} +ALU MOV S8.y : KC0[7].y {W} +ALU MOV S8.z : KC0[7].z {W} +ALU MOV S8.w : KC0[7].w {WL} +ALU MULADD_IEEE S9.x : R1.y@fully S8.x S6.x {W} +ALU MULADD_IEEE S9.y : R1.y@fully S8.y S6.y {W} +ALU MULADD_IEEE S9.z : R1.y@fully S8.z S6.z {W} +ALU MULADD_IEEE S9.w : R1.y@fully S8.w S6.w {WL} +ALU MOV S10.x@free : L[0x8] {WL} +ALU MOV S11.x : KC0[8].x {W} +ALU MOV S11.y : KC0[8].y {W} +ALU MOV S11.z : KC0[8].z {W} +ALU MOV S11.w : KC0[8].w {WL} +ALU MULADD_IEEE S12.x : R1.z@fully S11.x S9.x {W} +ALU MULADD_IEEE S12.y : R1.z@fully S11.y S9.y {W} +ALU MULADD_IEEE S12.z : R1.z@fully S11.z S9.z {W} +ALU MULADD_IEEE S12.w : R1.z@fully S11.w S9.w {WL} +ALU MOV S13.x@free : L[0x9] {WL} +ALU MOV S14.x : KC0[9].x {W} +ALU MOV S14.y : KC0[9].y {W} +ALU MOV S14.z : KC0[9].z {W} +ALU MOV S14.w : KC0[9].w {WL} +ALU MULADD_IEEE S15.x@group : R1.w@fully S14.x S12.x {W} +ALU MULADD_IEEE S15.y@group : R1.w@fully S14.y S12.y {W} +ALU MULADD_IEEE S15.z@group : R1.w@fully S14.z S12.z {W} +ALU MULADD_IEEE S15.w@group : R1.w@fully S14.w S12.w {WL} +ALU DOT4_IEEE S16.x@free : R2.x@fully R2.x@fully + R2.y@fully R2.y@fully + R2.z@fully R2.z@fully + I[0].x I[0].x {WL} +ALU RECIPSQRT_IEEE S17.x@free : |S16.x@free| {WL} +ALU MUL_IEEE S18.x : R2.x@fully S17.x@free {W} +ALU MUL_IEEE S18.y : R2.y@fully S17.x@free {W} +ALU MUL_IEEE S18.z : R2.z@fully S17.x@free {W} +ALU MUL_IEEE S18.w : R2.w@fully S17.x@free {WL} +ALU MOV S19.x@free : L[0x2] {WL} +ALU MOV S20.x : KC0[2].x {W} +ALU MOV S20.y : KC0[2].y {W} +ALU MOV S20.z : KC0[2].z {W} +ALU MOV S20.w : KC0[2].w {WL} +ALU MOV S21.x@free : L[0xa] {WL} +ALU MOV S22.x : KC0[10].x {W} +ALU MOV S22.y : KC0[10].y {W} +ALU MOV S22.z : KC0[10].z {W} +ALU MOV S22.w : KC0[10].w {WL} +ALU DOT4_IEEE S23.x@free : S18.x S22.x + S18.y S22.y + S18.z S22.z + I[0].x I[0].x {WL} +ALU MOV S24.x : I[0] {W} +ALU MOV S24.y : I[0] {W} +ALU MOV S24.z : I[0] {W} +ALU MOV S24.w : I[1.0] {WL} +ALU MAX_DX10 S25.x@free : S24.y S23.x@free {WL} +ALU MOV S26.x : I[0] {W} +ALU MOV S26.y : I[0] {W} +ALU MOV S26.z : I[0] {W} +ALU MOV S26.w : I[0] {WL} +ALU SETGT S27.x@free : S23.x@free S26.z {WL} +ALU MOV S28.x@free : L[0x3] {WL} +ALU MOV S29.x : KC0[3].x {W} +ALU MOV S29.y : KC0[3].y {W} +ALU MOV S29.z : KC0[3].z {W} +ALU MOV S29.w : KC0[3].w {WL} +ALU ADD S30.x : S29.x S20.x {W} +ALU ADD S30.y : S29.y S20.y {W} +ALU ADD S30.z : S29.z S20.z {WL} +ALU MOV S31.x@free : L[0x4] {WL} +ALU MOV S32.x : KC0[4].x {W} +ALU MOV S32.y : KC0[4].y {W} +ALU MOV S32.z : KC0[4].z {W} +ALU MOV S32.w : KC0[4].w {WL} +ALU MULADD_IEEE S33.x : S25.x@free S32.x S30.x {W} +ALU MULADD_IEEE S33.y : S25.x@free S32.y S30.y {W} +ALU MULADD_IEEE S33.z : S25.x@free S32.z S30.z {WL} +ALU MOV S34.x@free : L[0x5] {WL} +ALU MOV S35.x : KC0[5].x {W} +ALU MOV S35.y : KC0[5].y {W} +ALU MOV S35.z : KC0[5].z {W} +ALU MOV S35.w : KC0[5].w {WL} +ALU MULADD_IEEE CLAMP S36.x : S27.x@free S35.x S33.x {W} +ALU MULADD_IEEE CLAMP S36.y : S27.x@free S35.y S33.y {W} +ALU MULADD_IEEE CLAMP S36.z : S27.x@free S35.z S33.z {WL} +EXPORT_DONE POS 0 S15.xyzw +ALU MOV S38.x : S36.x {W} +ALU MOV S38.y : S36.y {W} +ALU MOV S38.z : S36.z {WL} +ALU MOV CLAMP S39.x@free : S20.w {WL} +ALU MOV S40.x@group : S38.x {W} +ALU MOV S40.y@group : S38.y {W} +ALU MOV S40.z@group : S38.z {W} +ALU MOV S40.w@group : S39.x@free {WL} +EXPORT_DONE PARAM 0 S40.xyzw)"; + + +const char *glxgears_vs2_from_nir_expect_cayman = +R"(VS +CHIPCLASS CAYMAN +INPUT LOC:0 NAME:0 +INPUT LOC:1 NAME:1 +OUTPUT LOC:0 NAME:0 MASK:15 SID:0 SPI_SID:0 +OUTPUT LOC:1 NAME:1 MASK:15 SID:0 SPI_SID:137 +SYSVALUES R1.xyzw R2.xyzw +SHADER +ALU MOV S3.x@free : I[0] {WL} +ALU MOV S4.x@free : L[0x6] {WL} +ALU MOV S5.x : KC0[6].x {W} +ALU MOV S5.y : KC0[6].y {W} +ALU MOV S5.z : KC0[6].z {W} +ALU MOV S5.w : KC0[6].w {WL} +ALU MUL_IEEE S6.x : R1.x@fully S5.x {W} +ALU MUL_IEEE S6.y : R1.x@fully S5.y {W} +ALU MUL_IEEE S6.z : R1.x@fully S5.z {W} +ALU MUL_IEEE S6.w : R1.x@fully S5.w {WL} +ALU MOV S7.x@free : L[0x7] {WL} +ALU MOV S8.x : KC0[7].x {W} +ALU MOV S8.y : KC0[7].y {W} +ALU MOV S8.z : KC0[7].z {W} +ALU MOV S8.w : KC0[7].w {WL} +ALU MULADD_IEEE S9.x : R1.y@fully S8.x S6.x {W} +ALU MULADD_IEEE S9.y : R1.y@fully S8.y S6.y {W} +ALU MULADD_IEEE S9.z : R1.y@fully S8.z S6.z {W} +ALU MULADD_IEEE S9.w : R1.y@fully S8.w S6.w {WL} +ALU MOV S10.x@free : L[0x8] {WL} +ALU MOV S11.x : KC0[8].x {W} +ALU MOV S11.y : KC0[8].y {W} +ALU MOV S11.z : KC0[8].z {W} +ALU MOV S11.w : KC0[8].w {WL} +ALU MULADD_IEEE S12.x : R1.z@fully S11.x S9.x {W} +ALU MULADD_IEEE S12.y : R1.z@fully S11.y S9.y {W} +ALU MULADD_IEEE S12.z : R1.z@fully S11.z S9.z {W} +ALU MULADD_IEEE S12.w : R1.z@fully S11.w S9.w {WL} +ALU MOV S13.x@free : L[0x9] {WL} +ALU MOV S14.x : KC0[9].x {W} +ALU MOV S14.y : KC0[9].y {W} +ALU MOV S14.z : KC0[9].z {W} +ALU MOV S14.w : KC0[9].w {WL} +ALU MULADD_IEEE S15.x@group : R1.w@fully S14.x S12.x {W} +ALU MULADD_IEEE S15.y@group : R1.w@fully S14.y S12.y {W} +ALU MULADD_IEEE S15.z@group : R1.w@fully S14.z S12.z {W} +ALU MULADD_IEEE S15.w@group : R1.w@fully S14.w S12.w {WL} +ALU DOT4_IEEE S16.x@free : R2.x@fully R2.x@fully + R2.y@fully R2.y@fully + R2.z@fully R2.z@fully + I[0].x I[0].x {WL} +ALU RECIPSQRT_IEEE S17.x@chan : |S16.x@free| + |S16.x@free| + S16.x@free {WL} +ALU MUL_IEEE S18.x : R2.x@fully S17.x@free {W} +ALU MUL_IEEE S18.y : R2.y@fully S17.x@free {W} +ALU MUL_IEEE S18.z : R2.z@fully S17.x@free {W} +ALU MUL_IEEE S18.w : R2.w@fully S17.x@free {WL} +ALU MOV S19.x@free : L[0x2] {WL} +ALU MOV S20.x : KC0[2].x {W} +ALU MOV S20.y : KC0[2].y {W} +ALU MOV S20.z : KC0[2].z {W} +ALU MOV S20.w : KC0[2].w {WL} +ALU MOV S21.x@free : L[0xa] {WL} +ALU MOV S22.x : KC0[10].x {W} +ALU MOV S22.y : KC0[10].y {W} +ALU MOV S22.z : KC0[10].z {W} +ALU MOV S22.w : KC0[10].w {WL} +ALU DOT4_IEEE S23.x@free : S18.x S22.x + S18.y S22.y + S18.z S22.z + I[0].x I[0].x {WL} +ALU MOV S24.x : I[0] {W} +ALU MOV S24.y : I[0] {W} +ALU MOV S24.z : I[0] {W} +ALU MOV S24.w : I[1.0] {WL} +ALU MAX_DX10 S25.x@free : S24.y S23.x@free {WL} +ALU MOV S26.x : I[0] {W} +ALU MOV S26.y : I[0] {W} +ALU MOV S26.z : I[0] {W} +ALU MOV S26.w : I[0] {WL} +ALU SETGT S27.x@free : S23.x@free S26.z {WL} +ALU MOV S28.x@free : L[0x3] {WL} +ALU MOV S29.x : KC0[3].x {W} +ALU MOV S29.y : KC0[3].y {W} +ALU MOV S29.z : KC0[3].z {W} +ALU MOV S29.w : KC0[3].w {WL} +ALU ADD S30.x : S29.x S20.x {W} +ALU ADD S30.y : S29.y S20.y {W} +ALU ADD S30.z : S29.z S20.z {WL} +ALU MOV S31.x@free : L[0x4] {WL} +ALU MOV S32.x : KC0[4].x {W} +ALU MOV S32.y : KC0[4].y {W} +ALU MOV S32.z : KC0[4].z {W} +ALU MOV S32.w : KC0[4].w {WL} +ALU MULADD_IEEE S33.x : S25.x@free S32.x S30.x {W} +ALU MULADD_IEEE S33.y : S25.x@free S32.y S30.y {W} +ALU MULADD_IEEE S33.z : S25.x@free S32.z S30.z {WL} +ALU MOV S34.x@free : L[0x5] {WL} +ALU MOV S35.x : KC0[5].x {W} +ALU MOV S35.y : KC0[5].y {W} +ALU MOV S35.z : KC0[5].z {W} +ALU MOV S35.w : KC0[5].w {WL} +ALU MULADD_IEEE CLAMP S36.x : S27.x@free S35.x S33.x {W} +ALU MULADD_IEEE CLAMP S36.y : S27.x@free S35.y S33.y {W} +ALU MULADD_IEEE CLAMP S36.z : S27.x@free S35.z S33.z {WL} +EXPORT_DONE POS 0 S15.xyzw +ALU MOV S38.x : S36.x {W} +ALU MOV S38.y : S36.y {W} +ALU MOV S38.z : S36.z {WL} +ALU MOV CLAMP S39.x@free : S20.w {WL} +ALU MOV S40.x@group : S38.x {W} +ALU MOV S40.y@group : S38.y {W} +ALU MOV S40.z@group : S38.z {W} +ALU MOV S40.w@group : S39.x@free {WL} +EXPORT_DONE PARAM 0 S40.xyzw)"; + + +const char *glxgears_vs2_from_nir_expect_optimized = +R"(VS +CHIPCLASS EVERGREEN +INPUT LOC:0 NAME:0 +INPUT LOC:1 NAME:1 +OUTPUT LOC:0 NAME:0 MASK:15 SID:0 SPI_SID:0 +OUTPUT LOC:1 NAME:1 MASK:15 SID:0 SPI_SID:137 +SYSVALUES R1.xyzw R2.xyzw +SHADER +ALU MUL_IEEE S6.x : R1.x@fully KC0[6].x {W} +ALU MUL_IEEE S6.y : R1.x@fully KC0[6].y {W} +ALU MUL_IEEE S6.z : R1.x@fully KC0[6].z {W} +ALU MUL_IEEE S6.w : R1.x@fully KC0[6].w {WL} +ALU MULADD_IEEE S9.x : R1.y@fully KC0[7].x S6.x {W} +ALU MULADD_IEEE S9.y : R1.y@fully KC0[7].y S6.y {W} +ALU MULADD_IEEE S9.z : R1.y@fully KC0[7].z S6.z {W} +ALU MULADD_IEEE S9.w : R1.y@fully KC0[7].w S6.w {WL} +ALU MULADD_IEEE S12.x : R1.z@fully KC0[8].x S9.x {W} +ALU MULADD_IEEE S12.y : R1.z@fully KC0[8].y S9.y {W} +ALU MULADD_IEEE S12.z : R1.z@fully KC0[8].z S9.z {W} +ALU MULADD_IEEE S12.w : R1.z@fully KC0[8].w S9.w {WL} +ALU MULADD_IEEE S15.x@group : R1.w@fully KC0[9].x S12.x {W} +ALU MULADD_IEEE S15.y@group : R1.w@fully KC0[9].y S12.y {W} +ALU MULADD_IEEE S15.z@group : R1.w@fully KC0[9].z S12.z {W} +ALU MULADD_IEEE S15.w@group : R1.w@fully KC0[9].w S12.w {WL} +ALU DOT4_IEEE S16.x@free : R2.x@fully R2.x@fully + R2.y@fully R2.y@fully + R2.z@fully R2.z@fully + I[0].x I[0].x {WL} +ALU RECIPSQRT_IEEE S17.x@free : |S16.x@free| {WL} +ALU MUL_IEEE S18.x : R2.x@fully S17.x@free {W} +ALU MUL_IEEE S18.y : R2.y@fully S17.x@free {W} +ALU MUL_IEEE S18.z : R2.z@fully S17.x@free {W} +ALU DOT4_IEEE S23.x@free : S18.x KC0[10].x + S18.y KC0[10].y + S18.z KC0[10].z + I[0].x I[0].x {WL} +ALU MAX_DX10 S25.x@free : I[0] S23.x@free {WL} +ALU SETGT S27.x@free : S23.x@free I[0] {WL} +ALU ADD S30.x : KC0[3].x KC0[2].x {W} +ALU ADD S30.y : KC0[3].y KC0[2].y {W} +ALU ADD S30.z : KC0[3].z KC0[2].z {WL} +ALU MULADD_IEEE S33.x : S25.x@free KC0[4].x S30.x {W} +ALU MULADD_IEEE S33.y : S25.x@free KC0[4].y S30.y {W} +ALU MULADD_IEEE S33.z : S25.x@free KC0[4].z S30.z {WL} +ALU MULADD_IEEE CLAMP S40.x@group : S27.x@free KC0[5].x S33.x {W} +ALU MULADD_IEEE CLAMP S40.y@group : S27.x@free KC0[5].y S33.y {W} +ALU MULADD_IEEE CLAMP S40.z@group : S27.x@free KC0[5].z S33.z {W} +EXPORT_DONE POS 0 S15.xyzw +ALU MOV CLAMP S40.w@group : KC0[2].w {WL} +EXPORT_DONE PARAM 0 S40.xyzw)"; + + + +const char *vs_nexted_loop_nir = +R"(shader: MESA_SHADER_VERTEX +name: GLSL3 +inputs: 1 +outputs: 2 +uniforms: 3 +shared: 0 +decl_var uniform INTERP_MODE_NONE int a (0, 0, 0) +decl_var uniform INTERP_MODE_NONE int b (1, 1, 0) +decl_var uniform INTERP_MODE_NONE int c (2, 2, 0) +decl_function main (0 params) + +impl main { + decl_var INTERP_MODE_NONE vec4 out@gl_Position-temp + decl_var INTERP_MODE_NONE vec4 out@gl_FrontColor-temp + decl_reg vec1 32 r2 + decl_reg vec1 32 r3 + decl_reg vec1 32 r4 + decl_reg vec1 32 r5 + decl_reg vec1 32 r6 + decl_reg vec1 32 r7 + decl_reg vec1 32 r8 + block block_0: + /* preds: */ + vec1 32 ssa_0 = load_const (0x00000000 /* 0.000000 */) + vec4 32 ssa_1 = intrinsic load_input (ssa_0) (0, 0, 160, 128) /* base=0 */ /* component=0 */ /* dest_type=float32 */ /* location=0 slots=1 */ + vec1 32 ssa_2 = load_const (0xffffffff /* -nan */) + vec1 32 ssa_3 = load_const (0x00000000 /* 0.000000 */) + vec1 32 ssa_4 = load_const (0x00000001 /* 0.000000 */) + vec4 32 ssa_5 = load_const (0x3f800000 /* 1.000000 */, 0x3f800000 /* 1.000000 */, 0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */) + vec1 32 ssa_6 = load_const (0x00000002 /* 0.000000 */) + vec1 32 ssa_7 = intrinsic load_uniform (ssa_0) (0, 1, 34) /* base=0 */ /* range=1 */ /* dest_type=int32 */ /* a */ + vec1 32 ssa_8 = ieq32 ssa_7, ssa_4 + /* succs: block_1 block_10 */ + if ssa_8 { + block block_1: + /* preds: block_0 */ + vec1 32 ssa_9 = intrinsic load_uniform (ssa_0) (2, 1, 34) /* base=2 */ /* range=1 */ /* dest_type=int32 */ /* c */ + vec1 32 ssa_10 = ine32 ssa_9, ssa_4 + /* succs: block_2 block_8 */ + if ssa_10 { + block block_2: + /* preds: block_1 */ + r3 = mov ssa_4 + r2 = mov ssa_0 + /* succs: block_3 */ + loop { + block block_3: + /* preds: block_2 block_6 */ + r4 = i2f32 r2 + vec1 32 ssa_11 = intrinsic load_uniform (ssa_0) (1, 1, 34) /* base=1 */ /* range=1 */ /* dest_type=int32 */ /* b */ + vec1 32 ssa_12 = ine32 ssa_11, ssa_6 + /* succs: block_4 block_5 */ + if ssa_12 { + block block_4: + /* preds: block_3 */ + break + /* succs: block_7 */ + } else { + block block_5: + /* preds: block_3 */ + /* succs: block_6 */ + } + block block_6: + /* preds: block_5 */ + r5 = iadd r3, ssa_4 + r2 = mov r3 + r3 = mov r5 + /* succs: block_3 */ + } + block block_7: + /* preds: block_4 */ + vec1 32 ssa_13 = load_const (0x3f800000 /* 1.000000 */) + r8 = mov ssa_13 + r7 = mov r8 + r6 = mov ssa_2 + /* succs: block_9 */ + } else { + block block_8: + /* preds: block_1 */ + vec1 32 ssa_14 = load_const (0x3f800000 /* 1.000000 */) + r8 = mov ssa_14 + r7 = mov ssa_0 + r4 = mov r8 + r6 = mov ssa_3 + /* succs: block_9 */ + } + block block_9: + /* preds: block_7 block_8 */ + /* succs: block_11 */ + } else { + block block_10: + /* preds: block_0 */ + vec1 32 ssa_15 = load_const (0x3f800000 /* 1.000000 */) + r8 = mov ssa_15 + r7 = mov ssa_0 + r4 = mov r8 + r6 = mov ssa_2 + /* succs: block_11 */ + } + block block_11: + /* preds: block_9 block_10 */ + vec1 32 ssa_16 = b32csel r6, r4, ssa_5.x + vec1 32 ssa_17 = b32csel r6, r7, ssa_5.y + vec1 32 ssa_18 = b32csel r6, r8, ssa_5.w + intrinsic store_output (ssa_1, ssa_0) (0, 15, 0, 160, 128) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=0 slots=1 */ + vec1 32 ssa_19 = fsat ssa_16 + vec1 32 ssa_20 = fsat ssa_17 + vec1 32 ssa_21 = fsat ssa_18 + vec4 32 ssa_22 = vec4 ssa_19, ssa_20, ssa_0, ssa_21 + intrinsic store_output (ssa_22, ssa_0) (1, 15, 0, 160, 129) /* base=1 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=1 slots=1 */ + /* succs: block_12 */ + block block_12: +})"; + +const char *vs_nexted_loop_from_nir_expect = +R"(VS +CHIPCLASS EVERGREEN +INPUT LOC:0 NAME:0 +OUTPUT LOC:0 NAME:0 MASK:15 SID:0 SPI_SID:0 +OUTPUT LOC:1 NAME:1 MASK:15 SID:0 SPI_SID:137 +SYSVALUES R1.xyzw +REGISTERS R2.x R3.x R4.x R5.x R6.x R7.x R8.x +SHADER +ALU MOV S9.x@free : I[0] {WL} +ALU MOV S10.x@free : I[-1] {WL} +ALU MOV S11.x@free : I[0] {WL} +ALU MOV S12.x@free : I[1] {WL} +ALU MOV S13.x : I[1.0] {W} +ALU MOV S13.y : I[1.0] {W} +ALU MOV S13.z : I[0] {W} +ALU MOV S13.w : I[1.0] {WL} +ALU MOV S14.x@free : L[0x2] {WL} +ALU MOV S15.x@free : KC0[0].x {WL} +ALU SETE_INT S16.x@free : S15.x@free S12.x@free {WL} +IF (( ALU PRED_SETNE_INT __.x@free : S16.x@free I[0] {LEP} PUSH_BEFORE )) + ALU MOV S18.x@free : KC0[2].x {WL} + ALU SETNE_INT S19.x@free : S18.x@free S12.x {WL} + IF (( ALU PRED_SETNE_INT __.y@free : S19.x@free I[0] {LEP} PUSH_BEFORE )) + ALU MOV R3.x : S12.x@free {WL} + ALU MOV R2.x : S9.x@free {WL} + LOOP_BEGIN + ALU INT_TO_FLT R4.x : R2.x {WL} + ALU MOV S21.x@free : KC0[1].x {WL} + ALU SETNE_INT S22.x@free : S21.x@free S14.x@free {WL} + IF (( ALU PRED_SETNE_INT __.z@free : S22.x@free I[0] {LEP} PUSH_BEFORE )) + BREAK + ENDIF + ALU ADD_INT R5.x@free : R3.x S12.x@free {WL} + ALU MOV R2.x : R3.x {WL} + ALU MOV R3.x : R5.x {WL} + LOOP_END + ALU MOV S24.x@free : I[1.0] {WL} + ALU MOV R8.x : S24.x@free {WL} + ALU MOV R7.x : R8.x {WL} + ALU MOV R6.x : S10.x@free {WL} + ELSE + ALU MOV S25.x@free : I[1.0] {WL} + ALU MOV R8.x : S25.x@free {WL} + ALU MOV R7.x : S9.x {WL} + ALU MOV R4.x : R8.x {WL} + ALU MOV R6.x : S11.x@free {WL} + ENDIF +ELSE + ALU MOV S26.x@free : I[1.0] {WL} + ALU MOV R8.x : S26.x@free {WL} + ALU MOV R7.x : S9.x {WL} + ALU MOV R4.x : R8.x {WL} + ALU MOV R6.x : S10.x@free {WL} +ENDIF +ALU CNDE_INT S27.x@free : R6.x S13.x R4.x {WL} +ALU CNDE_INT S28.x@free : R6.x S13.y R7.x {WL} +ALU CNDE_INT S29.x@free : R6.x S13.w R8.x {WL} +EXPORT_DONE POS 0 R1.xyzw +ALU MOV CLAMP S31.x@free : S27.x@free {WL} +ALU MOV CLAMP S32.x@free : S28.x@free {WL} +ALU MOV CLAMP S33.x@free : S29.x@free {WL} +ALU MOV S34.x@group : S31.x@free {W} +ALU MOV S34.y@group : S32.x@free {W} +ALU MOV S34.z@group : S9.x@free {W} +ALU MOV S34.w@group : S33.x@free {WL} +EXPORT_DONE PARAM 0 S34.xyzw +)"; + +const char *vs_nexted_loop_from_nir_expect_opt = +R"( +VS +CHIPCLASS EVERGREEN +INPUT LOC:0 NAME:0 +OUTPUT LOC:0 NAME:0 MASK:15 SID:0 SPI_SID:0 +OUTPUT LOC:1 NAME:1 MASK:15 SID:0 SPI_SID:137 +SYSVALUES R1.xyzw +REGISTERS R2.x@free R3.x@free R4.x@free R5.x@free R6.x@free R7.x@free R8.x@free +SHADER +IF (( ALU PREDE_INT __.x@free : KC0[0].x I[1] {LEP} PUSH_BEFORE )) + IF (( ALU PRED_SETNE_INT __.y@free : KC0[2].x I[1] {LEP} PUSH_BEFORE )) + ALU MOV R3.x : I[1] {WL} + ALU MOV R2.x : I[0] {WL} + LOOP_BEGIN + ALU INT_TO_FLT R4.x : R2.x {WL} + IF (( ALU PRED_SETNE_INT __.z@free : KC0[1].x L[0x2] {LEP} PUSH_BEFORE )) + BREAK + ENDIF + ALU ADD_INT R5.x : R3.x I[1] {WL} + ALU MOV R2.x : R3.x {WL} + ALU MOV R3.x : R5.x {WL} + LOOP_END + ALU MOV R8.x : I[1.0] {WL} + ALU MOV R7.x : R8.x {WL} + ALU MOV R6.x : I[-1] {WL} + ELSE + ALU MOV R8.x : I[1.0] {WL} + ALU MOV R7.x : I[0] {WL} + ALU MOV R4.x : R8.x {WL} + ALU MOV R6.x : I[0] {WL} + ENDIF +ELSE + ALU MOV R8.x : I[1.0] {WL} + ALU MOV R7.x : I[0] {WL} + ALU MOV R4.x : R8.x {WL} + ALU MOV R6.x : I[-1] {WL} +ENDIF +ALU CNDE_INT S27.x@free : R6.x I[1.0] R4.x {WL} +ALU CNDE_INT S28.x@free : R6.x I[1.0] R7.x {WL} +ALU CNDE_INT S29.x@free : R6.x I[1.0] R8.x {WL} +EXPORT_DONE POS 0 R1.xyzw +ALU MOV CLAMP S34.x@group : S27.x@free {W} +ALU MOV CLAMP S34.y@group : S28.x@free {W} +ALU MOV CLAMP S34.w@group : S29.x@free {WL} +EXPORT_DONE PARAM 0 S34.xy0w +)"; + +const char *shader_with_local_array_nir = +R"( +shader: MESA_SHADER_FRAGMENT +name: GLSL3 +inputs: 2 +outputs: 1 +uniforms: 2 +shared: 0 +decl_var uniform INTERP_MODE_NONE int index (1, 0, 0) +decl_var uniform INTERP_MODE_NONE float expect (2, 1, 0) +decl_function main (0 params) + +impl main { + decl_var INTERP_MODE_NONE float[4] m1 + decl_var INTERP_MODE_NONE float[4] m2 + decl_var INTERP_MODE_NONE vec4 in@packed:m1[0],m1[1],m1[2],m1[3]-temp + decl_var INTERP_MODE_NONE vec4 in@packed:m2[0],m2[1],m2[2],m2[3]-temp + decl_var INTERP_MODE_NONE vec4 out@gl_FragColor-temp + decl_reg vec1 32 r0[4] + decl_reg vec1 32 r1[4] + decl_reg vec1 32 r2 + block block_0: + /* preds: */ + vec2 32 ssa_0 = intrinsic load_barycentric_pixel () (0) /* interp_mode=0 */ + vec1 32 ssa_1 = load_const (0x00000000 /* 0.000000 */) + vec4 32 ssa_2 = intrinsic load_interpolated_input (ssa_0, ssa_1) (0, 0, 160, 160) /* base=0 */ /* component=0 */ /* location=32 slots=1 */ + vec4 32 ssa_3 = intrinsic load_interpolated_input (ssa_0, ssa_1) (1, 0, 160, 161) /* base=1 */ /* component=0 */ /* location=33 slots=1 */ + vec1 32 ssa_4 = load_const (0x00000004 /* 0.000000 */) + vec1 32 ssa_5 = load_const (0xfffffffc /* -nan */) + vec4 32 ssa_6 = load_const (0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */, 0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */) + vec4 32 ssa_7 = load_const (0x3f800000 /* 1.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */) + vec1 32 ssa_8 = mov ssa_2.x + r0[0] = mov ssa_8 + vec1 32 ssa_9 = mov ssa_2.y + r0[1] = mov ssa_9 + vec1 32 ssa_10 = mov ssa_2.z + r0[2] = mov ssa_10 + vec1 32 ssa_11 = mov ssa_2.w + r0[3] = mov ssa_11 + vec1 32 ssa_12 = mov ssa_3.x + r1[0] = mov ssa_12 + vec1 32 ssa_13 = mov ssa_3.y + r1[1] = mov ssa_13 + vec1 32 ssa_14 = mov ssa_3.z + r1[2] = mov ssa_14 + vec1 32 ssa_15 = mov ssa_3.w + r1[3] = mov ssa_15 + vec1 32 ssa_16 = intrinsic load_uniform (ssa_1) (0, 1, 34) /* base=0 */ /* range=1 */ /* dest_type=int32 */ /* index */ + vec1 32 ssa_17 = ige32 ssa_16, ssa_4 + /* succs: block_1 block_2 */ + if ssa_17 { + block block_1: + /* preds: block_0 */ + vec1 32 ssa_18 = iadd ssa_16, ssa_5 + vec1 32 ssa_19 = load_const (0x00000000 /* 0.000000 */) + vec1 32 ssa_20 = iadd ssa_19, ssa_18 + r2 = mov r1[0 + ssa_20] + /* succs: block_3 */ + } else { + block block_2: + /* preds: block_0 */ + vec1 32 ssa_21 = load_const (0x00000000 /* 0.000000 */) + vec1 32 ssa_22 = iadd ssa_21, ssa_16 + r2 = mov r0[0 + ssa_22] + /* succs: block_3 */ + } + block block_3: + /* preds: block_1 block_2 */ + vec1 32 ssa_23 = intrinsic load_uniform (ssa_1) (1, 1, 160) /* base=1 */ /* range=1 */ /* dest_type=float32 */ /* expect */ + vec1 32 ssa_24 = feq32 r2, ssa_23 + vec1 32 ssa_25 = fneu32 r2, ssa_23 + vec1 32 ssa_26 = b2f32 ssa_25 + vec1 32 ssa_27 = b2f32 ssa_24 + vec2 32 ssa_28 = b32csel ssa_24.xx, ssa_6.zw, ssa_7.zw + vec4 32 ssa_29 = vec4 ssa_26, ssa_27, ssa_28.x, ssa_28.y + intrinsic store_output (ssa_29, ssa_1) (0, 15, 0, 160, 130) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=2 slots=1 */ + /* succs: block_4 */ + block block_4: +} +)"; + +const char *shader_with_local_array_expect = +R"(FS +CHIPCLASS EVERGREEN +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +#PROP RAT_BASE:1 +INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:2 +INPUT LOC:1 NAME:5 SID:10 SPI_SID:11 INTERP:2 +OUTPUT LOC:0 NAME:1 MASK:15 +REGISTERS R0.x@fully R0.y@fully R1.x +ARRAYS A1[4].x A1[4].y +SHADER +ALU MOV S6.x@free : I[0] {WL} +ALU_GROUP_BEGIN + ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210 + ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210 + ALU INTERP_ZW S7.z@chan : R0.y@fully Param0.z {W} VEC_210 + ALU INTERP_ZW S7.w@chan : R0.x@fully Param0.w {WL} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU INTERP_XY S7.x@chan : R0.y@fully Param0.x {W} VEC_210 + ALU INTERP_XY S7.y@chan : R0.x@fully Param0.y {W} VEC_210 + ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210 + ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU INTERP_ZW __.x@chan : R0.y@fully Param1.x {} VEC_210 + ALU INTERP_ZW __.y@chan : R0.x@fully Param1.y {} VEC_210 + ALU INTERP_ZW S8.z@chan : R0.y@fully Param1.z {W} VEC_210 + ALU INTERP_ZW S8.w@chan : R0.x@fully Param1.w {WL} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU INTERP_XY S8.x@chan : R0.y@fully Param1.x {W} VEC_210 + ALU INTERP_XY S8.y@chan : R0.x@fully Param1.y {W} VEC_210 + ALU INTERP_XY __.z@chan : R0.y@fully Param1.z {} VEC_210 + ALU INTERP_XY __.w@chan : R0.x@fully Param1.w {L} VEC_210 +ALU_GROUP_END +ALU MOV S9.x@free : L[0x4] {WL} +ALU MOV S10.x@free : L[0xfffffffc] {WL} +ALU MOV S11.x : I[0] {W} +ALU MOV S11.y : I[1.0] {W} +ALU MOV S11.z : I[0] {W} +ALU MOV S11.w : I[1.0] {WL} +ALU MOV S12.x : I[1.0] {W} +ALU MOV S12.y : I[0] {W} +ALU MOV S12.z : I[0] {W} +ALU MOV S12.w : I[1.0] {WL} +ALU MOV S13.x@free : S7.x@chan {WL} +ALU MOV A1[0].x : S13.x@free {WL} +ALU MOV S14.x@free : S7.y@chan {WL} +ALU MOV A1[1].x : S14.x@free {WL} +ALU MOV S15.x@free : S7.z@chan {WL} +ALU MOV A1[2].x : S15.x@free {WL} +ALU MOV S16.x@free : S7.w@chan {WL} +ALU MOV A1[3].x : S16.x@free {WL} +ALU MOV S17.x@free : S8.x@chan {WL} +ALU MOV A1[0].y : S17.x@free {WL} +ALU MOV S18.x@free : S8.y@chan {WL} +ALU MOV A1[1].y : S18.x@free {WL} +ALU MOV S19.x@free : S8.z@chan {WL} +ALU MOV A1[2].y : S19.x@free {WL} +ALU MOV S20.x@free : S8.w@chan {WL} +ALU MOV A1[3].y : S20.x@free {WL} +ALU MOV S21.x@free : KC0[0].x {WL} +ALU SETGE_INT S22.x@free : S21.x@free S9.x@free {WL} +IF (( ALU PRED_SETNE_INT __.x@free : S22.x@free I[0] {LEP} PUSH_BEFORE )) + ALU ADD_INT S24.x@free : S21.x@free S10.x@free {WL} + ALU MOV S25.x@free : I[0] {WL} + ALU ADD_INT S26.x@free : S25.x@free S24.x@free {WL} + ALU MOV R5.x@free : A1[S26.x@free].y {WL} +ELSE + ALU MOV S27.x@free : I[0] {WL} + ALU ADD_INT S28.x@free : S27.x@free S21.x@free {WL} + ALU MOV R5.x@free : A1[S28.x@free].x {WL} +ENDIF +ALU MOV S29.x@free : KC0[1].x {WL} +ALU SETE_DX10 S30.x@free : R5.x@free S29.x@free {WL} +ALU SETNE_DX10 S31.x@free : R5.x@free S29.x@free {WL} +ALU AND_INT S32.x@free : S31.x@free I[1.0] {WL} +ALU AND_INT S33.x@free : S30.x@free I[1.0] {WL} +ALU CNDE_INT S34.x : S30.x@free S12.z S11.z {W} +ALU CNDE_INT S34.y : S30.x@free S12.w S11.w {WL} +ALU MOV S35.x@group : S32.x@free {W} +ALU MOV S35.y@group : S33.x@free {W} +ALU MOV S35.z@group : S34.x {W} +ALU MOV S35.w@group : S34.y {WL} +EXPORT_DONE PIXEL 0 S35.xyzw)"; + +const char *test_schedule_group = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV S0.x : I[0] {WL} +ALU MOV S1.x : I[1.0] {WL} +ALU MOV S2.x : KC0[0].x {W} +ALU MOV S2.y : KC0[0].y {WL} +ALU MOV S3.x : KC0[2].x {W} +ALU MOV S3.y : KC0[2].y {WL} +ALU ADD S4.x : |S2.x| -S3.x {W} +ALU ADD S4.y : |S2.y| -S3.y {WL} +ALU DOT4_IEEE S5.x : S4.x S4.x + S4.y S4.y + I[0] I[0] + I[0] I[0] {WL} +ALU SQRT_IEEE S6.x : S5.x {WL} +ALU MOV S7.x : KC0[1].x {WL} +ALU SETGE_DX10 S8.x : S7.x S6.x {WL} +ALU NOT_INT S9.x : S8.x {WL} +ALU AND_INT S10.x : S9.x I[1.0] {WL} +ALU AND_INT S11.x : S8.x I[1.0] {WL} +ALU MOV S12.x@group : S10.x {W} +ALU MOV S12.y@group : S11.x {W} +ALU MOV S12.z@group : S0.x {W} +ALU MOV S12.w@group : S1.x {WL} +EXPORT_DONE PIXEL 0 S12.xyzw +)"; + +const char *test_schedule_group_expect = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +BLOCK_START +ALU_GROUP_BEGIN + ALU ADD S4.x@chan : |KC0[0].x| -KC0[2].x {W} + ALU ADD S4.y@chan : |KC0[0].y| -KC0[2].y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU DOT4_IEEE S5.x@chan : S4.x@chan S4.x@chan {W} + ALU DOT4_IEEE __.y@chan : S4.y@chan S4.y@chan {} + ALU DOT4_IEEE __.z@chan : I[0] I[0] {} + ALU DOT4_IEEE __.w@chan : I[0] I[0] {L} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU SQRT_IEEE S6.x : S5.x@chan {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU SETGE_DX10 S8.x : KC0[1].x S6.x {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU NOT_INT S9.x : S8.x {W} + ALU AND_INT S12.y@group : S8.x I[1.0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU AND_INT S12.x@group : S9.x I[1.0] {WL} +ALU_GROUP_END +BLOCK_END +BLOCK_START +EXPORT_DONE PIXEL 0 S12.xy01 +BLOCK_END +)"; + + +const char *shader_with_bany_nir = +R"(shader: MESA_SHADER_FRAGMENT +source_sha1: {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000} +name: GLSL3 +inputs: 0 +outputs: 1 +uniforms: 8 +shared: 0 +ray queries: 0 +decl_var uniform INTERP_MODE_NONE mat4 arg0 (0, 0, 0) +decl_var uniform INTERP_MODE_NONE mat4 arg1 (1, 4, 0) +decl_function main (0 params) + +impl main { + decl_var INTERP_MODE_NONE vec4 out@gl_FragColor-temp + block block_0: + /* preds: */ + vec1 32 ssa_0 = load_const (0x00000000 /* 0.000000 */) + vec1 32 ssa_1 = load_const (0x00000001 /* 0.000000 */) + vec1 32 ssa_2 = load_const (0x00000002 /* 0.000000 */) + vec1 32 ssa_3 = load_const (0x00000003 /* 0.000000 */) + vec4 32 ssa_4 = intrinsic load_uniform (ssa_0) (4, 4, 160) /* base=4 */ /* range=4 */ /* dest_type=float32 */ /* arg1 */ + vec4 32 ssa_5 = intrinsic load_uniform (ssa_0) (0, 4, 160) /* base=0 */ /* range=4 */ /* dest_type=float32 */ /* arg0 */ + vec1 32 ssa_6 = b32any_fnequal4 ssa_4, ssa_5 + vec4 32 ssa_7 = intrinsic load_uniform (ssa_1) (4, 4, 160) /* base=4 */ /* range=4 */ /* dest_type=float32 */ /* arg1 */ + vec4 32 ssa_8 = intrinsic load_uniform (ssa_1) (0, 4, 160) /* base=0 */ /* range=4 */ /* dest_type=float32 */ /* arg0 */ + vec1 32 ssa_9 = b32any_fnequal4 ssa_7, ssa_8 + vec4 32 ssa_10 = intrinsic load_uniform (ssa_2) (4, 4, 160) /* base=4 */ /* range=4 */ /* dest_type=float32 */ /* arg1 */ + vec4 32 ssa_11 = intrinsic load_uniform (ssa_2) (0, 4, 160) /* base=0 */ /* range=4 */ /* dest_type=float32 */ /* arg0 */ + vec1 32 ssa_12 = b32any_fnequal4 ssa_10, ssa_11 + vec4 32 ssa_13 = intrinsic load_uniform (ssa_3) (4, 4, 160) /* base=4 */ /* range=4 */ /* dest_type=float32 */ /* arg1 */ + vec4 32 ssa_14 = intrinsic load_uniform (ssa_3) (0, 4, 160) /* base=0 */ /* range=4 */ /* dest_type=float32 */ /* arg0 */ + vec1 32 ssa_15 = b32any_fnequal4 ssa_13, ssa_14 + vec4 32 ssa_16 = vec4 ssa_6, ssa_9, ssa_12, ssa_15 + vec4 32 ssa_17 = load_const (0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */) + vec1 32 ssa_18 = b32any_inequal4 ssa_16, ssa_17 + vec1 32 ssa_19 = inot ssa_18 + vec1 32 ssa_20 = b2f32 ssa_19 + vec4 32 ssa_21 = vec4 ssa_20, ssa_0, ssa_0, ssa_0 + intrinsic store_output (ssa_21, ssa_0) (0, 15, 0, 160, 130) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=2 slots=1 */ + /* succs: block_1 */ + block block_1: +})"; + + +const char *shader_with_bany_expect_eg = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV S0.x@free : I[0] {WL} +ALU MOV S1.x@free : I[1] {WL} +ALU MOV S2.x@free : L[0x2] {WL} +ALU MOV S3.x@free : L[0x3] {WL} +ALU MOV S4.x : KC0[4].x {W} +ALU MOV S4.y : KC0[4].y {W} +ALU MOV S4.z : KC0[4].z {W} +ALU MOV S4.w : KC0[4].w {WL} +ALU MOV S5.x : KC0[0].x {W} +ALU MOV S5.y : KC0[0].y {W} +ALU MOV S5.z : KC0[0].z {W} +ALU MOV S5.w : KC0[0].w {WL} +ALU SETNE S6.x@group : S4.x S5.x {W} +ALU SETNE S6.y@group : S4.y S5.y {W} +ALU SETNE S6.z@group : S4.z S5.z {W} +ALU SETNE S6.w@group : S4.w S5.w {WL} +ALU MAX4 S7.x@free : S6.x@group + S6.y@group + S6.z@group + S6.w@group {WL} +ALU SETE_DX10 S8.x@free : S7.x@free I[1.0] {WL} +ALU MOV S9.x : KC0[5].x {W} +ALU MOV S9.y : KC0[5].y {W} +ALU MOV S9.z : KC0[5].z {W} +ALU MOV S9.w : KC0[5].w {WL} +ALU MOV S10.x : KC0[1].x {W} +ALU MOV S10.y : KC0[1].y {W} +ALU MOV S10.z : KC0[1].z {W} +ALU MOV S10.w : KC0[1].w {WL} +ALU SETNE S11.x@group : S9.x S10.x {W} +ALU SETNE S11.y@group : S9.y S10.y {W} +ALU SETNE S11.z@group : S9.z S10.z {W} +ALU SETNE S11.w@group : S9.w S10.w {WL} +ALU MAX4 S12.y@free : S11.x@group + S11.y@group + S11.z@group + S11.w@group {WL} +ALU SETE_DX10 S13.x@free : S12.y@free I[1.0] {WL} +ALU MOV S14.x : KC0[6].x {W} +ALU MOV S14.y : KC0[6].y {W} +ALU MOV S14.z : KC0[6].z {W} +ALU MOV S14.w : KC0[6].w {WL} +ALU MOV S15.x : KC0[2].x {W} +ALU MOV S15.y : KC0[2].y {W} +ALU MOV S15.z : KC0[2].z {W} +ALU MOV S15.w : KC0[2].w {WL} +ALU SETNE S16.x@group : S14.x S15.x {W} +ALU SETNE S16.y@group : S14.y S15.y {W} +ALU SETNE S16.z@group : S14.z S15.z {W} +ALU SETNE S16.w@group : S14.w S15.w {WL} +ALU MAX4 S17.z@free : S16.x@group + S16.y@group + S16.z@group + S16.w@group {WL} +ALU SETE_DX10 S18.x@free : S17.z@free I[1.0] {WL} +ALU MOV S19.x : KC0[7].x {W} +ALU MOV S19.y : KC0[7].y {W} +ALU MOV S19.z : KC0[7].z {W} +ALU MOV S19.w : KC0[7].w {WL} +ALU MOV S20.x : KC0[3].x {W} +ALU MOV S20.y : KC0[3].y {W} +ALU MOV S20.z : KC0[3].z {W} +ALU MOV S20.w : KC0[3].w {WL} +ALU SETNE S21.x@group : S19.x S20.x {W} +ALU SETNE S21.y@group : S19.y S20.y {W} +ALU SETNE S21.z@group : S19.z S20.z {W} +ALU SETNE S21.w@group : S19.w S20.w {WL} +ALU MAX4 S22.w@free : S21.x@group + S21.y@group + S21.z@group + S21.w@group {WL} +ALU SETE_DX10 S23.x@free : S22.w@free I[1.0] {WL} +ALU MOV S24.x : S8.x@free {W} +ALU MOV S24.y : S13.x@free {W} +ALU MOV S24.z : S18.x@free {W} +ALU MOV S24.w : S23.x@free {WL} +ALU MOV S25.x : I[0] {W} +ALU MOV S25.y : I[0] {W} +ALU MOV S25.z : I[0] {W} +ALU MOV S25.w : I[0] {WL} +ALU SETNE_INT S27.x@free : S24.x S25.x {W} +ALU SETNE_INT S28.y@free : S24.y S25.y {W} +ALU SETNE_INT S29.z@free : S24.z S25.z {W} +ALU SETNE_INT S30.w@free : S24.w S25.w {WL} +ALU OR_INT S31.x@free : S27.x@free S28.y@free {W} +ALU OR_INT S32.y@free : S29.z@free S30.w@free {WL} +ALU OR_INT S26.x@free : S31.x@free S32.y@free {WL} +ALU NOT_INT S33.x@free : S26.x@free {WL} +ALU AND_INT S34.x@free : S33.x@free I[1.0] {WL} +ALU MOV S35.x@group : S34.x@free {W} +ALU MOV S35.y@group : S0.x@free {W} +ALU MOV S35.z@group : S0.x@free {W} +ALU MOV S35.w@group : S0.x@free {WL} +EXPORT_DONE PIXEL 0 S35.xyzw +)"; + +const char *shader_with_bany_expect_opt_sched_eg = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +BLOCK_START +ALU_GROUP_BEGIN + ALU SETNE S6.x@chgr : KC0[4].x KC0[0].x {W} + ALU SETNE S6.y@chgr : KC0[4].y KC0[0].y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU SETNE S6.z@chgr : KC0[4].z KC0[0].z {W} + ALU SETNE S6.w@chgr : KC0[4].w KC0[0].w {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MAX4 S7.x@chan : S6.x@chgr {W} + ALU MAX4 __.y@chan : S6.y@chgr {} + ALU MAX4 __.z@chan : S6.z@chgr {} + ALU MAX4 __.w@chan : S6.w@chgr {} + ALU SETNE S11.x@chgr : KC0[5].x KC0[1].x {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU SETE_DX10 S8.x@free : S7.x@chan I[1.0] {W} + ALU SETNE S11.y@chgr : KC0[5].y KC0[1].y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU SETNE_INT S27.x@chan : S8.x@free I[0] {W} + ALU SETNE S11.z@chgr : KC0[5].z KC0[1].z {W} + ALU SETNE S11.w@chgr : KC0[5].w KC0[1].w {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MAX4 __.x@chan : S11.x@chgr {} + ALU MAX4 S12.y@chan : S11.y@chgr {W} + ALU MAX4 __.z@chan : S11.z@chgr {} + ALU MAX4 __.w@chan : S11.w@chgr {} + ALU SETNE S16.x@chgr : KC0[6].x KC0[2].x {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU SETE_DX10 S13.x@free : S12.y@chan I[1.0] {W} + ALU SETNE S16.y@chgr : KC0[6].y KC0[2].y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU SETNE_INT S28.y@chan : S13.x@free I[0] {W} + ALU SETNE S16.z@chgr : KC0[6].z KC0[2].z {W} + ALU SETNE S16.w@chgr : KC0[6].w KC0[2].w {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MAX4 __.x@chan : S16.x@chgr {} + ALU MAX4 __.y@chan : S16.y@chgr {} + ALU MAX4 S17.z@chan : S16.z@chgr {W} + ALU MAX4 __.w@chan : S16.w@chgr {} + ALU OR_INT S31.x@chan : S27.x@chan S28.y@chan {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU SETNE S21.x@chgr : KC0[7].x KC0[3].x {W} + ALU SETNE S21.y@chgr : KC0[7].y KC0[3].y {W} + ALU SETE_DX10 S18.z@chan : S17.z@chan I[1.0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU SETNE_INT S29.x@chan : S18.z@chan I[0] {W} + ALU SETNE S21.z@chgr : KC0[7].z KC0[3].z {W} + ALU SETNE S21.w@chgr : KC0[7].w KC0[3].w {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MAX4 __.x@chan : S21.x@chgr {} + ALU MAX4 __.y@chan : S21.y@chgr {} + ALU MAX4 __.z@chan : S21.z@chgr {} + ALU MAX4 S22.w@chan : S21.w@chgr {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU SETE_DX10 S23.x@free : S22.w@chan I[1.0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU SETNE_INT S30.w@chan : S23.x@free I[0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU OR_INT S32.y@chan : S29.x@chan S30.w@chan {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU OR_INT S26.x@chan : S31.x@chan S32.y@chan {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU NOT_INT S33.x@free : S26.x@chan {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU AND_INT S35.x@group : S33.x@free I[1.0] {WL} +ALU_GROUP_END +BLOCK_END +BLOCK_START +EXPORT_DONE PIXEL 0 S35.x000 +BLOCK_END +)"; + +const char *shader_copy_prop_dont_kill_double_use = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV S0.x : I[0] {WL} +ALU MOV S1.x : I[1] {WL} +ALU MOV S2.x : I[1.0] {WL} +ALU MOV S3.x : KC0[2].x {W} +ALU MOV S3.y : KC0[2].y {WL} +ALU MOV S4.x : KC0[0].x {W} +ALU MOV S4.y : KC0[0].y {WL} +ALU SETNE_DX10 S5.x : S3.y S4.y {W} +ALU SETNE_DX10 S5.y : S3.x S4.x {WL} +ALU OR_INT S6.x : S5.x S5.y {WL} +ALU MOV S7.x : KC0[3].x {W} +ALU MOV S7.y : KC0[3].y {WL} +ALU MOV S8.x : KC0[1].x {W} +ALU MOV S8.y : KC0[1].y {WL} +ALU SETNE_DX10 S9.x : S7.y S8.y {W} +ALU SETNE_DX10 S9.y : S7.x S8.x {WL} +ALU OR_INT S10.x : S9.x S9.y {WL} +ALU OR_INT S11.x : S10.x S6.x {WL} +ALU NOT_INT S12.x : S11.x {WL} +ALU AND_INT S13.x : S12.x I[1.0] {WL} +ALU AND_INT S14.x : S11.x I[1.0] {WL} +ALU MOV S15.x@group : S13.x {W} +ALU MOV S15.y@group : S13.x {W} +ALU MOV S15.z@group : S14.x {W} +ALU MOV S15.w@group : S2.x {WL} +EXPORT_DONE PIXEL 0 S15.xyzw +)"; + + +const char *shader_copy_prop_dont_kill_double_use_expect = +R"( +FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +BLOCK_START +ALU_GROUP_BEGIN + ALU SETNE_DX10 S5.x : KC0[2].y KC0[0].y {W} + ALU SETNE_DX10 S5.y : KC0[2].x KC0[0].x {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU OR_INT S6.x : S5.x S5.y {W} + ALU SETNE_DX10 S9.y : KC0[3].x KC0[1].x {W} + ALU SETNE_DX10 S9.x : KC0[3].y KC0[1].y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU OR_INT S10.x : S9.x S9.y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU OR_INT S11.x : S10.x S6.x {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU NOT_INT S12.x : S11.x {W} + ALU AND_INT S15.z@group : S11.x I[1.0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU AND_INT S13.x : S12.x I[1.0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV S15.x@group : S13.x {W} + ALU MOV S15.y@group : S13.x {WL} +ALU_GROUP_END +BLOCK_END +BLOCK_START +EXPORT_DONE PIXEL 0 S15.xyz1 +BLOCK_END +)"; + + +const char *shader_with_dest_array = +R"(VS +CHIPCLASS EVERGREEN +INPUT LOC:0 NAME:0 +OUTPUT LOC:0 NAME:0 MASK:15 +OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10 +OUTPUT LOC:2 NAME:5 MASK:15 SID:10 SPI_SID:11 +OUTPUT LOC:3 NAME:5 MASK:15 SID:11 SPI_SID:12 +OUTPUT LOC:4 NAME:5 MASK:15 SID:12 SPI_SID:13 +REGISTERS R1.xyzw +ARRAYS A2[4].xy A2[4].zw +SHADER +ALU MOV S6.x : I[0] {WL} +ALU MOV S7.x : I[1] {WL} +ALU MOV S8.x : L[0x2] {WL} +ALU MOV S9.x : L[0x3] {WL} +ALU MOV S10.x : L[0x4] {WL} +ALU MOV S11.x : L[0xfffffffc] {WL} +ALU MOV S12.x : KC0[1].x {W} +ALU MOV S12.y : KC0[1].y {W} +ALU MOV S12.z : KC0[1].z {W} +ALU MOV S12.w : KC0[1].w {WL} +ALU MOV S13.x : KC0[2].x {W} +ALU MOV S13.y : KC0[2].y {W} +ALU MOV S13.z : KC0[2].z {W} +ALU MOV S13.w : KC0[2].w {WL} +ALU MUL_IEEE S14.x : S13.x R1.y@fully {W} +ALU MUL_IEEE S14.y : S13.y R1.y@fully {W} +ALU MUL_IEEE S14.z : S13.z R1.y@fully {W} +ALU MUL_IEEE S14.w : S13.w R1.y@fully {WL} +ALU MULADD_IEEE S15.x : S12.x R1.x@fully S14.x {W} +ALU MULADD_IEEE S15.y : S12.y R1.x@fully S14.y {W} +ALU MULADD_IEEE S15.z : S12.z R1.x@fully S14.z {W} +ALU MULADD_IEEE S15.w : S12.w R1.x@fully S14.w {WL} +ALU MOV S16.x : KC0[3].x {W} +ALU MOV S16.y : KC0[3].y {W} +ALU MOV S16.z : KC0[3].z {W} +ALU MOV S16.w : KC0[3].w {WL} +ALU MULADD_IEEE S17.x : S16.x R1.z@fully S15.x {W} +ALU MULADD_IEEE S17.y : S16.y R1.z@fully S15.y {W} +ALU MULADD_IEEE S17.z : S16.z R1.z@fully S15.z {W} +ALU MULADD_IEEE S17.w : S16.w R1.z@fully S15.w {WL} +ALU MOV S18.x : KC0[4].x {W} +ALU MOV S18.y : KC0[4].y {W} +ALU MOV S18.z : KC0[4].z {W} +ALU MOV S18.w : KC0[4].w {WL} +ALU MULADD_IEEE S19.x@group : S18.x R1.w@fully S17.x {W} +ALU MULADD_IEEE S19.y@group : S18.y R1.w@fully S17.y {W} +ALU MULADD_IEEE S19.z@group : S18.z R1.w@fully S17.z {W} +ALU MULADD_IEEE S19.w@group : S18.w R1.w@fully S17.w {WL} +ALU MOV S20.x : I[1.0] {W} +ALU MOV S20.y : L[0x3f8ccccd] {WL} +ALU MOV A2[0].x : S20.x {W} +ALU MOV A2[0].y : S20.y {WL} +ALU MOV S21.x : L[0x40000000] {W} +ALU MOV S21.y : L[0x40066666] {WL} +ALU MOV A2[1].x : S21.x {W} +ALU MOV A2[1].y : S21.y {WL} +ALU MOV S22.x : L[0x40400000] {W} +ALU MOV S22.y : L[0x40466666] {WL} +ALU MOV A2[2].x : S22.x {W} +ALU MOV A2[2].y : S22.y {WL} +ALU MOV S23.x : L[0x40800000] {W} +ALU MOV S23.y : L[0x40833333] {WL} +ALU MOV A2[3].x : S23.x {W} +ALU MOV A2[3].y : S23.y {WL} +ALU MOV S24.x : L[0x40a00000] {W} +ALU MOV S24.y : L[0x40a33333] {WL} +ALU MOV A2[0].z : S24.x {W} +ALU MOV A2[0].w : S24.y {WL} +ALU MOV S25.x : L[0x40c00000] {W} +ALU MOV S25.y : L[0x40c33333] {WL} +ALU MOV A2[1].z : S25.x {W} +ALU MOV A2[1].w : S25.y {WL} +ALU MOV S26.x : L[0x40e00000] {W} +ALU MOV S26.y : L[0x40e33333] {WL} +ALU MOV A2[2].z : S26.x {W} +ALU MOV A2[2].w : S26.y {WL} +ALU MOV S27.x : L[0x41000000] {W} +ALU MOV S27.y : L[0x4101999a] {WL} +ALU MOV A2[3].z : S27.x {W} +ALU MOV A2[3].w : S27.y {WL} +ALU MOV S28.x : KC0[0].x {WL} +ALU SETGE_INT S29.x : S28.x S10.x {WL} +IF (( ALU PRED_SETNE_INT __.x@free : S29.x I[0] {LEP} PUSH_BEFORE )) + ALU ADD_INT S31.x : S28.x S11.x {WL} + ALU MOV S32.x : I[0] {W} + ALU MOV S32.y : L[0x3dcccccd] {WL} + ALU MOV S33.x : I[0] {WL} + ALU ADD_INT S34.x : S33.x S31.x {WL} + ALU MOV A2[S34.x].z : S32.x {W} + ALU MOV A2[S34.x].w : S32.y {WL} +ELSE + ALU MOV S35.x : I[0] {W} + ALU MOV S35.y : L[0x3dcccccd] {WL} + ALU MOV S36.x : I[0] {WL} + ALU ADD_INT S37.x : S36.x S28.x {WL} + ALU MOV A2[S37.x].x : S35.x {W} + ALU MOV A2[S37.x].y : S35.y {WL} +ENDIF +ALU MOV S38.x : A2[0].x {W} +ALU MOV S38.y : A2[0].y {WL} +ALU MOV S39.x : A2[1].x {W} +ALU MOV S39.y : A2[1].y {WL} +ALU MOV S40.x : A2[2].x {W} +ALU MOV S40.y : A2[2].y {WL} +ALU MOV S41.x : A2[3].x {W} +ALU MOV S41.y : A2[3].y {WL} +ALU MOV S42.x : A2[0].z {W} +ALU MOV S42.y : A2[0].w {WL} +ALU MOV S43.x : A2[1].z {W} +ALU MOV S43.y : A2[1].w {WL} +ALU MOV S44.x : A2[2].z {W} +ALU MOV S44.y : A2[2].w {WL} +ALU MOV S45.x : A2[3].z {W} +ALU MOV S45.y : A2[3].w {WL} +EXPORT_DONE POS 0 S19.xyzw +ALU MOV S46.x@group : S38.x {W} +ALU MOV S46.y@group : S38.y {W} +ALU MOV S46.z@group : S39.x {W} +ALU MOV S46.w@group : S39.y {WL} +EXPORT PARAM 0 S46.xyzw +ALU MOV S47.x@group : S40.x {W} +ALU MOV S47.y@group : S40.y {W} +ALU MOV S47.z@group : S41.x {W} +ALU MOV S47.w@group : S41.y {WL} +EXPORT PARAM 1 S47.xyzw +ALU MOV S48.x@group : S42.x {W} +ALU MOV S48.y@group : S42.y {W} +ALU MOV S48.z@group : S43.x {W} +ALU MOV S48.w@group : S43.y {WL} +EXPORT PARAM 2 S48.xyzw +ALU MOV S49.x@group : S44.x {W} +ALU MOV S49.y@group : S44.y {W} +ALU MOV S49.z@group : S45.x {W} +ALU MOV S49.w@group : S45.y {WL} +EXPORT_DONE PARAM 3 S49.xyzw +)"; + +const char *shader_with_dest_array_opt_expect = +R"(VS +CHIPCLASS EVERGREEN +INPUT LOC:0 NAME:0 +OUTPUT LOC:0 NAME:0 MASK:15 +OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10 +OUTPUT LOC:2 NAME:5 MASK:15 SID:10 SPI_SID:11 +OUTPUT LOC:3 NAME:5 MASK:15 SID:11 SPI_SID:12 +OUTPUT LOC:4 NAME:5 MASK:15 SID:12 SPI_SID:13 +REGISTERS R1.xyzw +ARRAYS A2[4].xy A2[4].zw +SHADER +ALU MUL_IEEE S14.x : KC0[2].x R1.y@fully {W} +ALU MUL_IEEE S14.y : KC0[2].y R1.y@fully {W} +ALU MUL_IEEE S14.z : KC0[2].z R1.y@fully {W} +ALU MUL_IEEE S14.w : KC0[2].w R1.y@fully {WL} +ALU MULADD_IEEE S15.x : KC0[1].x R1.x@fully S14.x {W} +ALU MULADD_IEEE S15.y : KC0[1].y R1.x@fully S14.y {W} +ALU MULADD_IEEE S15.z : KC0[1].z R1.x@fully S14.z {W} +ALU MULADD_IEEE S15.w : KC0[1].w R1.x@fully S14.w {WL} +ALU MULADD_IEEE S17.x : KC0[3].x R1.z@fully S15.x {W} +ALU MULADD_IEEE S17.y : KC0[3].y R1.z@fully S15.y {W} +ALU MULADD_IEEE S17.z : KC0[3].z R1.z@fully S15.z {W} +ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL} +ALU MULADD_IEEE S19.x@group : KC0[4].x R1.w@fully S17.x {W} +ALU MULADD_IEEE S19.y@group : KC0[4].y R1.w@fully S17.y {W} +ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W} +ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL} +ALU MOV A2[0].x : I[1.0] {W} +ALU MOV A2[0].y : L[0x3f8ccccd] {WL} +ALU MOV A2[1].x : L[0x40000000] {W} +ALU MOV A2[1].y : L[0x40066666] {WL} +ALU MOV A2[2].x : L[0x40400000] {W} +ALU MOV A2[2].y : L[0x40466666] {WL} +ALU MOV A2[3].x : L[0x40800000] {W} +ALU MOV A2[3].y : L[0x40833333] {WL} +ALU MOV A2[0].z : L[0x40a00000] {W} +ALU MOV A2[0].w : L[0x40a33333] {WL} +ALU MOV A2[1].z : L[0x40c00000] {W} +ALU MOV A2[1].w : L[0x40c33333] {WL} +ALU MOV A2[2].z : L[0x40e00000] {W} +ALU MOV A2[2].w : L[0x40e33333] {WL} +ALU MOV A2[3].z : L[0x41000000] {W} +ALU MOV A2[3].w : L[0x4101999a] {WL} +IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE )) + ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL} + ALU MOV A2[S34.x].z : I[0] {W} + ALU MOV A2[S34.x].w : L[0x3dcccccd] {WL} +ELSE + ALU MOV S37.x : KC0[0].x {WL} + ALU MOV A2[S37.x].x : I[0] {W} + ALU MOV A2[S37.x].y : L[0x3dcccccd] {WL} +ENDIF +EXPORT_DONE POS 0 S19.xyzw +ALU MOV S46.x@group : A2[0].x {W} +ALU MOV S46.y@group : A2[0].y {W} +ALU MOV S46.z@group : A2[1].x {W} +ALU MOV S46.w@group : A2[1].y {WL} +EXPORT PARAM 0 S46.xyzw +ALU MOV S47.x@group : A2[2].x {W} +ALU MOV S47.y@group : A2[2].y {W} +ALU MOV S47.z@group : A2[3].x {W} +ALU MOV S47.w@group : A2[3].y {WL} +EXPORT PARAM 1 S47.xyzw +ALU MOV S48.x@group : A2[0].z {W} +ALU MOV S48.y@group : A2[0].w {W} +ALU MOV S48.z@group : A2[1].z {W} +ALU MOV S48.w@group : A2[1].w {WL} +EXPORT PARAM 2 S48.xyzw +ALU MOV S49.x@group : A2[2].z {W} +ALU MOV S49.y@group : A2[2].w {W} +ALU MOV S49.z@group : A2[3].z {W} +ALU MOV S49.w@group : A2[3].w {WL} +EXPORT_DONE PARAM 3 S49.xyzw +)"; + +const char *shader_with_dest_array_opt_scheduled = +R"(VS +CHIPCLASS EVERGREEN +INPUT LOC:0 NAME:0 +OUTPUT LOC:0 NAME:0 MASK:15 +OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10 +OUTPUT LOC:2 NAME:5 MASK:15 SID:10 SPI_SID:11 +OUTPUT LOC:3 NAME:5 MASK:15 SID:11 SPI_SID:12 +OUTPUT LOC:4 NAME:5 MASK:15 SID:12 SPI_SID:13 +REGISTERS R1.xyzw +ARRAYS A2[4].xy A2[4].zw +SHADER +BLOCK_START +ALU_GROUP_BEGIN + ALU MOV A2[0].x : I[1.0] {W} + ALU MOV A2[0].y : L[0x3f8ccccd] {W} + ALU MOV A2[0].z : L[0x40a00000] {W} + ALU MOV A2[0].w : L[0x40a33333] {W} + ALU MOV A2[1].x : L[0x40000000] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV A2[2].x : L[0x40400000] {W} + ALU MOV A2[1].y : L[0x40066666] {W} + ALU MOV A2[1].z : L[0x40c00000] {W} + ALU MOV A2[1].w : L[0x40c33333] {W} + ALU MUL_IEEE S14.x : KC0[2].x R1.y@fully {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV A2[3].x : L[0x40800000] {W} + ALU MOV A2[2].y : L[0x40466666] {W} + ALU MOV A2[2].z : L[0x40e00000] {W} + ALU MOV A2[2].w : L[0x40e33333] {W} + ALU MULADD_IEEE S15.x : KC0[1].x R1.x@fully S14.x {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MULADD_IEEE S17.x : KC0[3].x R1.z@fully S15.x {W} + ALU MOV A2[3].y : L[0x40833333] {W} + ALU MOV A2[3].z : L[0x41000000] {W} + ALU MOV A2[3].w : L[0x4101999a] {W} + ALU MUL_IEEE S14.y : KC0[2].y R1.y@fully {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MULADD_IEEE S19.x@group : KC0[4].x R1.w@fully S17.x {W} + ALU MULADD_IEEE S15.y : KC0[1].y R1.x@fully S14.y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MULADD_IEEE S17.y : KC0[3].y R1.z@fully S15.y {W} + ALU MUL_IEEE S14.z : KC0[2].z R1.y@fully {W} + ALU MUL_IEEE S14.w : KC0[2].w R1.y@fully {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MULADD_IEEE S19.y@group : KC0[4].y R1.w@fully S17.y {W} + ALU MULADD_IEEE S15.z : KC0[1].z R1.x@fully S14.z {W} + ALU MULADD_IEEE S15.w : KC0[1].w R1.x@fully S14.w {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MULADD_IEEE S17.z : KC0[3].z R1.z@fully S15.z {W} + ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W} + ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL} +ALU_GROUP_END +IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE )) + ALU_GROUP_BEGIN + ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL} + ALU_GROUP_END + ALU_GROUP_BEGIN + ALU MOV A2[S34.x].z : I[0] {W} + ALU MOV A2[S34.x].w : L[0x3dcccccd] {WL} + ALU_GROUP_END +ELSE + ALU_GROUP_BEGIN + ALU MOV S37.x : KC0[0].x {WL} + ALU_GROUP_END + ALU_GROUP_BEGIN + ALU MOV A2[S37.x].x : I[0] {W} + ALU MOV A2[S37.x].y : L[0x3dcccccd] {WL} + ALU_GROUP_END +ENDIF +ALU_GROUP_BEGIN + ALU MOV S46.x@group : A2[0].x {W} + ALU MOV S46.y@group : A2[0].y {W} + ALU MOV S46.z@group : A2[1].x {W} + ALU MOV S46.w@group : A2[1].y {W} + ALU MOV S47.x@group : A2[2].x {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV S48.x@group : A2[0].z {W} + ALU MOV S47.y@group : A2[2].y {W} + ALU MOV S47.z@group : A2[3].x {W} + ALU MOV S47.w@group : A2[3].y {W} + ALU MOV S48.y@group : A2[0].w {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV S49.x@group : A2[2].z {W} + ALU MOV S49.y@group : A2[2].w {W} + ALU MOV S48.z@group : A2[1].z {W} + ALU MOV S48.w@group : A2[1].w {W} + ALU MOV S49.z@group : A2[3].z {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV S49.w@group : A2[3].w {WL} +ALU_GROUP_END +BLOCK_END +BLOCK_START +EXPORT_DONE POS 0 S19.xyzw +EXPORT PARAM 0 S46.xyzw +EXPORT PARAM 1 S47.xyzw +EXPORT PARAM 2 S48.xyzw +EXPORT_DONE PARAM 3 S49.xyzw +BLOCK_END +)"; + + +const char *shader_with_dest_array2 = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +ARRAYS A0[2].xy +SHADER +BLOCK_START +ALU MOV A0[0].x : KC0[0].x {W} +ALU MOV A0[0].y : KC0[0].y {WL} +ALU MOV A0[1].x : KC0[1].x {W} +ALU MOV A0[1].y : KC0[1].y {WL} +ALU MOV S1.x : KC0[2].x {WL} +ALU MOV A0[S1.x].x : I[1.0] {W} +ALU MOV A0[S1.x].y : L[2.0] {WL} +ALU MOV S2.x : A0[0].x {W} +ALU MOV S2.y : A0[0].y {WL} +ALU MUL_IEEE S3.x@group : S2.x KC0[2].y {W} +ALU MUL_IEEE S3.y@group : S2.y KC0[2].y {WL} +BLOCK_END +BLOCK_START +EXPORT_DONE PIXEL 0 S3.xy01 +BLOCK_END +)"; + +const char *shader_with_dest_array2_scheduled = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +ARRAYS A0[2].xy +SHADER +BLOCK_START +ALU_GROUP_BEGIN + ALU MOV A0[0].x : KC0[0].x {W} + ALU MOV A0[0].y : KC0[0].y {W} + ALU MOV A0[1].x : KC0[1].x {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV S1.x : KC0[2].x {W} + ALU MOV A0[1].y : KC0[1].y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV A0[S1.x].x : I[1.0] {W} + ALU MOV A0[S1.x].y : L[2.0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV S2.x : A0[0].x {W} + ALU MOV S2.y : A0[0].y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MUL_IEEE S3.x@group : S2.x KC0[2].y {W} + ALU MUL_IEEE S3.y@group : S2.y KC0[2].y {WL} +ALU_GROUP_END +BLOCK_END +BLOCK_START +EXPORT_DONE PIXEL 0 S3.xy01 +BLOCK_END +)"; + +const char *shader_with_dest_array2_scheduled_ra = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +ARRAYS A0[2].xy +SHADER +BLOCK_START +ALU_GROUP_BEGIN + ALU MOV A0[0].x : KC0[0].x {W} + ALU MOV A0[0].y : KC0[0].y {W} + ALU MOV A0[1].x : KC0[1].x {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV R2.x : KC0[2].x {W} + ALU MOV A0[1].y : KC0[1].y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV A0[R2.x].x : I[1.0] {W} + ALU MOV A0[R2.x].y : L[2.0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV R2.x : A0[0].x {W} + ALU MOV R2.y : A0[0].y {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MUL_IEEE R0.x : R2.x KC0[2].y {W} + ALU MUL_IEEE R0.y : R2.y KC0[2].y {WL} +ALU_GROUP_END +BLOCK_END +BLOCK_START +EXPORT_DONE PIXEL 0 R0.xy01 +BLOCK_END +)"; + +const char *shader_group_chan_pin_to_combine = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10 +OUTPUT LOC:0 NAME:1 MASK:15 +REGISTERS R0.xy__ +SHADER +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210 +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210 +ALU INTERP_ZW S1.z@chan : R0.y@fully Param0.z {W} VEC_210 +ALU INTERP_ZW S1.w@chan : R0.x@fully Param0.w {WL} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S1.x@chan : R0.y@fully Param0.x {W} VEC_210 +ALU INTERP_XY S1.y@chan : R0.x@fully Param0.y {W} VEC_210 +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210 +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210 +ALU_GROUP_END +ALU MOV S2.x@group : S1.x@chan {W} VEC_210 +ALU MOV S2.y@group : S1.y@chan {W} VEC_210 +ALU MOV S2.z@group : S1.z@chan {W} VEC_210 +ALU MOV S2.w@group : S1.w@chan {WL} VEC_210 +EXPORT_DONE PIXEL 0 S2.xyzw +)"; + + +const char *shader_group_chan_pin_combined = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10 +OUTPUT LOC:0 NAME:1 MASK:15 +REGISTERS R0.x R0.y +SHADER +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210 +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210 +ALU INTERP_ZW S2.z@chgr : R0.y@fully Param0.z {W} VEC_210 +ALU INTERP_ZW S2.w@chgr : R0.x@fully Param0.w {WL} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S2.x@chgr : R0.y@fully Param0.x {W} VEC_210 +ALU INTERP_XY S2.y@chgr : R0.x@fully Param0.y {W} VEC_210 +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210 +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210 +ALU_GROUP_END +EXPORT_DONE PIXEL 0 S2.xyzw +)"; + +const char *shader_group_chan_pin_combined_sheduled = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10 +OUTPUT LOC:0 NAME:1 MASK:15 +REGISTERS R0.x@fully R0.y@fully +SHADER +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210 +ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210 +ALU INTERP_ZW S2.z@chgr : R0.y@fully Param0.z {W} VEC_210 +ALU INTERP_ZW S2.w@chgr : R0.x@fully Param0.w {WL} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY S2.x@chgr : R0.y@fully Param0.x {W} VEC_210 +ALU INTERP_XY S2.y@chgr : R0.x@fully Param0.y {W} VEC_210 +ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210 +ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210 +ALU_GROUP_END +EXPORT_DONE PIXEL 0 S2.xyzw +)"; + +const char *shader_group_chan_pin_combined_sheduled_ra = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10 +OUTPUT LOC:0 NAME:1 MASK:15 +REGISTERS R0.x@fully R0.y@fully R1.xyzw +SHADER +ALU_GROUP_BEGIN +ALU INTERP_ZW __.x : R0.y Param0.x {} VEC_210 +ALU INTERP_ZW __.y : R0.x Param0.y {} VEC_210 +ALU INTERP_ZW R1.z : R0.y Param0.z {W} VEC_210 +ALU INTERP_ZW R1.w : R0.x Param0.w {WL} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN +ALU INTERP_XY R1.x : R0.y Param0.x {W} VEC_210 +ALU INTERP_XY R1.y : R0.x Param0.y {W} VEC_210 +ALU INTERP_XY __.z : R0.y Param0.z {} VEC_210 +ALU INTERP_XY __.w : R0.x Param0.w {L} VEC_210 +ALU_GROUP_END +EXPORT_DONE PIXEL 0 R1.xyzw +)"; + + +const char *shader_group_chan_pin_to_combine_2 = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV S0.x@free : I[0] {WL} +ALU MOV S1.x : KC0[0].x {W} +ALU MOV S1.y : KC0[0].y {W} +ALU MOV S1.z : KC0[0].z {W} +ALU MOV S1.w : KC0[0].w {WL} +ALU DOT4_IEEE S2.x@free : S1.y S1.y + S1.y S1.y + I[0] I[0] + I[0] I[0] {WL} +ALU DOT4_IEEE S3.x@free : S1.x S1.z + S1.x S1.w + I[0] I[0] + I[0] I[0] {WL} +ALU DOT4_IEEE S4.x@free : S1.y S1.w + S1.w S1.y + I[0] I[0] + I[0] I[0] {WL} +ALU MOV S5.x@group : S2.x@free {W} +ALU MOV S5.y@group : S3.x@free {W} +ALU MOV S5.z@group : S3.x@free {W} +ALU MOV S5.w@group : S4.x@free {WL} +EXPORT_DONE PIXEL 0 S5.xyzw +)"; + +const char *shader_group_chan_pin_to_combine_2_opt = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU DOT4_IEEE S5.x@group : KC0[0].y KC0[0].y + KC0[0].y KC0[0].y + I[0] I[0] + I[0] I[0] {W} +ALU DOT4_IEEE S3.x@free : KC0[0].x KC0[0].z + KC0[0].x KC0[0].w + I[0] I[0] + I[0] I[0] {WL} +ALU DOT4_IEEE S5.w@group : KC0[0].y KC0[0].w + KC0[0].w KC0[0].y + I[0] I[0] + I[0] I[0] {WL} +ALU MOV S5.y@group : S3.x@free {W} +ALU MOV S5.z@group : S3.x@free {W} +EXPORT_DONE PIXEL 0 S5.xyzw +)"; + + +const char *fs_with_grand_and_abs = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV S1.x@free : I[0] {WL} +ALU_GROUP_BEGIN + x: ALU INTERP_XY S2.x@chgr : R0.y@fully Param0.x {W} VEC_210 + y: ALU INTERP_XY S2.y@chan : R0.x@fully Param0.y {W} VEC_210 + z: ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210 + w: ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210 +ALU_GROUP_END +ALU MOV S3.x@free : L[0xbf800000] {WL} +ALU MOV S4.x@free : I[1.0] {WL} +ALU MOV S5.x@free : L[0x41a00000] {WL} +ALU MOV S6.x@free : L[0x41200000] {WL} +ALU SETGT_DX10 S7.x : S2.x@chgr S1.x@free {W} +ALU SETGT_DX10 S7.y : S2.y@chan S1.x@free {WL} +ALU AND_INT S8.x : S7.x I[1.0] {W} +ALU AND_INT S8.y : S7.y I[1.0] {WL} +ALU SETGT_DX10 S9.x : S1.x@free S2.x@chgr {W} +ALU SETGT_DX10 S9.y : S1.x@free S2.y@chan {WL} +ALU AND_INT S10.x : S9.x I[1.0] {W} +ALU AND_INT S10.y : S9.y I[1.0] {WL} +ALU ADD S11.x : S8.x -S10.x {W} +ALU ADD S11.y : S8.y -S10.y {WL} +ALU SETE_DX10 S12.x : S11.x S3.x@free {W} +ALU SETE_DX10 S12.y : S11.y S3.x@free {WL} +ALU MOV S13.x@group : |S2.x@chgr| {WL} +TEX GET_GRADIENTS_H S14.x___ : S2.x___ RID:18 SID:0 NNNN +ALU MUL_IEEE S15.x@free : S14.x@group S5.x@free {WL} +ALU MOV S16.x@free : -S15.x@free {WL} +ALU CNDE_INT S17.x@free : S12.x S15.x@free S16.x@free {WL} +ALU MOV S18.x : KC0[0].x {W} +ALU MOV S18.y : KC0[0].y {W} +ALU MOV S18.z : KC0[0].z {W} +ALU MOV S18.w : KC0[0].w {WL} +ALU MUL_IEEE S19.x@group : |S2.y@chan| S18.x {WL} +ALU MOV S20.x@group : S19.x@group {WL} +TEX GET_GRADIENTS_V S21.x___ : S19.x___ RID:18 SID:0 NNNN +ALU MUL_IEEE S22.x@free : S21.x@group S6.x@free {WL} +ALU MOV S23.x@free : -S22.x@free {WL} +ALU CNDE_INT S24.x@free : S12.y S22.x@free S23.x@free {WL} +ALU MOV S25.x@group : S17.x@free {W} +ALU MOV S25.y@group : S24.x@free {W} +ALU MOV S25.z@group : S1.x@free {W} +ALU MOV S25.w@group : S4.x@free {WL} +EXPORT_DONE PIXEL 0 S25.xyzw +)"; + + +const char *fs_with_loop_multislot_reuse = +R"(FS +CHIPCLASS CAYMAN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU MOV R1.x@free : I[0] {WL} +ALU MOV S2.x@free : L[0x38f00000] {WL} +LOOP_BEGIN +ALU RECIPSQRT_IEEE S3.x@free : |R1.x@free| + |R1.x@free| + |R1.x@free| {WL} +ALU SETGT_DX10 S4.x@free : S3.x@free S2.x@free {W} + IF (( ALU PRED_SETNE_INT __.x@free : S4.x@free I[0] {LEP} PUSH_BEFORE )) + BREAK + ENDIF + ALU ADD S5.x@free : S3.x@chan L[0x38f00000] {WL} + ALU MUL R1.x@free : S5.x@free L[0x38f00000] {WL} +LOOP_END +EXPORT_DONE PIXEL 0 R1.xxxx +)"; + +const char *fs_with_loop_multislot_reuse_scheduled = +R"(FS +CHIPCLASS CAYMAN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:1 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +ALU_GROUP_BEGIN + ALU MOV R1.x@free : I[0] {W} + ALU MOV S2.y@chan : L[0x38f00000] {WL} +ALU_GROUP_END +LOOP_BEGIN + ALU_GROUP_BEGIN + ALU RECIPSQRT_IEEE S3.x@chan : |R1.x@free| {W} + ALU RECIPSQRT_IEEE __.y@chan : |R1.x@free| {} + ALU RECIPSQRT_IEEE __.z@chan : |R1.x@free| {L} + ALU_GROUP_END + ALU_GROUP_BEGIN + ALU SETGT_DX10 S4.x@chan : S3.x@chgr S2.y@free {WL} + ALU_GROUP_END + IF (( ALU PRED_SETNE_INT __.x@free : S4.x@chan I[0] {LEP} PUSH_BEFORE )) + BREAK + ENDIF + ALU_GROUP_BEGIN + ALU ADD S5.x@free : S3.x@chan L[0x38f00000] {WL} + ALU_GROUP_END + ALU_GROUP_BEGIN + ALU MUL R1.x@free : S5.x@free L[0x38f00000] {WL} + ALU_GROUP_END +LOOP_END +EXPORT_DONE PIXEL 0 R1.xxxx +)"; + + +const char *gs_abs_float_nir = +R"(shader: MESA_SHADER_GEOMETRY +source_sha1: {0xdfd2ba73, 0x5eff5b0c, 0x577ee695, 0xb65ae49e, 0xecc34679} +name: GLSL4 +inputs: 1 +outputs: 2 +uniforms: 3 +shared: 0 +ray queries: 0 +invocations: 1 +vertices in: 3 +vertices out: 3 +input primitive: TRIANGLES +output primitive: TRIANGLE_STRIP +active_stream_mask: 0x1 +uses_end_primitive: 0 +decl_var uniform INTERP_MODE_NONE float arg0 (0, 0, 0) +decl_var uniform INTERP_MODE_NONE float tolerance (1, 1, 0) +decl_var uniform INTERP_MODE_NONE float expected (2, 2, 0) +decl_function main (0 params) + +impl main { + block block_0: + /* preds: */ + vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) + vec4 32 ssa_1 = intrinsic load_per_vertex_input (ssa_0, ssa_0) (0, 0, 160, 160) + vec1 32 ssa_2 = load_const (0x00000001 = 0.000000) + vec4 32 ssa_3 = intrinsic load_per_vertex_input (ssa_2, ssa_0) (0, 0, 160, 160) + vec1 32 ssa_4 = load_const (0x00000002 = 0.000000) + vec4 32 ssa_5 = intrinsic load_per_vertex_input (ssa_4, ssa_0) (0, 0, 160, 160) + vec1 32 ssa_6 = load_const (0x3f800000 = 1.000000) + vec1 32 ssa_7 = intrinsic load_uniform (ssa_0) (0, 1, 160) /* arg0 */ + vec1 32 ssa_8 = intrinsic load_uniform (ssa_0) (2, 1, 160) /* expected */ + vec1 32 ssa_9 = fsub abs(ssa_7), ssa_8 + vec1 32 ssa_10 = intrinsic load_uniform (ssa_0) (1, 1, 160) /* tolerance */ + vec1 32 ssa_11 = fge32 ssa_10, abs(ssa_9) + vec1 32 ssa_12 = inot ssa_11 + vec1 32 ssa_13 = b2f32 ssa_12 + vec1 32 ssa_14 = b2f32 ssa_11 + intrinsic store_output (ssa_1, ssa_0) (0, 15, 0, 160, 128) + vec4 32 ssa_15 = vec4 ssa_13, ssa_14, ssa_0, ssa_6 + intrinsic store_output (ssa_15, ssa_0) (1, 15, 0, 160, 160) + intrinsic emit_vertex () (0) + intrinsic store_output (ssa_3, ssa_0) (0, 15, 0, 160, 128) + intrinsic store_output (ssa_15, ssa_0) (1,15, 0, 160, 160) + intrinsic emit_vertex () (0) + intrinsic store_output (ssa_5, ssa_0) (0, 15, 0, 160, 128) + intrinsic store_output (ssa_15, ssa_0) (1,15, 0, 160, 160) + intrinsic emit_vertex () (0) + /* succs: block_1 */ + block block_1: +})"; + +const char *gs_abs_float_expect = +R"(GS +CHIPCLASS EVERGREEN +INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 +OUTPUT LOC:0 NAME:0 MASK:15 +OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10 +REGISTERS R0.x@fully R0.y@fully R0.w@fully +SHADER +ALU MOV S2.x@chan : I[0] {WL} +ALU MOV S3.x@chan : I[0] {WL} +ALU MOV S4.x@chan : I[0] {WL} +ALU MOV S5.x@chan : I[0] {WL} +ALU MOV S6.x@free : I[0] {WL} +LOAD_BUF S7.xyzw : R0.x@fully RID:17 +ALU MOV S8.x@free : I[1] {WL} +LOAD_BUF S9.xyzw : R0.y@fully RID:17 +ALU MOV S10.x@free : L[0x2] {WL} +LOAD_BUF S11.xyzw : R0.w@fully RID:17 +ALU MOV S12.x@free : I[1.0] {WL} +ALU MOV S13.x@free : KC0[0].x {WL} +ALU MOV S14.x@free : KC0[2].x {WL} +ALU ADD S15.x@free : |S13.x@free| -S14.x@free {WL} +ALU MOV S16.x@free : KC0[1].x {WL} +ALU SETGE_DX10 S17.x@free : S16.x@free |S15.x@free| {WL} +ALU NOT_INT S18.x@free : S17.x@free {WL} +ALU AND_INT S19.x@free : S18.x@free I[1.0] {WL} +ALU AND_INT S20.x@free : S17.x@free I[1.0] {WL} +ALU MOV S21.x@group : S19.x@free {W} +ALU MOV S21.y@group : S20.x@free {W} +ALU MOV S21.z@group : S6.x@free {W} +ALU MOV S21.w@group : S12.x@free {WL} +MEM_RING 0 WRITE_IDX 0 S7.xyzw @S2.x@chan ES:4 +MEM_RING 0 WRITE_IDX 4 S21.xyzw @S2.x@chan ES:4 +EMIT_VERTEX @0 +ALU ADD_INT S22.x@chan : S2.x@chan L[0x2] {WL} +MEM_RING 0 WRITE_IDX 0 S9.xyzw @S22.x@chan ES:4 +MEM_RING 0 WRITE_IDX 4 S21.xyzw @S22.x@chan ES:4 +EMIT_VERTEX @0 +ALU ADD_INT S23.x@chan : S22.x@chan L[0x2] {WL} +MEM_RING 0 WRITE_IDX 0 S11.xyzw @S23.x@chan ES:4 +MEM_RING 0 WRITE_IDX 4 S21.xyzw @S23.x@chan ES:4 +EMIT_VERTEX @0 +ALU ADD_INT S24.x@chan : S23.x@chan L[0x2] {WL} +)"; + + +const char *vtx_for_tcs_nir = +R"(shader: MESA_SHADER_VERTEX +source_sha1: {0xbd6100f2, 0xc71e7b0e, 0x74662024, 0x261073d8, 0xeae01762} +name: GLSL5 +inputs: 0 +outputs: 1 +uniforms: 10 +shared: 0 +ray queries: 0 +decl_var uniform INTERP_MODE_NONE int[6] constarray_1_0 (0, 0, 0) = { { 0x00000000 }, { 0x00000001 }, { 0x00000002 }, { 0x00000000 }, { 0x00000002 }, { 0x00000003 } } +decl_var uniform INTERP_MODE_NONE vec2[4] constarray_0_0 (1, 6, 0) = { { -1.000000, 1.000000 }, { -1.000000, -1.000000 }, { 1.000000, -1.000000 }, { 1.000000, 1.000000 } } +decl_function main (0 params) + +impl main { + block block_0: + /* preds: */ + vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) + vec1 32 ssa_1 = load_const (0x3f800000 = 1.000000) + vec1 32 ssa_2 = intrinsic load_vertex_id () () + vec1 32 ssa_3 = intrinsic load_uniform (ssa_2) (0, 6, 34) + vec2 32 ssa_4 = intrinsic load_uniform (ssa_3) (6, 4, 160) + vec4 32 ssa_5 = vec4 ssa_4.x, ssa_4.y, ssa_0, ssa_1 + vec4 32 ssa_6 = intrinsic load_tcs_in_param_base_r600 () () + vec1 32 ssa_7 = intrinsic load_tcs_rel_patch_id_r600 () () + vec1 32 ssa_8 = umul24 ssa_6.y, ssa_7 + intrinsic store_local_shared_r600 (ssa_5, ssa_8) (3) + vec1 32 ssa_9 = load_const (0x00000008 = 0.000000) + vec1 32 ssa_10 = iadd ssa_9, ssa_8 + intrinsic store_local_shared_r600 (ssa_5, ssa_10) (12) + /* succs: block_1 */ + block block_1: +})"; + + +const char *vtx_for_tcs_from_nir_expect = +R"(VS +CHIPCLASS EVERGREEN +REGISTERS R0.x@fully R0.y@fully +SHADER +ALU MOV S1.x@free : I[0] {WL} +ALU MOV S2.x@free : I[1.0] {WL} +ALU MOV S3.x@free : R0.x@fully {WL} +LOAD_BUF S4.xyzw : S3.x@free RID:0 +LOAD_BUF S5.xyzw : S4.x@group + 96b RID:0 +ALU MOV S6.x : S5.x@group {W} +ALU MOV S6.y : S5.y@group {W} +ALU MOV S6.z : S1.x@free {W} +ALU MOV S6.w : S2.x@free {WL} +ALU MOV S7.x@free : I[0] {WL} +LOAD_BUF S8.xyzw : S7.x@free RID:16 SRF +ALU MOV S9.x@free : R0.y@fully {WL} +ALU MUL_UINT24 S10.x@free : S8.y@group S9.x@free {WL} +LDS WRITE_REL __.x [ S10.x@free ] : S6.x S6.y +ALU MOV S11.x@free : L[0x8] {WL} +ALU ADD_INT S12.x@free : S11.x@free S10.x@free {WL} +LDS WRITE_REL __.x [ S12.x@free ] : S6.z S6.w)"; + + +const char *vtx_for_tcs_inp = +R"(VS +CHIPCLASS EVERGREEN +REGISTERS R0.x@fully R0.y@fully +SHADER +ALU MOV S1.x@free : I[0] {WL} +ALU MOV S2.x@free : I[1.0] {WL} +ALU MOV S3.x@free : R0.x@fully {WL} +LOAD_BUF S4.xyzw : S3.x@free RID:0 +LOAD_BUF S5.xyzw : S4.x@group + 96b RID:0 +ALU MOV S6.x : S5.x@group {W} +ALU MOV S6.y : S5.y@group {W} +ALU MOV S6.z : S1.x@free {W} +ALU MOV S6.w : S2.x@free {WL} +ALU MOV S7.x@free : I[0] {WL} +LOAD_BUF S8.xyzw : S7.x@free RID:16 SRF +ALU MOV S9.x@free : R0.y@fully {WL} +ALU MUL_UINT24 S10.x@free : S8.y@group S9.x@free {WL} +LDS WRITE_REL __.x [ S10.x@free ] : S6.x S6.y +ALU MOV S11.x@free : L[0x8] {WL} +ALU ADD_INT S12.x@free : S11.x@free S10.x@free {WL} +LDS WRITE_REL __.x [ S12.x@free ] : S6.z S6.w)"; + +const char *vtx_for_tcs_opt = +R"(VS +CHIPCLASS EVERGREEN +REGISTERS R0.x@fully R0.y@fully +SHADER +LOAD_BUF S4.x___ : R0.x@fully RID:0 +LOAD_BUF S5.xy__ : S4.x@group + 96b RID:0 +ALU MOV S7.x@free : I[0] {WL} +LOAD_BUF S8._y__ : S7.x@free RID:16 SRF +ALU MUL_UINT24 S10.x@free : S8.y@group R0.y@fully {WL} +LDS WRITE_REL __.x [ S10.x@free ] : S5.x@group S5.y@group +ALU ADD_INT S12.x@free : L[0x8] S10.x@free {WL} +LDS WRITE_REL __.x [ S12.x@free ] : I[0] I[1.0])"; + +const char *vtx_for_tcs_pre_sched = +R"(VS +CHIPCLASS EVERGREEN +REGISTERS R0.x@fully R0.y@fully +SHADER +ALU MOV S3.x@free : R0.x@fully {WL} +LOAD_BUF S4.xyzw : S3.x@free RID:0 +LOAD_BUF S5.xyzw : S4.x@group + 96b RID:0 +ALU MOV S7.y@free : I[0] {WL} +LOAD_BUF S8.xyzw : S7.y@free RID:16 SRF +ALU MUL_UINT24 S10.x@free : S8.y@group R0.y@fully {WL} +LDS WRITE_REL __.x [ S10.x@free ] : S5.x@group S5.y@group +ALU ADD_INT S12.x@free : L[0x8] S10.x@free {WL} +LDS WRITE_REL __.x [ S12.x@free ] : I[0] I[1.0])"; + +const char *vtx_for_tcs_sched = +R"(VS +CHIPCLASS EVERGREEN +REGISTERS R0.x@fully R0.y@fully +SHADER +BLOCK_START +ALU_GROUP_BEGIN + ALU MOV S3.x@free : R0.x@fully {W} + ALU MOV S7.y@free : I[0] {WL} +ALU_GROUP_END +BLOCK_END +BLOCK_START +LOAD_BUF S4.xyzw : S3.x@free RID:0 +LOAD_BUF S8.xyzw : S7.y@free RID:16 SRF +BLOCK_END +BLOCK_START +ALU_GROUP_BEGIN + ALU MUL_UINT24 S10.x@free : S8.y@group R0.y@fully {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU ADD_INT S12.x@chan : L[0x8] S10.x@free {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU LDS WRITE_REL __.x : S12.x@chan I[0] I[1.0] {L} +ALU_GROUP_END +BLOCK_END +BLOCK_START +LOAD_BUF S5.xyzw : S4.x@group + 96b RID:0 +BLOCK_END +BLOCK_START +ALU_GROUP_BEGIN + ALU LDS WRITE_REL __.x : S10.x@free S5.x@group S5.y@group {L} +ALU_GROUP_END +BLOCK_END)"; + +const char *tcs_nir = +R"(shader: MESA_SHADER_TESS_CTRL +source_sha1: {0xc83b0de6, 0x36934b97, 0xccddb436, 0xb0952cb0, 0x07a450a1} +name: GLSL5 +inputs: 1 +outputs: 3 +uniforms: 0 +shared: 0 +ray queries: 0 +decl_function main (0 params) + +impl main { + block block_0: + /* preds: */ + vec1 32 ssa_0 = undefined + vec2 32 ssa_1 = load_const (0x3f800000, 0x3f800000) + vec1 32 ssa_2 = load_const (0x00000000) + vec4 32 ssa_3 = intrinsic load_tcs_out_param_base_r600 () () + vec1 32 ssa_4 = intrinsic load_tcs_rel_patch_id_r600 () () + vec2 32 ssa_5 = umad24 ssa_3.xx, ssa_4.xx, ssa_3.wz + vec1 32 ssa_6 = mov ssa_5.x + vec1 32 ssa_7 = load_const (0x00000010) + vec4 32 ssa_8 = load_const (0x00000010, 0x00000000, 0x00000004, 0x00000008) + vec4 32 ssa_9 = iadd ssa_5.xxxx, ssa_8 + vec1 32 ssa_10 = mov ssa_9.x + vec3 32 ssa_11 = mov ssa_9.yzw + intrinsic store_local_shared_r600 (ssa_1, ssa_10) (3) + vec4 32 ssa_12 = load_const (0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000) + vec4 32 ssa_13 = vec4 ssa_12.x, ssa_12.y, ssa_12.z, ssa_0 + intrinsic store_local_shared_r600 (ssa_13, ssa_6) (3) + vec1 32 ssa_14 = load_const (0x00000008) + vec1 32 ssa_15 = iadd ssa_14, ssa_5.x + intrinsic store_local_shared_r600 (ssa_13, ssa_15) (12) + vec1 32 ssa_16 = intrinsic load_invocation_id () () + vec4 32 ssa_17 = intrinsic load_tcs_in_param_base_r600 () () + vec1 32 ssa_18 = umul24 ssa_17.x, ssa_4 + vec1 32 ssa_19 = umad24 ssa_17.y, ssa_16, ssa_18 + vec4 32 ssa_20 = load_const (0x00000000, 0x00000004, 0x00000008, 0x0000000c) + vec4 32 ssa_21 = iadd ssa_20, ssa_19.xxxx + vec4 32 ssa_22 = intrinsic load_local_shared_r600 (ssa_21) () + vec1 32 ssa_23 = umad24 ssa_3.y, ssa_16, ssa_5.y + intrinsic store_local_shared_r600 (ssa_22, ssa_23) (3) + vec1 32 ssa_24 = iadd ssa_14, ssa_23 + intrinsic store_local_shared_r600 (ssa_22, ssa_24) (12) + vec1 32 ssa_25 = ieq32 ssa_16, ssa_2 + /* succs: block_1 block_2 */ + if ssa_25 { + block block_1: + /* preds: block_0 */ + vec3 32 ssa_26 = intrinsic load_local_shared_r600 (ssa_11) () + vec1 32 ssa_27 = intrinsic load_tcs_tess_factor_base_r600 () () + vec1 32 ssa_28 = umad24 ssa_4, ssa_7, ssa_27 + vec3 32 ssa_29 = load_const (0x00000004, 0x00000008, 0x0000000c) + vec3 32 ssa_30 = iadd ssa_28.xxx, ssa_29 + vec4 32 ssa_31 = vec4 ssa_28, ssa_26.x, ssa_30.x, ssa_26.y + vec2 32 ssa_32 = vec2 ssa_30.y, ssa_26.z + vec1 32 ssa_33 = intrinsic load_local_shared_r600 (ssa_10) () + vec2 32 ssa_34 = vec2 ssa_30.z, ssa_33 + intrinsic store_tf_r600 (ssa_31) () + intrinsic store_tf_r600 (ssa_32) () + intrinsic store_tf_r600 (ssa_34) () + /* succs: block_3 */ + } else { + block block_2: + /* preds: block_0 */ + /* succs: block_3 */ + } + block block_3: + /* preds: block_1 block_2 */ + /* succs: block_4 */ + block block_4: +})"; + +const char *tcs_from_nir_expect = +R"(TCS +CHIPCLASS EVERGREEN +PROP TCS_PRIM_MODE:4 +REGISTERS R0.x@fully R0.y@fully R0.z@fully R0.w@fully +SHADER +ALU MOV S1.x@free : I[0] {WL} +ALU MOV S2.x : I[1.0] {W} +ALU MOV S2.y : I[1.0] {WL} +ALU MOV S3.x@free : I[0] {WL} +ALU MOV S4.x@free : I[0] {WL} +LOAD_BUF S5.xyzw : S4.x@free + 16b RID:16 SRF +ALU MOV S6.x@free : R0.y@fully {WL} +ALU MULADD_UINT24 S7.x : S5.x@group S6.x@free S5.w@group {W} +ALU MULADD_UINT24 S7.y : S5.x@group S6.x@free S5.z@group {WL} +ALU MOV S8.x@free : S7.x {WL} +ALU MOV S9.x@free : L[0x10] {WL} +ALU MOV S10.x : L[0x10] {W} +ALU MOV S10.y : I[0] {W} +ALU MOV S10.z : L[0x4] {W} +ALU MOV S10.w : L[0x8] {WL} +ALU ADD_INT S11.x : S7.x S10.x {W} +ALU ADD_INT S11.y : S7.x S10.y {W} +ALU ADD_INT S11.z : S7.x S10.z {W} +ALU ADD_INT S11.w : S7.x S10.w {WL} +ALU MOV S12.x@free : S11.x {WL} +ALU MOV S13.x : S11.y {W} +ALU MOV S13.y : S11.z {W} +ALU MOV S13.z : S11.w {WL} +LDS WRITE_REL __.x [ S12.x@free ] : S2.x S2.y +ALU MOV S14.x : I[1.0] {W} +ALU MOV S14.y : I[1.0] {W} +ALU MOV S14.z : I[1.0] {W} +ALU MOV S14.w : I[1.0] {WL} +ALU MOV S15.x : S14.x {W} +ALU MOV S15.y : S14.y {W} +ALU MOV S15.z : S14.z {W} +ALU MOV S15.w : S1.x@free {WL} +LDS WRITE_REL __.x [ S8.x@free ] : S15.x S15.y +ALU MOV S16.x@free : L[0x8] {WL} +ALU ADD_INT S17.x@free : S16.x@free S7.x {WL} +LDS WRITE_REL __.x [ S17.x@free ] : S15.z S15.w +ALU MOV S18.x@free : R0.z@fully {WL} +ALU MOV S19.y@free : I[0] {WL} +LOAD_BUF S20.xyzw : S19.y@free RID:16 SRF +ALU MUL_UINT24 S21.x@free : S20.x@group S6.x@free {WL} +ALU MULADD_UINT24 S22.x@free : S20.y@group S18.x@free S21.x@free {WL} +ALU MOV S23.x : I[0] {W} +ALU MOV S23.y : L[0x4] {W} +ALU MOV S23.z : L[0x8] {W} +ALU MOV S23.w : L[0xc] {WL} +ALU ADD_INT S24.x : S23.x S22.x@free {W} +ALU ADD_INT S24.y : S23.y S22.x@free {W} +ALU ADD_INT S24.z : S23.z S22.x@free {W} +ALU ADD_INT S24.w : S23.w S22.x@free {WL} +LDS_READ [ S25.x@free S25.y@free S25.z@free S25.w@free ] : [ S24.x S24.y S24.z S24.w ] +ALU MULADD_UINT24 S26.x@free : S5.y@group S18.x@free S7.y {WL} +LDS WRITE_REL __.x [ S26.x@free ] : S25.x@free S25.y@free +ALU ADD_INT S27.x@free : S16.x@free S26.x@free {WL} +LDS WRITE_REL __.x [ S27.x@free ] : S25.z@free S25.w@free +ALU SETE_INT S28.x@free : S18.x@free S3.x@free {WL} +IF (( ALU PRED_SETNE_INT __.z@free : S28.x@free I[0] {LEP} PUSH_BEFORE )) + LDS_READ [ S30.x@free S30.y@free S30.z@free ] : [ S13.x S13.y S13.z ] + ALU MOV S31.x@free : R0.w@fully {WL} + ALU MULADD_UINT24 S32.x@free : S6.x@free S9.x@free S31.x@free {WL} + ALU MOV S33.x : L[0x4] {W} + ALU MOV S33.y : L[0x8] {W} + ALU MOV S33.z : L[0xc] {WL} + ALU ADD_INT S34.x : S32.x@free S33.x {W} + ALU ADD_INT S34.y : S32.x@free S33.y {W} + ALU ADD_INT S34.z : S32.x@free S33.z {WL} + ALU MOV S35.x : S32.x@free {W} + ALU MOV S35.y : S30.x@free {W} + ALU MOV S35.z : S34.x {W} + ALU MOV S35.w : S30.y@free {WL} + ALU MOV S36.x : S34.y {W} + ALU MOV S36.y : S30.z@free {WL} + LDS_READ [ S37.x@free ] : [ S12.x@free ] + ALU MOV S38.x : S34.z {W} + ALU MOV S38.y : S37.x@free {WL} + ALU MOV S39.x@group : S35.x {W} + ALU MOV S39.y@group : S35.y {W} + ALU MOV S40.z@group : S35.z {W} + ALU MOV S40.w@group : S35.w {WL} + WRITE_TF S40.zw__ + WRITE_TF S39.xy__ + ALU MOV S41.x@group : S36.x {W} + ALU MOV S41.y@group : S36.y {WL} + WRITE_TF S41.xy__ + ALU MOV S42.x@group : S38.x {W} + ALU MOV S42.y@group : S38.y {WL} + WRITE_TF S42.xy__ +ENDIF)"; + +const char *tes_nir = +R"(shader: MESA_SHADER_TESS_EVAL +source_sha1: {0x2db04154, 0x4884cf59, 0x50e43ee6, 0x4bb239d7, 0x0b502229} +name: GLSL5 +inputs: 1 +outputs: 1 +uniforms: 0 +shared: 0 +ray queries: 0 +decl_function main (0 params) + +impl main { + block block_0: + /* preds: */ + vec1 32 ssa_0 = load_const (0x40000000) + vec2 32 ssa_1 = intrinsic load_tess_coord_r600 () () + vec1 32 ssa_2 = fadd ssa_1.x, ssa_1.y + vec1 32 ssa_3 = load_const (0x3f800000) + vec1 32 ssa_4 = fsub ssa_3, ssa_2 + vec1 32 ssa_5 = ffma ssa_0, ssa_4, ssa_1.y + vec1 32 ssa_6 = f2i32 ssa_5 + vec1 32 ssa_7 = load_const (0x00000000) + vec4 32 ssa_8 = intrinsic load_tcs_out_param_base_r600 () () + vec1 32 ssa_9 = intrinsic load_tcs_rel_patch_id_r600 () () + vec1 32 ssa_10 = umad24 ssa_8.x, ssa_9, ssa_8.z + vec1 32 ssa_11 = umad24 ssa_8.y, ssa_6, ssa_10 + vec4 32 ssa_12 = load_const (0x00000000, 0x00000004, 0x00000008, 0x0000000c) + vec4 32 ssa_13 = iadd ssa_12, ssa_11.xxxx + vec4 32 ssa_14 = intrinsic load_local_shared_r600 (ssa_13) () + intrinsic store_output (ssa_14, ssa_7) (0, 15, 0, 160, 128) + /* succs: block_1 */ + block block_1: +})"; + +const char *tes_from_nir_expect = +R"(TES +CHIPCLASS EVERGREEN +OUTPUT LOC:0 NAME:0 MASK:15 +REGISTERS R0.x@fully R0.y@fully R0.z@fully +SHADER +ALU MOV S1.x@free : L[0x40000000] {WL} +ALU MOV S2.x@free : R0.x@fully {WL} +ALU MOV S2.y@free : R0.y@fully {WL} +ALU ADD S3.x@free : S2.x@free S2.y@free {WL} +ALU MOV S4.x@free : I[1.0] {WL} +ALU ADD S5.x@free : S4.x@free -S3.x@free {WL} +ALU MULADD_IEEE S6.x@free : S1.x@free S5.x@free S2.y@free {WL} +ALU TRUNC S7.x@free : S6.x@free {WL} +ALU FLT_TO_INT S8.x@free : S7.x@free {WL} +ALU MOV S9.x@free : I[0] {WL} +ALU MOV S10.y@free : I[0] {WL} +LOAD_BUF S11.xyzw : S10.y@free + 16b RID:16 SRF +ALU MOV S12.x@free : R0.z@fully {WL} +ALU MULADD_UINT24 S13.x@free : S11.x@group S12.x@free S11.z@group {WL} +ALU MULADD_UINT24 S14.x@free : S11.y@group S8.x@free S13.x@free {WL} +ALU MOV S15.x : I[0] {W} +ALU MOV S15.y : L[0x4] {W} +ALU MOV S15.z : L[0x8] {W} +ALU MOV S15.w : L[0xc] {WL} +ALU ADD_INT S16.x : S15.x S14.x@free {W} +ALU ADD_INT S16.y : S15.y S14.x@free {W} +ALU ADD_INT S16.z : S15.z S14.x@free {W} +ALU ADD_INT S16.w : S15.w S14.x@free {WL} +LDS_READ [ S17.x@group S17.y@group S17.z@group S17.w@group ] : [ S16.x S16.y S16.z S16.w ] +EXPORT_DONE POS 0 S17.xyzw +EXPORT_DONE PARAM 0 R0.____)"; + + +const char *tes_pre_op = +R"(TES +CHIPCLASS EVERGREEN +OUTPUT LOC:0 NAME:0 MASK:15 +REGISTERS R0.x@fully R0.y@fully R0.z@fully +SHADER +ALU MOV S1024.x@free : L[0x40000000] {WL} +ALU MOV S1025.x@free : R0.x@fully {WL} +ALU MOV S1025.y@free : R0.y@fully {WL} +ALU ADD S1026.x@free : S1025.x@free S1025.y@free {WL} +ALU MOV S1027.x@free : I[1.0] {WL} +ALU ADD S1028.x@free : S1027.x@free -S1026.x@free {WL} +ALU MULADD_IEEE S1029.x@free : S1024.x@free S1028.x@free S1025.y@free {WL} +ALU TRUNC S1030.x@free : S1029.x@free {WL} +ALU FLT_TO_INT S1031.x@free : S1030.x@free {WL} +ALU MOV S1032.x@free : I[0] {WL} +ALU MOV S1033.y@free : I[0] {WL} +LOAD_BUF S1034.xyzw : S1033.y@free RID:16 SRF +ALU MOV S1035.x@free : R0.z@fully {WL} +ALU MULADD_UINT24 S1036.x@free : S1034.x@group S1035.x@free S1034.z@group {WL} +ALU MULADD_UINT24 S1037.x@free : S1034.y@group S1031.x@free S1036.x@free {WL} +ALU MOV S1038.x : I[0] {W} +ALU MOV S1038.y : L[0x4] {W} +ALU MOV S1038.z : L[0x8] {W} +ALU MOV S1038.w : L[0xc] {WL} +ALU ADD_INT S1039.x : S1038.x S1037.x@free {W} +ALU ADD_INT S1039.y : S1038.y S1037.x@free {W} +ALU ADD_INT S1039.z : S1038.z S1037.x@free {W} +ALU ADD_INT S1039.w : S1038.w S1037.x@free {WL} +LDS_READ [ S1040.x@group S1040.y@group S1040.z@group S1040.w@group ] : [ S1039.x S1039.y S1039.z S1039.w ] +EXPORT_DONE POS 0 S1040.xyzw +EXPORT_DONE PARAM 0 R0.____)"; + +const char *tes_optimized = +R"(TES +CHIPCLASS EVERGREEN +OUTPUT LOC:0 NAME:0 MASK:15 +REGISTERS R0.x@fully R0.y@fully R0.z@fully +SHADER +ALU ADD S1026.x@free : R0.x@fully R0.y@fully {WL} +ALU ADD S1028.x@free : I[1.0] -S1026.x@free {WL} +ALU MULADD_IEEE S1029.x@free : L[0x40000000] S1028.x@free R0.y@fully {WL} +ALU TRUNC S1030.x@free : S1029.x@free {WL} +ALU FLT_TO_INT S1031.x@free : S1030.x@free {WL} +ALU MOV S1033.y@free : I[0] {WL} +LOAD_BUF S1034.xyz_ : S1033.y@free RID:16 SRF +ALU MULADD_UINT24 S1036.x@free : S1034.x@group R0.z@fully S1034.z@group {WL} +ALU MULADD_UINT24 S1037.x@free : S1034.y@group S1031.x@free S1036.x@free {WL} +ALU MOV S1039.x : S1037.x@free {W} +ALU ADD_INT S1039.y : L[0x4] S1037.x@free {W} +ALU ADD_INT S1039.z : L[0x8] S1037.x@free {W} +ALU ADD_INT S1039.w : L[0xc] S1037.x@free {WL} +LDS_READ [ S1040.x@group S1040.y@group S1040.z@group S1040.w@group ] : [ S1039.x S1039.y S1039.z S1039.w ] +EXPORT_DONE POS 0 S1040.xyzw +EXPORT_DONE PARAM 0 R0.____)"; + +const char *tes_optimized_pre_sched = +R"(TES +CHIPCLASS EVERGREEN +OUTPUT LOC:0 NAME:0 MASK:15 +REGISTERS R0.x@fully R0.y@fully R0.z@fully +SHADER +ALU ADD S1026.x@free : R0.x@fully R0.y@fully {WL} +ALU ADD S1028.x@free : I[1.0] -S1026.x@free {WL} +ALU MULADD_IEEE S1029.x@free : L[0x40000000] S1028.x@free R0.y@fully {WL} +ALU TRUNC S1030.x@free : S1029.x@free {WL} +ALU FLT_TO_INT S1031.x@free : S1030.x@free {WL} +ALU MOV S1033.y@free : I[0] {WL} +LOAD_BUF S1034.xyzw : S1033.y@free RID:16 SRF +ALU MULADD_UINT24 S1036.x@free : S1034.x@group R0.z@fully S1034.z@group {WL} +ALU MULADD_UINT24 S1037.x@free : S1034.y@group S1031.x@free S1036.x@free {WL} +ALU ADD_INT S1039.x : I[0] S1037.x@free {W} +ALU ADD_INT S1039.y : L[0x4] S1037.x@free {W} +ALU ADD_INT S1039.z : L[0x8] S1037.x@free {W} +ALU ADD_INT S1039.w : L[0xc] S1037.x@free {WL} +LDS_READ [ S1040.x@group S1040.y@group S1040.z@group S1040.w@group ] : [ S1039.x S1039.y S1039.z S1039.w ] +EXPORT_DONE POS 0 S1040.xyzw +EXPORT_DONE PARAM 0 R0.____)"; + +const char *tes_optimized_sched = +R"(TES +CHIPCLASS EVERGREEN +OUTPUT LOC:0 NAME:0 MASK:15 +REGISTERS R0.x@fully R0.y@fully R0.z@fully +SHADER +BLOCK_START +ALU_GROUP_BEGIN + ALU ADD S1026.x@chan : R0.x@fully R0.y@fully {W} + ALU MOV S1033.y@chan : I[0] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU ADD S1028.x@chan : I[1.0] -S1026.x@chan {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MULADD_IEEE S1029.x@chan : L[0x40000000] S1028.x@chan R0.y@fully {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU TRUNC S1030.x@chan : S1029.x@chan {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU FLT_TO_INT S1031.x@chan : S1030.x@chan {WL} +ALU_GROUP_END +BLOCK_START +BLOCK_END +LOAD_BUF S1034.xyzw : S1033.y@chan RID:16 SRF +BLOCK_START +BLOCK_END +ALU_GROUP_BEGIN + ALU MULADD_UINT24 S1036.x@chan : S1034.x@group R0.z@fully S1034.z@group {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MULADD_UINT24 S1037.x@chan : S1034.y@group S1031.x@chan S1036.x@chan {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU ADD_INT S1039.x : I[0] S1037.x@chan {W} + ALU ADD_INT S1039.y : L[0x4] S1037.x@chan {W} + ALU ADD_INT S1039.z : L[0x8] S1037.x@chan {W} + ALU ADD_INT S1039.w : L[0xc] S1037.x@chan {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU LDS READ_RET __.x@chan : S1039.x {L} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU LDS READ_RET __.x@chan : S1039.y {L} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU LDS READ_RET __.x@chan : S1039.z {L} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU LDS READ_RET __.x@chan : S1039.w {L} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV S1040.x@group : I[LDS_OQ_A_POP] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV S1040.y@group : I[LDS_OQ_A_POP] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV S1040.z@group : I[LDS_OQ_A_POP] {WL} +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU MOV S1040.w@group : I[LDS_OQ_A_POP] {WL} +ALU_GROUP_END +BLOCK_START +BLOCK_END +EXPORT_DONE POS 0 S1040.xyzw +EXPORT_DONE PARAM 0 R0.____ +BLOCK_END)"; + +void TestShader::SetUp() +{ + init_pool(); + SetUpMore(); +} + +void TestShader::TearDown() +{ + TearDownMore(); + release_pool(); +} + +void TestShader::SetUpMore() +{ +} + +void TestShader::TearDownMore() +{ +} + +Shader *TestShader::from_string(const std::string& s) +{ + istringstream is(s); + string line; + + r600_shader_key key = {0}; + key.ps.nr_cbufs = 1; + + do { + std::getline(is, line); + } while (line.empty()); + + Shader *shader = nullptr; + + if (line.substr(0,2) == "FS") + shader = new FragmentShader(key); + else if (line.substr(0,2) == "VS") + shader = new VertexShader(nullptr, nullptr, key); + else if (line.substr(0,2) == "GS") + shader = new GeometryShader(key); + else if (line.substr(0,3) == "TCS") + shader = new TCSShader(key); + else if (line.substr(0,3) == "TES") + shader = new TESShader(nullptr, nullptr, key); + else + return nullptr; + + while (std::getline(is, line)) { + if (line.find_first_not_of(" \t") == std::string::npos) + continue; + if (line[0] == '#') + continue; + + if (line.substr(0,6) == "SHADER") + break; + + istringstream ls(line); + if (!shader->add_info_from_string(ls)) { + std::cerr << "Don't understand '" << line << "\n"; + return nullptr; + } + } + + while (std::getline(is, line)) { + if (line.find_first_not_of(" \t") == std::string::npos) + continue; + if (line[0] == '#') + continue; + + shader->emit_instruction_from_string(line); + } + + return shader; +} + +} diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h new file mode 100644 index 0000000..9663bb3 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h @@ -0,0 +1,115 @@ +#ifndef SFN_TEST_SHADERS_H +#define SFN_TEST_SHADERS_H +#include + +namespace r600 { + +class Shader; + +extern const char *red_triangle_fs_nir; +extern const char *red_triangle_fs_expect_from_nir; +extern const char *red_triangle_fs_expect_from_nir_dce; + +extern const char *add_add_1_nir; +extern const char *add_add_1_expect_from_nir; +extern const char *add_add_1_expect_from_nir_copy_prop_fwd; +extern const char *add_add_1_expect_from_nir_copy_prop_fwd_dce; +extern const char *add_add_1_expect_from_nir_copy_prop_fwd_dce_bwd; + +extern const char *basic_interpolation_nir; +extern const char *basic_interpolation_orig; +extern const char *basic_interpolation_translated_1; +extern const char *basic_interpolation_expect_from_nir; +extern const char *basic_interpolation_expect_from_nir_opt; +extern const char *basic_interpolation_expect_from_nir_sched; + +extern const char *glxgears_vs2_nir; +extern const char *glxgears_vs2_from_nir_expect; +extern const char *glxgears_vs2_from_nir_expect_optimized; + +extern const char *dot4_pre; +extern const char *dot4_copy_prop_dce; + +extern const char *glxgears_vs2_from_nir_expect_cayman; +extern const char *basic_interpolation_orig_cayman; +extern const char *basic_interpolation_expect_from_nir_sched_cayman; +extern const char *basic_interpolation_expect_opt_sched_cayman; + +extern const char *vs_nexted_loop_nir; +extern const char *vs_nexted_loop_from_nir_expect; +extern const char *vs_nexted_loop_from_nir_expect_opt; + +extern const char *shader_with_local_array_nir; +extern const char *shader_with_local_array_expect; + +extern const char *test_schedule_group; +extern const char *test_schedule_group_expect; + +extern const char *shader_with_bany_nir; +extern const char *shader_with_bany_expect_eg; +extern const char *shader_with_bany_expect_opt_sched_eg; + +extern const char *shader_copy_prop_dont_kill_double_use; +extern const char *shader_copy_prop_dont_kill_double_use_expect; + +extern const char *shader_with_dest_array; +extern const char *shader_with_dest_array_opt_expect; +extern const char *shader_with_dest_array_opt_scheduled; + +extern const char *shader_with_dest_array2; +extern const char *shader_with_dest_array2_scheduled; + +extern const char *shader_with_dest_array2_scheduled_ra; + +extern const char *shader_group_chan_pin_to_combine; +extern const char *shader_group_chan_pin_combined; + +extern const char *shader_group_chan_pin_combined_sheduled; +extern const char *shader_group_chan_pin_combined_sheduled_ra; + +extern const char *shader_group_chan_pin_to_combine_2; +extern const char *shader_group_chan_pin_to_combine_2_opt; + +extern const char *fs_with_loop_multislot_reuse; +extern const char *fs_with_loop_multislot_reuse_scheduled; + +extern const char *gs_abs_float_nir; +extern const char *gs_abs_float_expect; + +extern const char *vtx_for_tcs_nir; +extern const char *vtx_for_tcs_from_nir_expect; + +extern const char *tcs_nir; +extern const char *tcs_from_nir_expect; + +extern const char *tes_nir; +extern const char *tes_from_nir_expect; + +extern const char *tes_pre_op; +extern const char *tes_optimized; +extern const char *tes_optimized_pre_sched; +extern const char *tes_optimized_sched; + +extern const char *vtx_for_tcs_inp; +extern const char *vtx_for_tcs_opt; +extern const char *vtx_for_tcs_pre_sched; +extern const char *vtx_for_tcs_sched; + +class TestShader : public ::testing::Test { + + void SetUp() override; + void TearDown() override; + + virtual void SetUpMore(); + virtual void TearDownMore(); + + +protected: + Shader *from_string(const std::string& s); +}; + + +} + + +#endif // SFN_TEST_SHADERS_H diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp new file mode 100644 index 0000000..f8b7c48 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp @@ -0,0 +1,244 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2021 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "../sfn_virtualvalues.h" +#include "../sfn_alu_defines.h" +#include "../sfn_debug.h" + +#include "gtest/gtest.h" + +using namespace r600; + +class ValueTest : public ::testing::Test +{ + void SetUp() override { + init_pool(); + } + + void TearDown() override { + release_pool(); + } +}; + + + +TEST_F(ValueTest, gpr_register_fully_pinned) +{ + Register reg(1, 2, pin_fully); + + EXPECT_EQ(reg.sel(), 1); + EXPECT_EQ(reg.chan(), 2); + EXPECT_EQ(reg.pin(), pin_fully); + EXPECT_FALSE(reg.is_virtual()); + + Register reg2(3, 1, pin_fully); + + EXPECT_EQ(reg2.sel(), 3); + EXPECT_EQ(reg2.chan(), 1); + EXPECT_EQ(reg2.pin(), pin_fully); + EXPECT_FALSE(reg2.is_virtual()); +} + +#ifdef __cpp_exceptions +TEST_F(ValueTest, virtual_register_must_not_be_pinned_to_sel) +{ + EXPECT_THROW(Register(1024, 1, pin_fully), std::invalid_argument); +} +#endif + +TEST_F(ValueTest, virtual_register_not_pinned) +{ + Register reg(1024, 1, pin_none); + + EXPECT_EQ(reg.sel(), 1024); + EXPECT_EQ(reg.chan(), 1); + EXPECT_EQ(reg.pin(), pin_none); + EXPECT_TRUE(reg.is_virtual()); + + Register reg2(1025, 2, pin_none); + + EXPECT_EQ(reg2.sel(), 1025); + EXPECT_EQ(reg2.chan(), 2); + EXPECT_EQ(reg2.pin(), pin_none); + EXPECT_TRUE(reg2.is_virtual()); +} + +TEST_F(ValueTest, uniform_value) +{ + UniformValue reg0(512, 1); + + EXPECT_EQ(reg0.sel(), 512); + EXPECT_EQ(reg0.chan(), 1); + EXPECT_EQ(reg0.kcache_bank(), 0); + EXPECT_FALSE(reg0.buf_addr()); + EXPECT_FALSE(reg0.is_virtual()); + + UniformValue reg1(513, 2, 1); + + EXPECT_EQ(reg1.sel(), 513); + EXPECT_EQ(reg1.chan(), 2); + EXPECT_EQ(reg1.kcache_bank(), 1); + EXPECT_FALSE(reg1.buf_addr()); + EXPECT_FALSE(reg1.is_virtual()); + + auto addr = new Register( 1024, 0, pin_none); + ASSERT_TRUE(addr); + + UniformValue reg_with_buffer_addr(513, 0, addr); + + EXPECT_EQ(reg_with_buffer_addr.sel(), 513); + EXPECT_EQ(reg_with_buffer_addr.chan(), 0); + EXPECT_EQ(reg_with_buffer_addr.pin(), pin_none); + EXPECT_EQ(reg_with_buffer_addr.kcache_bank(), 0); + EXPECT_FALSE(reg_with_buffer_addr.is_virtual()); + ASSERT_TRUE(reg_with_buffer_addr.buf_addr()); + + auto baddr = reg_with_buffer_addr.buf_addr(); + EXPECT_EQ(baddr->sel(), 1024); + EXPECT_EQ(baddr->chan(), 0); + EXPECT_EQ(baddr->pin(), pin_none); + EXPECT_TRUE(baddr->is_virtual()); +} + +TEST_F(ValueTest, literal_value) +{ + LiteralConstant literal(12); + EXPECT_EQ(literal.sel(), ALU_SRC_LITERAL); + EXPECT_EQ(literal.chan(), -1); + EXPECT_EQ(literal.value(), 12); + EXPECT_FALSE(literal.is_virtual()); + + LiteralConstant literal2(2); + EXPECT_EQ(literal2.sel(), ALU_SRC_LITERAL); + EXPECT_EQ(literal2.chan(), -1); + EXPECT_EQ(literal2.value(), 2); + EXPECT_FALSE(literal2.is_virtual()); +} + +TEST_F(ValueTest, inline_constant) +{ + InlineConstant c0(ALU_SRC_1); + + EXPECT_EQ(c0.sel(), ALU_SRC_1); + EXPECT_EQ(c0.chan(), 0); + EXPECT_FALSE(c0.is_virtual()); + + InlineConstant c1(ALU_SRC_M_1_INT); + EXPECT_EQ(c1.sel(), ALU_SRC_M_1_INT); + EXPECT_EQ(c1.chan(), 0); + EXPECT_FALSE(c1.is_virtual()); + + InlineConstant c2(ALU_SRC_PV, 1); + EXPECT_EQ(c2.sel(), ALU_SRC_PV); + EXPECT_EQ(c2.chan(), 1); + EXPECT_FALSE(c2.is_virtual()); +} + +TEST_F(ValueTest, array) +{ + LocalArray array(1024, 2, 12); + + EXPECT_EQ(array.size(), 12); + EXPECT_EQ(array.nchannels(), 2); + + auto elm0 = array.element(0, nullptr, 0); + ASSERT_TRUE(elm0); + + EXPECT_EQ(elm0->sel(), 1024); + EXPECT_EQ(elm0->chan(), 0); + EXPECT_EQ(elm0->pin(), pin_array); + + EXPECT_FALSE(elm0->get_addr()); + + auto elm1 = array.element(8, nullptr, 1); + ASSERT_TRUE(elm1); + + EXPECT_EQ(elm1->sel(), 1024 + 8); + EXPECT_EQ(elm1->chan(), 1); + EXPECT_EQ(elm1->pin(), pin_array); + EXPECT_FALSE(elm1->get_addr()); + + auto addr = new Register( 2000, 0, pin_none); + ASSERT_TRUE(addr); + + auto elm_indirect = array.element(0, addr, 1); + ASSERT_TRUE(elm_indirect); + + auto elm_addr = elm_indirect->get_addr(); + ASSERT_TRUE(elm_addr); + + EXPECT_EQ(elm_indirect->sel(), 1024); + EXPECT_EQ(elm_indirect->chan(), 1); + EXPECT_EQ(elm_indirect->pin(), pin_array); + + EXPECT_EQ(elm_addr->sel(), 2000); + EXPECT_EQ(elm_addr->chan(), 0); + EXPECT_EQ(elm_addr->pin(), pin_none); + + // A constant addr should resolve directly + auto addr2 = new LiteralConstant( 3); + ASSERT_TRUE(addr2); + + auto elm_direct = array.element(0, addr2, 0); + auto elm_direct_addr = elm_direct->get_addr(); + EXPECT_FALSE(elm_direct_addr); + + EXPECT_EQ(elm_direct->sel(), 1027); + EXPECT_EQ(elm_direct->chan(), 0); + EXPECT_EQ(elm_direct->pin(), pin_array); + +#ifdef __cpp_exceptions + EXPECT_THROW(array.element(12, nullptr, 0), std::invalid_argument); + EXPECT_THROW(array.element(3, nullptr, 2), std::invalid_argument); + + auto addr3 = new LiteralConstant( 12); + ASSERT_TRUE(addr3); + EXPECT_THROW(array.element(0, addr3, 0), std::invalid_argument); +#endif +} + +TEST_F(ValueTest, reg_from_string) +{ + Register reg(1000, 0, pin_none); + auto fs = Register::from_string("R1000.x"); + EXPECT_EQ(*fs, reg); + + EXPECT_EQ(*Register::from_string("R1001.y"), Register(1001, 1, pin_none)); + EXPECT_EQ(*Register::from_string("R1.z@fully"), Register(1, 2, pin_fully)); + EXPECT_EQ(*Register::from_string("R1000.y@chan"), Register(1000, 1, pin_chan)); + EXPECT_EQ(*Register::from_string("R1000.y@free"), Register(1000, 1, pin_free)); + + + EXPECT_EQ(*VirtualValue::from_string("L[0x1]"), LiteralConstant(1)); + EXPECT_EQ(*VirtualValue::from_string("L[0x2]"), LiteralConstant(2)); + EXPECT_EQ(*VirtualValue::from_string("L[0xA]"), LiteralConstant(10)); + + EXPECT_EQ(*VirtualValue::from_string("I[0]"), InlineConstant(ALU_SRC_0)); + EXPECT_EQ(*VirtualValue::from_string("I[HW_WAVE_ID]"), InlineConstant(ALU_SRC_HW_WAVE_ID)); + + +} diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_valuefactory_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_valuefactory_test.cpp new file mode 100644 index 0000000..2032fe7 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/tests/sfn_valuefactory_test.cpp @@ -0,0 +1,285 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2021 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "../sfn_valuefactory.h" +#include "../sfn_alu_defines.h" +#include "../sfn_debug.h" + +#include "nir_builder.h" +#include "gtest/gtest.h" + +#include "ralloc.h" + +using namespace r600; + +class ValuefactoryTest: public ::testing::Test { + +public: + ValuefactoryTest(); + +protected: + void SetUp() override; + void TearDown() override; + + ValueFactory *factory; + nir_builder b; + nir_shader_compiler_options options; + +}; + +TEST_F(ValuefactoryTest, test_create_ssa) +{ + auto c1 = nir_imm_float(&b, 2.0); + auto c2 = nir_imm_float(&b, 4.0); + auto sum = nir_fadd(&b, c1, c2); + auto alu = nir_instr_as_alu(sum->parent_instr); + + sfn_log << SfnLog::reg << "Search (test) " << &alu->dest << "\n"; + auto dest_value = factory->dest(alu->dest, 0, pin_none); + EXPECT_EQ(dest_value->sel(), 1024); + EXPECT_EQ(dest_value->chan(), 0); + EXPECT_EQ(dest_value->pin(), pin_none); + + nir_src src = nir_src_for_ssa(sum); + sfn_log << SfnLog::reg << "Search (test) " << &src << "\n"; + PVirtualValue value = factory->src(src, 0); + EXPECT_EQ(value->sel(), 1024); + EXPECT_EQ(value->chan(), 0); + EXPECT_EQ(value->pin(), pin_none); +} + +TEST_F(ValuefactoryTest, test_create_register_1) +{ + nir_src src1 = NIR_SRC_INIT; + src1.reg.reg = nir_local_reg_create(b.impl); + src1.reg.reg->num_components = 1; + + nir_src src2 = NIR_SRC_INIT; + src2.reg.reg = nir_local_reg_create(b.impl); + src2.reg.reg->num_components = 4; + ASSERT_FALSE(src1.is_ssa); + + factory->allocate_registers(&b.impl->registers); + + auto value = factory->src(src1, 0); + EXPECT_EQ(value->sel(), 1024); + EXPECT_EQ(value->chan(), 0); + + for (int i = 0; i < 4; ++i) { + PVirtualValue value = factory->src(src2, i); + EXPECT_EQ(value->sel(), 1025); + EXPECT_EQ(value->chan(), i); + EXPECT_EQ(value->pin(), pin_none); + } +} + +TEST_F(ValuefactoryTest, test_create_register_array_direct_access) +{ + nir_dest dst = NIR_DEST_INIT; + dst.reg.reg = nir_local_reg_create(b.impl); + dst.reg.reg->num_components = 2; + dst.reg.reg->num_array_elems = 10; + + factory->allocate_registers(&b.impl->registers); + + auto c1 = nir_imm_float(&b, 2.0); + + nir_alu_instr *mov = nir_alu_instr_create(b.shader, nir_op_mov); + mov->src[0].src = nir_src_for_ssa(c1); + mov->dest.write_mask = 3; + mov->dest.dest.is_ssa = false; + mov->dest.dest.reg.reg = dst.reg.reg; + mov->dest.dest.reg.base_offset = 5; + nir_builder_instr_insert(&b, &mov->instr); + + auto regx = factory->dest(mov->dest.dest, 0, pin_none); + auto regy = factory->dest(mov->dest.dest, 1, pin_none); + EXPECT_EQ(regx->sel(), 1024 + 5); + EXPECT_EQ(regx->chan(), 0); + EXPECT_EQ(regx->pin(), pin_array); + + EXPECT_EQ(regy->sel(), 1024 + 5); + EXPECT_EQ(regy->chan(), 1); + EXPECT_EQ(regy->pin(), pin_array); + +} + + +TEST_F(ValuefactoryTest, test_create_register_array_indirect_access) +{ + nir_dest dst = NIR_DEST_INIT; + dst.reg.reg = nir_local_reg_create(b.impl); + dst.reg.reg->num_components = 3; + dst.reg.reg->num_array_elems = 10; + + factory->allocate_registers(&b.impl->registers); + + auto c1 = nir_imm_vec2(&b, 2.0, 4.0); + auto c2 = nir_imm_int(&b, 3); + + factory->dest(*c2, 0, pin_none); + + nir_alu_instr *mov = nir_alu_instr_create(b.shader, nir_op_mov); + mov->src[0].src = nir_src_for_ssa(c1); + mov->dest.write_mask = 3; + mov->dest.dest.is_ssa = false; + mov->dest.dest.reg.reg = dst.reg.reg; + mov->dest.dest.reg.base_offset = 0; + mov->dest.dest.reg.indirect = (nir_src *)calloc(1, sizeof(nir_src)); + nir_src addr = nir_src_for_ssa(c2); + nir_src_copy(mov->dest.dest.reg.indirect, &addr); + nir_builder_instr_insert(&b, &mov->instr); + + auto addr_reg = factory->src(addr, 0); + + auto regx = factory->dest(mov->dest.dest, 0, pin_none); + auto regy = factory->dest(mov->dest.dest, 1, pin_none); + + auto regx_addr = regx->get_addr(); + ASSERT_TRUE(regx_addr); + + EXPECT_EQ(regx->sel(), 1024); + EXPECT_EQ(regx->chan(), 0); + EXPECT_EQ(*regx_addr, *addr_reg); + EXPECT_EQ(regx->pin(), pin_array); + + auto regy_addr = regy->get_addr(); + ASSERT_TRUE(regy_addr); + + EXPECT_EQ(regy->sel(), 1024); + EXPECT_EQ(regy->chan(), 1); + EXPECT_EQ(*regy_addr, *addr_reg); + EXPECT_EQ(regy->pin(), pin_array); + +} + +TEST_F(ValuefactoryTest, test_create_ssa_pinned_chan) +{ + auto c1 = nir_imm_float(&b, 2.0); + auto c2 = nir_imm_float(&b, 4.0); + auto sum = nir_fadd(&b, c1, c2); + auto alu = nir_instr_as_alu(sum->parent_instr); + + auto dest_value = factory->dest(alu->dest, 0, pin_chan); + EXPECT_EQ(dest_value->sel(), 1024); + EXPECT_EQ(dest_value->chan(), 0); + EXPECT_EQ(dest_value->pin(), pin_chan); + + PVirtualValue value = factory->src(nir_src_for_ssa(sum), 0); + EXPECT_EQ(value->sel(), 1024); + EXPECT_EQ(value->chan(), 0); + EXPECT_EQ(value->pin(), pin_chan); +} + + +TEST_F(ValuefactoryTest, test_create_ssa_pinned_chan_and_reg) +{ + auto c1 = nir_imm_float(&b, 2.0); + auto c2 = nir_imm_float(&b, 4.0); + auto sum = nir_fadd(&b, c1, c2); + auto alu = nir_instr_as_alu(sum->parent_instr); + + auto dest_value = factory->dest(alu->dest, 1, pin_chan); + EXPECT_EQ(dest_value->sel(), 1024); + EXPECT_EQ(dest_value->chan(), 1); + EXPECT_EQ(dest_value->pin(), pin_chan); + + PVirtualValue value = factory->src(nir_src_for_ssa(sum), 1); + EXPECT_EQ(value->sel(), 1024); + EXPECT_EQ(value->chan(), 1); + EXPECT_EQ(value->pin(), pin_chan); +} + + +TEST_F(ValuefactoryTest, test_create_const) +{ + auto c1 = nir_imm_int(&b, 2); + auto c2 = nir_imm_int(&b, 4); + auto sum = nir_iadd(&b, c1, c2); + + auto ci1 = nir_instr_as_load_const(c1->parent_instr); + factory->allocate_const(ci1); + + auto ci2 = nir_instr_as_load_const(c2->parent_instr); + factory->allocate_const(ci2); + + auto alu = nir_instr_as_alu(sum->parent_instr); + + PVirtualValue value1 = factory->src(alu->src[0], 0); + PVirtualValue value2 = factory->src(alu->src[1], 0); + + const auto& cvalue1 = dynamic_cast(*value1); + const auto& cvalue2 = dynamic_cast(*value2); + + EXPECT_EQ(cvalue1.value(), 2); + EXPECT_EQ(cvalue2.value(), 4); +} + +TEST_F(ValuefactoryTest, test_create_sysvalue) +{ + auto ic = factory->inline_const(ALU_SRC_TIME_LO, 0); + + EXPECT_EQ(ic->sel(), ALU_SRC_TIME_LO); + EXPECT_EQ(ic->chan(), 0); +} + + +class GetKCache: public ConstRegisterVisitor { +public: + void visit(const VirtualValue& value) {(void)value;} + void visit(const Register& value) {(void)value;}; + void visit(const LocalArray& value) {(void)value;} + void visit(const LocalArrayValue& value) {(void)value;} + void visit(const UniformValue& value) {(void)value; m_result = value.kcache_bank();} + void visit(const LiteralConstant& value) {(void)value;} + void visit(const InlineConstant& value) {(void)value;} + + GetKCache() : m_result(0) {} + + int m_result; +}; + +ValuefactoryTest::ValuefactoryTest() +{ + memset(&options, 0, sizeof (options)); + init_pool(); +} + + +void ValuefactoryTest::SetUp() +{ + glsl_type_singleton_init_or_ref(); + b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, &options, "test shader"); + factory = new ValueFactory(); +} + +void ValuefactoryTest::TearDown() +{ + ralloc_free(b.shader); + glsl_type_singleton_decref(); + release_pool(); +} + -- 2.7.4