From 79ca456b4837b3bc21cf9ef3c03c505c4b4909f6 Mon Sep 17 00:00:00 2001
From: Gert Wollny <gert.wollny@collabora.com>
Date: Sat, 19 Jun 2021 13:03:32 +0200
Subject: [PATCH] r600/sfn: rewrite NIR backend

This is a rewite of the NIR backend. it adds some optimization
and a scheduler.

v2: - replace some magic numbers by constants
    - make sure constructor is always used with new
    - use default initialization in more places
      (changes suggested by Filip Gawin)

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Reviewed-by: Filip Gawin <filip@gawin.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17076>
---
 src/gallium/drivers/r600/meson.build               |  119 +-
 src/gallium/drivers/r600/r600_asm.c                |    4 +-
 src/gallium/drivers/r600/r600_asm.h                |    2 +
 src/gallium/drivers/r600/r600_pipe.c               |   13 +-
 src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp   |  126 +-
 src/gallium/drivers/r600/sfn/sfn_alu_defines.h     |  103 +-
 .../r600/sfn/sfn_alu_readport_validation.cpp       |  329 +++
 .../drivers/r600/sfn/sfn_alu_readport_validation.h |   41 +
 .../{sfn_ir_to_assembly.cpp => sfn_assembler.cpp}  | 1626 +++++-----
 src/gallium/drivers/r600/sfn/sfn_assembler.h       |   26 +
 .../drivers/r600/sfn/sfn_conditionaljumptracker.h  |    4 -
 src/gallium/drivers/r600/sfn/sfn_debug.cpp         |    4 +
 src/gallium/drivers/r600/sfn/sfn_debug.h           |    6 +-
 src/gallium/drivers/r600/sfn/sfn_defines.h         |    3 +
 src/gallium/drivers/r600/sfn/sfn_docu.txt          |   47 +-
 .../drivers/r600/sfn/sfn_emitaluinstruction.cpp    | 1046 -------
 .../drivers/r600/sfn/sfn_emitaluinstruction.h      |  116 -
 .../drivers/r600/sfn/sfn_emitinstruction.cpp       |  169 --
 src/gallium/drivers/r600/sfn/sfn_emitinstruction.h |  102 -
 .../drivers/r600/sfn/sfn_emitssboinstruction.cpp   |  741 -----
 .../drivers/r600/sfn/sfn_emitssboinstruction.h     |   60 -
 .../drivers/r600/sfn/sfn_emittexinstruction.cpp    |  671 -----
 .../drivers/r600/sfn/sfn_emittexinstruction.h      |   96 -
 src/gallium/drivers/r600/sfn/sfn_instr.cpp         |  522 ++++
 src/gallium/drivers/r600/sfn/sfn_instr.h           |  314 ++
 src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp     | 2449 +++++++++++++++
 src/gallium/drivers/r600/sfn/sfn_instr_alu.h       |  193 ++
 .../drivers/r600/sfn/sfn_instr_alugroup.cpp        |  361 +++
 src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h  |   89 +
 .../drivers/r600/sfn/sfn_instr_controlflow.cpp     |  176 ++
 .../drivers/r600/sfn/sfn_instr_controlflow.h       |   81 +
 src/gallium/drivers/r600/sfn/sfn_instr_export.cpp  |  524 ++++
 src/gallium/drivers/r600/sfn/sfn_instr_export.h    |  213 ++
 src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp   |  659 +++++
 src/gallium/drivers/r600/sfn/sfn_instr_fetch.h     |  152 +
 src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp     |  411 +++
 src/gallium/drivers/r600/sfn/sfn_instr_lds.h       |   80 +
 src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp     |  844 ++++++
 src/gallium/drivers/r600/sfn/sfn_instr_mem.h       |  177 ++
 src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp     | 1011 +++++++
 src/gallium/drivers/r600/sfn/sfn_instr_tex.h       |  166 ++
 src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp  |  188 ++
 src/gallium/drivers/r600/sfn/sfn_instrfactory.h    |   34 +
 .../drivers/r600/sfn/sfn_instruction_alu.cpp       |  183 --
 src/gallium/drivers/r600/sfn/sfn_instruction_alu.h |  142 -
 .../drivers/r600/sfn/sfn_instruction_base.cpp      |  187 --
 .../drivers/r600/sfn/sfn_instruction_base.h        |  155 -
 .../drivers/r600/sfn/sfn_instruction_block.cpp     |   57 -
 .../drivers/r600/sfn/sfn_instruction_block.h       |   82 -
 .../drivers/r600/sfn/sfn_instruction_cf.cpp        |  195 --
 src/gallium/drivers/r600/sfn/sfn_instruction_cf.h  |  142 -
 .../drivers/r600/sfn/sfn_instruction_export.cpp    |  341 ---
 .../drivers/r600/sfn/sfn_instruction_export.h      |  185 --
 .../drivers/r600/sfn/sfn_instruction_fetch.cpp     |  480 ---
 .../drivers/r600/sfn/sfn_instruction_fetch.h       |  187 --
 .../drivers/r600/sfn/sfn_instruction_gds.cpp       |  180 --
 src/gallium/drivers/r600/sfn/sfn_instruction_gds.h |  225 --
 .../drivers/r600/sfn/sfn_instruction_lds.cpp       |  151 -
 src/gallium/drivers/r600/sfn/sfn_instruction_lds.h |   82 -
 .../drivers/r600/sfn/sfn_instruction_misc.cpp      |   68 -
 .../drivers/r600/sfn/sfn_instruction_misc.h        |   69 -
 src/gallium/drivers/r600/sfn/sfn_instruction_tex.h |  143 -
 .../drivers/r600/sfn/sfn_instructionvisitor.cpp    |    0
 .../drivers/r600/sfn/sfn_instructionvisitor.h      |   91 -
 src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h  |   45 -
 src/gallium/drivers/r600/sfn/sfn_liverange.cpp     | 1006 -------
 src/gallium/drivers/r600/sfn/sfn_liverange.h       |  314 --
 .../drivers/r600/sfn/sfn_liverangeevaluator.cpp    |  438 +++
 .../drivers/r600/sfn/sfn_liverangeevaluator.h      |   23 +
 .../r600/sfn/sfn_liverangeevaluator_helpers.cpp    |  623 ++++
 .../r600/sfn/sfn_liverangeevaluator_helpers.h      |  162 +
 src/gallium/drivers/r600/sfn/sfn_memorypool.cpp    |   86 +
 src/gallium/drivers/r600/sfn/sfn_memorypool.h      |   69 +
 src/gallium/drivers/r600/sfn/sfn_nir.cpp           |  429 +--
 src/gallium/drivers/r600/sfn/sfn_nir.h             |   52 +-
 .../r600/sfn/sfn_nir_legalize_image_load_store.cpp |    6 +-
 .../drivers/r600/sfn/sfn_nir_lower_64bit.cpp       |  393 ++-
 .../drivers/r600/sfn/sfn_nir_lower_tess_io.cpp     |    7 +-
 ...n_instruction_tex.cpp => sfn_nir_lower_tex.cpp} |  153 +-
 src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h   |   10 +
 src/gallium/drivers/r600/sfn/sfn_optimizer.cpp     |  627 ++++
 src/gallium/drivers/r600/sfn/sfn_optimizer.h       |   17 +
 src/gallium/drivers/r600/sfn/sfn_optimizers.cpp    |   12 -
 src/gallium/drivers/r600/sfn/sfn_optimizers.h      |   14 -
 src/gallium/drivers/r600/sfn/sfn_peephole.cpp      |  212 ++
 src/gallium/drivers/r600/sfn/sfn_peephole.h        |   13 +
 src/gallium/drivers/r600/sfn/sfn_ra.cpp            |  268 ++
 src/gallium/drivers/r600/sfn/sfn_ra.h              |   51 +
 src/gallium/drivers/r600/sfn/sfn_scheduler.cpp     |  890 ++++++
 src/gallium/drivers/r600/sfn/sfn_scheduler.h       |   13 +
 src/gallium/drivers/r600/sfn/sfn_shader.cpp        | 1379 +++++++++
 src/gallium/drivers/r600/sfn/sfn_shader.h          |  365 +++
 src/gallium/drivers/r600/sfn/sfn_shader_base.cpp   | 1188 --------
 src/gallium/drivers/r600/sfn/sfn_shader_base.h     |  231 --
 .../drivers/r600/sfn/sfn_shader_compute.cpp        |  112 -
 src/gallium/drivers/r600/sfn/sfn_shader_compute.h  |   62 -
 src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp     |   95 +
 src/gallium/drivers/r600/sfn/sfn_shader_cs.h       |   39 +
 .../drivers/r600/sfn/sfn_shader_fragment.cpp       | 1074 -------
 src/gallium/drivers/r600/sfn/sfn_shader_fragment.h |  117 -
 src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp     |  873 ++++++
 src/gallium/drivers/r600/sfn/sfn_shader_fs.h       |   88 +
 .../drivers/r600/sfn/sfn_shader_geometry.cpp       |  343 ---
 src/gallium/drivers/r600/sfn/sfn_shader_geometry.h |   81 -
 src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp     |  370 +++
 src/gallium/drivers/r600/sfn/sfn_shader_gs.h       |   66 +
 src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp    |  108 -
 src/gallium/drivers/r600/sfn/sfn_shader_tcs.h      |   33 -
 src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp   |  275 ++
 src/gallium/drivers/r600/sfn/sfn_shader_tess.h     |   76 +
 .../drivers/r600/sfn/sfn_shader_tess_eval.cpp      |  123 -
 .../drivers/r600/sfn/sfn_shader_tess_eval.h        |   39 -
 src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp |  230 --
 src/gallium/drivers/r600/sfn/sfn_shader_vertex.h   |   83 -
 src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp     |  663 +++++
 src/gallium/drivers/r600/sfn/sfn_shader_vs.h       |  156 +
 src/gallium/drivers/r600/sfn/sfn_shaderio.cpp      |  448 ---
 src/gallium/drivers/r600/sfn/sfn_shaderio.h        |  176 --
 src/gallium/drivers/r600/sfn/sfn_value.cpp         |  242 --
 src/gallium/drivers/r600/sfn/sfn_value.h           |  194 --
 src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp     |  380 ---
 src/gallium/drivers/r600/sfn/sfn_value_gpr.h       |  208 --
 src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp  |  959 ++++++
 src/gallium/drivers/r600/sfn/sfn_valuefactory.h    |  299 ++
 src/gallium/drivers/r600/sfn/sfn_valuepool.cpp     |  526 ----
 src/gallium/drivers/r600/sfn/sfn_valuepool.h       |  242 --
 .../drivers/r600/sfn/sfn_vertexstageexport.cpp     |  535 ----
 .../drivers/r600/sfn/sfn_vertexstageexport.h       |  116 -
 src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp | 1072 +++++++
 src/gallium/drivers/r600/sfn/sfn_virtualvalues.h   |  460 +++
 src/gallium/drivers/r600/sfn/tests/meson.build     |   37 +
 .../drivers/r600/sfn/tests/sfn_instr_test.cpp      |  798 +++++
 .../r600/sfn/tests/sfn_instrfromstring_test.cpp    |  728 +++++
 .../drivers/r600/sfn/tests/sfn_liverange_test.cpp  |  217 ++
 .../drivers/r600/sfn/tests/sfn_optimizer_test.cpp  |  300 ++
 .../r600/sfn/tests/sfn_shaderfromstring_test.cpp   |  123 +
 .../drivers/r600/sfn/tests/sfn_test_shaders.cpp    | 3116 ++++++++++++++++++++
 .../drivers/r600/sfn/tests/sfn_test_shaders.h      |  115 +
 .../drivers/r600/sfn/tests/sfn_value_test.cpp      |  244 ++
 .../r600/sfn/tests/sfn_valuefactory_test.cpp       |  285 ++
 140 files changed, 27319 insertions(+), 16146 deletions(-)
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h
 rename src/gallium/drivers/r600/sfn/{sfn_ir_to_assembly.cpp => sfn_assembler.cpp} (52%)
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_assembler.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitinstruction.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_alu.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_export.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_export.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_fetch.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_lds.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_mem.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instr_tex.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_instrfactory.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_alu.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_base.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_block.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_cf.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_export.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_lds.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_misc.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_tex.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_liverange.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_liverange.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_memorypool.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_memorypool.h
 rename src/gallium/drivers/r600/sfn/{sfn_instruction_tex.cpp => sfn_nir_lower_tex.cpp} (65%)
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_optimizer.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_optimizer.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_optimizers.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_optimizers.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_peephole.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_peephole.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_ra.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_ra.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_scheduler.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_base.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_compute.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_cs.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_fragment.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_fs.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_geometry.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_gs.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tcs.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tess.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_vertex.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_vs.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shaderio.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_shaderio.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_value.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_value.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_value_gpr.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_valuefactory.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_valuepool.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_valuepool.h
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
 delete mode 100644 src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/sfn_virtualvalues.h
 create mode 100644 src/gallium/drivers/r600/sfn/tests/meson.build
 create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h
 create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp
 create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_valuefactory_test.cpp

diff --git a/src/gallium/drivers/r600/meson.build b/src/gallium/drivers/r600/meson.build
index e59917d..7059f6d 100644
--- a/src/gallium/drivers/r600/meson.build
+++ b/src/gallium/drivers/r600/meson.build
@@ -107,6 +107,10 @@ files_r600 = files(
   'sb/sb_valtable.cpp',
   'sfn/sfn_alu_defines.cpp',
   'sfn/sfn_alu_defines.h',
+  'sfn/sfn_alu_readport_validation.cpp',
+  'sfn/sfn_alu_readport_validation.h',
+  'sfn/sfn_assembler.cpp',
+  'sfn/sfn_assembler.h',
   'sfn/sfn_callstack.cpp',
   'sfn/sfn_callstack.h',
   'sfn/sfn_conditionaljumptracker.cpp',
@@ -114,73 +118,66 @@ files_r600 = files(
   'sfn/sfn_defines.h',
   'sfn/sfn_debug.cpp',
   'sfn/sfn_debug.h',
-  'sfn/sfn_emitaluinstruction.cpp',
-  'sfn/sfn_emitaluinstruction.h',
-  'sfn/sfn_emitinstruction.cpp',
-  'sfn/sfn_emitinstruction.h',
-  'sfn/sfn_emitssboinstruction.cpp',
-  'sfn/sfn_emitssboinstruction.h',
-  'sfn/sfn_emittexinstruction.cpp',
-  'sfn/sfn_emittexinstruction.h',
-  'sfn/sfn_emitinstruction.h',
-  'sfn/sfn_instruction_alu.cpp',
-  'sfn/sfn_instruction_alu.h',
-  'sfn/sfn_instruction_base.cpp',
-  'sfn/sfn_instruction_base.h',
-  'sfn/sfn_instruction_block.cpp',
-  'sfn/sfn_instruction_block.h',
-  'sfn/sfn_instruction_cf.cpp',
-  'sfn/sfn_instruction_cf.h',
-  'sfn/sfn_instruction_export.cpp',
-  'sfn/sfn_instruction_export.h',
-  'sfn/sfn_instruction_fetch.cpp',
-  'sfn/sfn_instruction_fetch.h',
-  'sfn/sfn_instruction_gds.cpp',
-  'sfn/sfn_instruction_gds.h',
-  'sfn/sfn_instruction_lds.cpp',
-  'sfn/sfn_instruction_lds.h',
-  'sfn/sfn_instruction_misc.cpp',
-  'sfn/sfn_instruction_misc.h',
-  'sfn/sfn_instruction_tex.cpp',
-  'sfn/sfn_instruction_tex.h',
-  'sfn/sfn_ir_to_assembly.cpp',
-  'sfn/sfn_ir_to_assembly.h',
-  'sfn/sfn_liverange.cpp',
-  'sfn/sfn_liverange.h',
+  'sfn/sfn_instr.cpp',
+  'sfn/sfn_instr.h',
+  'sfn/sfn_instr_alu.cpp',
+  'sfn/sfn_instr_alu.h',
+  'sfn/sfn_instr_alugroup.cpp',
+  'sfn/sfn_instr_alugroup.h',
+  'sfn/sfn_instr_controlflow.cpp',
+  'sfn/sfn_instr_controlflow.h',
+  'sfn/sfn_instr_export.cpp',
+  'sfn/sfn_instr_export.h',
+  'sfn/sfn_instr_fetch.cpp',
+  'sfn/sfn_instr_fetch.h',
+  'sfn/sfn_instr_mem.cpp',
+  'sfn/sfn_instr_mem.h',
+  'sfn/sfn_instr_lds.cpp',
+  'sfn/sfn_instr_lds.h',
+  'sfn/sfn_instr_tex.cpp',
+  'sfn/sfn_instr_tex.h',
+  'sfn/sfn_instrfactory.cpp',
+  'sfn/sfn_instrfactory.h',
+  'sfn/sfn_liverangeevaluator.cpp',
+  'sfn/sfn_liverangeevaluator.h',
+  'sfn/sfn_liverangeevaluator_helpers.cpp',
+  'sfn/sfn_liverangeevaluator_helpers.h',
+  'sfn/sfn_memorypool.cpp',
+  'sfn/sfn_memorypool.h',
   'sfn/sfn_nir.cpp',
   'sfn/sfn_nir.h',
   'sfn/sfn_nir_legalize_image_load_store.cpp',
   'sfn/sfn_nir_lower_64bit.cpp',
   'sfn/sfn_nir_lower_alu.cpp',
   'sfn/sfn_nir_lower_alu.h',
+  'sfn/sfn_nir_lower_tex.cpp',
+  'sfn/sfn_nir_lower_tex.h',
   'sfn/sfn_nir_lower_fs_out_to_vector.cpp',
   'sfn/sfn_nir_lower_fs_out_to_vector.h',
   'sfn/sfn_nir_lower_tess_io.cpp',
   'sfn/sfn_nir_vectorize_vs_inputs.c',
-  'sfn/sfn_shader_base.cpp',
-  'sfn/sfn_shader_base.h',
-  'sfn/sfn_shader_compute.cpp',
-  'sfn/sfn_shader_compute.h',
-  'sfn/sfn_shader_fragment.cpp',
-  'sfn/sfn_shader_fragment.h',
-  'sfn/sfn_shader_geometry.cpp',
-  'sfn/sfn_shader_geometry.h',
-  'sfn/sfn_shader_tcs.cpp',
-  'sfn/sfn_shader_tcs.h',
-  'sfn/sfn_shader_tess_eval.cpp',
-  'sfn/sfn_shader_tess_eval.h',
-  'sfn/sfn_shader_vertex.cpp',
-  'sfn/sfn_shader_vertex.h',
-  'sfn/sfn_shaderio.cpp',
-  'sfn/sfn_shaderio.h',
-  'sfn/sfn_value.cpp',
-  'sfn/sfn_value.h',
-  'sfn/sfn_value_gpr.cpp',
-  'sfn/sfn_value_gpr.h',
-  'sfn/sfn_valuepool.cpp',
-  'sfn/sfn_valuepool.h',
-  'sfn/sfn_vertexstageexport.cpp',
-  'sfn/sfn_vertexstageexport.h',
+  'sfn/sfn_optimizer.cpp',
+  'sfn/sfn_peephole.cpp',
+  'sfn/sfn_ra.cpp',
+  'sfn/sfn_ra.h',
+  'sfn/sfn_scheduler.cpp',
+  'sfn/sfn_scheduler.h',
+  'sfn/sfn_shader.cpp',
+  'sfn/sfn_shader.h',
+  'sfn/sfn_shader_cs.cpp',
+  'sfn/sfn_shader_cs.h',
+  'sfn/sfn_shader_fs.cpp',
+  'sfn/sfn_shader_fs.h',
+  'sfn/sfn_shader_gs.cpp',
+  'sfn/sfn_shader_gs.h',
+  'sfn/sfn_shader_tess.cpp',
+  'sfn/sfn_shader_tess.h',
+  'sfn/sfn_shader_vs.cpp',
+  'sfn/sfn_shader_vs.h',
+  'sfn/sfn_valuefactory.cpp',
+  'sfn/sfn_valuefactory.h',
+  'sfn/sfn_virtualvalues.cpp',
+  'sfn/sfn_virtualvalues.h',
   )
 
 egd_tables_h = custom_target(
@@ -200,11 +197,13 @@ libr600 = static_library(
   'r600',
   [files_r600, egd_tables_h],
   c_args : [r600_c_args, '-Wstrict-overflow=0'],
+  cpp_args: '-std=c++17',
   gnu_symbol_visibility : 'hidden',
   include_directories : [
     inc_src, inc_mapi, inc_mesa, inc_include, inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common,
     inc_gallium_drivers,
   ],
+
   dependencies: [dep_libdrm_radeon, dep_elf, dep_llvm, idep_nir, idep_nir_headers],
 )
 
@@ -212,3 +211,9 @@ driver_r600 = declare_dependency(
   compile_args : '-DGALLIUM_R600',
   link_with : [libr600, libmesa, libradeonwinsys],
 )
+
+if with_tests
+  subdir('sfn/tests')
+endif
+
+
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 92ea9c5..954af06 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -407,8 +407,8 @@ static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *
 			}
 			assignment[4] = alu;
 		} else {
-			if (assignment[chan]) {                           
-				assert(0); /* ALU.chan has already been allocated. */
+                        if (assignment[chan]) {
+			 	assert(0); /* ALU.chan has already been allocated. */
 				return -1;
 			}
 			assignment[chan] = alu;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 3072e64..952a950 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -355,6 +355,8 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
 void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
 			   unsigned *num_format, unsigned *format_comp, unsigned *endian);
 
+int r600_load_ar(struct r600_bytecode *bc);
+
 static inline int fp64_switch(int i)
 {
 	switch (i) {
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index b178c70..4ba7075 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -420,11 +420,14 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
                 if (is_nir_enabled(&rscreen->b))
                    return 1;
 		return 0;
-        case PIPE_CAP_INT64_DIVMOD:
-           /* it is actually not supported, but the nir lowering hdanles this corectly wheras
-            * the glsl lowering path seems to not initialize the buildins correctly.
-            */
-           return is_nir_enabled(&rscreen->b);
+
+	case PIPE_CAP_TWO_SIDED_COLOR:
+		return !is_nir_enabled(&rscreen->b);
+	case PIPE_CAP_INT64_DIVMOD:
+		/* it is actually not supported, but the nir lowering handles this corectly wheras
+		 * the glsl lowering path seems to not initialize the buildins correctly.
+		 */
+		return is_nir_enabled(&rscreen->b);
 	case PIPE_CAP_CULL_DISTANCE:
 		return 1;
 
diff --git a/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp b/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp
index 8690fc2..b3bf317 100644
--- a/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp
@@ -46,7 +46,7 @@ const std::map<EAluOp, AluOp> alu_ops = {
    {op1_cos                 ,AluOp(1, 1, AluOp::t,"COS")},
    {op1_exp_ieee            ,AluOp(1, 1, AluOp::t,"EXP_IEEE")},
    {op1_floor               ,AluOp(1, 1, AluOp::a,"FLOOR")},
-   {op1_flt_to_int          ,AluOp(1, 0, AluOp::a,"FLT_TO_INT")},
+   {op1_flt_to_int          ,AluOp(1, 0, AluOp::v,"FLT_TO_INT")},
    {op1_flt_to_uint         ,AluOp(1, 1, AluOp::t,"FLT_TO_UINT")},
    {op1_flt_to_int_rpi      ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_RPI")},
    {op1_flt_to_int_floor    ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_FLOOR")},
@@ -84,15 +84,15 @@ const std::map<EAluOp, AluOp> alu_ops = {
    {op1_recipsqrt_ieee1     ,AluOp(1, 1, AluOp::t,"RECIPSQRT_IEEE")},
    {op1_recip_int           ,AluOp(1, 0, AluOp::t,"RECIP_INT")},
    {op1_recip_uint          ,AluOp(1, 0, AluOp::t,"RECIP_UINT")},
-   {op1_recip_64            ,AluOp(1, 1, AluOp::t,"RECIP_64")},
-   {op1_recip_clamped_64    ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED_64")},
-   {op1_recipsqrt_64        ,AluOp(1, 1, AluOp::t,"RECIPSQRT_64")},
-   {op1_recipsqrt_clamped_64,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")},
+   {op1_recip_64            ,AluOp(2, 1, AluOp::t,"RECIP_64")},
+   {op1_recip_clamped_64    ,AluOp(2, 1, AluOp::t,"RECIP_CLAMPED_64")},
+   {op1_recipsqrt_64        ,AluOp(2, 1, AluOp::t,"RECIPSQRT_64")},
+   {op1_recipsqrt_clamped_64,AluOp(2, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")},
    {op1_rndne               ,AluOp(1, 1, AluOp::a,"RNDNE")},
    {op1_sqrt_ieee           ,AluOp(1, 1, AluOp::t,"SQRT_IEEE")},
    {op1_sin                 ,AluOp(1, 1, AluOp::t,"SIN")},
    {op1_trunc               ,AluOp(1, 1, AluOp::a,"TRUNC")},
-   {op1_sqrt_64             ,AluOp(1, 1, AluOp::t,"SQRT_64")},
+   {op1_sqrt_64             ,AluOp(2, 1, AluOp::t,"SQRT_64")},
    {op1_ubyte0_flt          ,AluOp(1, 1, AluOp::v,"UBYTE0_FLT")},
    {op1_ubyte1_flt          ,AluOp(1, 1, AluOp::v,"UBYTE1_FLT")},
    {op1_ubyte2_flt          ,AluOp(1, 1, AluOp::v,"UBYTE2_FLT")},
@@ -273,53 +273,73 @@ const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const = {
 };
 
 const std::map<ESDOp, LDSOp> lds_ops = {
-   {DS_OP_ADD           , {2, "DS_ADD"}},
-   {DS_OP_SUB           , {2, "DS_SUB"}},
-   {DS_OP_RSUB          , {2, "DS_RSUB"}},
-   {DS_OP_INC           , {2, "DS_INC"}},
-   {DS_OP_DEC           , {2, "DS_DEC"}},
-   {DS_OP_MIN_INT       , {2, "DS_MIN_INT"}},
-   {DS_OP_MAX_INT       , {2, "DS_MAX_INT"}},
-   {DS_OP_MIN_UINT      , {2, "DS_MIN_UINT"}},
-   {DS_OP_MAX_UINT      , {2, "DS_MAX_UINT"}},
-   {DS_OP_AND           , {2, "DS_AND"}},
-   {DS_OP_OR            , {2, "DS_OR"}},
-   {DS_OP_XOR           , {2, "DS_XOR"}},
-   {DS_OP_MSKOR         , {3, "DS_MSKOR"}},
-   {DS_OP_WRITE         , {2, "DS_WRITE"}},
-   {DS_OP_WRITE_REL     , {3, "DS_WRITE_REL"}},
-   {DS_OP_WRITE2        , {3, "DS_WRITE2"}},
-   {DS_OP_CMP_STORE     , {3, "DS_CMP_STORE"}},
-   {DS_OP_CMP_STORE_SPF , {3, "DS_CMP_STORE_SPF"}},
-   {DS_OP_BYTE_WRITE    , {2, "DS_BYTE_WRITE"}},
-   {DS_OP_SHORT_WRITE   , {2, "DS_SHORT_WRITE"}},
-   {DS_OP_ADD_RET       , {2, "DS_ADD_RET"}},
-   {DS_OP_SUB_RET       , {2, "DS_SUB_RET"}},
-   {DS_OP_RSUB_RET      , {2, "DS_RSUB_RET"}},
-   {DS_OP_INC_RET       , {2, "DS_INC_RET"}},
-   {DS_OP_DEC_RET       , {2, "DS_DEC_RET"}},
-   {DS_OP_MIN_INT_RET   , {2, "DS_MIN_INT_RET"}},
-   {DS_OP_MAX_INT_RET   , {2, "DS_MAX_INT_RET"}},
-   {DS_OP_MIN_UINT_RET  , {2, "DS_MIN_UINT_RET"}},
-   {DS_OP_MAX_UINT_RET  , {2, "DS_MAX_UINT_RET"}},
-   {DS_OP_AND_RET       , {2, "DS_AND_RET"}},
-   {DS_OP_OR_RET        , {2, "DS_OR_RET"}},
-   {DS_OP_XOR_RET       , {2, "DS_XOR_RET"}},
-   {DS_OP_MSKOR_RET     , {3, "DS_MSKOR_RET"}},
-   {DS_OP_XCHG_RET      , {2, "DS_XCHG_RET"}},
-   {DS_OP_XCHG_REL_RET  , {3, "DS_XCHG_REL_RET"}},
-   {DS_OP_XCHG2_RET     , {3, "DS_XCHG2_RET"}},
-   {DS_OP_CMP_XCHG_RET  , {3, "DS_CMP_XCHG_RET"}},
-   {DS_OP_CMP_XCHG_SPF_RET, {3, "DS_CMP_XCHG_SPF_RET"}},
-   {DS_OP_READ_RET      , {1, "DS_READ_RET"}},
-   {DS_OP_READ_REL_RET  , {1, "DS_READ_REL_RET"}},
-   {DS_OP_READ2_RET     , {2, "DS_READ2_RET"}},
-   {DS_OP_READWRITE_RET , {3, "DS_READWRITE_RET"}},
-   {DS_OP_BYTE_READ_RET , {1, "DS_BYTE_READ_RET"}},
-   {DS_OP_UBYTE_READ_RET, {1, "DS_UBYTE_READ_RET"}},
-   {DS_OP_SHORT_READ_RET, {1, "DS_SHORT_READ_RET"}},
-   {DS_OP_USHORT_READ_RET, {1, "DS_USHORT_READ_RET"}},
-   {DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "DS_ATOMIC_ORDERED_ALLOC_RET"}}
+   {DS_OP_ADD           , {2, "ADD"}},
+   {DS_OP_SUB           , {2, "SUB"}},
+   {DS_OP_RSUB          , {2, "RSUB"}},
+   {DS_OP_INC           , {2, "INC"}},
+   {DS_OP_DEC           , {2, "DEC"}},
+   {DS_OP_MIN_INT       , {2, "MIN_INT"}},
+   {DS_OP_MAX_INT       , {2, "MAX_INT"}},
+   {DS_OP_MIN_UINT      , {2, "MIN_UINT"}},
+   {DS_OP_MAX_UINT      , {2, "MAX_UINT"}},
+   {DS_OP_AND           , {2, "AND"}},
+   {DS_OP_OR            , {2, "OR"}},
+   {DS_OP_XOR           , {2, "XOR"}},
+   {DS_OP_MSKOR         , {3, "MSKOR"}},
+   {DS_OP_WRITE         , {2, "WRITE"}},
+   {DS_OP_WRITE_REL     , {3, "WRITE_REL"}},
+   {DS_OP_WRITE2        , {3, "WRITE2"}},
+   {DS_OP_CMP_STORE     , {3, "CMP_STORE"}},
+   {DS_OP_CMP_STORE_SPF , {3, "CMP_STORE_SPF"}},
+   {DS_OP_BYTE_WRITE    , {2, "BYTE_WRITE"}},
+   {DS_OP_SHORT_WRITE   , {2, "SHORT_WRITE"}},
+   {DS_OP_ADD_RET       , {2, "ADD_RET"}},
+   {DS_OP_SUB_RET       , {2, "SUB_RET"}},
+   {DS_OP_RSUB_RET      , {2, "RSUB_RET"}},
+   {DS_OP_INC_RET       , {2, "INC_RET"}},
+   {DS_OP_DEC_RET       , {2, "DEC_RET"}},
+   {DS_OP_MIN_INT_RET   , {2, "MIN_INT_RET"}},
+   {DS_OP_MAX_INT_RET   , {2, "MAX_INT_RET"}},
+   {DS_OP_MIN_UINT_RET  , {2, "MIN_UINT_RET"}},
+   {DS_OP_MAX_UINT_RET  , {2, "MAX_UINT_RET"}},
+   {DS_OP_AND_RET       , {2, "AND_RET"}},
+   {DS_OP_OR_RET        , {2, "OR_RET"}},
+   {DS_OP_XOR_RET       , {2, "XOR_RET"}},
+   {DS_OP_MSKOR_RET     , {3, "MSKOR_RET"}},
+   {DS_OP_XCHG_RET      , {2, "XCHG_RET"}},
+   {DS_OP_XCHG_REL_RET  , {3, "XCHG_REL_RET"}},
+   {DS_OP_XCHG2_RET     , {3, "XCHG2_RET"}},
+   {DS_OP_CMP_XCHG_RET  , {3, "CMP_XCHG_RET"}},
+   {DS_OP_CMP_XCHG_SPF_RET, {3, "CMP_XCHG_SPF_RET"}},
+   {DS_OP_READ_RET      , {1, "READ_RET"}},
+   {DS_OP_READ_REL_RET  , {1, "READ_REL_RET"}},
+   {DS_OP_READ2_RET     , {2, "READ2_RET"}},
+   {DS_OP_READWRITE_RET , {3, "READWRITE_RET"}},
+   {DS_OP_BYTE_READ_RET , {1, "BYTE_READ_RET"}},
+   {DS_OP_UBYTE_READ_RET, {1, "UBYTE_READ_RET"}},
+   {DS_OP_SHORT_READ_RET, {1, "SHORT_READ_RET"}},
+   {DS_OP_USHORT_READ_RET, {1, "USHORT_READ_RET"}},
+   {DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "ATOMIC_ORDERED_ALLOC_RET"}},
+   {LDS_ADD_RET, {2, "LDS_ADD_RET"}},
+   {LDS_ADD, {2, "LDS_ADD"}},
+   {LDS_AND_RET, {2, "LDS_AND_RET"}},
+   {LDS_AND, {2, "LDS_AND"}},
+   {LDS_WRITE, {2, "LDS_WRITE"}},
+   {LDS_OR_RET, {2, "LDS_OR_RET"}},
+   {LDS_OR, {2, "LDS_OR"}},
+   {LDS_MAX_INT_RET, {2, "LDS_MAX_INT_RET"}},
+   {LDS_MAX_INT, {2, "LDS_MAX_INT"}},
+   {LDS_MAX_UINT_RET, {2, "LDS_MAX_UINT_RET"}},
+   {LDS_MAX_UINT, {2, "LDS_MAX_UINT"}},
+   {LDS_MIN_INT_RET, {2, "LDS_MIN_INT_RET"}},
+   {LDS_MIN_INT, {2, "LDS_MIN_INT"}},
+   {LDS_MIN_UINT_RET, {2, "LDS_MIN_UINT_RET"}},
+   {LDS_MIN_UINT, {2, "LDS_MIN_UINT"}},
+   {LDS_XOR_RET, {2, "LDS_XOR"}},
+   {LDS_XOR, {2, "LDS_XOR"}},
+   {LDS_XCHG_RET, {2, "LDS_XCHG_RET"}},
+   {LDS_CMP_XCHG_RET, {3, "LDS_CMP_XCHG_RET"}},
+   {LDS_WRITE_REL, {3, "LDS_WRITE_REL"}},
 };
 
 }
diff --git a/src/gallium/drivers/r600/sfn/sfn_alu_defines.h b/src/gallium/drivers/r600/sfn/sfn_alu_defines.h
index 4481c49..4d2781f 100644
--- a/src/gallium/drivers/r600/sfn/sfn_alu_defines.h
+++ b/src/gallium/drivers/r600/sfn/sfn_alu_defines.h
@@ -27,6 +27,8 @@
 #ifndef r600_sfn_alu_defines_h
 #define r600_sfn_alu_defines_h
 
+#include "../r600_isa.h"
+
 #include <map>
 #include <bitset>
 
@@ -235,12 +237,71 @@ enum EAluOp {
    op3_cnde_int = 28<< 6,
    op3_cndgt_int = 29<< 6,
    op3_cndge_int = 30<< 6,
-   op3_mul_lit = 31<< 6
+   op3_mul_lit = 31<< 6,
+   op_invalid = 0xffff
+};
+
+enum AluModifiers {
+   alu_src0_neg,
+   alu_src0_abs,
+   alu_src0_rel,
+   alu_src1_neg,
+   alu_src1_abs,
+   alu_src1_rel,
+   alu_src2_neg,
+   alu_src2_rel,
+   alu_dst_clamp,
+   alu_dst_rel,
+   alu_last_instr,
+   alu_update_exec,
+   alu_update_pred,
+   alu_write,
+   alu_op3,
+   alu_is_trans,
+   alu_is_cayman_trans,
+   alu_is_lds,
+   alu_lds_group_start,
+   alu_lds_group_end,
+   alu_lds_address,
+   alu_no_schedule_bias,
+   alu_64bit_op,
+   alu_flag_count
+};
+
+enum AluDstModifiers {
+   omod_off = 0,
+   omod_mul2 = 1,
+   omod_mul4 = 2,
+   omod_divl2 = 3
+};
+
+enum AluPredSel {
+   pred_off = 0,
+   pred_zero = 2,
+   pred_one = 3
 };
 
+enum AluBankSwizzle {
+   alu_vec_012 = 0,
+   sq_alu_scl_201 = 0,
+   alu_vec_021 = 1,
+   sq_alu_scl_122 = 1,
+   alu_vec_120 = 2,
+   sq_alu_scl_212 = 2,
+   alu_vec_102 = 3,
+   sq_alu_scl_221 = 3,
+   alu_vec_201 = 4,
+   sq_alu_scl_unknown  = 4,
+   alu_vec_210 = 5,
+   alu_vec_unknown = 6
+};
 
+inline AluBankSwizzle operator ++(AluBankSwizzle& x) {
+   x = static_cast<AluBankSwizzle>(x + 1);
+   return x;
+}
 
-using AluOpFlags=std::bitset<32>;
+using AluOpFlags=std::bitset<alu_flag_count>;
 
 struct AluOp {
    static constexpr int x = 1;
@@ -314,6 +375,8 @@ struct AluInlineConstantDescr {
 
 extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const;
 
+#define LDSOP2(X) LDS_ ## X = LDS_OP2_LDS_ ## X
+
 enum ESDOp {
    DS_OP_ADD = 0,
    DS_OP_SUB = 1,
@@ -362,9 +425,31 @@ enum ESDOp {
    DS_OP_SHORT_READ_RET = 56,
    DS_OP_USHORT_READ_RET = 57,
    DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63,
-   DS_OP_INVALID = 64
+   DS_OP_INVALID = 64,
+   LDSOP2(ADD_RET),
+   LDSOP2(ADD),
+   LDSOP2(AND_RET),
+   LDSOP2(AND),
+   LDSOP2(WRITE),
+   LDSOP2(OR_RET),
+   LDSOP2(OR),
+   LDSOP2(MAX_INT_RET),
+   LDSOP2(MAX_INT),
+   LDSOP2(MAX_UINT_RET),
+   LDSOP2(MAX_UINT),
+   LDSOP2(MIN_INT_RET),
+   LDSOP2(MIN_INT),
+   LDSOP2(MIN_UINT_RET),
+   LDSOP2(MIN_UINT),
+   LDSOP2(XOR_RET),
+   LDSOP2(XOR),
+   LDSOP2(XCHG_RET),
+   LDS_CMP_XCHG_RET = LDS_OP3_LDS_CMP_XCHG_RET,
+   LDS_WRITE_REL = LDS_OP3_LDS_WRITE_REL
 };
 
+#undef LDSOP2
+
 struct LDSOp {
    int nsrc;
    const char *name;
@@ -372,6 +457,18 @@ struct LDSOp {
 
 extern const std::map<ESDOp, LDSOp> lds_ops;
 
+struct KCacheLine {
+   int bank{0};
+   int addr{0};
+   int len{0};
+   enum KCacheLockMode {
+      free,
+      lock_1,
+      lock_2
+   } mode{free};
+};
+
+
 }
 
 #endif // ALU_DEFINES_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp b/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp
new file mode 100644
index 0000000..6cbd4bf
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.cpp
@@ -0,0 +1,329 @@
+#include "sfn_alu_readport_validation.h"
+
+#include <cstring>
+
+namespace r600 {
+
+class ReserveReadport : public ConstRegisterVisitor {
+public:
+   ReserveReadport(AluReadportReservation& reserv);
+
+   void visit(const LocalArray& value) override;
+   void visit(const LiteralConstant& value) override;
+   void visit(const InlineConstant& value) override;
+
+   void reserve_gpr(int sel, int chan);
+
+   AluReadportReservation& reserver;
+   int cycle = -1;
+   int isrc = -1;
+   int src0_sel = -1;
+   int src0_chan = -1;
+   bool success = true;
+
+   static const int max_const_readports = 2;
+};
+
+
+class ReserveReadportVec : public ReserveReadport {
+public:
+   using ReserveReadport::ReserveReadport;
+
+   void visit(const Register& value) override;
+   void visit(const LocalArrayValue& value) override;
+   void visit(const UniformValue& value) override;
+};
+
+class ReserveReadportTrans : public ReserveReadport
+{
+public:
+   ReserveReadportTrans(AluReadportReservation& reserv);
+
+   int n_consts;
+};
+
+class ReserveReadportTransPass1 : public ReserveReadportTrans {
+public:
+   using ReserveReadportTrans::ReserveReadportTrans;
+
+   void visit(const Register& value) override;
+   void visit(const LocalArrayValue& value) override;
+   void visit(const UniformValue& value) override;
+   void visit(const InlineConstant& value) override;
+   void visit(const LiteralConstant& value) override;
+};
+
+
+class ReserveReadportTransPass2 : public ReserveReadportTrans {
+public:
+   using ReserveReadportTrans::ReserveReadportTrans;
+
+   void visit(const Register& value) override;
+   void visit(const LocalArrayValue& value) override;
+   void visit(const UniformValue& value) override;
+};
+
+bool AluReadportReservation::schedule_vec_src(PVirtualValue src[3],  int nsrc, AluBankSwizzle swz)
+{
+   ReserveReadportVec visitor(*this);
+
+   if (src[0]->as_register()) {
+      visitor.src0_sel = src[0]->sel();
+      visitor.src0_chan = src[0]->chan();
+   } else {
+      visitor.src0_sel = 0xffff;
+      visitor.src0_chan = 8;
+   }
+
+   for (int i = 0; i < nsrc; ++i) {
+      visitor.cycle = cycle_vec(swz, i);
+      visitor.isrc = i;
+      src[i]->accept(visitor);
+   }
+
+   return visitor.success;
+}
+
+bool AluReadportReservation::schedule_vec_instruction(const AluInstr& alu, AluBankSwizzle swz)
+{
+   ReserveReadportVec visitor(*this);
+
+   for (unsigned i = 0; i < alu.n_sources() && visitor.success; ++i) {
+      visitor.cycle = cycle_vec(swz, i);
+      visitor.isrc = i;
+      if (i == 1 && alu.src(i).equal_to(alu.src(0)))
+         continue;
+      alu.src(i).accept(visitor);
+   }
+   return visitor.success;
+}
+
+bool AluReadportReservation::schedule_trans_instruction(const AluInstr& alu, AluBankSwizzle swz)
+{
+
+   ReserveReadportTransPass1 visitor1(*this);
+
+   for (unsigned i = 0; i < alu.n_sources(); ++i) {
+      visitor1.cycle = cycle_trans(swz, i);
+      alu.src(i).accept(visitor1);
+   }
+   if (!visitor1.success)
+      return false;
+
+
+   ReserveReadportTransPass2 visitor2(*this);
+   visitor2.n_consts = visitor1.n_consts;
+
+
+   for (unsigned i = 0; i < alu.n_sources(); ++i) {
+      visitor2.cycle = cycle_trans(swz, i);
+
+      alu.src(i).accept(visitor2);
+   }
+   return visitor2.success;
+}
+
+
+AluReadportReservation::AluReadportReservation()
+{
+   for (int i = 0; i < max_chan_channels; ++i) {
+      for (int j = 0; j < max_gpr_readports; ++j)
+         m_hw_gpr[j][i] = -1;
+      m_hw_const_addr[i] = -1;
+      m_hw_const_chan[i] = -1;
+      m_hw_const_bank[i] = -1;
+   }
+}
+
+
+bool AluReadportReservation::reserve_gpr(int sel, int chan, int cycle)
+{
+   if (m_hw_gpr[cycle][chan] == -1) {
+      m_hw_gpr[cycle][chan] = sel;
+   }
+   else if (m_hw_gpr[cycle][chan] != sel) {
+      return false;
+   }
+   return true;
+}
+
+bool AluReadportReservation::reserve_const(const UniformValue& value)
+{
+   int match = -1;
+   int empty = -1;
+
+   for (int res = 0; res < ReserveReadport::max_const_readports; ++res) {
+      if (m_hw_const_addr[res] == -1)
+         empty = res;
+      else if ((m_hw_const_addr[res] == value.sel()) &&
+               (m_hw_const_bank[res] == value.kcache_bank()) &&
+               (m_hw_const_chan[res] == (value.chan() >> 1)))
+         match = res;
+   }
+
+   if (match < 0) {
+      if (empty >= 0) {
+         m_hw_const_addr[empty] = value.sel();
+         (m_hw_const_bank[empty] = value.kcache_bank());
+         m_hw_const_chan[empty] = value.chan() >> 1;
+      } else {
+         return false;
+      }
+   }
+   return true;
+}
+
+bool AluReadportReservation::add_literal(uint32_t value)
+{   
+   for (unsigned i = 0; i < m_nliterals; ++i) {
+      if (m_literals[i] == value)
+         return true;
+   }
+   if (m_nliterals < m_literals.size()) {
+      m_literals[m_nliterals++] = value;
+      return true;
+   }
+   return false;
+}
+
+int AluReadportReservation::cycle_vec(AluBankSwizzle swz, int src)
+{
+   static const int mapping[AluBankSwizzle::alu_vec_unknown][max_gpr_readports] = {
+      {0, 1, 2},
+      {0, 2, 1},
+      {1, 0, 2},
+      {1, 2, 0},
+      {2, 0, 1},
+      {2, 1, 0}
+   };
+   return mapping[swz][src];
+}
+
+int AluReadportReservation::cycle_trans(AluBankSwizzle swz, int src)
+{
+   static const int mapping[AluBankSwizzle::sq_alu_scl_unknown][max_gpr_readports] = {
+      {2, 1, 0},
+      {1, 2, 2},
+      {2, 1, 2},
+      {2, 2, 1},
+   };
+   return mapping[swz][src];
+}
+
+
+ReserveReadport::ReserveReadport(AluReadportReservation& reserv):
+   reserver(reserv)
+{
+}
+
+void ReserveReadport::visit(const LocalArray& value)
+{
+   (void)value;
+   unreachable("a full array is not available here");
+}
+
+void ReserveReadport::visit(const LiteralConstant& value)
+{
+   success &= reserver.add_literal(value.value());
+}
+
+void ReserveReadport::visit(const InlineConstant& value)
+{
+   (void)value;
+}
+
+void ReserveReadportVec::visit(const Register& value)
+{
+   reserve_gpr(value.sel(), value.chan());
+}
+
+void ReserveReadportVec::visit(const LocalArrayValue& value)
+{
+   // Set the hightest non-sign bit to indicated that we use the
+   // AR register
+   reserve_gpr(0x4000000 | value.sel(), value.chan());
+}
+
+void ReserveReadport::reserve_gpr(int sel, int chan)
+{
+   if (isrc == 1 && src0_sel == sel && src0_chan == chan)
+      return;
+   success &= reserver.reserve_gpr(sel, chan, cycle);
+}
+
+void ReserveReadportVec::visit(const UniformValue& value)
+{
+   // kcache bank?
+   success &= reserver.reserve_const(value);
+}
+
+ReserveReadportTrans::ReserveReadportTrans(AluReadportReservation& reserv):
+   ReserveReadport(reserv),
+   n_consts(0)
+{}
+
+void ReserveReadportTransPass1::visit(const Register& value)
+{
+   (void)value;
+}
+
+void ReserveReadportTransPass1::visit(const LocalArrayValue& value)
+{
+   (void)value;
+}
+
+void ReserveReadportTransPass1::visit(const UniformValue& value)
+{
+   if (n_consts >= max_const_readports) {
+      success = false;
+      return;
+   }
+   n_consts++;
+   success &= reserver.reserve_const(value);
+}
+
+void ReserveReadportTransPass1::visit(const InlineConstant& value)
+{
+   (void)value;
+   if (n_consts >= max_const_readports) {
+      success = false;
+      return;
+   }
+   n_consts++;
+}
+
+void ReserveReadportTransPass1::visit(const LiteralConstant& value)
+{
+   if (n_consts >= max_const_readports) {
+      success = false;
+      return;
+   }
+   n_consts++;
+   success &= reserver.add_literal(value.value());
+}
+
+void ReserveReadportTransPass2::visit(const Register& value)
+{
+   if (cycle < n_consts) {
+      success = false;
+      return;
+   }
+   reserve_gpr(value.sel(), value.chan());
+}
+
+void ReserveReadportTransPass2::visit(const LocalArrayValue& value)
+{
+   if (cycle < n_consts) {
+      success = false;
+      return;
+   }
+   reserve_gpr(0x4000000 | value.sel(), value.chan());
+}
+
+void ReserveReadportTransPass2::visit(const UniformValue& value)
+{
+   (void)value;
+}
+
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h b/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h
new file mode 100644
index 0000000..7ca23ce
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_alu_readport_validation.h
@@ -0,0 +1,41 @@
+#ifndef ALUREADPORTVALIDATION_H
+#define ALUREADPORTVALIDATION_H
+
+#include "sfn_instr_alu.h"
+
+namespace r600 {
+
+class AluReadportReservation {
+public:
+   AluReadportReservation();
+   AluReadportReservation(const AluReadportReservation& orig) = default;
+   AluReadportReservation& operator = (const AluReadportReservation& orig) = default;
+
+   bool schedule_vec_src(PVirtualValue src[3],  int nsrc, AluBankSwizzle swz);
+
+   bool schedule_vec_instruction(const AluInstr& alu, AluBankSwizzle swz);
+   bool schedule_trans_instruction(const AluInstr& alu, AluBankSwizzle swz);
+
+   bool reserve_gpr(int sel, int chan, int cycle);
+   bool reserve_const(const UniformValue& value);
+
+   bool add_literal(uint32_t value);
+
+   static int cycle_vec(AluBankSwizzle swz, int src);
+   static int cycle_trans(AluBankSwizzle swz, int src);
+
+   static const int max_chan_channels = 4;
+   static const int max_gpr_readports = 3;
+
+   std::array<std::array<int, max_chan_channels>, max_gpr_readports> m_hw_gpr;
+   std::array<int, max_chan_channels> m_hw_const_addr;
+   std::array<int, max_chan_channels>  m_hw_const_chan;
+   std::array<int, max_chan_channels>  m_hw_const_bank;
+   std::array<uint32_t, max_chan_channels> m_literals;
+   uint32_t m_nliterals{0};
+};
+
+
+}
+
+#endif // ALUREADPORTVALIDATION_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp
similarity index 52%
rename from src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp
rename to src/gallium/drivers/r600/sfn/sfn_assembler.cpp
index f97dba9..b2f67a2 100644
--- a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp
@@ -1,322 +1,300 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_ir_to_assembly.h"
+#include "sfn_assembler.h"
+#include "sfn_debug.h"
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_controlflow.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_mem.h"
+#include "sfn_instr_tex.h"
+
 #include "sfn_conditionaljumptracker.h"
 #include "sfn_callstack.h"
-#include "sfn_instruction_gds.h"
-#include "sfn_instruction_misc.h"
-#include "sfn_instruction_fetch.h"
-#include "sfn_instruction_lds.h"
 
-#include "../r600_shader.h"
 #include "../eg_sq.h"
 
 namespace r600 {
+Assembler::Assembler(r600_shader *sh, const r600_shader_key& key):
+   m_sh(sh), m_key(key)
+{
+}
 
-using std::vector;
-
-
-
-struct AssemblyFromShaderLegacyImpl : public ConstInstructionVisitor {
-
-   AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
-
-
-   bool emit(const Instruction::Pointer i);
-   void reset_addr_register() {m_last_addr.reset();}
+extern const std::map<ESDOp, int> ds_opcode_map;
 
+class AssamblerVisitor : public ConstInstrVisitor {
 public:
-   bool visit(const AluInstruction& i) override;
-   bool visit(const ExportInstruction& i) override;
-   bool visit(const TexInstruction& i) override;
-   bool visit(const FetchInstruction& i) override;
-   bool visit(const IfInstruction& i) override;
-   bool visit(const ElseInstruction& i) override;
-   bool visit(const IfElseEndInstruction& i) override;
-   bool visit(const LoopBeginInstruction& i) override;
-   bool visit(const LoopEndInstruction& i) override;
-   bool visit(const LoopBreakInstruction& i) override;
-   bool visit(const LoopContInstruction& i) override;
-   bool visit(const StreamOutIntruction& i) override;
-   bool visit(const MemRingOutIntruction& i) override;
-   bool visit(const EmitVertex& i) override;
-   bool visit(const WaitAck& i) override;
-   bool visit(const WriteScratchInstruction& i) override;
-   bool visit(const GDSInstr& i) override;
-   bool visit(const RatInstruction& i) override;
-   bool visit(const LDSWriteInstruction& i) override;
-   bool visit(const LDSReadInstruction& i) override;
-   bool visit(const LDSAtomicInstruction& i) override;
-   bool visit(const GDSStoreTessFactor& i) override;
-   bool visit(const InstructionBlock& i) override;
-
-   bool emit_load_addr(PValue addr);
-   bool emit_fs_pixel_export(const ExportInstruction & exi);
-   bool emit_vs_pos_export(const ExportInstruction & exi);
-   bool emit_vs_param_export(const ExportInstruction & exi);
-   bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
-   bool copy_src(r600_bytecode_alu_src& src, const Value& s);
-
-   EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx);
+   AssamblerVisitor(r600_shader *sh, const r600_shader_key& key);
+
+   void visit(const AluInstr& instr) override;
+   void visit(const AluGroup& instr) override;
+   void visit(const TexInstr& instr) override;
+   void visit(const ExportInstr& instr) override;
+   void visit(const FetchInstr& instr) override;
+   void visit(const Block& instr) override;
+   void visit(const IfInstr& instr) override;
+   void visit(const ControlFlowInstr& instr) override;
+   void visit(const WriteScratchInstr& instr) override;
+   void visit(const StreamOutInstr& instr) override;
+   void visit(const MemRingOutInstr& instr) override;
+   void visit(const EmitVertexInstr& instr) override;
+   void visit(const GDSInstr& instr) override;
+   void visit(const WriteTFInstr& instr) override;
+   void visit(const LDSAtomicInstr& instr) override;
+   void visit(const LDSReadInstr& instr) override;
+   void visit(const RatInstr& instr) override;
+
+   void finalize();
+
+   const uint32_t sf_vtx = 1;
+   const uint32_t sf_tex = 2;
+   const uint32_t sf_alu = 4;
+   const uint32_t sf_addr_register = 8;
+   const uint32_t sf_all = 0xf;
+
+   void clear_states(const uint32_t& states);
+   bool copy_dst(r600_bytecode_alu_dst& dst, const Register& d, bool write);
+   PVirtualValue copy_src(r600_bytecode_alu_src& src, const VirtualValue& s);
+
+   EBufferIndexMode
+   emit_index_reg(const VirtualValue& addr, unsigned idx);
+
+   void emit_endif();
+   void emit_else();
+   void emit_loop_begin(bool vpm);
+   void emit_loop_end();
+   void emit_loop_break();
+   void emit_loop_cont();
+
+   void emit_alu_op(const AluInstr& ai);
+   void emit_lds_op(const AluInstr& lds);
+
+   void emit_wait_ack();
+
+   /* Start initialized in constructor */
+   const r600_shader_key& m_key;
+   r600_shader *m_shader;
+   r600_bytecode *m_bc;
 
    ConditionalJumpTracker m_jump_tracker;
    CallStack m_callstack;
+   bool ps_alpha_to_one;
+   /* End initialized in constructor */
 
-public:
-   r600_bytecode *m_bc;
-   r600_shader *m_shader;
-   r600_shader_key *m_key;
-   r600_bytecode_output m_output;
-   unsigned m_max_color_exports;
-   bool has_pos_output;
-   bool has_param_output;
-   PValue m_last_addr;
-   int m_loop_nesting;
    std::set<uint32_t> m_nliterals_in_group;
    std::set<int> vtx_fetch_results;
    std::set<int> tex_fetch_results;
-   bool m_last_op_was_barrier;
-};
 
+   PRegister m_last_addr{nullptr};
 
-AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
-                                                   r600_shader_key *key)
-{
-   impl = new AssemblyFromShaderLegacyImpl(sh, key);
-}
+   unsigned m_max_color_exports{0};
+   int m_loop_nesting{0};
 
-AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
-{
-   delete impl;
-}
+   bool m_ack_suggested{false};
+   bool m_has_param_output{false};
+   bool m_has_pos_output{false};
+   bool m_last_op_was_barrier{false};
+   bool m_result{true};
+};
 
-bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir)
+bool Assembler::lower(Shader *shader)
 {
-   if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
-       impl->m_shader->ninput > 0)
-         r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
+   AssamblerVisitor ass(m_sh, m_key);
 
+   auto& blocks = shader->func();
+   for (auto b : blocks) {
+      b->accept(ass);
+      if (!ass.m_result)
+         return false;
+   }
 
-   std::vector<Instruction::Pointer> exports;
+   ass.finalize();
 
-   for (const auto& block : ir) {
-      if (!impl->visit(block))
-         return false;
-   }   /*
-   for (const auto& i : exports) {
-      if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
-          return false;
-   }*/
+   return ass.m_result;
 
+}
 
+AssamblerVisitor::AssamblerVisitor(r600_shader *sh, const r600_shader_key& key):
+   m_key(key),
+   m_shader(sh),
+
+   m_bc(&sh->bc),
+   m_callstack(sh->bc),
+   ps_alpha_to_one(key.ps.alpha_to_one)
+{
+   if (m_shader->processor_type == PIPE_SHADER_FRAGMENT)
+      m_max_color_exports = MAX2(m_key.ps.nr_cbufs, 1);
+
+   if (m_shader->processor_type == PIPE_SHADER_VERTEX &&
+       m_shader->ninput > 0)
+         r600_bytecode_add_cfinst(m_bc, CF_OP_CALL_FS);
+}
+
+void AssamblerVisitor::finalize()
+{
    const struct cf_op_info *last = nullptr;
-   if (impl->m_bc->cf_last)
-      last = r600_isa_cf(impl->m_bc->cf_last->op);
+
+   if (m_bc->cf_last)
+      last = r600_isa_cf(m_bc->cf_last->op);
 
    /* alu clause instructions don't have EOP bit, so add NOP */
-   if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
-       || impl->m_bc->cf_last->op == CF_OP_POP)
-      r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
+   if (m_shader->bc.gfx_level < CAYMAN &&
+       (!last || last->flags & CF_ALU || m_bc->cf_last->op == CF_OP_LOOP_END
+       || m_bc->cf_last->op == CF_OP_POP))
+      r600_bytecode_add_cfinst(m_bc, CF_OP_NOP);
+
+   /* A fetch shader only can't be EOP (results in hang), but we can replace it
+        * by a NOP */
+   else if (last && m_bc->cf_last->op == CF_OP_CALL_FS)
+      m_bc->cf_last->op = CF_OP_NOP;
+
+   if (m_shader->bc.gfx_level != CAYMAN)
+      m_bc->cf_last->end_of_program = 1;
+   else
+      cm_bytecode_add_cf_end(m_bc);
+}
 
-    /* A fetch shader only can't be EOP (results in hang), but we can replace it
-     * by a NOP */
-   else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
-      impl->m_bc->cf_last->op = CF_OP_NOP;
+extern const std::map<EAluOp, int> opcode_map;
 
-   if (impl->m_shader->bc.gfx_level != CAYMAN)
-      impl->m_bc->cf_last->end_of_program = 1;
-   else
-      cm_bytecode_add_cf_end(impl->m_bc);
+void AssamblerVisitor::visit(const AluInstr& ai)
+{
+   assert(vtx_fetch_results.empty());
+   assert(tex_fetch_results.empty());
 
-   return true;
+   if (unlikely(ai.has_alu_flag(alu_is_lds)))
+      emit_lds_op(ai);
+   else
+      emit_alu_op(ai);
 }
 
-bool AssemblyFromShaderLegacyImpl::visit(const InstructionBlock& block)
+void AssamblerVisitor::emit_lds_op(const AluInstr& lds)
 {
-   for (const auto& i : block) {
-
-      if (i->type() != Instruction::vtx) {
-          vtx_fetch_results.clear();
-          if (i->type() != Instruction::tex)
-              tex_fetch_results.clear();
-      }
+   struct r600_bytecode_alu alu;
+   memset(&alu, 0, sizeof(alu));
 
-      m_last_op_was_barrier &= i->type() == Instruction::alu;
+   alu.is_lds_idx_op = true;
+   alu.op = lds.lds_opcode();
 
-      sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
+   /* All paired LDS fetch + read from queue instructions
+    * have to fit into the same ALU CF, 256 DW fit in, but we leave some
+    * space for weired things the backend assembler might do. */
+   const bool is_lds_start = lds.has_alu_flag(alu_lds_group_start);
+   const unsigned expected_alu_clause_fill = m_bc->cf_last->ndw +
+                                             2 * lds.required_slots();
 
-      if (!i->accept(*this))
-         return false;
+   if (is_lds_start && expected_alu_clause_fill > 240)
+      m_bc->force_add_cf = 1;
 
-      if (i->type() != Instruction::alu)
-         reset_addr_register();
+   bool has_lds_fetch = false;
+   switch (alu.op) {
+   case DS_OP_WRITE:
+      alu.op = LDS_OP2_LDS_WRITE;
+      break;
+   case LDS_WRITE_REL:
+      alu.op = LDS_OP3_LDS_WRITE_REL;
+      alu.lds_idx = 1;
+      break;
+   case DS_OP_READ_RET:
+      alu.op = LDS_OP1_LDS_READ_RET;
+      FALLTHROUGH;
+   case LDS_ADD_RET:
+   case LDS_AND_RET:
+   case LDS_OR_RET:
+   case LDS_MAX_INT_RET:
+   case LDS_MAX_UINT_RET:
+   case LDS_MIN_INT_RET:
+   case LDS_MIN_UINT_RET:
+   case LDS_XOR_RET:
+   case LDS_XCHG_RET:
+   case LDS_CMP_XCHG_RET:
+      has_lds_fetch = true;
+      break;
+   default:
+      ;
    }
 
-   return true;
-}
+   copy_src(alu.src[0], lds.src(0));
 
-AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
-                                                           r600_shader_key *key):
-   m_callstack(sh->bc),
-   m_bc(&sh->bc),
-   m_shader(sh),
-   m_key(key),
-   has_pos_output(false),
-   has_param_output(false),
-   m_loop_nesting(0),
-   m_last_op_was_barrier(false)
-{
-   m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
-
-}
+   if (lds.n_sources() > 1)
+      copy_src(alu.src[1], lds.src(1));
+   else
+      alu.src[1].sel = V_SQ_ALU_SRC_0;
 
-extern const std::map<EAluOp, int> opcode_map;
+   if (lds.n_sources() > 2)
+      copy_src(alu.src[2], lds.src(2));
+   else
+      alu.src[2].sel = V_SQ_ALU_SRC_0;
 
-bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
-{
-   m_bc->ar_reg = addr->sel();
-   m_bc->ar_chan = addr->chan();
-   m_bc->ar_loaded = 0;
-   m_last_addr = addr;
+   alu.last = lds.has_alu_flag(alu_last_instr);
 
-   sfn_log << SfnLog::assembly << "   Prepare " << *addr << " to address register\n";
+   int r = r600_bytecode_add_alu(m_bc, &alu);
+   if (has_lds_fetch)
+      m_bc->cf_last->nlds_read++;
 
-   return true;
+   if (r)
+      m_result = false;
 }
 
-bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai)
+void AssamblerVisitor::emit_alu_op(const AluInstr& ai)
 {
-
    struct r600_bytecode_alu alu;
    memset(&alu, 0, sizeof(alu));
-   PValue addr_in_use;
 
    if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
       std::cerr << "Opcode not handled for " << ai <<"\n";
-      return false;
+      m_result = false;
+      return;
    }
 
+   // skip multiple barriers
    if (m_last_op_was_barrier && ai.opcode() == op0_group_barrier)
-      return true;
+      return;
 
    m_last_op_was_barrier = ai.opcode() == op0_group_barrier;
 
-   for (unsigned i = 0; i < ai.n_sources(); ++i) {
-      auto& s = ai.src(i);
-      if (s.type() == Value::literal) {
-         auto& v = static_cast<const LiteralValue&>(s);
-         if (v.value() != 0 &&
-             v.value() != 1 &&
-             v.value_float() != 1.0f &&
-             v.value_float() != 0.5f &&
-             v.value() != 0xffffffff)
-            m_nliterals_in_group.insert(v.value());
-      }
-   }
-
-   /* This instruction group would exceed the limit of literals, so
-    * force a new instruction group by adding a NOP as last
-    * instruction. This will no loner be needed with a real
-    * scheduler */
-   if (m_nliterals_in_group.size() > 4) {
-      sfn_log << SfnLog::assembly << "  Have " << m_nliterals_in_group.size() << " inject a last op (nop)\n";
-      alu.op = ALU_OP0_NOP;
-      alu.last = 1;
-      alu.dst.chan = 3;
-      int retval = r600_bytecode_add_alu(m_bc, &alu);
-      if (retval)
-         return false;
-      memset(&alu, 0, sizeof(alu));
-      m_nliterals_in_group.clear();
-      for (unsigned i = 0; i < ai.n_sources(); ++i) {
-         auto& s = ai.src(i);
-         if (s.type() == Value::literal) {
-            auto& v = static_cast<const LiteralValue&>(s);
-            m_nliterals_in_group.insert(v.value());
-         }
-      }
-   }
-
    alu.op = opcode_map.at(ai.opcode());
 
-   /* Missing test whether ai actually has a dest */
    auto dst = ai.dest();
-
    if (dst) {
-      if (!copy_dst(alu.dst, *dst))
-         return false;
-
-      alu.dst.write = ai.flag(alu_write);
-      alu.dst.clamp = ai.flag(alu_dst_clamp);
-
-      if (dst->type() == Value::gpr_array_value) {
-         auto& v = static_cast<const GPRArrayValue&>(*dst);
-         PValue addr = v.indirect();
-         if (addr) {
-            if (!m_last_addr || *addr != *m_last_addr) {
-               emit_load_addr(addr);
-               addr_in_use = addr;
-            }
-            alu.dst.rel = addr ? 1 : 0;;
-         }
+      if (!copy_dst(alu.dst, *dst, ai.has_alu_flag(alu_write))) {
+         m_result = false;
+         return;
       }
+
+      alu.dst.write = ai.has_alu_flag(alu_write);
+      alu.dst.clamp = ai.has_alu_flag(alu_dst_clamp);
+      alu.dst.rel = dst->addr() ? 1 : 0;
+   } else {
+      alu.dst.chan = ai.dest_chan();
    }
 
    alu.is_op3 = ai.n_sources() == 3;
 
+   EBufferIndexMode kcache_index_mode = bim_none;
+   PVirtualValue buffer_offset = nullptr;
+
    for (unsigned i = 0; i < ai.n_sources(); ++i) {
-      auto& s = ai.src(i);
+      buffer_offset = copy_src(alu.src[i], ai.src(i));
+      alu.src[i].neg = ai.has_alu_flag(AluInstr::src_neg_flags[i]);
+      if (!alu.is_op3)
+         alu.src[i].abs = ai.has_alu_flag(AluInstr::src_abs_flags[i]);
 
-      if (!copy_src(alu.src[i], s))
-         return false;
-      alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
-
-      if (s.type() == Value::gpr_array_value) {
-         auto& v = static_cast<const GPRArrayValue&>(s);
-         PValue addr = v.indirect();
-         if (addr) {
-            assert(!addr_in_use || (*addr_in_use == *addr));
-            if (!m_last_addr || *addr != *m_last_addr) {
-               emit_load_addr(addr);
-               addr_in_use = addr;
-            }
-            alu.src[i].rel = addr ? 1 : 0;
-         }
+      if (buffer_offset && kcache_index_mode == bim_none) {
+         kcache_index_mode = bim_zero;
+         alu.src[i].kc_bank = 1;
+         alu.src[i].kc_rel = 1;
+      }
+
+      if (ai.has_lds_access()) {
+         assert(m_bc->cf_last->nlds_read > 0);
+         m_bc->cf_last->nlds_read--;
       }
-      if (!alu.is_op3)
-         alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
    }
 
    if (ai.bank_swizzle() != alu_vec_unknown)
       alu.bank_swizzle_force = ai.bank_swizzle();
 
-   alu.last = ai.flag(alu_last_instr);
-   alu.update_pred = ai.flag(alu_update_pred);
-   alu.execute_mask = ai.flag(alu_update_exec);
+   alu.last = ai.has_alu_flag(alu_last_instr);
+   //alu.update_pred = ai.has_alu_flag(alu_update_pred);
+   alu.execute_mask = ai.has_alu_flag(alu_update_exec);
 
    /* If the destination register is equal to the last loaded address register
     * then clear the latter one, because the values will no longer be identical */
@@ -326,11 +304,10 @@ bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai)
    if (dst)
       sfn_log << SfnLog::assembly << "  Current dst register is " << *dst << "\n";
 
-   if (dst && m_last_addr)
-      if (*dst == *m_last_addr) {
-         sfn_log << SfnLog::assembly << "  Clear address register (was " << *m_last_addr << "\n";
-         m_last_addr.reset();
-      }
+   if (dst && m_last_addr && *dst == *m_last_addr) {
+      sfn_log << SfnLog::assembly << "  Clear address register (was " << *m_last_addr << "\n";
+      m_last_addr = nullptr;
+   }
 
    auto cf_op = ai.cf_type();
 
@@ -351,7 +328,8 @@ bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai)
    if (alu.last)
       m_nliterals_in_group.clear();
 
-   bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
+
+   m_result = !r600_bytecode_add_alu_type(m_bc, &alu, type);
 
    if (ai.opcode() == op1_mova_int)
       m_bc->ar_loaded = 0;
@@ -362,262 +340,248 @@ bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai)
    if (ai.opcode() == op1_set_cf_idx1)
       m_bc->index_loaded[1] = 1;
 
-
    m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
                           ai.opcode() == op2_killne_int ||
                           ai.opcode() == op1_set_cf_idx0 ||
                           ai.opcode() == op1_set_cf_idx1);
-   return retval;
 }
 
-bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
+void AssamblerVisitor::visit(const AluGroup& group)
 {
-   r600_bytecode_output output;
-   memset(&output, 0, sizeof(output));
-   assert(exi.gpr().type() == Value::gpr_vector);
-   const auto& gpr = exi.gpr();
-   output.gpr = gpr.sel();
-   output.elem_size = 3;
-   output.swizzle_x = gpr.chan_i(0);
-   output.swizzle_y = gpr.chan_i(1);
-   output.swizzle_z = gpr.chan_i(2);
-   output.swizzle_w = gpr.chan_i(3);
-   output.burst_count = 1;
-   output.array_base = 60 + exi.location();
-   output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
-   output.type = exi.export_type();
+   clear_states(sf_vtx | sf_tex);
 
+   if (group.slots() == 0)
+      return;
 
-   if (r600_bytecode_add_output(m_bc, &output)) {
-      R600_ERR("Error adding pixel export at location %d\n", exi.location());
-      return false;
+   if (group.has_lds_group_start()) {
+      if (m_bc->cf_last->ndw + 2 * (*group.begin())->required_slots() > 240) {
+         assert(m_bc->cf_last->nlds_read == 0);
+         m_bc->force_add_cf = 1;
+         m_last_addr = nullptr;
+      }
+   } else if (m_bc->cf_last) {
+      if (m_bc->cf_last->ndw + 2 * group.slots() > 240) {
+         m_bc->force_add_cf = 1;
+         m_last_addr = nullptr;
+      } else {
+         auto instr = *group.begin();
+         if (instr &&
+             !instr->has_alu_flag(alu_is_lds) &&
+             instr->opcode() == op0_group_barrier &&
+             m_bc->cf_last->ndw + 14 > 240) {
+            m_bc->force_add_cf = 1;
+            m_last_addr = nullptr;
+         }
+      }
    }
 
-   return true;
-}
+   auto addr = group.addr();
 
+   if (addr.first) {
+      if (!addr.second) {
+         if (!m_last_addr || !m_bc->ar_loaded ||
+             !m_last_addr->equal_to(*addr.first)) {
+            m_bc->ar_reg = addr.first->sel();
+            m_bc->ar_chan = addr.first->chan();
+            m_last_addr = addr.first;
+            m_bc->ar_loaded = 0;
 
-bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
+            r600_load_ar(m_bc);
+         }
+      } else {
+         emit_index_reg(*addr.first, 0);
+      }
+   }
+
+   for (auto& i : group) {
+      if (i)
+         i->accept(*this);
+   }
+}
+
+void AssamblerVisitor::visit(const TexInstr& tex_instr)
 {
-   r600_bytecode_output output;
-   assert(exi.gpr().type() == Value::gpr_vector);
-   const auto& gpr = exi.gpr();
+   clear_states(sf_vtx | sf_alu);
 
-   memset(&output, 0, sizeof(output));
-   output.gpr = gpr.sel();
-   output.elem_size = 3;
-   output.swizzle_x = gpr.chan_i(0);
-   output.swizzle_y = gpr.chan_i(1);
-   output.swizzle_z = gpr.chan_i(2);
-   output.swizzle_w = gpr.chan_i(3);
-   output.burst_count = 1;
-   output.array_base = exi.location();
-   output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
-   output.type = exi.export_type();
+   int sampler_offset = 0;
+   auto addr = tex_instr.sampler_offset();
+   EBufferIndexMode index_mode = bim_none;
 
+   if (addr)
+      index_mode = emit_index_reg(*addr, 1);
 
-   if (r600_bytecode_add_output(m_bc, &output)) {
-      R600_ERR("Error adding pixel export at location %d\n", exi.location());
-      return false;
+   if (tex_fetch_results.find(tex_instr.src().sel()) !=
+       tex_fetch_results.end()) {
+      m_bc->force_add_cf = 1;
+      tex_fetch_results.clear();
    }
 
-   return true;
-}
+   r600_bytecode_tex tex;
+   memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+   tex.op = tex_instr.opcode();
+   tex.sampler_id = tex_instr.sampler_id() + sampler_offset;
+   tex.resource_id = tex_instr.resource_id() + sampler_offset;
+   tex.src_gpr = tex_instr.src().sel();
+   tex.dst_gpr = tex_instr.dst().sel();
+   tex.dst_sel_x = tex_instr.dest_swizzle(0);
+   tex.dst_sel_y = tex_instr.dest_swizzle(1);
+   tex.dst_sel_z = tex_instr.dest_swizzle(2);
+   tex.dst_sel_w = tex_instr.dest_swizzle(3);
+   tex.src_sel_x = tex_instr.src()[0]->chan();
+   tex.src_sel_y = tex_instr.src()[1]->chan();
+   tex.src_sel_z = tex_instr.src()[2]->chan();
+   tex.src_sel_w = tex_instr.src()[3]->chan();
+   tex.coord_type_x = !tex_instr.has_tex_flag(TexInstr::x_unnormalized);
+   tex.coord_type_y = !tex_instr.has_tex_flag(TexInstr::y_unnormalized);
+   tex.coord_type_z = !tex_instr.has_tex_flag(TexInstr::z_unnormalized);
+   tex.coord_type_w = !tex_instr.has_tex_flag(TexInstr::w_unnormalized);
+   tex.offset_x = tex_instr.get_offset(0);
+   tex.offset_y = tex_instr.get_offset(1);
+   tex.offset_z = tex_instr.get_offset(2);
+   tex.resource_index_mode = index_mode;
+   tex.sampler_index_mode = index_mode;
 
+   if (tex.dst_sel_x < 4 &&
+       tex.dst_sel_y < 4 &&
+       tex.dst_sel_z < 4 &&
+       tex.dst_sel_w < 4)
+      tex_fetch_results.insert(tex.dst_gpr);
 
-bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
-{
-   if (exi.location() >= m_max_color_exports && exi.location()  < 60) {
-      R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
-               exi.location(), m_max_color_exports);
-      return true;
+   if (tex_instr.opcode() == TexInstr::get_gradient_h ||
+       tex_instr.opcode() == TexInstr::get_gradient_v)
+      tex.inst_mod = tex_instr.has_tex_flag(TexInstr::grad_fine) ? 1 : 0;
+   else
+      tex.inst_mod = tex_instr.inst_mode();
+   if (r600_bytecode_add_tex(m_bc, &tex)) {
+      R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
+      m_result = false;
    }
+}
 
-   assert(exi.gpr().type() == Value::gpr_vector);
-   const auto& gpr = exi.gpr();
+void AssamblerVisitor::visit(const ExportInstr& exi)
+{
+   const auto& value = exi.value();
 
    r600_bytecode_output output;
    memset(&output, 0, sizeof(output));
 
-   output.gpr = gpr.sel();
+   output.gpr = value.sel();
    output.elem_size = 3;
-   output.swizzle_x = gpr.chan_i(0);
-   output.swizzle_y = gpr.chan_i(1);
-   output.swizzle_z = gpr.chan_i(2);
-   output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
+   output.swizzle_x = value[0]->chan();
+   output.swizzle_y = value[1]->chan();
+   output.swizzle_z = value[2]->chan();
    output.burst_count = 1;
-   output.array_base = exi.location();
    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
    output.type = exi.export_type();
 
 
-   if (r600_bytecode_add_output(m_bc, &output)) {
-      R600_ERR("Error adding pixel export at location %d\n", exi.location());
-      return false;
-   }
-
-   return true;
-}
-
-
-bool AssemblyFromShaderLegacyImpl::visit(const ExportInstruction & exi)
-{
+   clear_states(sf_all);
    switch (exi.export_type()) {
-   case ExportInstruction::et_pixel:
-      return emit_fs_pixel_export(exi);
-   case ExportInstruction::et_pos:
-      return emit_vs_pos_export(exi);
-   case ExportInstruction::et_param:
-      return emit_vs_param_export(exi);
+   case ExportInstr::pixel:
+      output.swizzle_w = ps_alpha_to_one ? 5 : exi.value()[3]->chan();
+      output.array_base = exi.location();
+   break;
+   case ExportInstr::pos:
+      output.swizzle_w = exi.value()[3]->chan();
+      output.array_base = 60 + exi.location();      
+   break;
+   case ExportInstr::param:
+      output.swizzle_w = exi.value()[3]->chan();
+      output.array_base = exi.location();
+   break;
    default:
       R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
-      return false;
+      m_result = false;
    }
-}
-
-bool AssemblyFromShaderLegacyImpl::visit(const IfInstruction & if_instr)
-{
-   int elems = m_callstack.push(FC_PUSH_VPM);
-   bool needs_workaround = false;
-
-   if (m_bc->gfx_level == CAYMAN && m_bc->stack.loop > 1)
-      needs_workaround = true;
-
-   if (m_bc->family != CHIP_HEMLOCK &&
-       m_bc->family != CHIP_CYPRESS &&
-       m_bc->family != CHIP_JUNIPER) {
-      unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
-      unsigned dmod2 = (elems) % m_bc->stack.entry_size;
 
-      if (elems && (!dmod1 || !dmod2))
-         needs_workaround = true;
+   /* If all register elements pinned to fixed values
+    * we can override the gpr (the register allocator doesn't see
+    * this because it doesn't take these channels into account. */
+   if (output.swizzle_x > 3 && output.swizzle_y > 3 &&
+       output.swizzle_z > 3 && output.swizzle_w > 3)
+       output.gpr = 0;
+
+   int r = 0;
+   if ((r =r600_bytecode_add_output(m_bc, &output))) {
+      R600_ERR("Error adding export at location %d : err: %d\n", exi.location(), r);
+      m_result = false;
    }
-
-   auto& pred = if_instr.pred();
-
-   if (needs_workaround) {
-      r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
-      m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
-      auto new_pred = pred;
-      new_pred.set_cf_type(cf_alu);
-      visit(new_pred);
-   } else
-      visit(pred);
-
-   r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
-
-   m_jump_tracker.push(m_bc->cf_last, jt_if);
-   return true;
 }
 
-bool AssemblyFromShaderLegacyImpl::visit(UNUSED const ElseInstruction & else_instr)
+void AssamblerVisitor::visit(const WriteScratchInstr& instr)
 {
-   r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
-   m_bc->cf_last->pop_count = 1;
-   return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
-}
-
-bool AssemblyFromShaderLegacyImpl::visit(UNUSED const IfElseEndInstruction & endif_instr)
-{
-   m_callstack.pop(FC_PUSH_VPM);
+   clear_states(sf_all);
 
-   unsigned force_pop = m_bc->force_add_cf;
-   if (!force_pop) {
-      int alu_pop = 3;
-      if (m_bc->cf_last) {
-         if (m_bc->cf_last->op == CF_OP_ALU)
-            alu_pop = 0;
-         else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
-            alu_pop = 1;
-      }
-      alu_pop += 1;
-      if (alu_pop == 1) {
-         m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
-         m_bc->force_add_cf = 1;
-      } else if (alu_pop == 2) {
-         m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
-         m_bc->force_add_cf = 1;
-      } else {
-         force_pop = 1;
-      }
-   }
+   struct r600_bytecode_output cf;
 
-   if (force_pop) {
-      r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
-      m_bc->cf_last->pop_count = 1;
-      m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
-   }
+   memset(&cf, 0, sizeof(struct r600_bytecode_output));
 
-   return m_jump_tracker.pop(m_bc->cf_last, jt_if);
-}
+   cf.op = CF_OP_MEM_SCRATCH;
+   cf.elem_size = 3;
+   cf.gpr = instr.value().sel();
+   cf.mark = 1;
+   cf.comp_mask = instr.write_mask();
+   cf.swizzle_x = 0;
+   cf.swizzle_y = 1;
+   cf.swizzle_z = 2;
+   cf.swizzle_w = 3;
+   cf.burst_count = 1;
 
-bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBeginInstruction& instr)
-{
-   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
-   m_jump_tracker.push(m_bc->cf_last, jt_loop);
-   m_callstack.push(FC_LOOP);
-   ++m_loop_nesting;
-   return true;
-}
+   if (instr.address()) {
+      cf.type = 3;
+      cf.index_gpr = instr.address()->sel();
 
-bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopEndInstruction& instr)
-{
-   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
-   m_callstack.pop(FC_LOOP);
-   assert(m_loop_nesting);
-   --m_loop_nesting;
-   return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
-}
+      /* The docu seems to be wrong here: In indirect addressing the
+       * address_base seems to be the array_size */
+      cf.array_size = instr.array_size();
+   } else {
+      cf.type = 2;
+      cf.array_base = instr.location();
+   }
+   /* This should be 0, but the address calculation is apparently wrong */
 
-bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBreakInstruction& instr)
-{
-   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
-   return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
-}
 
-bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopContInstruction &instr)
-{
-   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
-   return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
+   if (r600_bytecode_add_output(m_bc, &cf)){
+      R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
+      m_result = false;
+   }
 }
 
-bool AssemblyFromShaderLegacyImpl::visit(const StreamOutIntruction& so_instr)
+void AssamblerVisitor::visit(const StreamOutInstr& instr)
 {
    struct r600_bytecode_output output;
    memset(&output, 0, sizeof(struct r600_bytecode_output));
 
-   output.gpr = so_instr.gpr().sel();
-   output.elem_size = so_instr.element_size();
-   output.array_base = so_instr.array_base();
+   output.gpr = instr.value().sel();
+   output.elem_size = instr.element_size();
+   output.array_base = instr.array_base();
    output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
-   output.burst_count = so_instr.burst_count();
-   output.array_size = so_instr.array_size();
-   output.comp_mask = so_instr.comp_mask();
-   output.op = so_instr.op();
+   output.burst_count = instr.burst_count();
+   output.array_size = instr.array_size();
+   output.comp_mask = instr.comp_mask();
+   output.op = instr.op();
 
    assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
 
-
    if (r600_bytecode_add_output(m_bc, &output))  {
       R600_ERR("shader_from_nir: Error creating stream output instruction\n");
-      return false;
+      m_result = false;
    }
-   return true;
 }
 
-
-bool AssemblyFromShaderLegacyImpl::visit(const MemRingOutIntruction& instr)
+void AssamblerVisitor::visit(const MemRingOutInstr& instr)
 {
    struct r600_bytecode_output output;
    memset(&output, 0, sizeof(struct r600_bytecode_output));
 
-   output.gpr = instr.gpr().sel();
+   output.gpr = instr.value().sel();
    output.type = instr.type();
    output.elem_size = 3;
    output.comp_mask = 0xf;
    output.burst_count = 1;
    output.op = instr.op();
-   if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
+   if (instr.type() == MemRingOutInstr::mem_write_ind ||
+       instr.type() == MemRingOutInstr::mem_write_ind_ack) {
       output.index_gpr = instr.index_reg();
       output.array_size = 0xfff;
    }
@@ -625,99 +589,35 @@ bool AssemblyFromShaderLegacyImpl::visit(const MemRingOutIntruction& instr)
 
    if (r600_bytecode_add_output(m_bc, &output)) {
       R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
-      return false;
+      m_result = false;
    }
-   return true;
 }
 
-
-bool AssemblyFromShaderLegacyImpl::visit(const TexInstruction & tex_instr)
+void AssamblerVisitor::visit(const EmitVertexInstr& instr)
 {
-   int sampler_offset = 0;
-   auto addr = tex_instr.sampler_offset();
-   EBufferIndexMode index_mode = bim_none;
-
-   if (addr) {
-      if (addr->type() == Value::literal) {
-         const auto& boffs = static_cast<const LiteralValue&>(*addr);
-         sampler_offset = boffs.value();
-      } else {
-         index_mode = emit_index_reg(*addr, 1);
-      }
-   }
-
-   if (tex_fetch_results.find(tex_instr.src().sel()) !=
-       tex_fetch_results.end()) {
-      m_bc->force_add_cf = 1;
-      tex_fetch_results.clear();
-   }
-
-   r600_bytecode_tex tex;
-   memset(&tex, 0, sizeof(struct r600_bytecode_tex));
-   tex.op = tex_instr.opcode();
-   tex.sampler_id = tex_instr.sampler_id() + sampler_offset;
-   tex.resource_id = tex_instr.resource_id() + sampler_offset;
-   tex.src_gpr = tex_instr.src().sel();
-   tex.dst_gpr = tex_instr.dst().sel();
-   tex.dst_sel_x = tex_instr.dest_swizzle(0);
-   tex.dst_sel_y = tex_instr.dest_swizzle(1);
-   tex.dst_sel_z = tex_instr.dest_swizzle(2);
-   tex.dst_sel_w = tex_instr.dest_swizzle(3);
-   tex.src_sel_x = tex_instr.src().chan_i(0);
-   tex.src_sel_y = tex_instr.src().chan_i(1);
-   tex.src_sel_z = tex_instr.src().chan_i(2);
-   tex.src_sel_w = tex_instr.src().chan_i(3);
-   tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
-   tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
-   tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
-   tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
-   tex.offset_x = tex_instr.get_offset(0);
-   tex.offset_y = tex_instr.get_offset(1);
-   tex.offset_z = tex_instr.get_offset(2);
-   tex.resource_index_mode = index_mode;
-   tex.sampler_index_mode = index_mode;
-
-   if (tex.dst_sel_x < 4 &&
-       tex.dst_sel_y < 4 &&
-       tex.dst_sel_z < 4 &&
-       tex.dst_sel_w < 4)
-      tex_fetch_results.insert(tex.dst_gpr);
-
-   if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
-       tex_instr.opcode() == TexInstruction::get_gradient_v)
-      tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
+   int r = r600_bytecode_add_cfinst(m_bc, instr.op());
+   if (!r)
+      m_bc->cf_last->count = instr.stream();
    else
-      tex.inst_mod = tex_instr.inst_mode();
-   if (r600_bytecode_add_tex(m_bc, &tex)) {
-      R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
-      return false;
-   }
-   return true;
+      m_result = false;
+   assert(m_bc->cf_last->count < 4);
 }
 
-bool AssemblyFromShaderLegacyImpl::visit(const FetchInstruction& fetch_instr)
+void AssamblerVisitor::visit(const FetchInstr& fetch_instr)
 {
-   int buffer_offset = 0;
-   auto addr = fetch_instr.buffer_offset();
-   auto index_mode = fetch_instr.buffer_index_mode();
+   clear_states(sf_tex | sf_alu);
 
-   if (addr) {
-      if (addr->type() == Value::literal) {
-         const auto& boffs = static_cast<const LiteralValue&>(*addr);
-         buffer_offset = boffs.value();
-      } else {
-         index_mode = emit_index_reg(*addr, 0);
-      }
-   }
+   auto buffer_offset = fetch_instr.resource_offset();
+   EBufferIndexMode rat_index_mode = bim_none;
 
-   if (fetch_instr.has_prelude()) {
-      for(auto &i : fetch_instr.prelude()) {
-         if (!i->accept(*this))
-            return false;
-      }
-   }
+   if (buffer_offset)
+      rat_index_mode = emit_index_reg(*buffer_offset, 0);
 
-   bool use_tc = fetch_instr.use_tc() || (m_bc->gfx_level == CAYMAN);
+   if (fetch_instr.has_fetch_flag(FetchInstr::wait_ack))
+      emit_wait_ack();
+
+   bool use_tc = fetch_instr.has_fetch_flag(FetchInstr::use_tc) ||
+                 (m_bc->gfx_level == CAYMAN);
    if (!use_tc &&
        vtx_fetch_results.find(fetch_instr.src().sel()) !=
        vtx_fetch_results.end()) {
@@ -725,7 +625,7 @@ bool AssemblyFromShaderLegacyImpl::visit(const FetchInstruction& fetch_instr)
       vtx_fetch_results.clear();
    }
 
-   if (fetch_instr.use_tc() &&
+   if (fetch_instr.has_fetch_flag(FetchInstr::use_tc) &&
        tex_fetch_results.find(fetch_instr.src().sel()) !=
        tex_fetch_results.end()) {
       m_bc->force_add_cf = 1;
@@ -739,161 +639,59 @@ bool AssemblyFromShaderLegacyImpl::visit(const FetchInstruction& fetch_instr)
 
    struct r600_bytecode_vtx vtx;
    memset(&vtx, 0, sizeof(vtx));
-   vtx.op = fetch_instr.vc_opcode();
-   vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
+   vtx.op = fetch_instr.opcode();
+   vtx.buffer_id = fetch_instr.resource_id();
    vtx.fetch_type = fetch_instr.fetch_type();
    vtx.src_gpr = fetch_instr.src().sel();
    vtx.src_sel_x = fetch_instr.src().chan();
    vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
    vtx.dst_gpr = fetch_instr.dst().sel();
-   vtx.dst_sel_x = fetch_instr.swz(0);		/* SEL_X */
-   vtx.dst_sel_y = fetch_instr.swz(1);		/* SEL_Y */
-   vtx.dst_sel_z = fetch_instr.swz(2);		/* SEL_Z */
-   vtx.dst_sel_w = fetch_instr.swz(3);		/* SEL_W */
-   vtx.use_const_fields = fetch_instr.use_const_fields();
+   vtx.dst_sel_x = fetch_instr.dest_swizzle(0);		/* SEL_X */
+   vtx.dst_sel_y = fetch_instr.dest_swizzle(1);		/* SEL_Y */
+   vtx.dst_sel_z = fetch_instr.dest_swizzle(2);		/* SEL_Z */
+   vtx.dst_sel_w = fetch_instr.dest_swizzle(3);		/* SEL_W */
+   vtx.use_const_fields = fetch_instr.has_fetch_flag(FetchInstr::use_const_field);
    vtx.data_format = fetch_instr.data_format();
    vtx.num_format_all = fetch_instr.num_format();		/* NUM_FORMAT_SCALED */
-   vtx.format_comp_all = fetch_instr.is_signed();	/* FORMAT_COMP_SIGNED */
+   vtx.format_comp_all = fetch_instr.has_fetch_flag(FetchInstr::format_comp_signed);
    vtx.endian = fetch_instr.endian_swap();
-   vtx.buffer_index_mode = index_mode;
-   vtx.offset = fetch_instr.offset();
-   vtx.indexed = fetch_instr.indexed();
-   vtx.uncached = fetch_instr.uncached();
+   vtx.buffer_index_mode = rat_index_mode;
+   vtx.offset = fetch_instr.src_offset();
+   vtx.indexed = fetch_instr.has_fetch_flag(FetchInstr::indexed);
+   vtx.uncached = fetch_instr.has_fetch_flag(FetchInstr::uncached);
    vtx.elem_size = fetch_instr.elm_size();
    vtx.array_base = fetch_instr.array_base();
    vtx.array_size = fetch_instr.array_size();
-   vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
-
+   vtx.srf_mode_all = fetch_instr.has_fetch_flag(FetchInstr::srf_mode);
 
-   if (fetch_instr.use_tc()) {
+   if (fetch_instr.has_fetch_flag(FetchInstr::use_tc)) {
       if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
          R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
-         return false;
+         m_result = false;
       }
 
    } else {
       if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
          R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
-         return false;
-      }
-   }
-
-   m_bc->cf_last->vpm = (m_bc->type == PIPE_SHADER_FRAGMENT) && fetch_instr.use_vpm();
-   m_bc->cf_last->barrier = 1;
-
-   return true;
-}
-
-bool AssemblyFromShaderLegacyImpl::visit(const EmitVertex &instr)
-{
-   int r = r600_bytecode_add_cfinst(m_bc, instr.op());
-   if (!r)
-      m_bc->cf_last->count = instr.stream();
-   assert(m_bc->cf_last->count < 4);
-
-   return r == 0;
-}
-
-bool AssemblyFromShaderLegacyImpl::visit(const WaitAck& instr)
-{
-   int r = r600_bytecode_add_cfinst(m_bc, instr.op());
-   if (!r) {
-      m_bc->cf_last->cf_addr = instr.n_ack();
-      m_bc->cf_last->barrier = 1;
-   }
-
-   return r == 0;
-}
-
-bool AssemblyFromShaderLegacyImpl::visit(const WriteScratchInstruction& instr)
-{
-   struct r600_bytecode_output cf;
-
-   memset(&cf, 0, sizeof(struct r600_bytecode_output));
-
-   cf.op = CF_OP_MEM_SCRATCH;
-   cf.elem_size = 3;
-   cf.gpr = instr.gpr().sel();
-   cf.mark = 1;
-   cf.comp_mask = instr.write_mask();
-   cf.swizzle_x = 0;
-   cf.swizzle_y = 1;
-   cf.swizzle_z = 2;
-   cf.swizzle_w = 3;
-   cf.burst_count = 1;
-
-   if (instr.indirect()) {
-      cf.type = 3;
-      cf.index_gpr = instr.address();
-
-      /* The docu seems to be wrong here: In indirect addressing the
-       * address_base seems to be the array_size */
-      cf.array_size = instr.array_size();
-   } else {
-      cf.type = 2;
-      cf.array_base = instr.location();
-   }
-   /* This should be 0, but the address calculation is apparently wrong */
-
-
-   if (r600_bytecode_add_output(m_bc, &cf)){
-      R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
-      return false;
-   }
-
-   return true;
-}
-
-extern const std::map<ESDOp, int> ds_opcode_map;
-
-bool AssemblyFromShaderLegacyImpl::visit(const GDSInstr& instr)
-{
-   struct r600_bytecode_gds gds;
-
-   int uav_idx = -1;
-   auto addr = instr.uav_id();
-   if (addr->type() != Value::literal) {
-      emit_index_reg(*addr, 1);
-   } else {
-      const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
-      uav_idx = addr_reg.value();
-   }
-
-   memset(&gds, 0, sizeof(struct r600_bytecode_gds));
-
-   gds.op = ds_opcode_map.at(instr.op());
-   gds.dst_gpr = instr.dest_sel();
-   gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
-   gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
-   gds.src_gpr = instr.src_sel();
-
-   gds.src_sel_x = instr.src_swizzle(0);
-   gds.src_sel_y = instr.src_swizzle(1);
-   gds.src_sel_z = instr.src_swizzle(2);
-
-   gds.dst_sel_x = instr.dest_swizzle(0);
-   gds.dst_sel_y = 7;
-   gds.dst_sel_z = 7;
-   gds.dst_sel_w = 7;
-   gds.src_gpr2 = 0;
-   gds.alloc_consume = 1; // Not Cayman
+         m_result = false;
+      }
+   }
 
-   int r = r600_bytecode_add_gds(m_bc, &gds);
-   if (r)
-      return false;
-   m_bc->cf_last->vpm = PIPE_SHADER_FRAGMENT == m_bc->type;
+   m_bc->cf_last->vpm = (m_bc->type == PIPE_SHADER_FRAGMENT) &&
+         fetch_instr.has_fetch_flag(FetchInstr::vpm);
    m_bc->cf_last->barrier = 1;
-   return true;
 }
 
-bool AssemblyFromShaderLegacyImpl::visit(const GDSStoreTessFactor& instr)
+void AssamblerVisitor::visit(const WriteTFInstr& instr)
 {
    struct r600_bytecode_gds gds;
 
+   auto& value = instr.value();
+
    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
-   gds.src_gpr = instr.sel();
-   gds.src_sel_x = instr.chan(0);
-   gds.src_sel_y = instr.chan(1);
+   gds.src_gpr = value.sel();
+   gds.src_sel_x = value[0]->chan();
+   gds.src_sel_y = value[1]->chan();
    gds.src_sel_z = 4;
    gds.dst_sel_x = 7;
    gds.dst_sel_y = 7;
@@ -901,14 +699,16 @@ bool AssemblyFromShaderLegacyImpl::visit(const GDSStoreTessFactor& instr)
    gds.dst_sel_w = 7;
    gds.op = FETCH_OP_TF_WRITE;
 
-   if (r600_bytecode_add_gds(m_bc, &gds) != 0)
-         return false;
+   if (r600_bytecode_add_gds(m_bc, &gds) != 0) {
+      m_result = false;
+      return;
+   }
 
-   if (instr.chan(2) != 7) {
+   if (value[2]->chan() != 7) {
       memset(&gds, 0, sizeof(struct r600_bytecode_gds));
-      gds.src_gpr = instr.sel();
-      gds.src_sel_x = instr.chan(2);
-      gds.src_sel_y = instr.chan(3);
+      gds.src_gpr = value.sel();
+      gds.src_sel_x = value[2]->chan();
+      gds.src_sel_y = value[3]->chan();
       gds.src_sel_z = 4;
       gds.dst_sel_x = 7;
       gds.dst_sel_y = 7;
@@ -916,178 +716,259 @@ bool AssemblyFromShaderLegacyImpl::visit(const GDSStoreTessFactor& instr)
       gds.dst_sel_w = 7;
       gds.op = FETCH_OP_TF_WRITE;
 
-      if (r600_bytecode_add_gds(m_bc, &gds))
-         return false;
+      if (r600_bytecode_add_gds(m_bc, &gds)) {
+         m_result = false;
+         return;
+      }
    }
-   return true;
 }
 
-bool AssemblyFromShaderLegacyImpl::visit(const LDSWriteInstruction& instr)
+void AssamblerVisitor::visit(const RatInstr& instr)
 {
-   r600_bytecode_alu alu;
-   memset(&alu, 0, sizeof(r600_bytecode_alu));
+   struct r600_bytecode_gds gds;
 
-   alu.last = true;
-   alu.is_lds_idx_op = true;
-   copy_src(alu.src[0], instr.address());
-   copy_src(alu.src[1], instr.value0());
+   /* The instruction writes to the retuen buffer loaction, and
+    * the value will actually be read bach, so make sure all previous writes
+    * have been finished */
+   if (m_ack_suggested /*&& instr.has_instr_flag(Instr::ack_rat_return_write)*/)
+      emit_wait_ack();
 
-   if (instr.num_components() == 1) {
-      alu.op = LDS_OP2_LDS_WRITE;
-   } else {
-      alu.op = LDS_OP3_LDS_WRITE_REL;
-      alu.lds_idx = 1;
-      copy_src(alu.src[2], instr.value1());
+   int rat_idx = instr.rat_id();
+   EBufferIndexMode rat_index_mode = bim_none;
+   auto addr = instr.rat_id_offset();
+
+   if (addr)
+      rat_index_mode = emit_index_reg(*addr, 1);
+
+   memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+
+   r600_bytecode_add_cfinst(m_bc, instr.cf_opcode());
+   auto cf = m_bc->cf_last;
+   cf->rat.id = rat_idx + m_shader->rat_base;
+   cf->rat.inst = instr.rat_op();
+   cf->rat.index_mode = rat_index_mode;
+   cf->output.type = instr.need_ack() ? 3 : 1;
+   cf->output.gpr = instr.data_gpr();
+   cf->output.index_gpr = instr.index_gpr();
+   cf->output.comp_mask = instr.comp_mask();
+   cf->output.burst_count = instr.burst_count();
+   assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
+   if (cf->rat.inst != RatInstr::STORE_TYPED) {
+      assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
+             instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
+      assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
+             instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
    }
 
-   return r600_bytecode_add_alu(m_bc, &alu) == 0;
+   cf->vpm = m_bc->type == PIPE_SHADER_FRAGMENT;
+   cf->barrier = 1;
+   cf->mark = instr.need_ack();
+   cf->output.elem_size = instr.elm_size();
+
+   m_ack_suggested |= instr.need_ack();
 }
 
-bool AssemblyFromShaderLegacyImpl::visit(const LDSReadInstruction& instr)
+
+void AssamblerVisitor::clear_states(const uint32_t& states)
 {
-   int r;
-   unsigned nread = 0;
-   unsigned nfetch = 0;
-   unsigned n_values = instr.num_values();
+   if (states & sf_vtx)
+      vtx_fetch_results.clear();
 
-   r600_bytecode_alu alu_fetch;
-   r600_bytecode_alu alu_read;
+   if (states & sf_tex)
+      tex_fetch_results.clear();
 
-   /* We must add a new ALU clause if the fetch and read op would be split otherwise
-    * r600_asm limits at 120 slots = 240 dwords */
-   if (m_bc->cf_last->ndw > 240 - 4 * n_values)
-      m_bc->force_add_cf = 1;
+   if (states & sf_alu) {
+      m_last_op_was_barrier = false;
+      m_last_addr = nullptr;
+   }
 
-   while (nread < n_values) {
-      if (nfetch < n_values) {
-         memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
-         alu_fetch.is_lds_idx_op = true;
-         alu_fetch.op = LDS_OP1_LDS_READ_RET;
-
-         copy_src(alu_fetch.src[0], instr.address(nfetch));
-         alu_fetch.src[1].sel = V_SQ_ALU_SRC_0;
-         alu_fetch.src[2].sel = V_SQ_ALU_SRC_0;
-         alu_fetch.last = 1;
-         r = r600_bytecode_add_alu(m_bc, &alu_fetch);
-         m_bc->cf_last->nlds_read++;
-         if (r)
-            return false;
-      }
+}
 
-      if (nfetch >= n_values) {
-         memset(&alu_read, 0, sizeof(r600_bytecode_alu));
-         copy_dst(alu_read.dst, instr.dest(nread));
-         alu_read.op = ALU_OP1_MOV;
-         alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
-         alu_read.last = 1;
-         alu_read.dst.write = 1;
-         r = r600_bytecode_add_alu(m_bc, &alu_read);
-         m_bc->cf_last->nqueue_read++;
-         if (r)
-            return false;
-         ++nread;
-      }
-      ++nfetch;
-   }
-   assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read);
 
-   return true;
+void AssamblerVisitor::visit(const Block& block)
+{
+   if (block.empty())
+      return;
+
+   m_bc->force_add_cf = block.has_instr_flag(Instr::force_cf);
+   sfn_log << SfnLog::assembly << "Translate block  size: " << block.size() << " new_cf:" << m_bc->force_add_cf << "\n";
+
+   for (const auto& i : block) {
+      sfn_log << SfnLog::assembly << "Translate " << *i << " ";
+      i->accept(*this);
+      sfn_log << SfnLog::assembly << (m_result ? "good" : "fail") << "\n";
+
+      if (!m_result)
+         break;
+   }
 }
 
-bool AssemblyFromShaderLegacyImpl::visit(const LDSAtomicInstruction& instr)
+void AssamblerVisitor::visit(const IfInstr& instr)
 {
-   if (m_bc->cf_last->ndw > 240 - 4)
-      m_bc->force_add_cf = 1;
+   int elems = m_callstack.push(FC_PUSH_VPM);
+   bool needs_workaround = false;
 
-   r600_bytecode_alu alu_fetch;
-   r600_bytecode_alu alu_read;
+   if (m_bc->gfx_level == CAYMAN && m_bc->stack.loop > 1)
+      needs_workaround = true;
 
-   memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
-   alu_fetch.is_lds_idx_op = true;
-   alu_fetch.op = instr.op();
+   if (m_bc->gfx_level == EVERGREEN &&
+       m_bc->family != CHIP_HEMLOCK &&
+       m_bc->family != CHIP_CYPRESS &&
+       m_bc->family != CHIP_JUNIPER) {
+      unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
+      unsigned dmod2 = (elems) % m_bc->stack.entry_size;
 
-   copy_src(alu_fetch.src[0], instr.address());
-   copy_src(alu_fetch.src[1], instr.src0());
+      if (elems && (!dmod1 || !dmod2))
+         needs_workaround = true;
+   }
 
-   if (instr.src1())
-      copy_src(alu_fetch.src[2], *instr.src1());
-   alu_fetch.last = 1;
-   int r = r600_bytecode_add_alu(m_bc, &alu_fetch);
-   if (r)
-      return false;
+   auto pred = instr.predicate();
+   auto [addr, dummy ] = pred->indirect_addr(); {}
+   if (addr) {
+      if (!m_last_addr || !m_bc->ar_loaded ||
+          !m_last_addr->equal_to(*addr)) {
+         m_bc->ar_reg = addr->sel();
+            m_bc->ar_chan = addr->chan();
+            m_last_addr = addr;
+            m_bc->ar_loaded = 0;
+
+            r600_load_ar(m_bc);
+      }
+   }
 
-   memset(&alu_read, 0, sizeof(r600_bytecode_alu));
-   copy_dst(alu_read.dst, instr.dest());
-   alu_read.op = ALU_OP1_MOV;
-   alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
-   alu_read.last = 1;
-   alu_read.dst.write = 1;
-   r = r600_bytecode_add_alu(m_bc, &alu_read);
-   if (r)
-      return false;
-   return true;
+   if (needs_workaround) {
+      r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
+      m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
+      pred->set_cf_type(cf_alu);
+   }
+
+   clear_states(sf_tex|sf_vtx);
+   pred->accept(*this);
+
+   r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
+   clear_states(sf_all);
+
+   m_jump_tracker.push(m_bc->cf_last, jt_if);
+}
+
+void AssamblerVisitor::visit(const ControlFlowInstr& instr)
+{
+   clear_states(sf_all);
+   switch (instr.cf_type()) {
+   case ControlFlowInstr::cf_else:
+      emit_else();
+      break;
+   case ControlFlowInstr::cf_endif:
+      emit_endif();
+      break;
+   case ControlFlowInstr::cf_loop_begin:
+      emit_loop_begin(instr.has_instr_flag(Instr::vpm));
+      break;
+   case ControlFlowInstr::cf_loop_end:
+      emit_loop_end();
+      break;
+   case ControlFlowInstr::cf_loop_break:
+      emit_loop_break();
+      break;
+   case ControlFlowInstr::cf_loop_continue:
+      emit_loop_cont();
+      break;
+   case ControlFlowInstr::cf_wait_ack:
+   {
+      int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK);
+      if (!r) {
+         m_bc->cf_last->cf_addr = 0;
+         m_bc->cf_last->barrier = 1;
+         m_ack_suggested = false;
+      } else {
+         m_result = false;
+      }
+   }
+      break;
+   default:
+      unreachable("Unknown CF instruction type");
+   }
 }
 
-bool AssemblyFromShaderLegacyImpl::visit(const RatInstruction& instr)
+void AssamblerVisitor::visit(const GDSInstr& instr)
 {
    struct r600_bytecode_gds gds;
 
-   int rat_idx = instr.rat_id();
-   EBufferIndexMode rat_index_mode = bim_none;
-   auto addr = instr.rat_id_offset();
+   bool indirect = false;
+   auto addr = instr.uav_id();
 
    if (addr) {
-      if (addr->type() != Value::literal) {
-         rat_index_mode = emit_index_reg(*addr, 1);
-      } else {
-         const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
-         rat_idx += addr_reg.value();
-      }
+      indirect = true;
+      emit_index_reg(*addr, 1);
    }
+
    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
 
-   r600_bytecode_add_cfinst(m_bc, instr.cf_opcode());
-   auto cf = m_bc->cf_last;
-   cf->rat.id = rat_idx + m_shader->rat_base;
-   cf->rat.inst = instr.rat_op();
-   cf->rat.index_mode = rat_index_mode;
-   cf->output.type = instr.need_ack() ? 3 : 1;
-   cf->output.gpr = instr.data_gpr();
-   cf->output.index_gpr = instr.index_gpr();
-   cf->output.comp_mask = instr.comp_mask();
-   cf->output.burst_count = instr.burst_count();
-   assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
-   if (cf->rat.inst != RatInstruction::STORE_TYPED) {
-      assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
-             instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
-      assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
-             instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
+   gds.op = ds_opcode_map.at(instr.opcode());
+   gds.dst_gpr = instr.dest()->sel();
+   gds.uav_id = instr.uav_base();
+   gds.uav_index_mode = indirect ? bim_one : bim_none;
+   gds.src_gpr = instr.src().sel();
+
+   gds.src_sel_x = instr.src()[0]->chan() < 7 ? instr.src()[0]->chan() : 4;
+   gds.src_sel_y = instr.src()[1]->chan();
+   gds.src_sel_z = instr.src()[2]->chan() < 7 ? instr.src()[2]->chan() : 4;
+
+   gds.dst_sel_x = 7;
+   gds.dst_sel_y = 7;
+   gds.dst_sel_z = 7;
+   gds.dst_sel_w = 7;
+
+   switch (instr.dest()->chan()) {
+   case 0: gds.dst_sel_x = 0;break;
+   case 1: gds.dst_sel_y = 0;break;
+   case 2: gds.dst_sel_z = 0;break;
+   case 3: gds.dst_sel_w = 0;
    }
 
-   cf->vpm = m_bc->type == PIPE_SHADER_FRAGMENT;
-   cf->barrier = 1;
-   cf->mark = instr.need_ack();
-   cf->output.elem_size = instr.elm_size();
-   return true;
+   gds.src_gpr2 = 0;
+   gds.alloc_consume = m_bc->gfx_level < CAYMAN ? 1 : 0; // Not Cayman
+
+   int r = r600_bytecode_add_gds(m_bc, &gds);
+   if (r) {
+      m_result = false;
+      return;
+   }
+   m_bc->cf_last->vpm = PIPE_SHADER_FRAGMENT == m_bc->type;
+   m_bc->cf_last->barrier = 1;
+}
+
+void AssamblerVisitor::visit(const LDSAtomicInstr& instr)
+{
+   (void)instr;
+   unreachable("LDSAtomicInstr must be lowered to ALUInstr");
+}
+
+void AssamblerVisitor::visit(const LDSReadInstr& instr)
+{
+   (void)instr;
+   unreachable("LDSReadInstr must be lowered to ALUInstr");
 }
 
 EBufferIndexMode
-AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx)
+AssamblerVisitor::emit_index_reg(const VirtualValue& addr, unsigned idx)
 {
    assert(idx < 2);
 
    if (!m_bc->index_loaded[idx] || m_loop_nesting ||
-       m_bc->index_reg[idx] != addr.sel()
-       ||  m_bc->index_reg_chan[idx] != addr.chan()) {
+       m_bc->index_reg[idx] != (unsigned)addr.sel()
+       ||  m_bc->index_reg_chan[idx] != (unsigned)addr.chan()) {
       struct r600_bytecode_alu alu;
 
       // Make sure MOVA is not last instr in clause
-      if ((m_bc->cf_last->ndw>>1) >= 110)
+
+      if (!m_bc->cf_last || (m_bc->cf_last->ndw>>1) >= 110)
          m_bc->force_add_cf = 1;
 
       if (m_bc->gfx_level != CAYMAN) {
 
          EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
+
          memset(&alu, 0, sizeof(alu));
          alu.op = opcode_map.at(op1_mova_int);
          alu.dst.chan = 0;
@@ -1126,18 +1007,87 @@ AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx)
       m_bc->index_reg[idx] = addr.sel();
       m_bc->index_reg_chan[idx] = addr.chan();
       m_bc->index_loaded[idx] = true;
+      m_bc->force_add_cf = 1;
       sfn_log << SfnLog::assembly << "\n";
    }
    return idx == 0 ? bim_zero : bim_one;
 }
 
-bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
-                                            const Value& d)
+void AssamblerVisitor::emit_else()
+{
+   r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
+   m_bc->cf_last->pop_count = 1;
+   m_result &= m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
+}
+
+void AssamblerVisitor::emit_endif()
+{
+   m_callstack.pop(FC_PUSH_VPM);
+
+   unsigned force_pop = m_bc->force_add_cf;
+   if (!force_pop) {
+      int alu_pop = 3;
+      if (m_bc->cf_last) {
+         if (m_bc->cf_last->op == CF_OP_ALU)
+            alu_pop = 0;
+         else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
+            alu_pop = 1;
+      }
+      alu_pop += 1;
+      if (alu_pop == 1) {
+         m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
+         m_bc->force_add_cf = 1;
+      } else {
+         force_pop = 1;
+      }
+   }
+
+   if (force_pop) {
+      r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
+      m_bc->cf_last->pop_count = 1;
+      m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
+   }
+
+   m_result &= m_jump_tracker.pop(m_bc->cf_last, jt_if);
+}
+
+void AssamblerVisitor::emit_loop_begin(bool vpm)
+{
+   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
+   m_bc->cf_last->vpm = vpm && m_bc->type == PIPE_SHADER_FRAGMENT;
+   m_jump_tracker.push(m_bc->cf_last, jt_loop);
+   m_callstack.push(FC_LOOP);
+   ++m_loop_nesting;
+}
+
+void AssamblerVisitor::emit_loop_end()
 {
-   assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
+   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
+   m_callstack.pop(FC_LOOP);
+   assert(m_loop_nesting);
+   --m_loop_nesting;
+   m_result |= m_jump_tracker.pop(m_bc->cf_last, jt_loop);
+}
+
+void AssamblerVisitor::emit_loop_break()
+{
+   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
+   m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
+}
+
+void AssamblerVisitor::emit_loop_cont()
+{
+   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
+   m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
+}
 
-   if (d.sel() > 124) {
-      R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
+bool AssamblerVisitor::copy_dst(r600_bytecode_alu_dst& dst,
+                                const Register& d, bool write)
+{
+   if (write && d.sel() > 124) {
+      R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n",
+               d.sel());
+      m_result = false;
       return false;
    }
 
@@ -1155,74 +1105,84 @@ bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
    return true;
 }
 
-bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
+void AssamblerVisitor::emit_wait_ack()
 {
+   int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK);
+   if (!r) {
+      m_bc->cf_last->cf_addr = 0;
+      m_bc->cf_last->barrier = 1;
+      m_ack_suggested = false;
+   } else
+      m_result = false;
+}
 
-   if (s.type() == Value::gpr && s.sel() > 124) {
-      R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
-      return false;
-   }
+class EncodeSourceVisitor : public ConstRegisterVisitor {
+public:
 
-   if (s.type() == Value::lds_direct)  {
-      R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
-      return false;
-   }
+   EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc);
+   void visit(const Register& value) override;
+   void visit(const LocalArray& value) override;
+   void visit(const LocalArrayValue& value) override;
+   void visit(const UniformValue& value) override;
+   void visit(const LiteralConstant& value) override;
+   void visit(const InlineConstant& value) override;
 
-   if (s.type() == Value::kconst && s.sel() < 512)  {
-      R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
-      return false;
-   }
+   r600_bytecode_alu_src& src;
+   r600_bytecode *m_bc;
+   PVirtualValue m_buffer_offset{nullptr};
+};
 
-   if (s.type() == Value::literal) {
-      auto& v = static_cast<const LiteralValue&>(s);
-      if (v.value() == 0) {
-         src.sel = ALU_SRC_0;
-         src.chan = 0;
-         return true;
-      }
-      if (v.value() == 1) {
-         src.sel = ALU_SRC_1_INT;
-         src.chan = 0;
-         return true;
-      }
-      if (v.value_float() == 1.0f) {
-         src.sel = ALU_SRC_1;
-         src.chan = 0;
-         return true;
-      }
-      if (v.value_float() == 0.5f) {
-         src.sel = ALU_SRC_0_5;
-         src.chan = 0;
-         return true;
-      }
-      if (v.value() == 0xffffffff) {
-         src.sel = ALU_SRC_M_1_INT;
-         src.chan = 0;
-         return true;
-      }
-      src.value = v.value();
-   }
+PVirtualValue AssamblerVisitor::copy_src(r600_bytecode_alu_src& src, const VirtualValue& s)
+{
 
+   EncodeSourceVisitor visitor(src, m_bc);
    src.sel = s.sel();
    src.chan = s.chan();
-   if (s.type() == Value::kconst) {
-      const UniformValue& cv = static_cast<const UniformValue&>(s);
-      src.kc_bank = cv.kcache_bank();
-      auto addr = cv.addr();
-      if (addr) {
-         src.kc_rel = 1;
-         emit_index_reg(*addr, 0);
-         auto type = m_bc->cf_last->op;
-         if (r600_bytecode_add_cf(m_bc)) {
-                 return false;
-         }
-         m_bc->cf_last->op = type;
-      }
-   }
 
-   return true;
+   s.accept(visitor);
+   return visitor.m_buffer_offset;
+}
+
+EncodeSourceVisitor::EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc):
+   src(s), m_bc(bc)
+{
+}
+
+void EncodeSourceVisitor::visit(const Register& value)
+{
+   assert(value.sel() <= 124 && "Only have 124 registers");
+}
+
+void EncodeSourceVisitor::visit(const LocalArray& value)
+{
+   (void)value;
+   unreachable("An array can't be a source register");
+}
+
+void EncodeSourceVisitor::visit(const LocalArrayValue& value)
+{
+   src.rel = value.addr() ? 1 : 0;
+}
+
+void EncodeSourceVisitor::visit(const UniformValue& value)
+{
+   assert(value.sel() >= 512 && "Uniform values must have a sel >= 512");
+   m_buffer_offset = value.buf_addr();
+   src.kc_bank = value.kcache_bank();
+}
+
+void EncodeSourceVisitor::visit(const LiteralConstant& value)
+{
+   src.value = value.value();
 }
 
+void EncodeSourceVisitor::visit(const InlineConstant& value)
+{
+   (void)value;
+}
+
+
+
 const std::map<EAluOp, int> opcode_map = {
 
    {op2_add, ALU_OP2_ADD},
@@ -1253,6 +1213,10 @@ const std::map<EAluOp, int> opcode_map = {
    {op2_mul_64, ALU_OP2_MUL_64},
    {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
    {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
+   {op2_prede_int, ALU_OP2_PRED_SETE_INT},
+   {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
+   {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
+   {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
    {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
    {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
    {op2_pred_sete, ALU_OP2_PRED_SETE},
diff --git a/src/gallium/drivers/r600/sfn/sfn_assembler.h b/src/gallium/drivers/r600/sfn/sfn_assembler.h
new file mode 100644
index 0000000..796ff60
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_assembler.h
@@ -0,0 +1,26 @@
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+#include "../r600_pipe.h"
+#include "../r600_shader.h"
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+class Assembler
+{
+public:
+   Assembler(r600_shader *sh, const r600_shader_key& key);
+
+   bool lower(Shader *shader);
+private:
+   r600_shader *m_sh;
+   const r600_shader_key& m_key;
+};
+
+
+
+}
+
+#endif // ASSAMBLY_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h b/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h
index 76cc02a..cd2b975 100644
--- a/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h
+++ b/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h
@@ -38,10 +38,7 @@ enum JumpType {
 
 /**
   Class to link the jump locations
-
 */
-
-
 class ConditionalJumpTracker
 {
 public:
@@ -49,7 +46,6 @@ public:
    ~ConditionalJumpTracker();
 
    /* Mark the start of a loop or a if/else */
-
    void push(r600_bytecode_cf *start, JumpType type);
 
    /* Mark the end of a loop or a if/else and fixup the jump sites */
diff --git a/src/gallium/drivers/r600/sfn/sfn_debug.cpp b/src/gallium/drivers/r600/sfn/sfn_debug.cpp
index 92357fc..fce891e 100644
--- a/src/gallium/drivers/r600/sfn/sfn_debug.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_debug.cpp
@@ -61,6 +61,10 @@ static const struct debug_named_value sfn_debug_options[] = {
    {"nomerge", SfnLog::nomerge, "Skip register merge step"},
    {"tex", SfnLog::tex, "Log texture ops"},
    {"trans", SfnLog::trans, "Log generic translation messages"},
+   {"schedule", SfnLog::schedule, "Log scheduling"},
+   {"opt", SfnLog::opt, "Log optimization"},
+   {"steps", SfnLog::steps, "Log shaders at transformation steps"},
+   {"noopt", SfnLog::noopt, "Don't run backend optimizations"},
    DEBUG_NAMED_VALUE_END
 };
 
diff --git a/src/gallium/drivers/r600/sfn/sfn_debug.h b/src/gallium/drivers/r600/sfn/sfn_debug.h
index 3910b27..fd3c0fb 100644
--- a/src/gallium/drivers/r600/sfn/sfn_debug.h
+++ b/src/gallium/drivers/r600/sfn/sfn_debug.h
@@ -64,8 +64,12 @@ public:
       merge = 1 << 10,
       tex = 1 << 11,
       trans = 1 << 12,
-      all = (1 << 13) - 1,
+      schedule = 1 << 13,
+      opt = 1 << 14,
+      all = (1 << 15) - 1,
       nomerge = 1 << 16,
+      steps = 1 << 17,
+      noopt = 1 << 18
    };
 
    SfnLog();
diff --git a/src/gallium/drivers/r600/sfn/sfn_defines.h b/src/gallium/drivers/r600/sfn/sfn_defines.h
index 31a10ae..c5a18b0 100644
--- a/src/gallium/drivers/r600/sfn/sfn_defines.h
+++ b/src/gallium/drivers/r600/sfn/sfn_defines.h
@@ -303,6 +303,9 @@ enum EVFetchFlagShift {
    vtx_alt_const,
    vtx_use_tc,
    vtx_vpm,
+   vtx_is_mega_fetch,
+   vtx_uncached,
+   vtx_indexed,
    vtx_unknown
 };
 
diff --git a/src/gallium/drivers/r600/sfn/sfn_docu.txt b/src/gallium/drivers/r600/sfn/sfn_docu.txt
index 97a9c36..4784599 100644
--- a/src/gallium/drivers/r600/sfn/sfn_docu.txt
+++ b/src/gallium/drivers/r600/sfn/sfn_docu.txt
@@ -2,44 +2,33 @@
 
 This code is an attempt to implement a NIR backend for r600.
 
-## State
-
-Supported hardware: Evergreen and NI (tested on CEDAR and BARTS)
+Supported hardware: Cayman, Evergreen and NI (tested on CAYMAN, CEDAR and BARTS)
 
-Thanks to soft fp64 the OpenGL version is now 4.5
+Thanks to soft fp64 the OpenGL version is now 4.5 also for EG.
 
-sb has been enabled for nir to be able to run some more demanding work loads. The aim is
-still to get rid of it.
+sb can bee enabled for nir, it still gives some improvements, e.g. with Xonotic
+The aim is still to get rid of it.
 
 
-piglits gpu passes mostly like with TGSI, there are some fixes but also a few regressions.
+## State
 
-CTS gles
- - 2 passes like with TGSI
- - 3 no regressions, a few fixes compared to TGSI
- - 31
-    * a few fixes with interpolation specifiers
-    * synchronization has some unstable tests, this might be because global synchronization is missing (in both)
+TODO:
 
-GL CTS:
-  * a few regressions and a hang with KHR-GL43.compute_shader.shared-max
+piglits gpu passes mostly like with TGSI, there are some fixes but also a few regressions.
 
 piglit:
-  * spilling arrays is broken on Barts (but it works on Cedar)
-  * a few tests fail because the register limit is exhausted, and needlessly so, because
-    with better RA it would work
+  * spilling arrays is broken on Barts and CAYMAN (but it works on Cedar)
 
 ## Needed optimizations:
 
-  - Register allocator and scheduler (Could the sb allocator and scheduler
-    be ported?)
-
   - peepholes:
-    - compare + set predicate
-
-  - copy propagation:
-    - Moves from inputs are usually not required, they could be forwarded
-    - texture operations often move additional parameters in extra registers
-      but they are actually needed in the same registers they come from and
-      could just be swizzled into the right place
-      (lower in NIR like it is done in e.g. in ETNAVIV)
+    - compare + set predicate / kill
+  - use clause local registers
+  - reduce register usage
+  - don't rely on the backend to schedule addr load and Index load as well
+  - don't rely on the backend to merge some alu groups
+
+## There are still some hangs
+
+
+
diff --git a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp
deleted file mode 100644
index 3068225..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp
+++ /dev/null
@@ -1,1046 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "sfn_emitaluinstruction.h"
-#include "sfn_debug.h"
-
-#include "gallium/drivers/r600/r600_shader.h"
-
-namespace r600 {
-
-using std::vector;
-
-EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
-   EmitInstruction (processor)
-{
-
-}
-
-bool EmitAluInstruction::do_emit(nir_instr* ir)
-{
-   const nir_alu_instr& instr = *nir_instr_as_alu(ir);
-
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *ir
-                 << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
-                 << "' (" << __func__ << ")\n";
-
-   preload_src(instr);
-
-   if (get_chip_class() == CAYMAN) {
-      switch (instr.op) {
-      case nir_op_fcos_r600: return emit_alu_cm_trig(instr, op1_cos);
-      case nir_op_fexp2: return emit_alu_cm_trig(instr, op1_exp_ieee);
-      case nir_op_flog2: return emit_alu_cm_trig(instr, op1_log_clamped);
-      case nir_op_frcp: return emit_alu_cm_trig(instr, op1_recip_ieee);
-      case nir_op_frsq: return emit_alu_cm_trig(instr, op1_recipsqrt_ieee1);
-      case nir_op_fsin_r600: return emit_alu_cm_trig(instr, op1_sin);
-      case nir_op_fsqrt: return emit_alu_cm_trig(instr, op1_sqrt_ieee);
-         default:
-            ;
-      }
-   }
-
-   switch (instr.op) {
-    /* These are in the ALU instruction list, but they should be texture instructions */
-   case nir_op_b2b1: return emit_mov(instr);
-   case nir_op_b2b32: return emit_mov(instr);
-   case nir_op_b2f32: return emit_alu_b2f(instr);
-   case nir_op_b2i32: return emit_b2i32(instr);
-   case nir_op_b32all_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
-   case nir_op_b32all_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
-   case nir_op_b32all_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
-   case nir_op_b32all_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
-   case nir_op_b32all_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
-   case nir_op_b32all_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
-   case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
-   case nir_op_b32any_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
-   case nir_op_b32any_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
-   case nir_op_b32any_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
-   case nir_op_b32any_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
-   case nir_op_b32any_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
-   case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde_int,  {0, 2, 1});
-   case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
-   case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
-   case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
-   case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
-   case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
-   case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
-   case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
-   case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
-   case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
-   case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
-   case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
-   case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
-   case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int,  {0, 2, 1});
-   case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int);
-   case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
-
-   case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
-   case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int);
-   case nir_op_cube_r600: return emit_cube(instr);
-   case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
-   case nir_op_f2b32: return emit_alu_f2b32(instr);
-   case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
-   case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
-   case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
-   case nir_op_fadd: return emit_alu_op2(instr, op2_add);
-   case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
-   case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos);
-   case nir_op_fcsel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1});
-   case nir_op_fcsel_ge: return emit_alu_op3(instr, op3_cndge, {0, 1, 2});
-   case nir_op_fcsel_gt: return emit_alu_op3(instr, op3_cndgt, {0, 1, 2});
-
-    /* These are in the ALU instruction list, but they should be texture instructions */
-   case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
-   case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
-   case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
-   case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
-   case nir_op_fddy_coarse:
-   case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v,  true);
-   case nir_op_fdot2: return emit_dot(instr, 2);
-   case nir_op_fdot3: return emit_dot(instr, 3);
-   case nir_op_fdot4: return emit_dot(instr, 4);
-   case nir_op_fdph:  return emit_fdph(instr);
-   case nir_op_feq32: return emit_alu_op2(instr, op2_sete_dx10);
-   case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
-   case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
-   case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
-   case nir_op_ffma:
-      if (use_legacy_math_rules())
-         return emit_alu_op2(instr, op3_muladd);
-      return emit_alu_op3(instr, op3_muladd_ieee);
-   case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
-   case nir_op_fge32: return emit_alu_op2(instr, op2_setge_dx10);
-   case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
-   case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
-   case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
-   case nir_op_flt32: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
-   case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
-   case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
-   case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
-   case nir_op_fmul:
-      if (use_legacy_math_rules())
-         return emit_alu_op2(instr, op2_mul);
-      return emit_alu_op2(instr, op2_mul_ieee);
-   case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
-   case nir_op_fneu32: return emit_alu_op2(instr, op2_setne_dx10);
-   case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10);
-   case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
-   case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
-   case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
-   case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
-   case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin);
-   case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
-   case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
-   case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
-   case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
-   case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int);
-   case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
-   case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
-   case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
-   case nir_op_ibfe: return emit_alu_op3(instr, op3_bfe_int);
-   case nir_op_i32csel_ge: return emit_alu_op3(instr, op3_cndge_int,  {0, 1, 2});
-   case nir_op_i32csel_gt: return emit_alu_op3(instr, op3_cndgt_int,  {0, 1, 2});
-   case nir_op_ieq32: return emit_alu_op2_int(instr, op2_sete_int);
-   case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
-   case nir_op_ifind_msb_rev: return emit_alu_op1(instr, op1_ffbh_int);
-   case nir_op_ige32: return emit_alu_op2_int(instr, op2_setge_int);
-   case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
-   case nir_op_ilt32: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
-   case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
-   case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
-   case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
-   case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
-   case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
-   case nir_op_ine32: return emit_alu_op2_int(instr, op2_setne_int);
-   case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
-   case nir_op_ineg: return emit_alu_ineg(instr);
-   case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
-   case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
-   case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
-   case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
-   case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
-   case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
-   case nir_op_mov:return emit_mov(instr);
-   case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
-   case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
-   case nir_op_slt: return emit_alu_op2(instr, op2_setgt, op2_opt_reverse);
-   case nir_op_sge: return emit_alu_op2(instr, op2_setge);
-   case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
-   case nir_op_ubfe: return emit_alu_op3(instr, op3_bfe_uint);
-   case nir_op_ufind_msb_rev: return emit_alu_op1(instr, op1_ffbh_uint);
-   case nir_op_uge32: return emit_alu_op2_int(instr, op2_setge_uint);
-   case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
-   case nir_op_ult32: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
-   case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
-   case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24,  {0, 1, 2});
-   case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
-   case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
-   case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
-   case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
-   case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
-   case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
-   case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
-   case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
-   case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
-   case nir_op_vec2: return emit_create_vec(instr, 2);
-   case nir_op_vec3: return emit_create_vec(instr, 3);
-   case nir_op_vec4: return emit_create_vec(instr, 4);
-   default:
-      return false;
-   }
-}
-
-void EmitAluInstruction::preload_src(const nir_alu_instr& instr)
-{
-   const nir_op_info *op_info = &nir_op_infos[instr.op];
-   assert(op_info->num_inputs <= 4);
-
-   unsigned nsrc_comp = num_src_comp(instr);
-   sfn_log << SfnLog::reg << "Preload:\n";
-   for (unsigned i = 0; i < op_info->num_inputs; ++i) {
-      for (unsigned c = 0; c < nsrc_comp; ++c) {
-         m_src[i][c] = from_nir(instr.src[i], c);
-         sfn_log << SfnLog::reg << " " << *m_src[i][c];
-
-      }
-      sfn_log << SfnLog::reg << "\n";
-   }
-   if (instr.op == nir_op_fdph) {
-      m_src[1][3] = from_nir(instr.src[1], 3);
-      sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n";
-   }
-
-   split_constants(instr, nsrc_comp);
-}
-
-unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr)
-{
-   switch (instr.op) {
-   case nir_op_fdot2:
-   case nir_op_bany_inequal2:
-   case nir_op_ball_iequal2:
-   case nir_op_bany_fnequal2:
-   case nir_op_ball_fequal2:
-   case nir_op_b32any_inequal2:
-   case nir_op_b32all_iequal2:
-   case nir_op_b32any_fnequal2:
-   case nir_op_b32all_fequal2:
-   case nir_op_unpack_64_2x32_split_y:
-      return 2;
-
-   case nir_op_fdot3:
-   case nir_op_bany_inequal3:
-   case nir_op_ball_iequal3:
-   case nir_op_bany_fnequal3:
-   case nir_op_ball_fequal3:
-   case nir_op_b32any_inequal3:
-   case nir_op_b32all_iequal3:
-   case nir_op_b32any_fnequal3:
-   case nir_op_b32all_fequal3:
-   case nir_op_cube_r600:
-      return 3;
-
-   case nir_op_fdot4:
-   case nir_op_fdph:
-   case nir_op_bany_inequal4:
-   case nir_op_ball_iequal4:
-   case nir_op_bany_fnequal4:
-   case nir_op_ball_fequal4:
-   case nir_op_b32any_inequal4:
-   case nir_op_b32all_iequal4:
-   case nir_op_b32any_fnequal4:
-   case nir_op_b32all_fequal4:
-      return 4;
-
-   case nir_op_vec2:
-   case nir_op_vec3:
-   case nir_op_vec4:
-      return 1;
-
-   default:
-      return nir_dest_num_components(instr.dest.dest);
-
-   }
-}
-
-bool EmitAluInstruction::emit_cube(const nir_alu_instr& instr)
-{
-   AluInstruction *ir = nullptr;
-   const uint16_t src0_chan[4] = {2, 2, 0, 1};
-   const uint16_t src1_chan[4] = {1, 0, 2, 2};
-
-   for (int i = 0; i < 4; ++i)  {
-      ir = new AluInstruction(op2_cube, from_nir(instr.dest, i),
-                              from_nir(instr.src[0], src0_chan[i]),
-                              from_nir(instr.src[0], src1_chan[i]), {alu_write});
-      emit_instruction(ir);
-   }
-   ir->set_flag(alu_last_instr);
-   return true;
-}
-
-void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp)
-{
-    const nir_op_info *op_info = &nir_op_infos[instr.op];
-    if (op_info->num_inputs < 2)
-       return;
-
-    int nconst = 0;
-    std::array<const UniformValue *,4> c;
-    std::array<int,4> idx;
-    for (unsigned i = 0; i < op_info->num_inputs; ++i) {
-       PValue& src = m_src[i][0];
-       assert(src);
-       sfn_log << SfnLog::reg << "Split test " << *src;
-
-       if (src->type() == Value::kconst) {
-          c[nconst] = static_cast<const UniformValue *>(src.get());
-          idx[nconst++] = i;
-          sfn_log << SfnLog::reg << " is constant " << i;
-       }
-       sfn_log << SfnLog::reg << "\n";
-    }
-
-    if (nconst < 2)
-       return;
-
-    unsigned sel = c[0]->sel();
-    unsigned kcache =  c[0]->kcache_bank();
-    sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
-
-    for (int i = 1; i < nconst; ++i) {
-       sfn_log << "sel[" << i << "] = " <<  c[i]->sel() << "\n";
-       if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) {
-          AluInstruction *ir = nullptr;
-          auto v = get_temp_vec4();
-          for (unsigned k = 0; k < nsrc_comp; ++k) {
-             ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write});
-             emit_instruction(ir);
-             m_src[idx[i]][k] = v[k];
-          }
-          make_last(ir);
-       }
-    }
-}
-
-bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
-{
-   if (instr.src[0].negate || instr.src[0].abs) {
-      std::cerr << "source modifiers not supported with int ops\n";
-      return false;
-   }
-
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < 4 ; ++i) {
-      if (instr.dest.write_mask & (1 << i)){
-         ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
-                                 m_src[0][i], write);
-         emit_instruction(ir);
-      }
-   }
-   make_last(ir);
-   return true;
-}
-
-bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
-                                      const AluOpFlags& flags)
-{
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < 4 ; ++i) {
-      if (instr.dest.write_mask & (1 << i)){
-         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
-                                 m_src[0][i], write);
-
-         if (flags.test(alu_src0_abs) || instr.src[0].abs)
-            ir->set_flag(alu_src0_abs);
-
-         if (instr.src[0].negate ^ flags.test(alu_src0_neg))
-            ir->set_flag(alu_src0_neg);
-
-         if (flags.test(alu_dst_clamp) || instr.dest.saturate)
-             ir->set_flag(alu_dst_clamp);
-
-         emit_instruction(ir);
-      }
-   }
-   make_last(ir);
-
-   return true;
-}
-
-bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
-{
-   /* If the op is a plain move beween SSA values we can just forward
-    * the register reference to the original register */
-   if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
-       !instr.src[0].abs && !instr.src[0].negate  && !instr.dest.saturate) {
-      bool result = true;
-      for (int i = 0; i < 4 ; ++i) {
-         if (instr.dest.write_mask & (1 << i)){
-            result &= inject_register(instr.dest.dest.ssa.index, i,
-                                      m_src[0][i], true);
-         }
-      }
-      return result;
-   } else {
-      return emit_alu_op1(instr, op1_mov);
-   }
-}
-
-bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
-                                            bool absolute)
-{
-   AluInstruction *ir = nullptr;
-   std::set<int> src_idx;
-
-   if (get_chip_class() == CAYMAN) {
-      int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
-      for (int i = 0; i < last_slot; ++i) {
-         bool write_comp = instr.dest.write_mask & (1 << i);
-         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
-                                 m_src[0][write_comp ? i : 0], write_comp ? write : empty);
-         if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
-         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
-         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
-
-         if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
-
-         emit_instruction(ir);
-      }
-   } else {
-      for (int i = 0; i < 4 ; ++i) {
-         if (instr.dest.write_mask & (1 << i)){
-            ir = new AluInstruction(opcode, from_nir(instr.dest, i),
-                                    m_src[0][i], last_write);
-            if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
-            if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
-            if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
-            emit_instruction(ir);
-         }
-      }
-   }
-   return true;
-}
-
-bool EmitAluInstruction::emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode)
-{
-   AluInstruction *ir = nullptr;
-   std::set<int> src_idx;
-
-   unsigned last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
-
-   for (unsigned j = 0; j < nir_dest_num_components(instr.dest.dest); ++j) {
-      for (unsigned i = 0; i < last_slot; ++i) {
-         bool write_comp = instr.dest.write_mask & (1 << j) && (i == j);
-         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
-                                 m_src[0][j], write_comp ? write : empty);
-         if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
-         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
-         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
-
-         if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
-
-         emit_instruction(ir);
-      }
-   }
-   return true;
-}
-
-
-bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
-{
-   AluInstruction *ir = nullptr;
-
-   if (get_chip_class() < CAYMAN) {
-      std::array<PValue, 4> v;
-
-      for (int i = 0; i < 4; ++i) {
-         if (!(instr.dest.write_mask & (1 << i)))
-            continue;
-         v[i] = from_nir(instr.dest, i);
-         ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write});
-         if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
-         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
-         emit_instruction(ir);
-      }
-      make_last(ir);
-
-      for (int i = 0; i < 4; ++i) {
-         if (!(instr.dest.write_mask & (1 << i)))
-            continue;
-         ir = new AluInstruction(op, v[i], v[i], {alu_write});
-         emit_instruction(ir);
-         if (op == op1_flt_to_uint)
-            make_last(ir);
-      }
-      make_last(ir);
-   } else {
-      for (int i = 0; i < 4; ++i) {
-         if (!(instr.dest.write_mask & (1 << i)))
-            continue;
-         ir = new AluInstruction(op, from_nir(instr.dest, i), m_src[0][i], {alu_write});
-         if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
-         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
-         emit_instruction(ir);
-         if (op == op1_flt_to_uint)
-            make_last(ir);
-      }
-      make_last(ir);
-   }
-
-   return true;
-}
-
-bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr)
-{
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < 4 ; ++i) {
-      if (instr.dest.write_mask & (1 << i)){
-         ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i),
-                                 m_src[0][i], literal(0.0f), write);
-         emit_instruction(ir);
-      }
-   }
-   make_last(ir);
-   return true;
-}
-
-bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
-{
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < 4 ; ++i) {
-      if (!(instr.dest.write_mask & (1 << i)))
-         continue;
-
-      ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
-                              m_src[0][i], Value::one_i, write);
-     emit_instruction(ir);
-   }
-   make_last(ir);
-
-   return true;
-}
-
-bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
-{
-   AluInstruction *ir = nullptr;
-   for (unsigned i = 0; i < 2; ++i) {
-      if (!(instr.dest.write_mask & (1 << i)))
-         continue;
-     ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
-                             m_src[0][i], write);
-     emit_instruction(ir);
-   }
-   ir->set_flag(alu_last_instr);
-   return true;
-}
-
-bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
-{
-   emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
-                                       m_src[0][comp], last_write));
-   return true;
-}
-
-bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
-{
-   AluInstruction *ir = nullptr;
-   std::set<int> src_slot;
-   for(unsigned i = 0; i < nc; ++i) {
-      if (instr.dest.write_mask & (1 << i)){
-         auto src = m_src[i][0];
-         ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
-         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
-
-         // FIXME: This is a rather crude approach to fix the problem that
-         // r600 can't read from four different slots of the same component
-         // here we check only for the register index
-         if (src->type() == Value::gpr)
-            src_slot.insert(src->sel());
-         if (src_slot.size() >= 3) {
-            src_slot.clear();
-            ir->set_flag(alu_last_instr);
-         }
-         emit_instruction(ir);
-      }
-   }
-   if (ir)
-      ir->set_flag(alu_last_instr);
-   return true;
-}
-
-bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
-{
-   const nir_alu_src& src0 = instr.src[0];
-   const nir_alu_src& src1 = instr.src[1];
-   EAluOp dot4_op = use_legacy_math_rules() ? op2_dot4 : op2_dot4_ieee;
-
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < n ; ++i) {
-      ir = new AluInstruction(dot4_op, from_nir(instr.dest, i),
-                              m_src[0][i], m_src[1][i],
-                              instr.dest.write_mask & (1 << i) ? write : empty);
-
-      if (src0.negate) ir->set_flag(alu_src0_neg);
-      if (src0.abs) ir->set_flag(alu_src0_abs);
-      if (src1.negate) ir->set_flag(alu_src1_neg);
-      if (src1.abs) ir->set_flag(alu_src1_abs);
-
-      if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
-      emit_instruction(ir);
-   }
-   for (int i = n; i < 4 ; ++i) {
-      ir = new AluInstruction(dot4_op, from_nir(instr.dest, i),
-                              Value::zero, Value::zero,
-                              instr.dest.write_mask & (1 << i) ? write : empty);
-      emit_instruction(ir);
-   }
-
-   if (ir)
-      ir->set_flag(alu_last_instr);
-   return true;
-}
-
-bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
-{
-   const nir_alu_src& src0 = instr.src[0];
-   const nir_alu_src& src1 = instr.src[1];
-
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < 3 ; ++i) {
-      ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
-                              m_src[0][i], m_src[1][i],
-                              instr.dest.write_mask & (1 << i) ? write : empty);
-      if (src0.negate) ir->set_flag(alu_src0_neg);
-      if (src0.abs) ir->set_flag(alu_src0_abs);
-      if (src1.negate) ir->set_flag(alu_src1_neg);
-      if (src1.abs) ir->set_flag(alu_src1_abs);
-      if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
-      emit_instruction(ir);
-   }
-
-   ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
-                           m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty);
-   if (src1.negate) ir->set_flag(alu_src1_neg);
-   if (src1.abs) ir->set_flag(alu_src1_abs);
-   emit_instruction(ir);
-
-   ir->set_flag(alu_last_instr);
-   return true;
-
-}
-
-bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
-{
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < 4 ; ++i) {
-      if (instr.dest.write_mask & (1 << i)) {
-         ir = new AluInstruction(op, from_nir(instr.dest, i),
-                                 m_src[0][i], Value::zero,
-                                 write);
-         emit_instruction(ir);
-      }
-   }
-   if (ir)
-      ir->set_flag(alu_last_instr);
-   return true;
-}
-
-bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
-{
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < 4 ; ++i) {
-      if (instr.dest.write_mask & (1 << i)){
-         ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
-                                 m_src[0][i], Value::one_f, write);
-         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
-         if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
-         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
-         emit_instruction(ir);
-      }
-   }
-   if (ir)
-      ir->set_flag(alu_last_instr);
-   return true;
-}
-
-bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
-{
-
-   AluInstruction *ir = nullptr;
-   PValue v[4]; // this might need some additional temp register creation
-   for (unsigned i = 0; i < 4 ; ++i)
-      v[i] = from_nir(instr.dest, i);
-
-   EAluOp combine = all ? op2_and_int : op2_or_int;
-
-   /* For integers we can not use the modifiers, so this needs some emulation */
-   /* Should actually be lowered with NIR */
-   if (instr.src[0].negate == instr.src[1].negate &&
-       instr.src[0].abs == instr.src[1].abs) {
-
-      for (unsigned i = 0; i < nc ; ++i) {
-         ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
-         emit_instruction(ir);
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-   } else {
-      std::cerr << "Negate in iequal/inequal not (yet) supported\n";
-      return false;
-   }
-
-   for (unsigned i = 0; i < nc/2 ; ++i) {
-      ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
-      emit_instruction(ir);
-   }
-   if (ir)
-      ir->set_flag(alu_last_instr);
-
-   if (nc > 2) {
-      ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
-      emit_instruction(ir);
-   }
-
-   return true;
-}
-
-bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
-{
-   AluInstruction *ir = nullptr;
-   PValue v[4]; // this might need some additional temp register creation
-   for (unsigned i = 0; i < 4 ; ++i)
-      v[i] = from_nir(instr.dest, i);
-
-   for (unsigned i = 0; i < nc ; ++i) {
-      ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
-
-      if (instr.src[0].abs)
-         ir->set_flag(alu_src0_abs);
-      if (instr.src[0].negate)
-         ir->set_flag(alu_src0_neg);
-
-      if (instr.src[1].abs)
-         ir->set_flag(alu_src1_abs);
-      if (instr.src[1].negate)
-         ir->set_flag(alu_src1_neg);
-
-      emit_instruction(ir);
-   }
-   if (ir)
-      ir->set_flag(alu_last_instr);
-
-   for (unsigned i = 0; i < nc ; ++i) {
-      ir = new AluInstruction(op1_max4, v[i], v[i], write);
-      if (all) ir->set_flag(alu_src0_neg);
-      emit_instruction(ir);
-   }
-
-   for (unsigned i = nc; i < 4 ; ++i) {
-      ir = new AluInstruction(op1_max4, v[i],
-                              all ? Value::one_f : Value::zero, write);
-      if (all)
-         ir->set_flag(alu_src0_neg);
-
-      emit_instruction(ir);
-   }
-
-   ir->set_flag(alu_last_instr);
-
-   if (all)
-      op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
-   else
-      op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
-
-   ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
-   if (all)
-      ir->set_flag(alu_src1_neg);
-   emit_instruction(ir);
-
-   return true;
-}
-
-bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
-{
-   AluInstruction *ir = nullptr;
-   PValue v[4]; // this might need some additional temp register creation
-   for (unsigned i = 0; i < 4 ; ++i)
-      v[i] = from_nir(instr.dest, i);
-
-   for (unsigned i = 0; i < 2 ; ++i) {
-      ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
-      if (instr.src[0].abs)
-         ir->set_flag(alu_src0_abs);
-      if (instr.src[0].negate)
-         ir->set_flag(alu_src0_neg);
-
-      if (instr.src[1].abs)
-         ir->set_flag(alu_src1_abs);
-      if (instr.src[1].negate)
-         ir->set_flag(alu_src1_neg);
-
-      emit_instruction(ir);
-   }
-   if (ir)
-      ir->set_flag(alu_last_instr);
-
-   op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
-   ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
-   emit_instruction(ir);
-
-   return true;
-}
-
-bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
-{
-   const nir_alu_src& src0 = instr.src[0];
-   const nir_alu_src& src1 = instr.src[1];
-
-   AluInstruction *ir = nullptr;
-
-   if (get_chip_class() == CAYMAN) {
-      for (int k = 0; k < 4; ++k) {
-         if (instr.dest.write_mask & (1 << k)) {
-
-            for (int i = 0; i < 4; i++) {
-               ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[1][k], (i == k) ? write : empty);
-               if (src0.negate) ir->set_flag(alu_src0_neg);
-               if (src0.abs) ir->set_flag(alu_src0_abs);
-               if (src1.negate) ir->set_flag(alu_src1_neg);
-               if (src1.abs) ir->set_flag(alu_src1_abs);
-               if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
-               if (i == 3) ir->set_flag(alu_last_instr);
-               emit_instruction(ir);
-            }
-         }
-      }
-   } else {
-      for (int i = 0; i < 4 ; ++i) {
-         if (instr.dest.write_mask & (1 << i)){
-            ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write);
-            if (src0.negate) ir->set_flag(alu_src0_neg);
-            if (src0.abs) ir->set_flag(alu_src0_abs);
-            if (src1.negate) ir->set_flag(alu_src1_neg);
-            if (src1.abs) ir->set_flag(alu_src1_abs);
-            if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
-            emit_instruction(ir);
-         }
-      }
-   }
-   return true;
-}
-
-bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
-{
-
-   const nir_alu_src& src0 = instr.src[0];
-   const nir_alu_src& src1 = instr.src[1];
-
-   if (src0.negate || src1.negate ||
-       src0.abs || src1.abs) {
-      std::cerr << "R600: don't support modifiers with integer operations";
-      return false;
-   }
-   return emit_alu_op2(instr, opcode, opts);
-}
-
-bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
-{
-   const nir_alu_src *src0 = &instr.src[0];
-   const nir_alu_src *src1 = &instr.src[1];
-
-   int idx0 = 0;
-   int idx1 = 1;
-   if (ops & op2_opt_reverse) {
-      std::swap(src0, src1);
-      std::swap(idx0, idx1);
-   }
-
-   bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
-
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < 4 ; ++i) {
-      if (instr.dest.write_mask & (1 << i)){
-         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
-                                 m_src[idx0][i], m_src[idx1][i], write);
-
-         if (src0->negate) ir->set_flag(alu_src0_neg);
-         if (src0->abs) ir->set_flag(alu_src0_abs);
-         if (src1_negate) ir->set_flag(alu_src1_neg);
-         if (src1->abs) ir->set_flag(alu_src1_abs);
-         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
-         emit_instruction(ir);
-      }
-   }
-   if (ir)
-      ir->set_flag(alu_last_instr);
-   return true;
-}
-
-bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
-                                      std::array<uint8_t, 3> reorder)
-{
-   const nir_alu_src *src[3];
-   src[0] = &instr.src[reorder[0]];
-   src[1] = &instr.src[reorder[1]];
-   src[2] = &instr.src[reorder[2]];
-
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < 4 ; ++i) {
-      if (instr.dest.write_mask & (1 << i)){
-         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
-                                 m_src[reorder[0]][i],
-                                 m_src[reorder[1]][i],
-                                 m_src[reorder[2]][i],
-               write);
-
-         if (src[0]->negate) ir->set_flag(alu_src0_neg);
-         if (src[1]->negate) ir->set_flag(alu_src1_neg);
-         if (src[2]->negate) ir->set_flag(alu_src2_neg);
-
-         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
-         ir->set_flag(alu_write);
-         emit_instruction(ir);
-      }
-   }
-   make_last(ir);
-   return true;
-}
-
-bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
-{
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < 4 ; ++i) {
-      if (instr.dest.write_mask & (1 << i)){
-         ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
-                                 m_src[0][i], write);
-         emit_instruction(ir);
-      }
-   }
-   if (ir)
-      ir->set_flag(alu_last_instr);
-
-   return true;
-}
-
-static const char swz[] = "xyzw01?_";
-
-void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src,
-                                             const GPRVector::Values& v, GPRVector::Values& out, int ncomp)
-{
-
-   AluInstruction *alu = nullptr;
-   for (int i = 0; i < ncomp; ++i) {
-      alu  = new AluInstruction(op1_mov,  out[i], v[i], {alu_write});
-      if (src.abs)
-         alu->set_flag(alu_src0_abs);
-      if (src.negate)
-         alu->set_flag(alu_src0_neg);
-      emit_instruction(alu);
-   }
-   make_last(alu);
-}
-
-bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
-                                      bool fine)
-{
-
-   GPRVector::Values v;
-   std::array<int, 4> writemask = {0,1,2,3};
-
-   int ncomp = nir_dest_num_components(instr.dest.dest);
-   GPRVector::Swizzle src_swz = {7,7,7,7};
-   for (auto i = 0; i < ncomp; ++i)
-      src_swz[i] = instr.src[0].swizzle[i];
-
-   auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz);
-
-   if (instr.src[0].abs || instr.src[0].negate) {
-      GPRVector tmp = get_temp_vec4();
-      split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp);
-      src = tmp;
-   }
-
-   for (int i = 0; i < 4; ++i) {
-      writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
-      v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
-   }
-
-   /* This is querying the dreivatives of the output fb, so we would either need
-    * access to the neighboring pixels or to the framebuffer. Neither is currently
-    * implemented */
-   GPRVector dst(v);
-
-   auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
-   tex->set_dest_swizzle(writemask);
-
-   if (fine)
-      tex->set_flag(TexInstruction::grad_fine);
-
-   emit_instruction(tex);
-
-   return true;
-}
-
-bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
-{
-   auto tmp = get_temp_register();
-   emit_instruction(op2_lshr_int, tmp,
-   {m_src[0][0], PValue(new LiteralValue(16))},
-   {alu_write, alu_last_instr});
-
-   emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
-                                  {tmp}, {alu_write, alu_last_instr});
-
-   return true;
-}
-
-bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
-{
-   emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
-   {m_src[0][0]},{alu_write, alu_last_instr});
-   return true;
-}
-
-bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
-{
-   PValue x = get_temp_register();
-   PValue y = get_temp_register();
-
-   emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write});
-   emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr});
-
-   emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
-
-   emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
-
-   return true;
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h
deleted file mode 100644
index 509f5cf..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_EMITALUINSTRUCTION_H
-#define SFN_EMITALUINSTRUCTION_H
-
-#include "sfn_emitinstruction.h"
-
-#include "sfn_alu_defines.h"
-#include "sfn_instruction_alu.h"
-#include "sfn_instruction_tex.h"
-
-namespace r600  {
-
-
-class EmitAluInstruction : public EmitInstruction
-{
-public:
-   EmitAluInstruction(ShaderFromNirProcessor& processor);
-
-private:
-
-   enum AluOp2Opts {
-      op2_opt_none = 0,
-      op2_opt_reverse = 1,
-      op2_opt_neg_src1 = 1 << 1
-   };
-
-   bool do_emit(nir_instr* instr) override;
-
-   void split_constants(const nir_alu_instr& instr, unsigned nsrc_comp);
-
-   bool emit_mov(const nir_alu_instr& instr);
-   bool emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, const AluOpFlags &flags = 0);
-   bool emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
-
-   bool emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode);
-   bool emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode);
-
-   bool emit_alu_inot(const nir_alu_instr& instr);
-   bool emit_alu_ineg(const nir_alu_instr& instr);
-   bool emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
-
-   bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array<uint8_t, 3> reorder={0,1,2});
-   bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false);
-
-   bool emit_alu_b2f(const nir_alu_instr& instr);
-   bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op);
-   bool emit_dot(const nir_alu_instr& instr, int n);
-   bool emit_create_vec(const nir_alu_instr& instr, unsigned nc);
-   bool emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op,  unsigned nc, bool all);
-   bool emit_any_iequal(const nir_alu_instr& instr, unsigned nc);
-
-   bool emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all);
-   bool emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all);
-
-   bool emit_fdph(const nir_alu_instr &instr);
-   bool emit_discard_if(const nir_intrinsic_instr *instr);
-
-   bool emit_alu_f2b32(const nir_alu_instr& instr);
-   bool emit_b2i32(const nir_alu_instr& instr);
-   bool emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op);
-   bool emit_pack_64_2x32_split(const nir_alu_instr& instr);
-   bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp);
-
-   bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine);
-   bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr);
-   bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr);
-   bool emit_pack_32_2x16_split(const nir_alu_instr& instr);
-
-   bool emit_cube(const nir_alu_instr& instr);
-private:
-   void make_last(AluInstruction *ir) const;
-   void split_alu_modifiers(const nir_alu_src &src, const GPRVector::Values& v,
-                            GPRVector::Values& out, int ncomp);
-
-   void preload_src(const nir_alu_instr& instr);
-   unsigned num_src_comp(const nir_alu_instr& instr);
-
-   using vreg = std::array<PValue, 4>;
-
-   std::array<PValue, 4> m_src[4];
-};
-
-inline void EmitAluInstruction::make_last(AluInstruction *ir) const
-{
-   if (ir)
-      ir->set_flag(alu_last_instr);
-}
-
-}
-
-#endif // SFN_EMITALUINSTRUCTION_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp
deleted file mode 100644
index 7978ff8..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_emitinstruction.h"
-
-#include "sfn_shader_base.h"
-
-namespace r600 {
-
-EmitInstruction::EmitInstruction(ShaderFromNirProcessor& processor):
-   m_proc(processor)
-{
-
-}
-
-EmitInstruction::~EmitInstruction()
-{
-}
-
-bool EmitInstruction::emit(nir_instr* instr)
-{
-   return do_emit(instr);
-}
-
-bool EmitInstruction::use_legacy_math_rules(void)
-{
-   return m_proc.use_legacy_math_rules();
-}
-
-PValue EmitInstruction::from_nir(const nir_src& v, unsigned component, unsigned swizzled)
-{
-   return m_proc.from_nir(v, component, swizzled);
-}
-
-PValue EmitInstruction::from_nir(const nir_alu_src& v, unsigned component)
-{
-   return m_proc.from_nir(v, component);
-}
-
-PValue EmitInstruction::from_nir(const nir_tex_src& v, unsigned component)
-{
-   return m_proc.from_nir(v, component);
-}
-
-PValue EmitInstruction::from_nir(const nir_alu_dest& v, unsigned component)
-{
-   return m_proc.from_nir(v, component);
-}
-
-PValue EmitInstruction::from_nir(const nir_dest& v, unsigned component)
-{
-   return m_proc.from_nir(v, component);
-}
-
-PValue EmitInstruction::from_nir(const nir_src& v, unsigned component)
-{
-   return m_proc.from_nir(v, component);
-}
-
-void EmitInstruction::emit_instruction(Instruction *ir)
-{
-   return m_proc.emit_instruction(ir);
-}
-
-void EmitInstruction::emit_instruction(AluInstruction *ir)
-{
-   return m_proc.emit_instruction(ir);
-}
-
-bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest,
-                                       std::vector<PValue> src0,
-                                       const std::set<AluModifiers>& m_flags)
-{
-   return m_proc.emit_instruction(opcode, dest,src0, m_flags);
-}
-
-const nir_variable *
-EmitInstruction::get_deref_location(const nir_src& v) const
-{
-   return m_proc.get_deref_location(v);
-}
-
-PValue EmitInstruction::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
-{
-   return m_proc.from_nir_with_fetch_constant(src, component, channel);
-}
-
-GPRVector EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
-                                                            const GPRVector::Swizzle& swizzle, bool match)
-{
-   return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle, match);
-}
-
-PGPRValue EmitInstruction::get_temp_register(int channel)
-{
-   return m_proc.get_temp_register(channel);
-}
-
-GPRVector EmitInstruction::get_temp_vec4(const GPRVector::Swizzle& swizzle)
-{
-   return m_proc.get_temp_vec4(swizzle);
-}
-
-PValue EmitInstruction::create_register_from_nir_src(const nir_src& src, unsigned swizzle)
-{
-   return m_proc.create_register_from_nir_src(src, swizzle);
-}
-
-enum amd_gfx_level EmitInstruction::get_chip_class(void) const
-{
-   return m_proc.get_chip_class();
-}
-
-PValue EmitInstruction::literal(uint32_t value)
-{
-   return m_proc.literal(value);
-}
-
-GPRVector EmitInstruction::vec_from_nir(const nir_dest& dst, int num_components)
-{
-   return m_proc.vec_from_nir(dst, num_components);
-}
-
-bool EmitInstruction::inject_register(unsigned sel, unsigned swizzle,
-                                      const PValue& reg, bool map)
-{
-   return m_proc.inject_register(sel, swizzle, reg, map);
-}
-
-int EmitInstruction::remap_atomic_base(int base)
-{
-	return m_proc.remap_atomic_base(base);
-}
-
-void EmitInstruction::set_has_txs_cube_array_comp()
-{
-   m_proc.sh_info().has_txq_cube_array_z_comp = 1;
-}
-
-const std::set<AluModifiers> EmitInstruction::empty = {};
-const std::set<AluModifiers> EmitInstruction::write = {alu_write};
-const std::set<AluModifiers> EmitInstruction::last_write = {alu_write, alu_last_instr};
-const std::set<AluModifiers> EmitInstruction::last = {alu_last_instr};
-
-}
-
diff --git a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h
deleted file mode 100644
index 79080a5..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef EMITINSTRUCTION_H
-#define EMITINSTRUCTION_H
-
-#include "compiler/nir/nir.h"
-#include "sfn_defines.h"
-#include "sfn_value.h"
-#include "sfn_instruction_alu.h"
-
-namespace r600 {
-
-class ShaderFromNirProcessor;
-
-class EmitInstruction
-{
-public:
-   EmitInstruction(ShaderFromNirProcessor& processor);
-   virtual ~EmitInstruction();
-   bool emit(nir_instr* instr);
-
-   static const std::set<AluModifiers> empty;
-   static const std::set<AluModifiers> write;
-   static const std::set<AluModifiers> last_write;
-   static const std::set<AluModifiers> last;
-
-protected:
-   virtual bool do_emit(nir_instr* instr) = 0;
-
-   // forwards from ValuePool
-   PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled);
-   PValue from_nir(const nir_src& v, unsigned component);
-   PValue from_nir(const nir_alu_src& v, unsigned component);
-   PValue from_nir(const nir_tex_src& v, unsigned component);
-   PValue from_nir(const nir_alu_dest& v, unsigned component);
-   PValue from_nir(const nir_dest& v, unsigned component);
-
-   PValue create_register_from_nir_src(const nir_src& src, unsigned comp);
-
-   PGPRValue get_temp_register(int channel = -1);
-   GPRVector get_temp_vec4(const GPRVector::Swizzle& swizzle = {0,1,2,3});
-
-   // forwards from ShaderFromNirProcessor
-   void emit_instruction(Instruction *ir);
-   void emit_instruction(AluInstruction *ir);
-   bool emit_instruction(EAluOp opcode, PValue dest,
-                         std::vector<PValue> src0,
-                         const std::set<AluModifiers>& m_flags);
-   bool use_legacy_math_rules(void);
-
-   PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
-   GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
-                                              const GPRVector::Swizzle& swizzle, bool match = false);
-
-   const nir_variable *get_deref_location(const nir_src& v) const;
-
-   enum amd_gfx_level get_chip_class(void) const;
-
-   PValue literal(uint32_t value);
-
-   GPRVector vec_from_nir(const nir_dest& dst, int num_components);
-
-   bool inject_register(unsigned sel, unsigned swizzle,
-                        const PValue& reg, bool map);
-
-   int remap_atomic_base(int base);
-
-   void set_has_txs_cube_array_comp();
-private:
-
-   ShaderFromNirProcessor& m_proc;
-};
-
-}
-
-
-
-#endif // EMITINSTRUCTION_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
deleted file mode 100644
index 40f2730..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
+++ /dev/null
@@ -1,741 +0,0 @@
-#include "sfn_emitssboinstruction.h"
-
-#include "sfn_instruction_fetch.h"
-#include "sfn_instruction_gds.h"
-#include "sfn_instruction_misc.h"
-#include "sfn_instruction_tex.h"
-#include "../r600_pipe.h"
-#include "../r600_asm.h"
-
-namespace r600 {
-
-#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
-
-EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
-   EmitInstruction(processor),
-   m_require_rat_return_address(false),
-   m_ssbo_image_offset(0)
-{
-}
-
-void EmitSSBOInstruction::set_ssbo_offset(int offset)
-{
-   m_ssbo_image_offset = offset;
-}
-
-
-void EmitSSBOInstruction::set_require_rat_return_address()
-{
-   m_require_rat_return_address = true;
-}
-
-bool
-EmitSSBOInstruction::load_rat_return_address()
-{
-   if (m_require_rat_return_address) {
-      m_rat_return_address = get_temp_vec4();
-      emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
-      emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
-      emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
-                                          literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
-      emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
-                                          m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
-      {alu_write, alu_last_instr}));
-      m_require_rat_return_address = false;
-   }
-   return true;
-}
-
-
-bool EmitSSBOInstruction::do_emit(nir_instr* instr)
-{
-   const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-   switch (intr->intrinsic) {
-   case nir_intrinsic_atomic_counter_add:
-   case nir_intrinsic_atomic_counter_and:
-   case nir_intrinsic_atomic_counter_exchange:
-   case nir_intrinsic_atomic_counter_max:
-   case nir_intrinsic_atomic_counter_min:
-   case nir_intrinsic_atomic_counter_or:
-   case nir_intrinsic_atomic_counter_xor:
-   case nir_intrinsic_atomic_counter_comp_swap:
-      return emit_atomic(intr);
-   case nir_intrinsic_atomic_counter_read:
-   case nir_intrinsic_atomic_counter_post_dec:
-      return emit_unary_atomic(intr);
-   case nir_intrinsic_atomic_counter_inc:
-      return emit_atomic_inc(intr);
-   case nir_intrinsic_atomic_counter_pre_dec:
-      return emit_atomic_pre_dec(intr);
-   case nir_intrinsic_load_ssbo:
-       return emit_load_ssbo(intr);
-   case nir_intrinsic_store_ssbo:
-      return emit_store_ssbo(intr);
-   case nir_intrinsic_ssbo_atomic_add:
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-   case nir_intrinsic_ssbo_atomic_or:
-   case nir_intrinsic_ssbo_atomic_xor:
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_ssbo_atomic_umax:
-   case nir_intrinsic_ssbo_atomic_umin:
-   case nir_intrinsic_ssbo_atomic_and:
-   case nir_intrinsic_ssbo_atomic_exchange:
-      return emit_ssbo_atomic_op(intr);
-   case nir_intrinsic_image_store:
-      return emit_image_store(intr);
-   case nir_intrinsic_image_load:
-   case nir_intrinsic_image_atomic_add:
-   case nir_intrinsic_image_atomic_and:
-   case nir_intrinsic_image_atomic_or:
-   case nir_intrinsic_image_atomic_xor:
-   case nir_intrinsic_image_atomic_exchange:
-   case nir_intrinsic_image_atomic_comp_swap:
-   case nir_intrinsic_image_atomic_umin:
-   case nir_intrinsic_image_atomic_umax:
-   case nir_intrinsic_image_atomic_imin:
-   case nir_intrinsic_image_atomic_imax:
-      return emit_image_load(intr);
-   case nir_intrinsic_image_size:
-      return emit_image_size(intr);
-   case nir_intrinsic_get_ssbo_size:
-      return emit_buffer_size(intr);
-   case nir_intrinsic_memory_barrier:
-   case nir_intrinsic_memory_barrier_image:
-   case nir_intrinsic_memory_barrier_buffer:
-   case nir_intrinsic_group_memory_barrier:
-      return make_stores_ack_and_waitack();
-   default:
-      return false;
-   }
-}
-
-bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
-{
-   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
-
-   ESDOp op = read_result ? get_opcode(instr->intrinsic) :
-                            get_opcode_wo(instr->intrinsic);
-
-   if (DS_OP_INVALID == op)
-      return false;
-
-
-
-   GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
-
-   int base = remap_atomic_base(nir_intrinsic_base(instr));
-
-   PValue uav_id = from_nir(instr->src[0], 0);
-
-   PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
-
-   GDSInstr *ir = nullptr;
-   if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap)  {
-      PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0);
-      ir = new GDSInstr(op, dest, value, value2, uav_id, base);
-   } else {
-      ir = new GDSInstr(op, dest, value, uav_id, base);
-   }
-
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
-{
-   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
-
-   ESDOp op = read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic);
-
-   if (DS_OP_INVALID == op)
-      return false;
-
-   GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
-
-   PValue uav_id = from_nir(instr->src[0], 0);
-
-   auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr)));
-
-   emit_instruction(ir);
-   return true;
-}
-
-ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode) const
-{
-   switch (opcode) {
-   case nir_intrinsic_atomic_counter_add:
-      return DS_OP_ADD_RET;
-   case nir_intrinsic_atomic_counter_and:
-      return DS_OP_AND_RET;
-   case nir_intrinsic_atomic_counter_exchange:
-      return DS_OP_XCHG_RET;
-   case nir_intrinsic_atomic_counter_inc:
-      return DS_OP_INC_RET;
-   case nir_intrinsic_atomic_counter_max:
-      return DS_OP_MAX_UINT_RET;
-   case nir_intrinsic_atomic_counter_min:
-      return DS_OP_MIN_UINT_RET;
-   case nir_intrinsic_atomic_counter_or:
-      return DS_OP_OR_RET;
-   case nir_intrinsic_atomic_counter_read:
-      return DS_OP_READ_RET;
-   case nir_intrinsic_atomic_counter_xor:
-      return DS_OP_XOR_RET;
-   case nir_intrinsic_atomic_counter_post_dec:
-      return DS_OP_DEC_RET;
-   case nir_intrinsic_atomic_counter_comp_swap:
-      return DS_OP_CMP_XCHG_RET;
-   case nir_intrinsic_atomic_counter_pre_dec:
-   default:
-      return DS_OP_INVALID;
-   }
-}
-
-ESDOp EmitSSBOInstruction::get_opcode_wo(const nir_intrinsic_op opcode) const
-{
-   switch (opcode) {
-   case nir_intrinsic_atomic_counter_add:
-      return DS_OP_ADD;
-   case nir_intrinsic_atomic_counter_and:
-      return DS_OP_AND;
-   case nir_intrinsic_atomic_counter_inc:
-      return DS_OP_INC;
-   case nir_intrinsic_atomic_counter_max:
-      return DS_OP_MAX_UINT;
-   case nir_intrinsic_atomic_counter_min:
-      return DS_OP_MIN_UINT;
-   case nir_intrinsic_atomic_counter_or:
-      return DS_OP_OR;
-   case nir_intrinsic_atomic_counter_xor:
-      return DS_OP_XOR;
-   case nir_intrinsic_atomic_counter_post_dec:
-      return DS_OP_DEC;
-   case nir_intrinsic_atomic_counter_comp_swap:
-      return DS_OP_CMP_XCHG_RET;
-   case nir_intrinsic_atomic_counter_exchange:
-      return DS_OP_XCHG_RET;
-   case nir_intrinsic_atomic_counter_pre_dec:
-   default:
-      return DS_OP_INVALID;
-   }
-}
-
-RatInstruction::ERatOp
-EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
-{
-   switch (opcode) {
-   case nir_intrinsic_ssbo_atomic_add:
-   case nir_intrinsic_image_atomic_add:
-      return RatInstruction::ADD_RTN;
-   case nir_intrinsic_ssbo_atomic_and:
-   case nir_intrinsic_image_atomic_and:
-      return RatInstruction::AND_RTN;
-   case nir_intrinsic_ssbo_atomic_exchange:
-   case nir_intrinsic_image_atomic_exchange:
-      return RatInstruction::XCHG_RTN;
-   case nir_intrinsic_ssbo_atomic_or:
-   case nir_intrinsic_image_atomic_or:
-      return RatInstruction::OR_RTN;
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_image_atomic_imin:
-      return RatInstruction::MIN_INT_RTN;
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_image_atomic_imax:
-      return RatInstruction::MAX_INT_RTN;
-   case nir_intrinsic_ssbo_atomic_umin:
-   case nir_intrinsic_image_atomic_umin:
-      return RatInstruction::MIN_UINT_RTN;
-   case nir_intrinsic_ssbo_atomic_umax:
-   case nir_intrinsic_image_atomic_umax:
-      return RatInstruction::MAX_UINT_RTN;
-   case nir_intrinsic_ssbo_atomic_xor:
-   case nir_intrinsic_image_atomic_xor:
-      return RatInstruction::XOR_RTN;
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-   case nir_intrinsic_image_atomic_comp_swap:
-      if (util_format_is_float(format))
-         return RatInstruction::CMPXCHG_FLT_RTN;
-      else
-         return RatInstruction::CMPXCHG_INT_RTN;
-   case nir_intrinsic_image_load:
-      return RatInstruction::NOP_RTN;
-   default:
-      unreachable("Unsupported RAT instruction");
-   }
-}
-
-RatInstruction::ERatOp
-EmitSSBOInstruction::get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const
-{
-	switch (opcode) {
-   case nir_intrinsic_ssbo_atomic_add:
-   case nir_intrinsic_image_atomic_add:
-      return RatInstruction::ADD;
-   case nir_intrinsic_ssbo_atomic_and:
-   case nir_intrinsic_image_atomic_and:
-      return RatInstruction::AND;
-   case nir_intrinsic_ssbo_atomic_or:
-   case nir_intrinsic_image_atomic_or:
-      return RatInstruction::OR;
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_image_atomic_imin:
-      return RatInstruction::MIN_INT;
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_image_atomic_imax:
-      return RatInstruction::MAX_INT;
-   case nir_intrinsic_ssbo_atomic_umin:
-   case nir_intrinsic_image_atomic_umin:
-      return RatInstruction::MIN_UINT;
-   case nir_intrinsic_ssbo_atomic_umax:
-   case nir_intrinsic_image_atomic_umax:
-      return RatInstruction::MAX_UINT;
-   case nir_intrinsic_ssbo_atomic_xor:
-   case nir_intrinsic_image_atomic_xor:
-      return RatInstruction::XOR;
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-   case nir_intrinsic_image_atomic_comp_swap:
-      if (util_format_is_float(format))
-         return RatInstruction::CMPXCHG_FLT;
-      else
-         return RatInstruction::CMPXCHG_INT;
-   default:
-      unreachable("Unsupported WO RAT instruction");
-   }
-}
-
-bool EmitSSBOInstruction::load_atomic_inc_limits()
-{
-   m_atomic_update = get_temp_register();
-   m_atomic_update->set_keep_alive();
-   emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
-   {alu_write, alu_last_instr}));
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
-{
-   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
-   PValue uav_id = from_nir(instr->src[0], 0);
-   GPRVector dest = read_result ? make_dest(instr): GPRVector(0, {7,7,7,7});
-   auto ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest,
-                          m_atomic_update, uav_id,
-                          remap_atomic_base(nir_intrinsic_base(instr)));
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
-{
-   GPRVector dest = make_dest(instr);
-
-   PValue uav_id = from_nir(instr->src[0], 0);
-
-   auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
-                          remap_atomic_base(nir_intrinsic_base(instr)));
-   emit_instruction(ir);
-
-   emit_instruction(new AluInstruction(op2_sub_int,  dest.x(), dest.x(), literal(1), last_write));
-
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
-{
-   GPRVector dest = make_dest(instr);
-
-   /** src0 not used, should be some offset */
-   auto addr = from_nir(instr->src[1], 0);
-   PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
-
-   /** Should be lowered in nir */
-   emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
-                    {alu_write, alu_last_instr}));
-
-   const EVTXDataFormat formats[4] = {
-      fmt_32,
-      fmt_32_32,
-      fmt_32_32_32,
-      fmt_32_32_32_32
-   };
-
-   const std::array<int,4> dest_swt[4] = {
-      {0,7,7,7},
-      {0,1,7,7},
-      {0,1,2,7},
-      {0,1,2,3}
-   };
-
-   /* TODO fix resource index */
-   auto ir = new FetchInstruction(dest, addr_temp,
-                                  R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset
-                                  , from_nir(instr->src[0], 0),
-                                  formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
-   ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
-   ir->set_flag(vtx_use_tc);
-
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
-{
-
-   GPRVector::Swizzle swz = {7,7,7,7};
-   for (unsigned i = 0; i <  nir_src_num_components(instr->src[0]); ++i)
-      swz[i] = i;
-
-   auto orig_addr = from_nir(instr->src[2], 0);
-
-   GPRVector addr_vec = get_temp_vec4({0,1,2,7});
-
-   auto temp2 = get_temp_vec4();
-
-   auto rat_id = from_nir(instr->src[1], 0);
-
-   emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
-                                       PValue(new LiteralValue(2)), write));
-   emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
-   emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
-
-
-   auto values = vec_from_nir_with_fetch_constant(instr->src[0],
-         (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
-
-   auto cf_op = cf_mem_rat;
-   //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
-   auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
-                                   values, addr_vec, m_ssbo_image_offset, rat_id, 1,
-                                   1, 0, false);
-   emit_instruction(store);
-   m_store_ops.push_back(store);
-
-   for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
-      emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), get_chip_class() == CAYMAN  ?  last_write : write));
-      emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
-                                          {addr_vec.reg_i(0), Value::one_i}, last_write));
-      store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
-                                 temp2, addr_vec, m_ssbo_image_offset, rat_id, 1,
-                                 1, 0, false);
-      emit_instruction(store);
-      if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT))
-         m_store_ops.push_back(store);
-   }
-
-   return true;
-}
-
-bool
-EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
-{
-   int imageid = 0;
-   PValue image_offset;
-
-   if (nir_src_is_const(intrin->src[0]))
-      imageid = nir_src_as_int(intrin->src[0]);
-   else
-      image_offset = from_nir(intrin->src[0], 0);
-
-   auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
-   auto undef = from_nir(intrin->src[2], 0);
-   auto value = vec_from_nir_with_fetch_constant(intrin->src[3],  0xf, {0,1,2,3});
-   auto unknown  = from_nir(intrin->src[4], 0);
-
-   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
-       nir_intrinsic_image_array(intrin)) {
-      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
-      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
-   }
-
-   auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
-   auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid,
-                                   image_offset, 1, 0xf, 0, false);
-
-   //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT))
-      m_store_ops.push_back(store);
-
-   emit_instruction(store);
-   return true;
-}
-
-bool
-EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
-{
-   int imageid = 0;
-   PValue image_offset;
-
-   if (nir_src_is_const(intrin->src[0]))
-      imageid = nir_src_as_int(intrin->src[0]);
-   else
-      image_offset = from_nir(intrin->src[0], 0);
-
-   bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
-   auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
-                               get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
-
-   auto coord_orig =  from_nir(intrin->src[1], 0, 0);
-   auto coord = get_temp_register(0);
-
-   emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write));
-
-   if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
-      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
-                                          from_nir(intrin->src[3], 0), {alu_write}));
-      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
-                                          from_nir(intrin->src[2], 0), {alu_last_instr, alu_write}));
-   } else {
-      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
-                                          from_nir(intrin->src[2], 0), {alu_write}));
-      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
-   }
-
-
-   GPRVector out_vec({coord, coord, coord, coord});
-
-   auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset,
-                                   image_offset, 1, 0xf, 0, true);
-   emit_instruction(atomic);
-
-   if (read_result) {
-      emit_instruction(new WaitAck(0));
-
-      GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
-      auto fetch = new FetchInstruction(vc_fetch,
-                                        no_index_offset,
-                                        fmt_32,
-                                        vtx_nf_int,
-                                        vtx_es_none,
-                                        m_rat_return_address.reg_i(1),
-                                        dest,
-                                        0,
-                                        false,
-                                        0xf,
-                                        R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
-                                        0,
-                                        bim_none,
-                                        false,
-                                        false,
-                                        0,
-                                        0,
-                                        0,
-                                        image_offset,
-                                        {0,7,7,7});
-      fetch->set_flag(vtx_srf_mode);
-      fetch->set_flag(vtx_use_tc);
-      fetch->set_flag(vtx_vpm);
-      emit_instruction(fetch);
-   }
-
-   return true;
-
-}
-
-bool
-EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
-{
-   int imageid = 0;
-   PValue image_offset;
-
-   if (nir_src_is_const(intrin->src[0]))
-      imageid = nir_src_as_int(intrin->src[0]);
-   else
-      image_offset = from_nir(intrin->src[0], 0);
-
-   bool read_retvalue = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
-   auto rat_op = read_retvalue ? get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin)):
-                                 get_rat_opcode_wo(intrin->intrinsic, nir_intrinsic_format(intrin));
-
-   GPRVector::Swizzle swz = {0,1,2,3};
-   auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
-
-   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
-       nir_intrinsic_image_array(intrin)) {
-      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
-      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
-   }
-
-   if (intrin->intrinsic != nir_intrinsic_image_load) {
-      if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
-         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
-                                             from_nir(intrin->src[4], 0), {alu_write}));
-         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
-                                             from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
-      } else {
-         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
-                                             from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
-      }
-   }
-   auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
-
-   auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid,
-                                   image_offset, 1, 0xf, 0, true);
-   emit_instruction(store);
-   return read_retvalue ? fetch_return_value(intrin) : true;
-}
-
-bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
-{
-   emit_instruction(new WaitAck(0));
-
-   pipe_format format = nir_intrinsic_format(intrin);
-   unsigned fmt = fmt_32;
-   unsigned num_format = 0;
-   unsigned format_comp = 0;
-   unsigned endian = 0;
-
-   int imageid = 0;
-   PValue image_offset;
-
-   if (nir_src_is_const(intrin->src[0]))
-      imageid = nir_src_as_int(intrin->src[0]);
-   else
-      image_offset = from_nir(intrin->src[0], 0);
-
-   r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
-
-   GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
-
-   auto fetch = new FetchInstruction(vc_fetch,
-                                     no_index_offset,
-                                     (EVTXDataFormat)fmt,
-                                     (EVFetchNumFormat)num_format,
-                                     (EVFetchEndianSwap)endian,
-                                     m_rat_return_address.reg_i(1),
-                                     dest,
-                                     0,
-                                     false,
-                                     0x3,
-                                     R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
-                                     0,
-                                     bim_none,
-                                     false,
-                                     false,
-                                     0,
-                                     0,
-                                     0,
-                                     image_offset, {0,1,2,3});
-   fetch->set_flag(vtx_srf_mode);
-   fetch->set_flag(vtx_use_tc);
-   fetch->set_flag(vtx_vpm);
-   if (format_comp)
-      fetch->set_flag(vtx_format_comp_signed);
-
-   emit_instruction(fetch);
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
-{
-   GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
-   GPRVector src{0,{4,4,4,4}};
-
-   assert(nir_src_as_uint(intrin->src[1]) == 0);
-
-   auto const_offset = nir_src_as_const_value(intrin->src[0]);
-   auto dyn_offset = PValue();
-   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
-   if (const_offset)
-      res_id += const_offset[0].u32;
-   else
-      dyn_offset = from_nir(intrin->src[0], 0);
-
-   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
-      emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
-                       res_id,
-                       bim_none));
-      return true;
-   } else {
-      emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
-                                             0/* ?? */,
-                                             res_id, dyn_offset));
-      if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
-          nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
-         /* Need to load the layers from a const buffer */
-
-         set_has_txs_cube_array_comp();
-
-         if (const_offset) {
-            unsigned lookup_resid = const_offset[0].u32;
-            emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
-                                                PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
-                                                                        R600_BUFFER_INFO_CONST_BUFFER)),
-                                                EmitInstruction::last_write));
-         } else {
-            /* If the adressing is indirect we have to get the z-value by using a binary search */
-            GPRVector trgt;
-            GPRVector help;
-
-            auto addr = help.reg_i(0);
-            auto comp = help.reg_i(1);
-            auto low_bit = help.reg_i(2);
-            auto high_bit = help.reg_i(3);
-
-            emit_instruction(new AluInstruction(op2_lshr_int, addr, from_nir(intrin->src[0], 0),
-                             literal(2), EmitInstruction::write));
-            emit_instruction(new AluInstruction(op2_and_int, comp, from_nir(intrin->src[0], 0),
-                             literal(3), EmitInstruction::last_write));
-
-            emit_instruction(new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, R600_SHADER_BUFFER_INFO_SEL,
-                                                  R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none));
-
-            emit_instruction(new AluInstruction(op3_cnde_int, comp, high_bit, trgt.reg_i(0), trgt.reg_i(2),
-                                                EmitInstruction::write));
-            emit_instruction(new AluInstruction(op3_cnde_int, high_bit, high_bit, trgt.reg_i(1), trgt.reg_i(3),
-                                                EmitInstruction::last_write));
-
-            emit_instruction(new AluInstruction(op3_cnde_int, dest.reg_i(2), low_bit, comp, high_bit, EmitInstruction::last_write));
-         }
-      }
-   }
-   return true;
-}
-
-bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr)
-{
-   std::array<PValue,4> dst_elms;
-
-
-   for (uint16_t i = 0; i < 4; ++i) {
-      dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7);
-   }
-
-   GPRVector dst(dst_elms);
-   GPRVector src(0,{4,4,4,4});
-
-   auto const_offset = nir_src_as_const_value(intr->src[0]);
-   auto dyn_offset = PValue();
-   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
-   if (const_offset)
-      res_id += const_offset[0].u32;
-   else
-      assert(0 && "dynamic buffer offset not supported in buffer_size");
-
-   emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
-                    res_id, bim_none));
-
-   return true;
-}
-
-bool EmitSSBOInstruction::make_stores_ack_and_waitack()
-{
-   for (auto&& store: m_store_ops)
-      store->set_ack();
-
-   if (!m_store_ops.empty())
-      emit_instruction(new WaitAck(0));
-
-   m_store_ops.clear();
-
-   return true;
-}
-
-GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
-{
-   GPRVector::Values v;
-   int i;
-   for (i = 0; i < 4; ++i)
-      v[i] = from_nir(ir->dest, i);
-   return GPRVector(v);
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
deleted file mode 100644
index 4d5fa0f..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef SFN_EMITSSBOINSTRUCTION_H
-#define SFN_EMITSSBOINSTRUCTION_H
-
-#include "sfn_emitinstruction.h"
-#include "sfn_instruction_gds.h"
-#include "sfn_value_gpr.h"
-
-namespace r600 {
-
-class EmitSSBOInstruction: public EmitInstruction {
-public:
-   EmitSSBOInstruction(ShaderFromNirProcessor& processor);
-
-   void set_ssbo_offset(int offset);
-
-   void set_require_rat_return_address();
-   bool load_rat_return_address();
-   bool load_atomic_inc_limits();
-
-private:
-   bool do_emit(nir_instr *instr);
-
-   bool emit_atomic(const nir_intrinsic_instr* instr);
-   bool emit_unary_atomic(const nir_intrinsic_instr* instr);
-   bool emit_atomic_inc(const nir_intrinsic_instr* instr);
-   bool emit_atomic_pre_dec(const nir_intrinsic_instr* instr);
-
-   bool emit_load_ssbo(const nir_intrinsic_instr* instr);
-   bool emit_store_ssbo(const nir_intrinsic_instr* instr);
-
-   bool emit_image_size(const nir_intrinsic_instr *intrin);
-   bool emit_image_load(const nir_intrinsic_instr *intrin);
-   bool emit_image_store(const nir_intrinsic_instr *intrin);
-   bool emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin);
-   bool emit_buffer_size(const nir_intrinsic_instr *intrin);
-
-   bool fetch_return_value(const nir_intrinsic_instr *intrin);
-
-   bool make_stores_ack_and_waitack();
-
-   ESDOp get_opcode(nir_intrinsic_op opcode) const;
-   ESDOp get_opcode_wo(const nir_intrinsic_op opcode) const;
-
-   RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const;
-   RatInstruction::ERatOp get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const;
-
-
-   GPRVector make_dest(const nir_intrinsic_instr* instr);
-
-   PGPRValue m_atomic_update;
-
-   bool m_require_rat_return_address;
-   GPRVector m_rat_return_address;
-   int m_ssbo_image_offset;
-   std::vector<RatInstruction *> m_store_ops;
-};
-
-}
-
-#endif // SFN_EMITSSBOINSTRUCTION_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp
deleted file mode 100644
index 326cd15..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp
+++ /dev/null
@@ -1,671 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_emittexinstruction.h"
-#include "sfn_shader_base.h"
-#include "sfn_instruction_fetch.h"
-
-namespace r600 {
-
-EmitTexInstruction::EmitTexInstruction(ShaderFromNirProcessor &processor):
-   EmitInstruction (processor)
-{
-}
-
-bool EmitTexInstruction::do_emit(nir_instr* instr)
-{
-   nir_tex_instr* ir = nir_instr_as_tex(instr);
-
-   TexInputs src;
-   if (!get_inputs(*ir, src))
-      return false;
-
-   if (ir->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
-      switch (ir->op) {
-      case nir_texop_txf:
-         return emit_buf_txf(ir, src);
-      case nir_texop_txs:
-         return emit_tex_txs(ir, src, {0,1,2,3});
-      default:
-         return false;
-      }
-   } else {
-      switch (ir->op) {
-      case nir_texop_tex:
-         return emit_tex_tex(ir, src);
-      case nir_texop_txf:
-         return emit_tex_txf(ir, src);
-      case nir_texop_txb:
-         return emit_tex_txb(ir, src);
-      case nir_texop_txl:
-         return emit_tex_txl(ir, src);
-      case nir_texop_txd:
-         return emit_tex_txd(ir, src);
-      case nir_texop_txs:
-         return emit_tex_txs(ir, src, {0,1,2,3});
-      case nir_texop_lod:
-         return emit_tex_lod(ir, src);
-      case nir_texop_tg4:
-         return emit_tex_tg4(ir, src);
-      case nir_texop_txf_ms:
-         return emit_tex_txf_ms(ir, src);
-      case nir_texop_query_levels:
-         return emit_tex_txs(ir, src, {3,7,7,7});
-      case nir_texop_texture_samples:
-         return emit_tex_texture_samples(ir, src, {3,7,7,7});
-      default:
-
-         return false;
-      }
-   }
-}
-
-bool EmitTexInstruction::emit_buf_txf(nir_tex_instr* instr, TexInputs &src)
-{
-   auto dst = make_dest(*instr);
-
-   auto ir = new FetchInstruction(vc_fetch, no_index_offset, dst, src.coord.reg_i(0), 0,
-                                  instr->texture_index +  R600_MAX_CONST_BUFFERS,
-                                  src.texture_offset, bim_none);
-   ir->set_flag(vtx_use_const_field);
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_tex(nir_tex_instr* instr, TexInputs& src)
-{
-
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   auto tex_op = TexInstruction::sample;
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect);
-
-   if (instr->is_shadow)  {
-      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
-                       {alu_last_instr, alu_write}));
-      tex_op = TexInstruction::sample_c;
-   }
-
-   auto dst = make_dest(*instr);
-   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
-                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   if (instr->is_array)
-      handle_array_index(*instr, src.coord, irt);
-
-   set_rect_coordinate_flags(instr, irt);
-   set_offsets(irt, src.offset);
-
-   emit_instruction(irt);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_txd(nir_tex_instr* instr, TexInputs& src)
-{
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   auto tex_op = TexInstruction::sample_g;
-   auto dst = make_dest(*instr);
-
-   GPRVector empty_dst(0,{7,7,7,7});
-
-   if (instr->is_shadow)  {
-      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
-                       {alu_last_instr, alu_write}));
-      tex_op = TexInstruction::sample_c_g;
-   }
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, src.ddx,
-                                             sampler.id,
-                                             sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   irgh->set_dest_swizzle({7,7,7,7});
-
-   TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, src.ddy,
-                           sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   irgv->set_dest_swizzle({7,7,7,7});
-
-   TexInstruction *ir = new TexInstruction(tex_op, dst, src.coord, sampler.id,
-                                           sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   if (instr->is_array)
-      handle_array_index(*instr, src.coord, ir);
-
-   set_rect_coordinate_flags(instr, ir);
-   set_offsets(ir, src.offset);
-
-   emit_instruction(irgh);
-   emit_instruction(irgv);
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_txf(nir_tex_instr* instr, TexInputs& src)
-{
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   auto dst = make_dest(*instr);
-
-   if (*src.coord.reg_i(3) != *src.lod) {
-      if (src.coord.sel() != src.lod->sel())
-         emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {alu_write, alu_last_instr}));
-      else
-         src.coord.set_reg_i(3, src.lod);
-   }
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect);
-
-   /* txf doesn't need rounding for the array index, but 1D has the array index
-    * in the z component */
-   if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
-      src.coord.set_reg_i(2, src.coord.reg_i(1));
-
-   auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
-                                    sampler.id,
-                                    sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-
-
-   if (src.offset) {
-      assert(src.offset->is_ssa);
-      AluInstruction *ir = nullptr;
-      for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
-         ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
-                  {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
-         emit_instruction(ir);
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-   }
-
-   if (instr->is_array)
-      tex_ir->set_flag(TexInstruction::z_unnormalized);
-
-   emit_instruction(tex_ir);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_lod(nir_tex_instr* instr, TexInputs& src)
-{
-   auto tex_op = TexInstruction::get_tex_lod;
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   auto dst = make_dest(*instr);
-   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
-                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   irt->set_dest_swizzle({1,0,7,7});
-   emit_instruction(irt);
-
-   return true;
-
-}
-
-bool EmitTexInstruction::emit_tex_txl(nir_tex_instr* instr, TexInputs& src)
-{
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   auto tex_op = TexInstruction::sample_l;
-   if (instr->is_shadow)  {
-      if (src.coord.sel() != src.comperator->sel())
-         emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
-      else
-         src.coord.set_reg_i(2, src.comperator);
-      tex_op = TexInstruction::sample_c_l;
-   }
-
-   if (src.coord.sel() != src.lod->sel())
-      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {last_write}));
-   else
-      src.coord.set_reg_i(3, src.lod);
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   auto dst = make_dest(*instr);
-   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
-                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-
-   if (instr->is_array)
-      handle_array_index(*instr, src.coord, irt);
-
-   set_rect_coordinate_flags(instr, irt);
-   set_offsets(irt, src.offset);
-
-   emit_instruction(irt);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_txb(nir_tex_instr* instr, TexInputs& src)
-{
-   auto tex_op = TexInstruction::sample_lb;
-
-   std::array<uint8_t, 4> in_swizzle = {0,1,2,3};
-
-   if (instr->is_shadow) {
-      if (src.coord.sel() != src.comperator->sel())
-         emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
-      else
-         src.coord.set_reg_i(2, src.comperator);
-      tex_op = TexInstruction::sample_c_lb;
-   }
-
-   if (src.coord.sel() != src.bias->sel())
-      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.bias, {last_write}));
-   else
-      src.coord.set_reg_i(3, src.bias);
-
-   GPRVector tex_src(src.coord, in_swizzle);
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   auto dst = make_dest(*instr);
-   auto irt = new TexInstruction(tex_op, dst, tex_src, sampler.id,
-                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   if (instr->is_array)
-      handle_array_index(*instr, tex_src, irt);
-
-   set_rect_coordinate_flags(instr, irt);
-   set_offsets(irt, src.offset);
-
-   emit_instruction(irt);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_txs(nir_tex_instr* instr, TexInputs& tex_src,
-                                      const std::array<int,4>& dest_swz)
-{
-   std::array<PValue,4> dst_elms;
-   std::array<PValue,4> src_elms;
-
-   for (uint16_t i = 0; i < 4; ++i) {
-      dst_elms[i] = from_nir(instr->dest, (i < instr->dest.ssa.num_components) ? i : 7);
-   }
-
-   GPRVector dst(dst_elms);
-
-   if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
-      emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
-                       instr->sampler_index + R600_MAX_CONST_BUFFERS,
-                       bim_none));
-   } else {
-      for (uint16_t i = 0; i < 4; ++i)
-         src_elms[i] =  tex_src.lod;
-      GPRVector src(src_elms);
-
-      auto sampler = get_sampler_id(instr->sampler_index, tex_src.sampler_deref);
-      assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-      auto ir = new TexInstruction(TexInstruction::get_resinfo, dst, src,
-                                   sampler.id,
-                                   sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
-      ir->set_dest_swizzle(dest_swz);
-      emit_instruction(ir);
-
-      if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-         PValue src(new UniformValue(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2),
-                                     sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER));
-
-         auto alu = new AluInstruction(op1_mov, dst[2], src, {last_write});
-         emit_instruction(alu);
-         set_has_txs_cube_array_comp();
-      }
-   }
-
-   return true;
-
-}
-
-bool EmitTexInstruction::emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
-                                                  const std::array<int, 4> &dest_swz)
-{
-   GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
-   GPRVector help{0,{4,4,4,4}};
-
-   auto dyn_offset = PValue();
-   int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index;
-
-   auto ir = new TexInstruction(TexInstruction::get_nsampled, dest, help,
-                                0, res_id, src.sampler_offset);
-   ir->set_dest_swizzle(dest_swz);
-   emit_instruction(ir);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_tg4(nir_tex_instr* instr, TexInputs& src)
-{
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   TexInstruction *set_ofs = nullptr;
-
-   auto tex_op = TexInstruction::gather4;
-
-   if (instr->is_shadow)  {
-      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
-                       {alu_last_instr, alu_write}));
-      tex_op = TexInstruction::gather4_c;
-   }
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   bool literal_offset = false;
-   if (src.offset) {
-      literal_offset =  nir_src_as_const_value(*src.offset) != 0;
-      r600::sfn_log << SfnLog::tex << " really have offsets and they are " <<
-                       (literal_offset ? "literal" : "varying") <<
-                       "\n";
-
-      if (!literal_offset) {
-         GPRVector::Swizzle swizzle = {4,4,4,4};
-         for (unsigned i = 0; i < instr->coord_components; ++i)
-            swizzle[i] = i;
-
-         int noffsets = instr->coord_components;
-         if (instr->is_array)
-            --noffsets;
-
-         auto ofs = vec_from_nir_with_fetch_constant(*src.offset,
-                                                     ( 1 << noffsets) - 1,
-                                                     swizzle);
-         GPRVector dummy(0, {7,7,7,7});
-         tex_op = (tex_op == TexInstruction::gather4_c) ?
-                     TexInstruction::gather4_c_o : TexInstruction::gather4_o;
-
-         set_ofs = new TexInstruction(TexInstruction::set_offsets, dummy,
-                                           ofs, sampler.id,
-                                      sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-         set_ofs->set_dest_swizzle({7,7,7,7});
-      }
-   }
-
-
-   /* pre CAYMAN needs swizzle */
-   auto dst = make_dest(*instr);
-   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
-                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-
-   if (get_chip_class() != CAYMAN)
-      irt->set_dest_swizzle({1,2,0,3});
-   irt->set_gather_comp(instr->component);
-
-   if (instr->is_array)
-      handle_array_index(*instr, src.coord, irt);
-
-   if (literal_offset) {
-      r600::sfn_log << SfnLog::tex << "emit literal offsets\n";
-      set_offsets(irt, src.offset);
-   }
-
-   set_rect_coordinate_flags(instr, irt);
-
-   if (set_ofs)
-      emit_instruction(set_ofs);
-
-   emit_instruction(irt);
-   return true;
-}
-
-bool EmitTexInstruction::emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src)
-{
-   assert(instr->src[0].src.is_ssa);
-
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   PGPRValue sample_id_dest_reg = get_temp_register();
-   GPRVector sample_id_dest(sample_id_dest_reg->sel(), {7,7,7,7});
-   sample_id_dest.set_reg_i(sample_id_dest_reg->chan(), sample_id_dest_reg);
-   std::array<int,4> dest_swz = {7,7,7,7};
-   dest_swz[sample_id_dest_reg->chan()] = 0;
-
-   emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3),
-                                       src.ms_index,
-                                       {alu_write, alu_last_instr}));
-
-   auto tex_sample_id_ir = new TexInstruction(TexInstruction::ld, sample_id_dest, src.coord,
-                                              sampler.id,
-                                              sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-   tex_sample_id_ir->set_flag(TexInstruction::x_unnormalized);
-   tex_sample_id_ir->set_flag(TexInstruction::y_unnormalized);
-   tex_sample_id_ir->set_flag(TexInstruction::z_unnormalized);
-   tex_sample_id_ir->set_flag(TexInstruction::w_unnormalized);
-   tex_sample_id_ir->set_inst_mode(1);
-
-   tex_sample_id_ir->set_dest_swizzle(dest_swz);
-
-   emit_instruction(tex_sample_id_ir);
-
-   if (src.ms_index->type() != Value::literal ||
-       static_cast<const LiteralValue&>(*src.ms_index).value() != 0) {
-       PValue help = get_temp_register();
-
-      emit_instruction(new AluInstruction(op2_lshl_int, help,
-                                          src.ms_index, literal(2),
-      {alu_write, alu_last_instr}));
-
-      emit_instruction(new AluInstruction(op2_lshr_int, sample_id_dest_reg,
-                                          {sample_id_dest_reg, help},
-                                          {alu_write, alu_last_instr}));
-   }
-
-   emit_instruction(new AluInstruction(op2_and_int, src.coord.reg_i(3),
-                                       {sample_id_dest_reg, PValue(new LiteralValue(15))},
-                                       {alu_write, alu_last_instr}));
-
-   auto dst = make_dest(*instr);
-
-   /* txf doesn't need rounding for the array index, but 1D has the array index
-    * in the z component */
-   if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
-      src.coord.set_reg_i(2, src.coord.reg_i(1));
-
-   auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
-                                    sampler.id,
-                                    sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
-
-
-   if (src.offset) {
-      assert(src.offset->is_ssa);
-      AluInstruction *ir = nullptr;
-      for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
-         ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
-                  {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
-         emit_instruction(ir);
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-   }
-
-   emit_instruction(tex_ir);
-   return true;
-}
-
-bool EmitTexInstruction::get_inputs(const nir_tex_instr& instr, TexInputs &src)
-{
-   sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n";
-
-   unsigned grad_components = instr.coord_components;
-   if (instr.is_array && !instr.array_is_lowered_cube)
-      --grad_components;
-
-
-   src.offset = nullptr;
-   bool retval = true;
-   for (unsigned i = 0; i < instr.num_srcs; ++i) {
-      switch (instr.src[i].src_type) {
-      case nir_tex_src_bias:
-         src.bias = from_nir(instr.src[i], 0);
-         break;
-
-      case nir_tex_src_coord: {
-         src.coord = vec_from_nir_with_fetch_constant(instr.src[i].src,
-                                                      (1 << instr.coord_components) - 1,
-         {0,1,2,3});
-      } break;
-      case nir_tex_src_comparator:
-         src.comperator = from_nir(instr.src[i], 0);
-         break;
-      case nir_tex_src_ddx: {
-         sfn_log << SfnLog::tex << "Get DDX ";
-         src.ddx = vec_from_nir_with_fetch_constant(instr.src[i].src,
-                                                    (1 << grad_components) - 1,
-                                                    swizzle_from_comps(grad_components));
-         sfn_log << SfnLog::tex << src.ddx << "\n";
-      } break;
-      case nir_tex_src_ddy:{
-         sfn_log << SfnLog::tex << "Get DDY ";
-         src.ddy = vec_from_nir_with_fetch_constant(instr.src[i].src,
-                                                    (1 << grad_components) - 1,
-                                                    swizzle_from_comps(grad_components));
-         sfn_log << SfnLog::tex << src.ddy << "\n";
-      }  break;
-      case nir_tex_src_lod:
-         src.lod = from_nir_with_fetch_constant(instr.src[i].src, 0);
-         break;
-      case nir_tex_src_offset:
-         sfn_log << SfnLog::tex << "  -- Find offset\n";
-         src.offset = &instr.src[i].src;
-         break;
-      case nir_tex_src_sampler_deref:
-         src.sampler_deref = get_deref_location(instr.src[i].src);
-         break;
-      case nir_tex_src_texture_deref:
-         src.texture_deref = get_deref_location(instr.src[i].src);
-         break;
-      case nir_tex_src_ms_index:
-         src.ms_index = from_nir(instr.src[i], 0);
-         break;
-      case nir_tex_src_texture_offset:
-         src.texture_offset = from_nir(instr.src[i], 0);
-         break;
-      case nir_tex_src_sampler_offset:
-         src.sampler_offset = from_nir(instr.src[i], 0);
-         break;
-      case nir_tex_src_plane:
-      case nir_tex_src_projector:
-      case nir_tex_src_min_lod:
-      default:
-         sfn_log << SfnLog::tex << "Texture source type " <<  instr.src[i].src_type << " not supported\n";
-         retval = false;
-      }
-   }
-   return retval;
-}
-
-GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr)
-{
-   int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
-                                                 instr.dest.reg.reg->num_components;
-   std::array<PValue,4> dst_elms;
-   for (uint16_t i = 0; i < 4; ++i)
-      dst_elms[i] = from_nir(instr.dest, (i < num_dest_components) ? i : 7);
-   return GPRVector(dst_elms);
-}
-
-
-GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr,
-                                        const std::array<int, 4>& swizzle)
-{
-   int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
-                                                 instr.dest.reg.reg->num_components;
-   std::array<PValue,4> dst_elms;
-   for (uint16_t i = 0; i < 4; ++i) {
-      int k = swizzle[i];
-      dst_elms[i] = from_nir(instr.dest, (k < num_dest_components) ? k : 7);
-   }
-   return GPRVector(dst_elms);
-}
-
-void EmitTexInstruction::set_rect_coordinate_flags(nir_tex_instr* instr,
-                                                   TexInstruction* ir) const
-{
-   if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
-      ir->set_flag(TexInstruction::x_unnormalized);
-      ir->set_flag(TexInstruction::y_unnormalized);
-   }
-}
-
-void EmitTexInstruction::set_offsets(TexInstruction* ir, nir_src *offset)
-{
-   if (!offset)
-      return;
-
-   assert(offset->is_ssa);
-   auto literal = nir_src_as_const_value(*offset);
-   assert(literal);
-
-   for (int i = 0; i < offset->ssa->num_components; ++i) {
-      ir->set_offset(i, literal[i].i32);
-   }
-}
-
-void EmitTexInstruction::handle_array_index(const nir_tex_instr& instr, const GPRVector& src, TexInstruction *ir)
-{
-   int src_idx = instr.sampler_dim == GLSL_SAMPLER_DIM_1D ? 1 : 2;
-   emit_instruction(new AluInstruction(op1_rndne, src.reg_i(2), src.reg_i(src_idx),
-                                       {alu_last_instr, alu_write}));
-   ir->set_flag(TexInstruction::z_unnormalized);
-}
-
-EmitTexInstruction::SamplerId
-EmitTexInstruction::get_sampler_id(int sampler_id, const nir_variable *deref)
-{
-   EmitTexInstruction::SamplerId result = {sampler_id, false};
-
-   if (deref) {
-      assert(glsl_type_is_sampler(deref->type));
-      result.id = deref->data.binding;
-   }
-   return result;
-}
-
-EmitTexInstruction::TexInputs::TexInputs():
-   sampler_deref(nullptr),
-   texture_deref(nullptr),
-   offset(nullptr)
-{
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h
deleted file mode 100644
index e11ebda..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_EMITTEXINSTRUCTION_H
-#define SFN_EMITTEXINSTRUCTION_H
-
-#include "sfn_emitinstruction.h"
-#include "sfn_instruction_tex.h"
-
-namespace r600  {
-
-class EmitTexInstruction : public EmitInstruction
-{
-public:
-   EmitTexInstruction(ShaderFromNirProcessor& processor);
-
-private:
-   struct TexInputs {
-      TexInputs();
-      const nir_variable *sampler_deref;
-      const nir_variable *texture_deref;
-      GPRVector coord;
-      PValue bias;
-      PValue comperator;
-      PValue lod;
-      GPRVector ddx;
-      GPRVector ddy;
-      nir_src *offset;
-      PValue gather_comp;
-      PValue ms_index;
-      PValue sampler_offset;
-      PValue texture_offset;
-   };
-
-   bool emit_tex_tex(nir_tex_instr* instr, TexInputs& src);
-
-   bool emit_tex_txf(nir_tex_instr* instr, TexInputs &src);
-   bool emit_tex_txb(nir_tex_instr* instr, TexInputs& src);
-   bool emit_tex_txd(nir_tex_instr* instr, TexInputs& src);
-   bool emit_tex_txl(nir_tex_instr* instr, TexInputs& src);
-   bool emit_tex_txs(nir_tex_instr* instr, TexInputs& src,
-                     const std::array<int, 4> &dest_swz);
-   bool emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
-                                 const std::array<int, 4> &dest_swz);
-   bool emit_tex_lod(nir_tex_instr* instr, TexInputs& src);
-   bool emit_tex_tg4(nir_tex_instr* instr, TexInputs& src);
-   bool emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src);
-   bool emit_buf_txf(nir_tex_instr* instr, TexInputs& src);
-
-   bool get_inputs(const nir_tex_instr& instr, TexInputs &src);
-
-   void set_rect_coordinate_flags(nir_tex_instr* instr, TexInstruction* ir) const;
-
-   bool do_emit(nir_instr* instr) override;
-
-   GPRVector make_dest(nir_tex_instr& instr);
-   GPRVector make_dest(nir_tex_instr &instr, const std::array<int, 4> &swizzle);
-
-   void set_offsets(TexInstruction* ir, nir_src *offset);
-   void handle_array_index(const nir_tex_instr& instr, const GPRVector &src, TexInstruction* ir);
-
-   struct SamplerId {
-      int id;
-      bool indirect;
-   };
-
-   SamplerId get_sampler_id(int sampler_id, const nir_variable *deref);
-
-};
-
-}
-
-#endif // SFN_EMITTEXINSTRUCTION_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.cpp b/src/gallium/drivers/r600/sfn/sfn_instr.cpp
new file mode 100644
index 0000000..d81e329
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr.cpp
@@ -0,0 +1,522 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2021 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_mem.h"
+#include "sfn_instr_lds.h"
+#include "sfn_instr_tex.h"
+#include "sfn_instr_controlflow.h"
+
+#include <iostream>
+#include <sstream>
+#include <numeric>
+
+namespace r600 {
+
+using std::string;
+using std::vector;
+
+Instr::Instr():
+   m_use_count(0),
+   m_block_id(std::numeric_limits<int>::max()),
+   m_index(std::numeric_limits<int>::max())
+{
+}
+
+Instr::~Instr()
+{
+
+}
+
+void Instr::print(std::ostream& os) const
+{
+   do_print(os);
+}
+
+bool Instr::ready() const
+{
+   for (auto& i : m_required_instr)
+      if (!i->ready())
+         return false;
+   return do_ready();
+}
+
+int int_from_string_with_prefix(const std::string& str, const std::string& prefix)
+{
+   if (str.substr(0, prefix.length()) != prefix) {
+      std::cerr << "Expect '" << prefix << "' as start of '" << str << "'\n";
+      assert(0);
+   }
+
+   std::stringstream help(str.substr(prefix.length()));
+   int retval;
+   help >> retval;
+   return retval;
+}
+
+int sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle &swz, bool& is_ssa)
+{
+   assert(str[0] == 'R' || str[0] == '_' || str[0] == 'S');
+   int sel = 0;
+
+   auto istr = str.begin() + 1;
+
+   if (str[0] == '_') {
+      while (istr != str.end() && *istr == '_')
+         ++istr;
+      sel = std::numeric_limits<int>::max();
+   } else {
+      while (istr != str.end() && isdigit(*istr)) {
+         sel *= 10;
+         sel += *istr - '0';
+         ++istr;
+      }
+   }
+
+   assert(*istr == '.');
+   istr++;
+
+   int i = 0;
+   while (istr != str.end()) {
+      switch (*istr) {
+      case 'x': swz[i] = 0; break;
+      case 'y': swz[i] = 1; break;
+      case 'z': swz[i] = 2; break;
+      case 'w': swz[i] = 3; break;
+      case '0': swz[i] = 4; break;
+      case '1': swz[i] = 5; break;
+      case '_': swz[i] = 7; break;
+      default:
+         unreachable("Unknown swizzle character");
+      }
+      ++istr;
+      ++i;
+   }
+
+   is_ssa = str[0] == 'S';
+
+   return sel;
+}
+
+bool Instr::is_last() const
+{
+   return true;
+}
+
+bool Instr::set_dead()
+{
+   if (m_instr_flags.test(always_keep))
+      return false;
+   bool is_dead = propagate_death();
+   m_instr_flags.set(dead);
+   return is_dead;
+}
+
+bool Instr::propagate_death()
+{
+   return true;
+}
+
+bool Instr::replace_source(PRegister old_src, PVirtualValue new_src)
+{
+   (void)old_src;
+   (void)new_src;
+   return false;
+}
+
+void Instr::add_required_instr(Instr *instr)
+{
+   assert(instr);
+   m_required_instr.push_back(instr);
+   instr->m_dependend_instr.push_back(this);
+}
+
+void Instr::replace_required_instr(Instr *old_instr, Instr *new_instr)
+{
+
+   for (auto i = m_required_instr.begin(); i != m_required_instr.end(); ++i) {
+      if (*i == old_instr)
+         *i = new_instr;
+   }
+}
+
+bool Instr::replace_dest(PRegister new_dest, r600::AluInstr *move_instr)
+{
+   (void)new_dest;
+   (void)move_instr;
+   return false;
+}
+
+void Instr::set_blockid(int id, int index)
+{
+   m_block_id = id;
+   m_index = index;
+   forward_set_blockid(id, index);
+}
+
+
+void Instr::forward_set_blockid(int id, int index)
+{
+   (void)id;
+   (void)index;
+}
+
+InstrWithVectorResult::InstrWithVectorResult(const RegisterVec4& dest,
+                                             const RegisterVec4::Swizzle& dest_swizzle):
+   m_dest(dest),
+   m_dest_swizzle(dest_swizzle)
+{
+   for (int i = 0; i < 4; ++i) {
+      if (m_dest_swizzle[i] < 6)
+         m_dest[i]->add_parent(this);
+   }
+}
+
+void InstrWithVectorResult::print_dest(std::ostream& os) const
+{
+   os << (m_dest[0]->is_ssa() ? 'S' : 'R' ) << m_dest.sel();
+   os << ".";
+   for (int i = 0; i < 4; ++i)
+      os << VirtualValue::chanchar[m_dest_swizzle[i]];
+}
+
+bool InstrWithVectorResult::comp_dest(const RegisterVec4& dest,
+                                      const RegisterVec4::Swizzle& dest_swizzle) const
+{
+   for(int i = 0; i < 4; ++i) {
+      if (!m_dest[i]->equal_to(*dest[i])) {
+         return false;
+      }
+      if (m_dest_swizzle[i] != dest_swizzle[i])
+         return false;
+   }
+   return true;
+}
+
+void Block::do_print(std::ostream& os) const
+{
+   for (int j = 0; j < 2 * m_nesting_depth; ++j)
+      os << ' ';
+   os << "BLOCK START\n";
+   for (auto& i : m_instructions) {
+      for (int j = 0; j < 2 * (m_nesting_depth + i->nesting_corr()) + 2; ++j)
+         os << ' ';
+      os << *i << "\n";
+   }
+   for (int j = 0; j < 2 * m_nesting_depth; ++j)
+      os << ' ';
+   os << "BLOCK END\n";
+}
+
+bool Block::is_equal_to(const Block& lhs) const
+{
+   if (m_id != lhs.m_id || m_nesting_depth != lhs.m_nesting_depth)
+      return false;
+
+   if (m_instructions.size() != lhs.m_instructions.size())
+      return false;
+
+   return std::inner_product(m_instructions.begin(), m_instructions.end(), lhs.m_instructions.begin(),
+                             true,
+                             [] (bool l, bool r) { return l && r;},
+   [](PInst l, PInst r) { return l->equal_to(*r);});
+}
+
+inline bool operator != (const Block& lhs, const Block& rhs)
+{
+   return !lhs.is_equal_to(rhs);
+}
+
+void Block::erase(iterator node)
+{
+   m_instructions.erase(node);
+}
+
+void Block::set_type(Type t)
+{
+   m_blocK_type = t;
+   switch (t) {
+   case vtx:
+   case gds:
+   case tex: m_remaining_slots = 8; break; /* TODO: 16 for >= EVERGREEN */
+   default:
+      m_remaining_slots = 0xffff;
+   }
+}
+
+Block::Block(int nesting_depth, int id):
+   m_nesting_depth(nesting_depth),
+   m_id(id),
+   m_next_index(0)
+{
+   assert(!has_instr_flag(force_cf));
+}
+
+void Block::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void Block::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+void Block::push_back(PInst instr)
+{
+   instr->set_blockid(m_id, m_next_index++);
+   if (m_remaining_slots != 0xffff) {
+      uint32_t new_slots = instr->slots();
+      m_remaining_slots -= new_slots;
+   }
+   if (m_lds_group_start)
+      m_lds_group_requirement += instr->slots();
+
+   m_instructions.push_back(instr);
+}
+
+bool Block::try_reserve_kcache(const AluGroup& group)
+{
+   auto kcache_constants = group.get_kconsts();
+   for (auto& kc : kcache_constants)  {
+      auto u = kc->as_uniform();
+      assert(u);
+      if (!try_reserve_kcache(*u))
+         return false;
+   }
+   return true;
+}
+
+bool Block::try_reserve_kcache(const UniformValue& u)
+{
+   const int kcache_banks = 4; // TODO: handle pre-evergreen
+
+   int bank = u.kcache_bank();
+   int sel  = (u.sel() - 512);
+   int line = sel >> 4;
+
+   bool found = false;
+
+   for (int i = 0; i < kcache_banks && !found; ++i) {
+      if (m_kcache[i].mode) {
+         if (m_kcache[i].bank < bank)
+            continue;
+
+         if ((m_kcache[i].bank == bank &&
+              m_kcache[i].addr > line  + 1) ||
+             m_kcache[i].bank > bank) {
+            if (m_kcache[kcache_banks - 1].mode)
+               return false;
+
+            memmove(&m_kcache[i+1],&m_kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
+            m_kcache[i].mode = KCacheLine::lock_1;
+            m_kcache[i].bank = bank;
+            m_kcache[i].addr = line;
+            return true;
+         }
+
+         int d = line - m_kcache[i].addr;
+
+         if (d == -1) {
+            m_kcache[i].addr--;
+            if (m_kcache[i].mode == KCacheLine::lock_2) {
+               /* we are prepending the line to the current set,
+          * discarding the existing second line,
+          * so we'll have to insert line+2 after it */
+               line += 2;
+               continue;
+            } else if (m_kcache[i].mode == KCacheLine::lock_1) {
+               m_kcache[i].mode = KCacheLine::lock_2;
+               return true;
+            } else {
+               /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
+               return false;
+            }
+         } else if (d == 1) {
+            m_kcache[i].mode = KCacheLine::lock_2;
+            return true;
+         } else if (d == 0)
+            return true;
+      } else { /* free kcache set - use it */
+         m_kcache[i].mode = KCacheLine::lock_1;
+         m_kcache[i].bank = bank;
+         m_kcache[i].addr = line;
+         return true;
+      }
+   }
+   return false;
+}
+
+void Block::lds_group_start(AluInstr *alu)
+{
+   assert(!m_lds_group_start);
+   m_lds_group_start = alu;
+   m_lds_group_requirement = 0;
+}
+
+void Block::lds_group_end()
+{
+   assert(m_lds_group_start);
+   m_lds_group_start->set_required_slots(m_lds_group_requirement);
+   m_lds_group_start = 0;
+}
+
+InstrWithVectorResult::InstrWithVectorResult(const InstrWithVectorResult& orig):
+   m_dest(orig.m_dest),
+   m_dest_swizzle(orig.m_dest_swizzle)
+{
+}
+
+class InstrComparer : public ConstInstrVisitor {
+public:
+   InstrComparer() = default;
+   bool result {false};
+
+#define DECLARE_MEMBER(TYPE)         \
+    InstrComparer(const TYPE *instr) \
+    {                                \
+       this_ ## TYPE = instr;        \
+    }                                \
+                                     \
+    void visit(const TYPE& instr)    \
+    {                                \
+       result = false;               \
+       if (!this_ ## TYPE)           \
+         return;                     \
+      result = this_ ## TYPE->is_equal_to(instr); \
+   }                                 \
+                                     \
+   const TYPE *this_ ## TYPE{nullptr};
+
+   DECLARE_MEMBER(AluInstr);
+   DECLARE_MEMBER(AluGroup);
+   DECLARE_MEMBER(TexInstr);
+   DECLARE_MEMBER(ExportInstr);
+   DECLARE_MEMBER(FetchInstr);
+   DECLARE_MEMBER(Block);
+   DECLARE_MEMBER(ControlFlowInstr);
+   DECLARE_MEMBER(IfInstr);
+   DECLARE_MEMBER(WriteScratchInstr);
+   DECLARE_MEMBER(StreamOutInstr);
+   DECLARE_MEMBER(MemRingOutInstr);
+   DECLARE_MEMBER(EmitVertexInstr);
+   DECLARE_MEMBER(GDSInstr);
+   DECLARE_MEMBER(WriteTFInstr);
+   DECLARE_MEMBER(LDSAtomicInstr);
+   DECLARE_MEMBER(LDSReadInstr);
+   DECLARE_MEMBER(RatInstr);
+};
+
+class InstrCompareForward: public ConstInstrVisitor {
+public:
+
+   void visit(const AluInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const AluGroup& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const TexInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const ExportInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const FetchInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const Block& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const ControlFlowInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const IfInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const WriteScratchInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const StreamOutInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const MemRingOutInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const EmitVertexInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const GDSInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const WriteTFInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const LDSAtomicInstr& instr) override {
+      m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const LDSReadInstr& instr) override {
+         m_comparer = InstrComparer(&instr);
+   }
+
+   void visit(const RatInstr& instr) override {
+         m_comparer = InstrComparer(&instr);
+   }
+
+   InstrComparer m_comparer;
+};
+
+
+bool Instr::equal_to(const Instr& lhs) const
+{
+   InstrCompareForward cmp;
+   accept(cmp);
+   lhs.accept(cmp.m_comparer);
+
+   return cmp.m_comparer.result;
+}
+
+
+
+
+} // ns r600
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.h b/src/gallium/drivers/r600/sfn/sfn_instr.h
new file mode 100644
index 0000000..c70427e
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr.h
@@ -0,0 +1,314 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2021 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "sfn_virtualvalues.h"
+#include "sfn_alu_defines.h"
+#include "sfn_defines.h"
+#include <set>
+#include <list>
+#include <iostream>
+
+namespace r600 {
+
+class ConstInstrVisitor;
+
+class InstrVisitor;
+class AluInstr;
+class AluGroup;
+class TexInstr;
+class ExportInstr;
+class FetchInstr;
+class ControlFlowInstr;
+class IfInstr;
+class WriteScratchInstr;
+class StreamOutInstr;
+class MemRingOutInstr;
+class EmitVertexInstr;
+class GDSInstr;
+class WriteTFInstr;
+class LDSAtomicInstr;
+class LDSReadInstr;
+class RatInstr;
+
+
+int int_from_string_with_prefix(const std::string& str, const std::string& prefix);
+int sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle& swz, bool& is_ssa);
+
+class Instr : public Allocate {
+public:
+
+   enum Flags {
+      always_keep,
+      dead,
+      scheduled,
+      vpm,
+      force_cf,
+      ack_rat_return_write,
+      nflags
+      };
+
+   Instr();
+
+   Instr(const Instr& orig) = default;
+
+   virtual ~Instr();
+
+   using Pointer = R600_POINTER_TYPE(Instr);
+
+   void print(std::ostream& os) const;
+   bool equal_to(const Instr& lhs) const;
+
+   virtual void accept(ConstInstrVisitor& visitor) const = 0;
+   virtual void accept(InstrVisitor& visitor) = 0;
+   virtual bool end_group() const { return true;}
+
+   virtual bool is_last() const;
+
+   void set_always_keep() {m_instr_flags.set(always_keep);}
+   bool set_dead();
+   virtual void set_scheduled() { m_instr_flags.set(scheduled); forward_set_scheduled();}
+   void add_use() {++m_use_count;}
+   void dec_use() {assert(m_use_count > 0); --m_use_count;}
+   bool is_dead() const {return m_instr_flags.test(dead);}
+   bool is_scheduled() const {return m_instr_flags.test(scheduled);}
+   bool keep() const {return m_instr_flags.test(always_keep);}
+   bool has_uses() const {return m_use_count > 0;}
+
+   bool has_instr_flag(Flags f) const  {return m_instr_flags.test(f);}
+   void set_instr_flag(Flags f) { m_instr_flags.set(f);}
+
+   virtual bool replace_source(PRegister old_src, PVirtualValue new_src);
+   virtual bool replace_dest(PRegister new_dest, AluInstr *move_instr);
+
+   virtual int nesting_corr() const { return 0;}
+
+   virtual bool end_block() const { return false;}
+   virtual int nesting_offset() const { return 0;}
+
+   void set_blockid(int id, int index);
+   int block_id() const {return m_block_id;}
+   int index() const { return m_index;}
+
+   void add_required_instr(Instr *instr);
+   void replace_required_instr(Instr *old_instr, Instr *new_instr);
+
+   bool ready() const;
+
+   virtual uint32_t slots() const {return 0;};
+
+   using InstrList = std::list<Instr *, Allocator<Instr *>>;
+
+   const InstrList& dependend_instr() { return m_dependend_instr;}
+
+protected:
+
+   const InstrList& required_instr() const {return m_required_instr; }
+
+private:
+   virtual void forward_set_blockid(int id, int index);
+
+   virtual bool do_ready() const = 0;
+
+   virtual void do_print(std::ostream& os) const = 0;
+   virtual bool propagate_death();
+   virtual void forward_set_scheduled() {}
+
+   InstrList m_required_instr;
+   InstrList m_dependend_instr;
+
+   int m_use_count;
+   int m_block_id;
+   int m_index;
+   std::bitset<nflags> m_instr_flags{0};
+
+};
+using PInst = Instr::Pointer;
+
+class Block : public Instr {
+public:
+
+   enum Type {
+      cf,
+      alu,
+      tex,
+      vtx,
+      gds,
+      unknown
+   };
+
+   using Instructions = std::list<Instr *, Allocator<Instr *>>;
+   using Pointer = R600_POINTER_TYPE(Block);
+   using iterator = Instructions::iterator;
+   using reverse_iterator = Instructions::reverse_iterator;
+   using const_iterator = Instructions::const_iterator;
+
+   Block(int nesting_depth, int id);
+   Block(const Block& orig) = delete;
+
+   void push_back(PInst instr);
+   iterator begin() { return m_instructions.begin(); }
+   iterator end() { return m_instructions.end(); }
+   reverse_iterator rbegin() { return m_instructions.rbegin(); }
+   reverse_iterator rend() { return m_instructions.rend(); }
+
+   const_iterator begin() const { return m_instructions.begin();}
+   const_iterator end() const { return m_instructions.end();}
+
+   bool empty() const { return m_instructions.empty();}
+
+   void erase(iterator node);
+
+   bool is_equal_to(const Block& lhs) const;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   int nesting_depth() const { return m_nesting_depth;}
+
+   int id() const {return m_id;}
+
+   auto type() const {return m_blocK_type; }
+   void set_type(Type t);
+   uint32_t remaining_slots() const { return m_remaining_slots;}
+
+   bool try_reserve_kcache(const AluGroup& group);
+
+   auto last_lds_instr() {return m_last_lds_instr;}
+   void set_last_lds_instr(Instr *instr) {m_last_lds_instr = instr;}
+
+   void lds_group_start(AluInstr *alu);
+   void lds_group_end();
+   bool lds_group_active() { return m_lds_group_start != nullptr;}
+
+   size_t size() const { return m_instructions.size();}
+
+private:
+   bool try_reserve_kcache(const UniformValue& u);
+
+   bool do_ready() const override {return true;};
+   void do_print(std::ostream& os) const override;
+   Instructions m_instructions;
+   int m_nesting_depth;
+   int m_id;
+   int m_next_index;
+
+   Type m_blocK_type{unknown};
+   uint32_t m_remaining_slots{0xffff};
+
+   std::array<KCacheLine, 4> m_kcache;
+
+   Instr *m_last_lds_instr{nullptr};
+
+   int m_lds_group_requirement{0};
+   AluInstr *m_lds_group_start{nullptr};
+};
+
+class InstrWithVectorResult : public Instr {
+public:
+   InstrWithVectorResult(const RegisterVec4& dest, const RegisterVec4::Swizzle& dest_swizzle);
+
+   void set_dest_swizzle(const RegisterVec4::Swizzle& swz) {m_dest_swizzle = swz;}
+   int dest_swizzle(int i) const { return m_dest_swizzle[i];}
+   const RegisterVec4::Swizzle&  all_dest_swizzle() const { return m_dest_swizzle;}
+   const RegisterVec4& dst() const {return m_dest;}
+
+protected:
+   InstrWithVectorResult(const InstrWithVectorResult& orig);
+
+   void print_dest(std::ostream& os) const;
+   bool comp_dest(const RegisterVec4& dest, const RegisterVec4::Swizzle& dest_swizzle) const;
+
+private:
+   RegisterVec4 m_dest;
+   RegisterVec4::Swizzle m_dest_swizzle;
+};
+
+inline bool operator == (const Instr& lhs, const Instr& rhs) {
+   return lhs.equal_to(rhs);
+}
+
+inline bool operator != (const Instr& lhs, const Instr& rhs) {
+   return !(lhs == rhs);
+}
+
+inline std::ostream& operator << (std::ostream& os, const Instr& instr)
+{
+   instr.print(os);
+   return os;
+}
+
+template <typename T, typename = std::enable_if_t<std::is_base_of_v<Instr, T>>>
+std::ostream& operator<<(std::ostream& os, const T& instr) {
+  instr.print(os);
+  return os;
+}
+
+class ConstInstrVisitor {
+public:
+   virtual void visit(const AluInstr& instr) = 0;
+   virtual void visit(const AluGroup& instr) = 0;
+   virtual void visit(const TexInstr& instr) = 0;
+   virtual void visit(const ExportInstr& instr) = 0;
+   virtual void visit(const FetchInstr& instr) = 0;
+   virtual void visit(const Block& instr) = 0;
+   virtual void visit(const ControlFlowInstr& instr) = 0;
+   virtual void visit(const IfInstr& instr) = 0;
+   virtual void visit(const WriteScratchInstr& instr) = 0;
+   virtual void visit(const StreamOutInstr& instr) = 0;
+   virtual void visit(const MemRingOutInstr& instr) = 0;
+   virtual void visit(const EmitVertexInstr& instr) = 0;
+   virtual void visit(const GDSInstr& instr) = 0;
+   virtual void visit(const WriteTFInstr& instr) = 0;
+   virtual void visit(const LDSAtomicInstr& instr) = 0;
+   virtual void visit(const LDSReadInstr& instr) = 0;
+   virtual void visit(const RatInstr& instr) = 0;
+};
+
+class InstrVisitor {
+public:
+   virtual void visit(AluInstr  *instr) = 0;
+   virtual void visit(AluGroup *instr) = 0;
+   virtual void visit(TexInstr *instr) = 0;
+   virtual void visit(ExportInstr *instr) = 0;
+   virtual void visit(FetchInstr *instr) = 0;
+   virtual void visit(Block *instr) = 0;
+   virtual void visit(ControlFlowInstr *instr) = 0;
+   virtual void visit(IfInstr *instr) = 0;
+   virtual void visit(WriteScratchInstr *instr) = 0;
+   virtual void visit(StreamOutInstr *instr) = 0;
+   virtual void visit(MemRingOutInstr *instr) = 0;
+   virtual void visit(EmitVertexInstr *instr) = 0;
+   virtual void visit(GDSInstr *instr) = 0;
+   virtual void visit(WriteTFInstr *instr) = 0;
+   virtual void visit(LDSAtomicInstr *instr) = 0;
+   virtual void visit(LDSReadInstr *instr) = 0;
+   virtual void visit(RatInstr *instr) = 0;
+};
+
+
+} // ns r600
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
new file mode 100644
index 0000000..006a34f
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
@@ -0,0 +1,2449 @@
+#include "sfn_instr_alu.h"
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_tex.h"
+#include "sfn_shader.h"
+#include "sfn_debug.h"
+
+#include <algorithm>
+#include <sstream>
+
+
+namespace r600 {
+
+using std::string;
+using std::istream;
+using std::vector;
+
+AluInstr::AluInstr(EAluOp opcode, PRegister dest,
+                   SrcValues src,
+                   const std::set<AluModifiers>& flags, int slots):
+   m_opcode(opcode),
+   m_dest(dest),
+   m_bank_swizzle(alu_vec_unknown),
+   m_cf_type(cf_alu),
+   m_alu_slots(slots)
+{
+   m_src.swap(src);
+
+   if (m_src.size() == 3)
+      m_alu_flags.set(alu_op3);
+
+   for(auto f : flags)
+      m_alu_flags.set(f);
+
+   ASSERT_OR_THROW(m_src.size() == static_cast<size_t>(alu_ops.at(opcode).nsrc * m_alu_slots),
+                   "Unexpected number of source values");
+
+   if (m_alu_flags.test(alu_write))
+      ASSERT_OR_THROW(dest, "Write flag is set, but no destination register is given");
+
+   update_uses();
+
+}
+
+AluInstr::AluInstr(EAluOp opcode):
+   AluInstr(opcode, nullptr, SrcValues(alu_ops.at(opcode).nsrc), {}, 1)
+{
+}
+
+AluInstr::AluInstr(EAluOp opcode, int chan):
+   AluInstr(opcode, nullptr, SrcValues(), {}, 1)
+{
+   m_fallback_chan = chan;
+}
+
+AluInstr::AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0,
+                   const std::set<AluModifiers>& m_flags):
+   AluInstr(opcode, dest, SrcValues{src0}, m_flags, 1)
+{
+
+}
+
+AluInstr::AluInstr(EAluOp opcode, PRegister dest,
+                   PVirtualValue src0, PVirtualValue src1,
+                   const std::set<AluModifiers>& m_flags):
+   AluInstr(opcode, dest, SrcValues{src0, src1}, m_flags, 1)
+{
+
+}
+
+AluInstr::AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0, PVirtualValue src1,
+                   PVirtualValue src2,
+                   const std::set<AluModifiers>& m_flags):
+   AluInstr(opcode, dest, SrcValues{src0, src1, src2}, m_flags, 1)
+{
+
+}
+
+AluInstr::AluInstr(ESDOp op, PVirtualValue src0, PVirtualValue src1, PVirtualValue address):
+   m_lds_opcode(op)
+{
+   set_alu_flag(alu_is_lds);
+
+   m_src.push_back(address);
+   if (src0) {
+      m_src.push_back(src0);
+      if (src1)
+         m_src.push_back(src1);
+   }
+   update_uses();
+}
+
+AluInstr::AluInstr(ESDOp op, const SrcValues& src, const std::set<AluModifiers>& flags):
+   m_lds_opcode(op),
+   m_src(src)
+{
+   for(auto f : flags)
+      set_alu_flag(f);
+
+   set_alu_flag(alu_is_lds);
+   update_uses();
+}
+
+void AluInstr::update_uses()
+{
+   for (auto& s : m_src) {
+      auto r = s->as_register();
+      if (r) {
+         r->add_use(this);
+         // move this to add_use
+         if (r->pin() == pin_array) {
+            auto array_elm = static_cast<LocalArrayValue *>(r);
+            auto addr = array_elm->addr();
+            if (addr && addr->as_register())
+               addr->as_register()->add_use(this);
+         }
+      }
+      auto u = s->as_uniform();
+      if (u && u->buf_addr() && u->buf_addr()->as_register())
+         u->buf_addr()->as_register()->add_use(this);
+   }
+
+   if (m_dest && has_alu_flag(alu_write)) {
+      m_dest->add_parent(this);
+
+      if (m_dest->pin() == pin_array) {
+         // move this to add_parent
+         auto array_elm = static_cast<LocalArrayValue *>(m_dest);
+         auto addr = array_elm->addr();
+         if (addr && addr->as_register())
+            addr->as_register()->add_use(this);
+      }
+   }
+}
+
+void AluInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void AluInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+const std::map<ECFAluOpCode, std::string> AluInstr::cf_map = {
+   {cf_alu_break, "BREAK"},
+   {cf_alu_continue, "CONT"},
+   {cf_alu_else_after, "ELSE_AFTER"},
+   {cf_alu_extended, "EXTENDED"},
+   {cf_alu_pop_after, "POP_AFTER"},
+   {cf_alu_pop2_after, "POP2_AFTER"},
+   {cf_alu_push_before, "PUSH_BEFORE"}
+};
+
+const std::map<AluBankSwizzle, std::string> AluInstr::bank_swizzle_map = {
+   {alu_vec_012, "VEC_012"},
+   {alu_vec_021, "VEC_021"},
+   {alu_vec_102, "VEC_102"},
+   {alu_vec_120, "VEC_120"},
+   {alu_vec_201, "VEC_201"},
+   {alu_vec_210, "VEC_210"}
+};
+
+const AluModifiers AluInstr::src_abs_flags[2] =
+{alu_src0_abs, alu_src1_abs};
+const AluModifiers AluInstr::src_neg_flags[3] =
+{alu_src0_neg, alu_src1_neg, alu_src2_neg};
+const AluModifiers AluInstr::src_rel_flags[3] =
+{alu_src0_rel, alu_src1_rel, alu_src2_rel};
+
+struct ValuePrintFlags {
+   ValuePrintFlags(int im, int f):index_mode(im),
+      flags(f)
+   {
+   }
+   int index_mode = 0;
+   int flags = 0;
+   static const int is_rel = 1;
+   static const int has_abs = 2;
+   static const int has_neg = 4;
+   static const int literal_is_float = 8;
+   static const int index_ar = 16;
+   static const int index_loopidx = 32;
+};
+
+void AluInstr::do_print(std::ostream& os) const
+{
+   const char swzchar[] = "xyzw01?_";
+
+   unsigned i = 0;
+
+   os << "ALU ";
+
+   if (has_alu_flag(alu_is_lds)) {
+      os << "LDS " << lds_ops.at(m_lds_opcode).name;
+      os << " __.x : ";
+   } else {
+
+      os << alu_ops.at(m_opcode).name;
+      if (has_alu_flag(alu_dst_clamp))
+         os << " CLAMP";
+
+      if (m_dest) {
+         if (has_alu_flag(alu_write))
+            os << " " << *m_dest;
+         else
+            os << " __" << "." << swzchar[m_dest->chan()];
+         if (!has_alu_flag(alu_write) && m_dest->pin() != pin_none)
+            os << "@" << m_dest->pin();
+         os << " : ";
+      } else {
+         os << "__." << swzchar[dest_chan()] << " : ";
+      }
+   }
+
+   const int n_source_per_slot = has_alu_flag(alu_is_lds) ?
+                                 m_src.size() : alu_ops.at(m_opcode).nsrc;
+
+   for (int s = 0; s < m_alu_slots; ++s) {
+
+      if (s > 0)
+         os << " +";
+
+      for (int k = 0; k < n_source_per_slot; ++k) {
+         int pflags = 0;
+         if (i)
+            os << ' ';
+         if (has_alu_flag(src_neg_flags[k])) pflags |= ValuePrintFlags::has_neg;
+         if (has_alu_flag(src_rel_flags[k])) pflags |= ValuePrintFlags::is_rel;
+         if (i < 2)
+            if (has_alu_flag(src_abs_flags[k])) pflags |= ValuePrintFlags::has_abs;
+
+         if (pflags & ValuePrintFlags::has_neg) os << '-';
+         if (pflags & ValuePrintFlags::has_abs) os << '|';
+         os << *m_src[i];
+         if (pflags & ValuePrintFlags::has_abs) os << '|';
+         ++i;
+      }
+   }
+
+   os << " {";
+   if (has_alu_flag(alu_write)) os << 'W';
+   if (has_alu_flag(alu_last_instr)) os << 'L';
+   if (has_alu_flag(alu_update_exec)) os << 'E';
+   if (has_alu_flag(alu_update_pred)) os << 'P';
+   os << "}";
+
+   auto bs_name = bank_swizzle_map.find(m_bank_swizzle);
+   if (bs_name != bank_swizzle_map.end())
+      os << ' ' <<  bs_name->second;
+
+   auto cf_name = cf_map.find(m_cf_type);
+   if (cf_name != cf_map.end())
+      os <<  ' ' << cf_name->second;
+}
+
+bool AluInstr::can_propagate_src() const
+{
+   /* We can use the source in the next instruction */
+   if (!can_copy_propagate())
+      return false;
+
+   auto src_reg = m_src[0]->as_register();
+   if (!src_reg)
+      return true;
+
+   assert(m_dest);
+
+
+
+   if (!m_dest->is_ssa()) {
+      return false;
+   }
+
+   if (m_dest->pin() == pin_fully)
+      return m_dest->equal_to(*src_reg);
+
+   if (m_dest->pin() == pin_chan)
+      return src_reg->pin() == pin_none ||
+            (src_reg->pin() == pin_chan &&
+             src_reg->chan() == m_dest->chan());
+
+   return m_dest->pin() == pin_none || m_dest->pin() == pin_free;
+}
+
+bool AluInstr::can_propagate_dest() const
+{
+   if (!can_copy_propagate()){
+      return false;
+   }
+
+   auto src_reg = m_src[0]->as_register();
+   if (!src_reg) {
+      return false;
+   }
+
+   assert(m_dest);
+
+   if (src_reg->pin() == pin_fully ||
+       src_reg->pin() == pin_group) {
+      return false;
+   }
+
+   if (!src_reg->is_ssa())
+      return false;
+
+   if (src_reg->pin() == pin_chan)
+      return m_dest->pin() == pin_none ||
+             m_dest->pin() == pin_free ||
+            ((m_dest->pin() == pin_chan ||
+              m_dest->pin() == pin_group) &&
+             src_reg->chan() == m_dest->chan());
+
+   return (src_reg->pin() == pin_none ||
+           src_reg->pin() == pin_free);
+}
+
+bool AluInstr::can_copy_propagate() const
+{
+   if (m_opcode != op1_mov)
+      return false;
+
+   if (has_alu_flag(alu_src0_abs) ||
+       has_alu_flag(alu_src0_neg) ||
+       has_alu_flag(alu_dst_clamp))
+      return false;
+
+   return has_alu_flag(alu_write);
+}
+
+bool AluInstr::replace_source(PRegister old_src, PVirtualValue new_src)
+{
+   bool process = false;
+
+   if (!check_readport_validation(old_src, new_src))
+      return false;
+
+   /* If the old source is an array element, we assume that there
+    * might have been an (untracked) indirect access, so don't replace
+    * this source */
+   if (old_src->pin() == pin_array)
+      return false;
+
+   if (new_src->get_addr()) {
+      for (auto& s : m_src) {
+         auto addr = s->get_addr();
+         /* can't have two differen't indirect addresses in the same instr */
+         if (addr && !addr->equal_to(*new_src->get_addr()))
+            return false;
+      }
+   }
+
+   if (m_dest) {
+      /* We don't allow src and dst with rel and different indirect register addresses */
+      if (m_dest->pin() == pin_array && new_src->pin() == pin_array) {
+         auto dav = static_cast<const LocalArrayValue *>(m_dest)->addr();
+         auto sav = static_cast<const LocalArrayValue *>(new_src)->addr();
+         if (dav && sav && dav->as_register() &&  !dav->equal_to(*sav))
+            return false;
+      }
+   }
+
+   /* Check the readports */
+   if (m_alu_slots * alu_ops.at(m_opcode).nsrc > 2 || m_parent_group) {
+      AluReadportReservation read_port_check =
+            !m_parent_group ?
+               AluReadportReservation() :
+               m_parent_group->readport_reserer();
+
+      int nsrc = alu_ops.at(m_opcode).nsrc;
+      PVirtualValue src[3];
+
+      for (int s = 0; s < m_alu_slots; ++s) {
+         for (int i = 0; i < nsrc; ++i) {
+            auto old_s = m_src[i + nsrc * s];
+            src[i] = old_s->equal_to(*old_src) ? new_src : old_s;
+         }
+         AluBankSwizzle bs = alu_vec_012;
+         while (bs != alu_vec_unknown) {
+            if (read_port_check.schedule_vec_src(src,  nsrc, bs)) {
+               break;
+            }
+            ++bs;
+         }
+         if (bs == alu_vec_unknown)
+            return false;
+      }
+      if (m_parent_group)
+         m_parent_group->set_readport_reserer(read_port_check);
+   }
+
+   for (unsigned i = 0; i < m_src.size(); ++i) {
+      if (old_src->equal_to(*m_src[i])) {
+         m_src[i] = new_src;
+         process = true;
+      }
+   }
+   if (process) {
+      auto r = new_src->as_register();
+      if (r)
+         r->add_use(this);
+      old_src->del_use(this);
+   }
+   return process;
+}
+
+void AluInstr::set_sources(SrcValues src)
+{
+   for (auto& s : m_src) {
+      auto r = s->as_register();
+      if (r)
+         r->del_use(this);
+   }
+   m_src.swap(src);
+   for (auto& s : m_src) {
+      auto r = s->as_register();
+      if (r)
+         r->add_use(this);
+   }
+}
+
+bool AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr)
+{
+   if (m_dest->equal_to(*new_dest))
+      return false;
+
+   if (m_dest->uses().size() > 1)
+      return false;
+
+   if (new_dest->pin() == pin_array)
+      return false;
+
+   /* Currently we bail out when an array write should be moved, because
+    * decalring an array write is currently not well defined. The
+    * Whole "backwards" copy propagation shoul dprobably be replaced by some
+    * forward peep holew optimization */
+   /*
+   if (new_dest->pin() == pin_array) {
+      auto dav = static_cast<const LocalArrayValue *>(new_dest)->addr();
+      for (auto s: m_src) {
+         if (s->pin() == pin_array) {
+            auto sav = static_cast<const LocalArrayValue *>(s)->addr();
+            if (dav && sav && dav->as_register() &&  !dav->equal_to(*sav))
+               return false;
+         }
+      }
+   }
+   */
+
+   if (m_dest->pin() == pin_chan &&
+       new_dest->chan() != m_dest->chan())
+      return false;
+
+
+   if (m_dest->pin() == pin_chan) {
+      if (new_dest->pin() == pin_group)
+         new_dest->set_pin(pin_chgr);
+      else
+         new_dest->set_pin(pin_chan);
+   }
+
+   m_dest = new_dest;
+   if (!move_instr->has_alu_flag(alu_last_instr))
+      reset_alu_flag(alu_last_instr);
+
+   if (has_alu_flag(alu_is_cayman_trans)) {
+      /* Copy propagation puts an instruction into the w channel, but we
+       * don't have the slots for a w channel */
+      if (m_dest->chan() == 3 && m_alu_slots < 4) {
+         m_alu_slots = 4;
+         assert(m_src.size() == 3);
+         m_src.push_back(m_src[0]);
+      }
+   }
+
+   return true;
+}
+
+void AluInstr::pin_sources_to_chan()
+{
+   for (auto s: m_src) {
+      auto r = s->as_register();
+      if (r) {
+         if (r->pin() == pin_free)
+            r->set_pin(pin_chan);
+         else if (r->pin() == pin_group)
+               r->set_pin(pin_chgr);
+      }
+   }
+}
+
+bool AluInstr::check_readport_validation(PRegister old_src, PVirtualValue new_src) const
+{
+   bool success = true;
+   AluReadportReservation rpr_sum;
+
+   if (m_src.size() < 3)
+      return true;
+
+   unsigned nsrc = alu_ops.at(m_opcode).nsrc;
+   assert(nsrc * m_alu_slots == m_src.size());
+
+
+   for (int s = 0; s < m_alu_slots && success; ++s) {
+      for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
+         auto ireg = m_src.begin() + s * nsrc;
+
+         AluReadportReservation rpr = rpr_sum;
+         PVirtualValue s[3];
+
+         for (unsigned i = 0; i < nsrc; ++i, ++ireg)
+            s[i] = old_src->equal_to(**ireg) ? new_src : *ireg;
+
+         if (rpr.schedule_vec_src(s, nsrc, i)) {
+            rpr_sum = rpr;
+            break;
+         } else {
+            success = false;
+         }
+      }
+   }
+   return success;
+}
+
+void AluInstr::add_extra_dependency(PVirtualValue value)
+{
+   auto reg = value->as_register();
+   if (reg)
+      m_extra_dependencies.insert(reg);
+}
+
+
+bool AluInstr::is_equal_to(const AluInstr& lhs) const
+{
+   if (lhs.m_opcode != m_opcode ||
+       lhs.m_bank_swizzle != m_bank_swizzle ||
+       lhs.m_cf_type != m_cf_type ||
+       lhs.m_alu_flags != m_alu_flags) {
+      return false;
+   }
+
+   if (m_dest) {
+      if (!lhs.m_dest) {
+         return false;
+      } else {
+         if (has_alu_flag(alu_write)) {
+            if (!m_dest->equal_to(*lhs.m_dest))
+               return false;
+         } else {
+            if (m_dest->chan() != lhs.m_dest->chan())
+               return false;
+         }
+      }
+   } else {
+      if (lhs.m_dest)
+         return false;
+   }
+
+   if (m_src.size() != lhs.m_src.size())
+      return false;
+
+   for (unsigned i = 0; i < m_src.size(); ++i) {
+      if (!m_src[i]->equal_to(*lhs.m_src[i]))
+         return false;
+   }
+
+   return true;
+}
+
+class ResolveIndirectArrayAddr: public ConstRegisterVisitor {
+public:
+   void visit(const Register& value){(void) value;}
+   void visit(const LocalArray& value) {(void)value; unreachable("An array can't be used as address");}
+   void visit(const LocalArrayValue& value);
+   void visit(const UniformValue& value);
+   void visit(const LiteralConstant& value) {(void)value;}
+   void visit(const InlineConstant& value) {(void)value;}
+
+   PRegister addr{nullptr};
+   bool is_index{false};
+};
+
+void ResolveIndirectArrayAddr::visit(const LocalArrayValue& value)
+{
+   auto a = value.addr();
+   if (a)
+      addr = a->as_register();
+}
+
+void ResolveIndirectArrayAddr::visit(const UniformValue& value)
+{
+   auto a = value.buf_addr();
+   if (a) {
+      addr = a->as_register();
+      is_index = true;
+   }
+}
+
+std::pair<PRegister, bool> AluInstr::indirect_addr() const
+{
+   ResolveIndirectArrayAddr visitor;
+
+   if (m_dest) {
+      m_dest->accept(visitor);
+      if (visitor.addr)
+         return {visitor.addr, false};
+   }
+
+   for (auto s: m_src) {
+      s->accept(visitor);
+      if (visitor.addr) {
+         return {visitor.addr, visitor.is_index};
+      }
+   }
+   return {nullptr, false};
+}
+
+AluGroup *AluInstr::split(ValueFactory& vf)
+{
+   if (m_alu_slots == 1)
+      return nullptr;
+
+   sfn_log << SfnLog::instr << "Split " << *this << "\n";
+
+   auto group = new AluGroup();
+
+   m_dest->del_parent(this);
+
+   for (int s = 0; s < m_alu_slots; ++s) {
+
+      PRegister dst = m_dest->chan() == s ? m_dest : vf.dummy_dest(s);
+      if (dst->pin() != pin_chgr) {
+         auto pin = pin_chan;
+         if (dst->pin() == pin_group && m_dest->chan() == s)
+            pin = pin_chgr;
+         dst->set_pin(pin);
+      }
+
+      SrcValues src;
+      for (int i = 0; i < alu_ops.at(m_opcode).nsrc; ++i) {
+         auto old_src = m_src[s * alu_ops.at(m_opcode).nsrc + i];
+         // Make it easy for the scheduler and pin the register to the
+         // channel, otherwise scheduler would have to check whether a
+         // channel switch is possible
+         auto r = old_src->as_register();
+         if (r) {
+            if (r->pin() == pin_free || r->pin() == pin_none)
+               r->set_pin(pin_chan);
+            else if (r->pin() == pin_group)
+               r->set_pin(pin_chgr);
+         }
+         src.push_back(old_src);
+      }
+
+      auto instr = new AluInstr(m_opcode, dst, src, {}, 1);
+      instr->set_blockid(block_id(), index());
+
+      if (s == 0 || !m_alu_flags.test(alu_64bit_op)) {
+         if (has_alu_flag(alu_src0_neg))
+            instr->set_alu_flag(alu_src0_neg);
+         if (has_alu_flag(alu_src1_neg))
+            instr->set_alu_flag(alu_src1_neg);
+         if (has_alu_flag(alu_src2_neg))
+            instr->set_alu_flag(alu_src2_neg);
+         if (has_alu_flag(alu_src0_abs))
+            instr->set_alu_flag(alu_src0_abs);
+         if (has_alu_flag(alu_src1_abs))
+            instr->set_alu_flag(alu_src1_abs);
+      }
+      if (has_alu_flag(alu_dst_clamp))
+         instr->set_alu_flag(alu_dst_clamp);
+
+      if (s == m_dest->chan())
+         instr->set_alu_flag(alu_write);
+
+      m_dest->add_parent(instr);
+      sfn_log << SfnLog::instr << "   " << *instr << "\n";
+
+      if (!group->add_instruction(instr)) {
+         std::cerr << "Unable to schedule '" << *instr << "' into\n"
+                   << *group << "\n";
+
+         unreachable("Invalid group instruction");
+      }
+   }
+   group->set_blockid(block_id(), index());
+
+   for (auto s : m_src) {
+      auto r = s->as_register();
+      if (r) {
+         r->del_use(this);
+      }
+   }
+
+   return group;
+}
+
+/* Alu instructions that have SSA dest registers increase the  regietsr pressure
+ * Alu instructions that read from SSA registers may decresase the register pressure
+ * hency evaluate a priorityx values based on register pressure change */
+int AluInstr::register_priority() const
+{
+   int priority = 0;
+   if (!has_alu_flag(alu_no_schedule_bias)) {
+
+      if (m_dest && m_dest->is_ssa() && has_alu_flag(alu_write)) {
+         if (m_dest->pin() != pin_group && m_dest->pin() != pin_chgr)
+            priority--;
+      }
+
+      for (const auto s : m_src) {
+         auto r = s->as_register();
+         if (r && r->is_ssa()) {
+            int pending = 0;
+            for (auto b : r->uses()) {
+               if (!b->is_scheduled())
+                  ++pending;
+            }
+            if (pending == 1)
+               ++priority;
+         }
+      }
+   }
+   return priority;
+}
+
+bool AluInstr::propagate_death()
+{
+   if (!m_dest)
+      return true;
+
+   if  (m_dest->pin() == pin_group ||
+        m_dest->pin() == pin_chan) {
+      switch (m_opcode) {
+      case op2_interp_x:
+      case op2_interp_xy:
+      case op2_interp_z:
+      case op2_interp_zw:
+         reset_alu_flag(alu_write);
+         return false;
+      default:
+         ;
+      }
+   }
+
+   if  (m_dest->pin() == pin_array)
+      return false;
+
+   /* We assume that nir does a good job in eliminating all ALU results that
+    * are not needed, and we don't let copy propagation doesn't make the
+    * instruction obsolte, so just keep all */
+   if (has_alu_flag(alu_is_cayman_trans))
+      return false;
+
+   for (auto& src: m_src) {
+      auto reg = src->as_register();
+      if (reg)
+         reg->del_use(this);
+   }
+   return true;
+}
+
+bool AluInstr::has_lds_access() const
+{
+   if (has_alu_flag(alu_is_lds))
+      return true;
+
+   for (auto& s : m_src)
+      if (s->as_inline_const() &&
+          (s->as_inline_const()->sel() == ALU_SRC_LDS_OQ_A_POP))
+         return true;
+
+   return false;
+}
+
+struct OpDescr {
+   union {
+      EAluOp alu_opcode;
+      ESDOp lds_opcode;
+   };
+   int nsrc;
+};
+
+static std::map<std::string, OpDescr> s_alu_map_by_name;
+static std::map<std::string, OpDescr> s_lds_map_by_name;
+
+Instr::Pointer AluInstr::from_string(istream& is, ValueFactory& value_factory, AluGroup *group)
+{
+   vector<string> tokens;
+
+   while (is.good() && !is.eof()) {
+      string t;
+      is >> t;
+      if (t.length() > 0) {
+         tokens.push_back(t);
+      }
+   }
+
+   std::set<AluModifiers> flags;
+   auto t = tokens.begin();
+
+   bool is_lds = false;
+
+   if (*t == "LDS") {
+      is_lds = true;
+      t++;
+   }
+
+   string opstr = *t++;
+   string deststr = *t++;
+
+   if (deststr == "CLAMP") {
+      flags.insert(alu_dst_clamp);
+      deststr = *t++;
+   }
+
+   assert(*t == ":");
+   OpDescr op_descr = {{op_invalid}, -1};
+
+   if (is_lds) {
+      auto op = s_lds_map_by_name.find(opstr);
+      if (op == s_lds_map_by_name.end()) {
+         for(auto [opcode, opdescr] : lds_ops ) {
+            if (opstr == opdescr.name) {
+               op_descr.lds_opcode = opcode;
+               op_descr.nsrc = opdescr.nsrc;
+               s_alu_map_by_name[opstr] = op_descr;
+               break;
+            }
+         }
+
+         if (op_descr.nsrc == -1) {
+            std::cerr << "'" << opstr << "'";
+            unreachable("Unknown opcode");
+            return nullptr;
+         }
+      } else {
+         op_descr = op->second;
+      }
+   } else {
+      auto op = s_alu_map_by_name.find(opstr);
+      if (op == s_alu_map_by_name.end()) {
+         for(auto [opcode, opdescr] : alu_ops ) {
+            if (opstr == opdescr.name) {
+               op_descr = {{opcode}, opdescr.nsrc};
+               s_alu_map_by_name[opstr] = op_descr;
+               break;
+            }
+         }
+
+         if (op_descr.nsrc == -1) {
+            std::cerr << "'" << opstr << "'";
+            unreachable("Unknown opcode");
+            return nullptr;
+         }
+      } else {
+         op_descr = op->second;
+      }
+   }
+
+   int slots = 0;
+
+   SrcValues sources;
+   do {
+      ++t;
+      for (int i = 0; i < op_descr.nsrc; ++i) {
+         string srcstr = *t++;
+
+         if (srcstr[0] == '-') {
+            if (!slots)
+               flags.insert(AluInstr::src_neg_flags[i]);
+            else
+               assert(flags.find(AluInstr::src_neg_flags[i]) != flags.end());
+            srcstr = srcstr.substr(1);
+         }
+
+         if (srcstr[0] == '|') {
+            assert(srcstr[srcstr.length() - 1] == '|');
+            if (!slots)
+               flags.insert(AluInstr::src_abs_flags[i]);
+            else
+               assert(flags.find(AluInstr::src_abs_flags[i]) != flags.end());
+            srcstr = srcstr.substr(1, srcstr.length() - 2);
+         }
+
+         auto src = value_factory.src_from_string(srcstr);
+         if (!src) {
+            std::cerr << "Unable to create src[" << i << "] from " << srcstr << "\n";
+            assert(src);
+         }
+         sources.push_back(src);
+      }
+      ++slots;
+   } while (t != tokens.end() && *t == "+");
+
+   AluBankSwizzle bank_swizzle = alu_vec_unknown;
+   ECFAluOpCode cf = cf_alu;
+
+   while (t != tokens.end()) {
+
+      switch ((*t)[0]) {
+      case '{': {
+         auto iflag = t->begin() + 1;
+         while (iflag != t->end()) {
+            if (*iflag == '}')
+               break;
+
+            switch (*iflag) {
+            case 'L': flags.insert(alu_last_instr); break;
+            case 'W': flags.insert(alu_write); break;
+            case 'E': flags.insert(alu_update_exec); break;
+            case 'P': flags.insert(alu_update_pred); break;
+            }
+            ++iflag;
+         }
+      }
+      break;
+
+      case 'V': {
+         string bs = *t;
+         if (bs == "VEC_012")
+            bank_swizzle = alu_vec_012;
+         else if (bs == "VEC_021")
+            bank_swizzle = alu_vec_021;
+         else if (bs == "VEC_102")
+            bank_swizzle = alu_vec_102;
+         else if (bs == "VEC_120")
+            bank_swizzle = alu_vec_120;
+         else if (bs == "VEC_201")
+            bank_swizzle = alu_vec_201;
+         else if (bs == "VEC_210")
+            bank_swizzle = alu_vec_210;
+         else {
+            std::cerr << "'" << bs << "': ";
+            unreachable("Unknowe bankswizzle given");
+         }
+      }
+      break;
+
+      default: {
+         string cf_str = *t;
+         if (cf_str == "PUSH_BEFORE")
+            cf = cf_alu_push_before;
+         else if (cf_str == "POP_AFTER")
+            cf = cf_alu_pop_after;
+         else if (cf_str == "POP2_AFTER")
+            cf = cf_alu_pop2_after;
+         else if (cf_str == "EXTENDED")
+            cf = cf_alu_extended;
+         else if (cf_str == "BREAK")
+            cf = cf_alu_break;
+         else if (cf_str == "CONT")
+            cf = cf_alu_continue;
+         else if (cf_str == "ELSE_AFTER")
+            cf = cf_alu_else_after;
+         else {
+            std::cerr << " '" << cf_str << "' ";
+            unreachable("Unknown tocken in ALU instruction");
+         }
+      }
+      }
+      ++t;
+   }
+
+   PRegister dest = nullptr;
+   // construct instruction
+   if (deststr != "(null)")
+      dest = value_factory.dest_from_string(deststr);
+
+   AluInstr *retval = nullptr;
+   if (is_lds)
+      retval = new AluInstr(op_descr.lds_opcode, sources, flags);
+   else
+      retval = new AluInstr(op_descr.alu_opcode, dest, sources, flags, slots);
+
+   retval->set_bank_swizzle(bank_swizzle);
+   retval->set_cf_type(cf);
+   if (group) {
+      group->add_instruction(retval);
+      retval= nullptr;
+   }
+   return retval;
+}
+
+bool AluInstr::do_ready() const
+{
+   /* Alu instructions are shuffled by the scheduler, so
+    * we have to make sure that required ops are already
+    * scheduled before marking this one ready */
+   for (auto i: required_instr()) {
+      if (!i->is_scheduled())
+         return false;
+   }
+
+   for (auto s : m_src) {
+      auto r = s->as_register();
+      if (r) {
+         if (!r->ready(block_id(), index()))
+            return false;
+      }
+      auto u = s->as_uniform();
+      if (u && u->buf_addr() && u->buf_addr()->as_register()) {
+         if (!u->buf_addr()->as_register()->ready(block_id(), index()))
+            return false;
+      }
+   }
+
+   if (m_dest && !m_dest->is_ssa()) {
+      if (m_dest->pin() == pin_array) {
+         auto av = static_cast<const LocalArrayValue *>(m_dest);
+         auto addr = av->addr();
+         /* For true indiect dest access we have to make sure that all
+       * instructions that write the value before are schedukled */
+         if (addr && (!addr->ready(block_id(), index()) ||
+                      !m_dest->ready(block_id(), index() - 1)))
+            return false;
+      }
+
+      /* If a register is updates, we have to make sure that uses before that
+       * update are scheduled, otherwise we may use the updated value when we
+       * shouldn't */
+      for (auto u : m_dest->uses()) {
+         if (u->block_id() <= block_id() && u->index() < index() &&
+             !u->is_scheduled()) {
+            return false;
+         }
+      }
+   }
+
+   for (auto& r : m_extra_dependencies) {
+      if (!r->ready(block_id(), index()))
+         return false;
+   }
+
+   return true;
+}
+
+void AluInstrVisitor::visit(AluGroup *instr)
+{
+   for (auto& i : *instr) {
+      if (i)
+         i->accept(*this);
+   }
+}
+
+void AluInstrVisitor::visit(Block *instr)
+{
+   for (auto& i : *instr)
+      i->accept(*this);
+}
+
+void AluInstrVisitor::visit(IfInstr *instr)
+{
+   instr->predicate()->accept(*this);
+}
+
+static bool emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader);
+
+static bool emit_alu_op1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
+                         const AluOpFlags& flags = 0);
+static bool emit_alu_op1_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_chan);
+static bool emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader);
+static bool emit_alu_neg(const nir_alu_instr& alu, Shader& shader);
+static bool emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
+static bool emit_alu_op2_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_order);
+static bool emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_order);
+static bool emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
+static bool emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader);
+static bool emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader);
+static bool emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader);
+static bool emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader);
+static bool emit_alu_abs64(const nir_alu_instr& alu, Shader& shader);
+
+static bool emit_alu_op2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
+                         AluInstr::Op2Options opts = AluInstr::op2_opt_none);
+static bool emit_alu_op2_int(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
+                             AluInstr::Op2Options opts = AluInstr::op2_opt_none);
+static bool emit_alu_op3(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
+                  const std::array<int, 3>& src_shuffle = {0,1,2});
+static bool emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
+static bool emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
+static bool emit_any_all_icomp(const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
+
+static bool emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
+
+static bool emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
+static bool emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader);
+static bool emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader);
+static bool emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader);
+static bool emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader);
+static bool emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader);
+static bool emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader);
+
+static bool emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader);
+static bool emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader);
+
+static bool emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader);
+static bool emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader);
+
+static bool emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
+static bool emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
+
+static bool emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
+static bool emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
+
+static bool emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
+
+static bool emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader);
+
+static bool emit_alu_cube(const nir_alu_instr& alu, Shader& shader);
+
+static bool check_64_bit_op_src(nir_src *src, void *state)
+{
+   if (nir_src_bit_size(*src) == 64) {
+      *(bool*)state = true;
+      return false;
+   }
+   return true;
+}
+
+static bool check_64_bit_op_dest(nir_dest *dest, void *state)
+{
+   if (nir_dest_bit_size(*dest) == 64) {
+      *(bool*)state = true;
+      return false;
+   }
+   return true;
+}
+
+bool AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
+{
+   bool is_64bit_op = false;
+   nir_foreach_src(&alu->instr, check_64_bit_op_src, &is_64bit_op);
+   if (!is_64bit_op)
+      nir_foreach_dest(&alu->instr, check_64_bit_op_dest, &is_64bit_op);
+
+
+   if (is_64bit_op) {
+      switch (alu->op) {
+      case nir_op_pack_64_2x32:
+      case nir_op_unpack_64_2x32:
+      case nir_op_pack_64_2x32_split:
+      case nir_op_pack_half_2x16_split:
+      case nir_op_unpack_64_2x32_split_x:
+      case nir_op_unpack_64_2x32_split_y: break;
+      case nir_op_mov: return emit_alu_mov_64bit(*alu, shader);
+      case nir_op_fneg: return emit_alu_neg(*alu, shader);
+      case nir_op_ffract: return emit_alu_op1_64bit(*alu, op1_fract_64, shader, true);
+      case nir_op_feq32: return emit_alu_op2_64bit_one_dst(*alu, op2_sete_64, shader, false);
+      case nir_op_fge32: return emit_alu_op2_64bit_one_dst(*alu, op2_setge_64, shader, false);
+      case nir_op_flt32: return emit_alu_op2_64bit_one_dst(*alu, op2_setgt_64, shader, true);
+      case nir_op_fneu32: return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false);
+      case nir_op_ffma: return emit_alu_fma_64bit(*alu, op3_fma_64, shader);         
+
+      case nir_op_fadd: return emit_alu_op2_64bit(*alu, op2_add_64, shader, false);
+      case nir_op_fmul: return emit_alu_op2_64bit(*alu, op2_mul_64, shader, false);
+      case nir_op_fmax: return emit_alu_op2_64bit(*alu, op2_max_64, shader, false);
+      case nir_op_fmin: return emit_alu_op2_64bit(*alu, op2_min_64, shader, false);
+      case nir_op_b2f64: return emit_alu_b2f64(*alu, shader);
+      case nir_op_f2f64: return emit_alu_f2f64(*alu, shader);
+      case nir_op_i2f64: return emit_alu_i2f64(*alu, op1_int_to_flt, shader);
+      case nir_op_u2f64: return emit_alu_i2f64(*alu, op1_uint_to_flt, shader);
+      case nir_op_f2f32: return emit_alu_f2f32(*alu, shader);
+      case nir_op_fabs: return emit_alu_abs64(*alu, shader);
+      case nir_op_fsqrt: return emit_alu_op1_64bit_trans(*alu, op1_sqrt_64, shader);
+      case nir_op_frcp: return emit_alu_op1_64bit_trans(*alu, op1_recip_64, shader);
+      case nir_op_frsq: return emit_alu_op1_64bit_trans(*alu, op1_recipsqrt_64, shader);
+      case nir_op_vec2: return emit_alu_vec2_64(*alu, shader);
+      default:
+         return false;
+         ;
+      }
+   }
+
+
+   if (shader.chip_class() == ISA_CC_CAYMAN) {
+      switch (alu->op) {
+      case nir_op_fcos_r600: return emit_alu_trans_op1_cayman(*alu, op1_cos, shader);
+      case nir_op_fexp2: return emit_alu_trans_op1_cayman(*alu, op1_exp_ieee, shader);
+      case nir_op_flog2: return emit_alu_trans_op1_cayman(*alu, op1_log_clamped, shader);
+      case nir_op_frcp: return emit_alu_trans_op1_cayman(*alu, op1_recip_ieee, shader);
+      case nir_op_frsq: return emit_alu_trans_op1_cayman(*alu,  op1_recipsqrt_ieee1, shader);
+      case nir_op_fsqrt: return emit_alu_trans_op1_cayman(*alu, op1_sqrt_ieee, shader);
+      case nir_op_fsin_r600: return emit_alu_trans_op1_cayman(*alu, op1_sin, shader);
+      case nir_op_i2f32: return emit_alu_op1(*alu, op1_int_to_flt, shader);
+      case nir_op_u2f32: return emit_alu_op1(*alu, op1_uint_to_flt, shader);
+      case nir_op_imul: return emit_alu_trans_op2_cayman(*alu, op2_mullo_int, shader);
+      case nir_op_imul_high: return emit_alu_trans_op2_cayman(*alu, op2_mulhi_int, shader);
+      case nir_op_umul_high: return emit_alu_trans_op2_cayman(*alu, op2_mulhi_uint, shader);
+      case nir_op_f2u32: return emit_alu_op1(*alu, op1_flt_to_uint, shader);
+      case nir_op_f2i32: return emit_alu_op1(*alu, op1_flt_to_int, shader);
+      default:
+         ;
+      }
+   } else {
+      switch (alu->op) {
+      case nir_op_fcos_r600: return emit_alu_trans_op1_eg(*alu, op1_cos, shader);
+      case nir_op_fexp2: return emit_alu_trans_op1_eg(*alu, op1_exp_ieee, shader);
+      case nir_op_flog2: return emit_alu_trans_op1_eg(*alu, op1_log_clamped, shader);
+      case nir_op_frcp: return emit_alu_trans_op1_eg(*alu, op1_recip_ieee, shader);
+      case nir_op_frsq: return emit_alu_trans_op1_eg(*alu, op1_recipsqrt_ieee1, shader);
+      case nir_op_fsin_r600: return emit_alu_trans_op1_eg(*alu, op1_sin, shader);
+      case nir_op_fsqrt: return emit_alu_trans_op1_eg(*alu, op1_sqrt_ieee, shader);
+      case nir_op_i2f32: return emit_alu_trans_op1_eg(*alu, op1_int_to_flt, shader);
+      case nir_op_u2f32: return emit_alu_trans_op1_eg(*alu, op1_uint_to_flt, shader);
+      case nir_op_imul: return emit_alu_trans_op2_eg(*alu, op2_mullo_int, shader);
+      case nir_op_imul_high: return emit_alu_trans_op2_eg(*alu, op2_mulhi_int, shader);
+      case nir_op_umul_high: return emit_alu_trans_op2_eg(*alu, op2_mulhi_uint, shader);
+      case nir_op_f2i32: return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_int, shader);
+      case nir_op_f2u32: return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_uint, shader);
+      default:
+         ;
+      }
+   }
+
+   switch (alu->op) {
+   case nir_op_b2b1: return emit_alu_op1(*alu, op1_mov, shader);
+   case nir_op_b2b32: return emit_alu_op1(*alu, op1_mov, shader);
+   case nir_op_b2f32: return emit_alu_b2x(*alu, ALU_SRC_1, shader);
+   case nir_op_b2i32: return emit_alu_b2x(*alu, ALU_SRC_1_INT, shader);
+
+   case nir_op_bfm: return emit_alu_op2_int(*alu, op2_bfm_int, shader, op2_opt_none);
+   case nir_op_bit_count: return emit_alu_op1(*alu, op1_bcnt_int, shader);
+
+   case nir_op_bitfield_reverse: return emit_alu_op1(*alu, op1_bfrev_int, shader);
+   case nir_op_bitfield_select: return emit_alu_op3(*alu, op3_bfi_int, shader);
+
+   case nir_op_b32all_fequal2: return emit_any_all_fcomp2(*alu, op2_sete_dx10, shader);
+   case nir_op_b32all_fequal3: return emit_any_all_fcomp(*alu, op2_sete, 3, true, shader);
+   case nir_op_b32all_fequal4: return emit_any_all_fcomp(*alu, op2_sete, 4, true, shader);
+   case nir_op_b32all_iequal2: return emit_any_all_icomp(*alu, op2_sete_int, 2, true, shader);
+   case nir_op_b32all_iequal3: return emit_any_all_icomp(*alu, op2_sete_int, 3, true, shader);
+   case nir_op_b32all_iequal4: return emit_any_all_icomp(*alu, op2_sete_int, 4, true, shader);
+   case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(*alu, op2_setne_dx10, shader);
+   case nir_op_b32any_fnequal3: return emit_any_all_fcomp(*alu, op2_setne, 3, false, shader);
+   case nir_op_b32any_fnequal4: return emit_any_all_fcomp(*alu, op2_setne, 4, false, shader);
+   case nir_op_b32any_inequal2: return emit_any_all_icomp(*alu, op2_setne_int, 2, false, shader);
+   case nir_op_b32any_inequal3: return emit_any_all_icomp(*alu, op2_setne_int, 3, false, shader);
+   case nir_op_b32any_inequal4: return emit_any_all_icomp(*alu, op2_setne_int, 4, false, shader);
+   case nir_op_b32csel: return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1});
+
+   case nir_op_f2b32: return emit_alu_comb_with_zero(*alu, op2_setne_dx10, shader);
+   case nir_op_fabs: return emit_alu_op1(*alu, op1_mov, shader,  {1 << alu_src0_abs});
+   case nir_op_fadd: return emit_alu_op2(*alu, op2_add, shader);
+   case nir_op_fceil: return emit_alu_op1(*alu, op1_ceil, shader);
+   case nir_op_fcsel: return emit_alu_op3(*alu, op3_cnde, shader, {0, 2, 1});
+   case nir_op_fcsel_ge: return emit_alu_op3(*alu, op3_cndge, shader, {0, 1, 2});
+   case nir_op_fcsel_gt: return emit_alu_op3(*alu, op3_cndgt, shader, {0, 1, 2});
+
+   case nir_op_fdot2: return emit_dot(*alu, 2, shader);
+   case nir_op_fdot3: return emit_dot(*alu, 3, shader);
+   case nir_op_fdot4: return emit_dot(*alu, 4, shader);
+
+   case nir_op_feq32:
+   case nir_op_feq: return emit_alu_op2(*alu, op2_sete_dx10, shader);
+   case nir_op_ffloor: return emit_alu_op1(*alu, op1_floor, shader);
+   case nir_op_ffract: return emit_alu_op1(*alu, op1_fract, shader);
+   case nir_op_fge32: return emit_alu_op2(*alu, op2_setge_dx10, shader);
+   case nir_op_fge: return emit_alu_op2(*alu, op2_setge_dx10, shader);
+   case nir_op_find_lsb: return emit_alu_op1(*alu, op1_ffbl_int, shader);
+
+   case nir_op_flt32: return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
+   case nir_op_flt: return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
+   case nir_op_fmax: return emit_alu_op2(*alu, op2_max_dx10, shader);
+   case nir_op_fmin: return emit_alu_op2(*alu, op2_min_dx10, shader);
+   case nir_op_fmul: return emit_alu_op2(*alu, op2_mul_ieee, shader);
+   case nir_op_fneg: return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_src0_neg});
+   case nir_op_fneu32: return emit_alu_op2(*alu, op2_setne_dx10, shader);
+   case nir_op_fneu: return emit_alu_op2(*alu, op2_setne_dx10, shader);
+
+   case nir_op_fround_even: return emit_alu_op1(*alu, op1_rndne, shader);
+   case nir_op_fsat: return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_dst_clamp});
+   case nir_op_fsub: return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1);
+   case nir_op_ftrunc: return emit_alu_op1(*alu, op1_trunc, shader);
+   case nir_op_i2b1:
+   case nir_op_i2b32: return emit_alu_i2orf2_b1(*alu, op2_setne_int, shader);
+   case nir_op_iadd: return emit_alu_op2_int(*alu, op2_add_int, shader);
+   case nir_op_iand: return emit_alu_op2_int(*alu, op2_and_int, shader);
+   case nir_op_ibfe: return emit_alu_op3(*alu, op3_bfe_int, shader);
+   case nir_op_i32csel_ge: return emit_alu_op3(*alu, op3_cndge_int, shader,  {0, 1, 2});
+   case nir_op_i32csel_gt: return emit_alu_op3(*alu, op3_cndgt_int, shader,  {0, 1, 2});
+   case nir_op_ieq32: return emit_alu_op2_int(*alu, op2_sete_int, shader);
+   case nir_op_ieq: return emit_alu_op2_int(*alu, op2_sete_int, shader);
+   case nir_op_ifind_msb_rev: return emit_alu_op1(*alu, op1_ffbh_int, shader);
+   case nir_op_ige32: return emit_alu_op2_int(*alu, op2_setge_int, shader);
+   case nir_op_ige: return emit_alu_op2_int(*alu, op2_setge_int, shader);
+   case nir_op_ilt32: return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
+   case nir_op_ilt: return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
+   case nir_op_imax: return emit_alu_op2_int(*alu, op2_max_int, shader);
+   case nir_op_imin: return emit_alu_op2_int(*alu, op2_min_int, shader);
+   case nir_op_ine32: return emit_alu_op2_int(*alu, op2_setne_int, shader);
+   case nir_op_ine: return emit_alu_op2_int(*alu, op2_setne_int, shader);
+   case nir_op_ineg: return emit_alu_comb_with_zero(*alu, op2_sub_int, shader);
+   case nir_op_inot: return emit_alu_op1(*alu, op1_not_int, shader);
+   case nir_op_ior: return emit_alu_op2_int(*alu, op2_or_int, shader);
+   case nir_op_ishl: return emit_alu_op2_int(*alu, op2_lshl_int, shader);
+   case nir_op_ishr: return emit_alu_op2_int(*alu, op2_ashr_int, shader);
+   case nir_op_isub: return emit_alu_op2_int(*alu, op2_sub_int, shader);
+   case nir_op_ixor: return emit_alu_op2_int(*alu, op2_xor_int, shader);
+   case nir_op_pack_64_2x32: return emit_pack_64_2x32(*alu, shader);
+   case nir_op_unpack_64_2x32: return emit_unpack_64_2x32(*alu, shader);
+   case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(*alu, shader);
+   case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(*alu, shader);
+   case nir_op_slt: return emit_alu_op2(*alu, op2_setgt, shader, op2_opt_reverse);
+   case nir_op_sge: return emit_alu_op2(*alu, op2_setge, shader);
+   case nir_op_ubfe: return emit_alu_op3(*alu, op3_bfe_uint, shader);
+   case nir_op_ufind_msb_rev: return emit_alu_op1(*alu, op1_ffbh_uint, shader);
+   case nir_op_uge32: return emit_alu_op2_int(*alu, op2_setge_uint, shader);
+   case nir_op_uge: return emit_alu_op2_int(*alu, op2_setge_uint, shader);
+   case nir_op_ult32: return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
+   case nir_op_ult: return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
+   case nir_op_umad24: return emit_alu_op3(*alu, op3_muladd_uint24, shader,  {0, 1, 2});
+   case nir_op_umax: return emit_alu_op2_int(*alu, op2_max_uint, shader);
+   case nir_op_umin: return emit_alu_op2_int(*alu, op2_min_uint, shader);
+   case nir_op_umul24: return emit_alu_op2(*alu, op2_mul_uint24, shader);
+   case nir_op_ushr: return emit_alu_op2_int(*alu, op2_lshr_int, shader);
+   case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(*alu, 0, shader);
+   case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(*alu, 1, shader);
+   case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(*alu, shader);
+   case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(*alu, shader);
+
+
+   case nir_op_ffma: return emit_alu_op3(*alu, op3_muladd_ieee, shader);
+   case nir_op_mov: return emit_alu_op1(*alu, op1_mov, shader);
+   case nir_op_f2i32: return emit_alu_op1(*alu, op1_flt_to_int, shader);
+   case nir_op_vec2: return emit_create_vec(*alu, 2, shader);
+   case nir_op_vec3: return emit_create_vec(*alu, 3, shader);
+   case nir_op_vec4: return emit_create_vec(*alu, 4, shader);
+
+   case nir_op_fddx:
+   case nir_op_fddx_coarse: return emit_tex_fdd(*alu, TexInstr::get_gradient_h, false, shader);
+   case nir_op_fddx_fine: return emit_tex_fdd(*alu, TexInstr::get_gradient_h, true, shader);
+   case nir_op_fddy:
+   case nir_op_fddy_coarse: return emit_tex_fdd(*alu,TexInstr::get_gradient_v, false, shader);
+   case nir_op_fddy_fine: return emit_tex_fdd(*alu, TexInstr::get_gradient_v,  true, shader);
+   case nir_op_cube_r600: return emit_alu_cube(*alu, shader);
+   default:
+      fprintf(stderr, "Unknown instruction '");
+      nir_print_instr(&alu->instr, stderr);
+      fprintf(stderr, "'\n");
+      assert(0);
+   return false;
+   }
+}
+
+static Pin pin_for_components(const nir_alu_instr& alu)
+{
+   return (alu.dest.dest.is_ssa &&
+           (nir_dest_num_components(alu.dest.dest) == 1)) ? pin_free : pin_none;
+
+}
+
+static bool emit_alu_op1_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_chan)
+{
+   auto& value_factory = shader.value_factory();
+
+   auto group = new AluGroup();
+
+   AluInstr *ir = nullptr;
+
+   int swz[2] = {0,1};
+   if (switch_chan) {
+      swz[0] = 1;
+      swz[1] = 0;
+   }
+
+   for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
+      for (unsigned c = 0; c < 2 ; ++c) {
+         ir = new AluInstr(opcode,
+                           value_factory.dest(alu.dest, 2 * i + c, pin_chan),
+                           value_factory.src64(alu.src[0], i, swz[c]),
+                           {alu_write});
+         group->add_instruction(ir);
+      }
+      if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
+      if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   shader.emit_instruction(group);
+   return true;
+}
+
+static bool emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+
+   AluInstr *ir = nullptr;
+
+   for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
+      for (unsigned c = 0; c < 2 ; ++c) {
+         ir = new AluInstr(op1_mov,
+                           value_factory.dest(alu.dest, 2 * i + c, pin_free),
+                           value_factory.src64(alu.src[0], i, c),
+                           {alu_write});
+         shader.emit_instruction(ir);
+      }
+      if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
+      if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+static bool emit_alu_neg(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+
+   auto group = new AluGroup();
+
+   AluInstr *ir = nullptr;
+
+   for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
+      for (unsigned c = 0; c < 2 ; ++c) {
+         ir = new AluInstr(op1_mov,
+                           value_factory.dest(alu.dest, 2 * i + c, pin_chan),
+                           value_factory.src64(alu.src[0], i, c),
+                           {alu_write});
+         group->add_instruction(ir);
+      }
+      ir->set_alu_flag(alu_src0_neg);
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   shader.emit_instruction(group);
+   return true;
+}
+
+static bool emit_alu_abs64(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+
+   assert(nir_dest_num_components(alu.dest.dest) == 1);
+
+   shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.dest, 0, pin_chan),
+                                        value_factory.src64(alu.src[0], 0, 0),
+                                        AluInstr::write));
+
+   auto ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, 1, pin_chan),
+                          value_factory.src64(alu.src[0], 0, 1),
+                          AluInstr::last_write);
+   ir->set_alu_flag(alu_src0_abs);
+   shader.emit_instruction(ir);
+   return true;
+}
+
+static bool emit_alu_op2_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_src)
+{
+   auto& value_factory = shader.value_factory();
+   auto group = new AluGroup();
+   AluInstr *ir = nullptr;
+   int order[2] =  {0, 1};
+   if (switch_src) {
+      order[0] = 1;
+      order[1] = 0;
+   }
+
+   int num_emit0  = opcode == op2_mul_64 ? 3 : 1;
+
+   assert(num_emit0 == 1 || nir_dest_num_components(alu.dest.dest) == 1);
+
+
+   for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) {
+      int i = 0;
+      for (; i < num_emit0; ++i) {
+         auto dest = i < 2 ? value_factory.dest(alu.dest, i, pin_chan) :
+                             value_factory.dummy_dest(i);
+
+         ir = new AluInstr(opcode,
+                           dest,
+                           value_factory.src64(alu.src[order[0]], k, 1),
+                           value_factory.src64(alu.src[order[1]], k, 1),
+                           i < 2 ? AluInstr::write : AluInstr::empty);
+
+         if (alu.src[0].abs) ir->set_alu_flag(switch_src ? alu_src1_abs : alu_src0_abs);
+         if (alu.src[1].abs) ir->set_alu_flag(switch_src ? alu_src0_abs : alu_src1_abs);
+         if (alu.src[0].negate) ir->set_alu_flag(switch_src ? alu_src1_neg : alu_src0_neg);
+         if (alu.src[1].negate) ir->set_alu_flag(switch_src ? alu_src0_neg : alu_src1_neg);
+         if (alu.dest.saturate && i == 0) {
+            ir->set_alu_flag(alu_dst_clamp);
+         }
+
+         group->add_instruction(ir);
+      }
+
+      auto dest = i == 1 ? value_factory.dest(alu.dest, i, pin_chan) :
+                           value_factory.dummy_dest(i);
+
+      ir = new AluInstr(opcode,
+                        dest,
+                        value_factory.src64(alu.src[order[0]], k, 0),
+                        value_factory.src64(alu.src[order[1]], k, 0),
+                        i == 1 ? AluInstr::write : AluInstr::empty);
+      group->add_instruction(ir);
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+
+   shader.emit_instruction(group);
+   return true;
+}
+
+static bool emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu, EAluOp opcode,
+                                       Shader& shader, bool switch_order)
+{
+   auto& value_factory = shader.value_factory();
+   AluInstr *ir = nullptr;
+   int order[2] =  {0, 1};
+   if (switch_order) {
+      order[0] = 1;
+      order[1] = 0;
+   }
+
+   AluInstr::SrcValues src(4);
+
+   for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) {
+      auto dest = value_factory.dest(alu.dest, 2 * k, pin_chan);
+      src[0] = value_factory.src64(alu.src[order[0]], k, 1);
+      src[1] = value_factory.src64(alu.src[order[1]], k, 1);
+      src[2] = value_factory.src64(alu.src[order[0]], k, 0);
+      src[3] = value_factory.src64(alu.src[order[1]], k, 0);
+
+      ir = new AluInstr(opcode, dest, src, AluInstr::write, 2);
+
+      if (alu.src[0].abs) ir->set_alu_flag(switch_order ? alu_src1_abs : alu_src0_abs);
+      if (alu.src[1].abs) ir->set_alu_flag(switch_order ? alu_src0_abs : alu_src1_abs);
+      if (alu.src[0].negate) ir->set_alu_flag(switch_order ? alu_src1_neg : alu_src0_neg);
+      if (alu.src[1].negate) ir->set_alu_flag(switch_order ? alu_src0_neg : alu_src1_neg);
+      ir->set_alu_flag(alu_64bit_op);
+
+      shader.emit_instruction(ir);
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+
+   return true;
+}
+
+static bool emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   auto group = new AluGroup();
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < 3; ++i) {
+      ir = new AluInstr(opcode,
+                        i < 2 ? value_factory.dest(alu.dest, i, pin_chan) :
+                                value_factory.dummy_dest(i),
+                        value_factory.src64(alu.src[0], 0, 1),
+                        value_factory.src64(alu.src[0], 0, 0),
+                        i < 2 ? AluInstr::write : AluInstr::empty);
+
+      if (alu.src[0].abs || opcode == op1_sqrt_64) ir->set_alu_flag(alu_src1_abs);
+      if (alu.src[0].negate) ir->set_alu_flag(alu_src1_neg);
+
+      group->add_instruction(ir);
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   shader.emit_instruction(group);
+   return true;
+
+}
+
+static bool emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   auto group = new AluGroup();
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < 4 ; ++i) {
+
+      int chan = i < 3 ? 1 : 0;
+      auto dest = i < 2 ? value_factory.dest(alu.dest, i, pin_chan) :
+                          value_factory.dummy_dest(i);
+
+      ir = new AluInstr(opcode,
+                        dest,
+                        value_factory.src64(alu.src[0], 0, chan),
+                        value_factory.src64(alu.src[1], 0, chan),
+                        value_factory.src64(alu.src[2], 0, chan),
+                        i < 2 ? AluInstr::write : AluInstr::empty);
+
+      if (i < 3) {
+         if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
+         if (alu.src[1].negate) ir->set_alu_flag(alu_src1_neg);
+         if (alu.src[2].negate) ir->set_alu_flag(alu_src2_neg);
+      }
+
+      group->add_instruction(ir);
+
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   shader.emit_instruction(group);
+   return true;
+}
+
+static bool emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   auto group = new AluGroup();
+   AluInstr *ir = nullptr;
+
+   for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
+      ir = new AluInstr(op2_and_int,
+                           value_factory.dest(alu.dest, 2 * i, pin_group),
+                           value_factory.src(alu.src[0], i),
+                           value_factory.zero(),
+                           {alu_write});
+       group->add_instruction(ir);
+
+       ir = new AluInstr(op2_and_int,
+                         value_factory.dest(alu.dest, 2 * i  + 1, pin_group),
+                         value_factory.src(alu.src[0], i),
+                         value_factory.literal(0x3ff00000),
+                         {alu_write});
+        group->add_instruction(ir);
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   shader.emit_instruction(group);
+   return true;
+}
+
+static bool emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader)
+{
+   /* int 64 to f64 should have been lowered, so we only handle i32 to f64 */
+   auto& value_factory = shader.value_factory();
+   auto group = new AluGroup();
+   AluInstr *ir = nullptr;
+
+   assert(nir_dest_num_components(alu.dest.dest) == 1);
+
+   auto tmpx = value_factory.temp_register();
+   shader.emit_instruction(new AluInstr(op2_and_int, tmpx,
+                                        value_factory.src(alu.src[0], 0),
+                                        value_factory.literal(0xffffff00),
+                                        AluInstr::write));
+   auto tmpy = value_factory.temp_register();
+   shader.emit_instruction(new AluInstr(op2_and_int, tmpy,
+                                        value_factory.src(alu.src[0], 0),
+                                        value_factory.literal(0xff),
+                                        AluInstr::last_write));
+
+
+   auto tmpx2 = value_factory.temp_register();
+   auto tmpy2 = value_factory.temp_register();
+   shader.emit_instruction(new AluInstr(op, tmpx2, tmpx,
+                                        AluInstr::last_write));
+   shader.emit_instruction(new AluInstr(op, tmpy2, tmpy,
+                                        AluInstr::last_write));
+
+   auto tmpx3 = value_factory.temp_register(0);
+   auto tmpy3 = value_factory.temp_register(1);
+   auto tmpz3 = value_factory.temp_register(2);
+   auto tmpw3 = value_factory.temp_register(3);
+
+
+   ir = new AluInstr(op1_flt32_to_flt64,
+                     tmpx3,
+                     tmpx2, AluInstr::write);
+   group->add_instruction(ir);
+   ir = new AluInstr(op1_flt32_to_flt64,
+                     tmpy3,
+                     value_factory.zero(), AluInstr::write);
+   group->add_instruction(ir);
+   ir = new AluInstr(op1_flt32_to_flt64,
+                     tmpz3,
+                     tmpy2, AluInstr::write);
+   group->add_instruction(ir);
+   ir = new AluInstr(op1_flt32_to_flt64,
+                     tmpw3,
+                     value_factory.zero(), AluInstr::last_write);
+   group->add_instruction(ir);
+   shader.emit_instruction(group);
+
+   group = new AluGroup();
+
+   ir = new AluInstr(op2_add_64,
+                     value_factory.dest(alu.dest, 0, pin_chan),
+                     tmpy3, tmpw3, AluInstr::write);
+   group->add_instruction(ir);
+   ir = new AluInstr(op2_add_64,
+                     value_factory.dest(alu.dest, 1, pin_chan),
+                     tmpx3, tmpz3, AluInstr::write);
+   group->add_instruction(ir);
+   shader.emit_instruction(group);
+
+   return true;
+}
+
+static bool emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   auto group = new AluGroup();
+   AluInstr *ir = nullptr;
+
+   assert(nir_dest_num_components(alu.dest.dest) == 1);
+
+   ir = new AluInstr(op1_flt32_to_flt64,
+                     value_factory.dest(alu.dest, 0, pin_chan),
+                     value_factory.src(alu.src[0], 0), AluInstr::write);
+   group->add_instruction(ir);
+   ir = new AluInstr(op1_flt32_to_flt64,
+                     value_factory.dest(alu.dest, 1, pin_chan),
+                     value_factory.zero(), AluInstr::last_write);
+   group->add_instruction(ir);
+   shader.emit_instruction(group);
+   return true;
+}
+
+static bool emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   auto group = new AluGroup();
+   AluInstr *ir = nullptr;
+
+   ir = new AluInstr(op1v_flt64_to_flt32,
+                     value_factory.dest(alu.dest, 0, pin_chan),
+                     value_factory.src64(alu.src[0], 0, 1), {alu_write});
+   group->add_instruction(ir);
+   ir = new AluInstr(op1v_flt64_to_flt32,
+                     value_factory.dummy_dest(1),
+                     value_factory.src64(alu.src[0], 0, 0), AluInstr::last);
+   group->add_instruction(ir);
+   shader.emit_instruction(group);
+   return true;
+
+}
+
+static bool emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   AluInstr *ir = nullptr;
+   auto pin = pin_for_components(alu);
+
+   for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
+      if (alu.dest.write_mask & (1 << i)){
+         ir = new AluInstr(op2_and_int,
+                           value_factory.dest(alu.dest, i, pin),
+                           value_factory.src(alu.src[0], i),
+                           value_factory.inline_const(mask, 0),
+                           {alu_write});
+         if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
+         if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
+         shader.emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+static bool emit_alu_op1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, const AluOpFlags& flags)
+{
+   auto& value_factory = shader.value_factory();
+
+   AluInstr *ir = nullptr;
+   auto pin = pin_for_components(alu);
+
+   for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
+      if (alu.dest.write_mask & (1 << i)){
+         ir = new AluInstr(opcode, value_factory.dest(alu.dest, i, pin),
+                           value_factory.src(alu.src[0], i), {alu_write});
+
+         if (flags.test(alu_src0_abs) || alu.src[0].abs)
+            ir->set_alu_flag(alu_src0_abs);
+
+         if (alu.src[0].negate ^ flags.test(alu_src0_neg))
+            ir->set_alu_flag(alu_src0_neg);
+
+         if (flags.test(alu_dst_clamp) || alu.dest.saturate)
+            ir->set_alu_flag(alu_dst_clamp);
+
+         shader.emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+static bool emit_alu_op2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
+                  AluInstr::Op2Options opts)
+{
+   auto& value_factory = shader.value_factory();
+   const nir_alu_src *src0 = &alu.src[0];
+   const nir_alu_src *src1 = &alu.src[1];
+
+   int idx0 = 0;
+   int idx1 = 1;
+   if (opts & AluInstr::op2_opt_reverse) {
+      std::swap(src0, src1);
+      std::swap(idx0, idx1);
+   }
+
+   bool src1_negate = (opts & AluInstr::op2_opt_neg_src1) ^ src1->negate;
+
+   auto pin = pin_for_components(alu);
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
+      if (alu.dest.write_mask & (1 << i)){
+         ir = new AluInstr(opcode,
+                           value_factory.dest(alu.dest.dest, i, pin),
+                           value_factory.src(*src0, i),
+                           value_factory.src(*src1, i), {alu_write});
+
+         if (src0->negate) ir->set_alu_flag(alu_src0_neg);
+         if (src0->abs) ir->set_alu_flag(alu_src0_abs);
+         if (src1_negate) ir->set_alu_flag(alu_src1_neg);
+         if (src1->abs) ir->set_alu_flag(alu_src1_abs);
+         if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
+         shader.emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+static bool emit_alu_op2_int(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
+                             AluInstr::Op2Options opts)
+{
+   assert(!alu.src[0].abs);
+   assert(!alu.src[0].negate);
+   assert(!alu.src[1].abs);
+   assert(!alu.src[1].negate);
+
+   return emit_alu_op2(alu, opcode, shader, opts);
+}
+
+static bool emit_alu_op3(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
+                  const std::array<int, 3>& src_shuffle)
+{
+   auto& value_factory = shader.value_factory();
+   const nir_alu_src *src[3];
+   src[0] = &alu.src[src_shuffle[0]];
+   src[1] = &alu.src[src_shuffle[1]];
+   src[2] = &alu.src[src_shuffle[2]];
+
+   auto pin = pin_for_components(alu);
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
+      if (alu.dest.write_mask & (1 << i)){
+         ir = new AluInstr(opcode, value_factory.dest(alu.dest.dest, i, pin),
+                           value_factory.src(*src[0], i),
+               value_factory.src(*src[1], i),
+               value_factory.src(*src[2], i),
+         {alu_write});
+
+         if (src[0]->negate) ir->set_alu_flag(alu_src0_neg);
+         if (src[1]->negate) ir->set_alu_flag(alu_src1_neg);
+         if (src[2]->negate) ir->set_alu_flag(alu_src2_neg);
+
+         if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
+         ir->set_alu_flag(alu_write);
+         shader.emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+static bool emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
+{
+   AluInstr *ir = nullptr;
+   auto& value_factory = shader.value_factory();
+
+   PRegister tmp[2];
+   tmp[0] = value_factory.temp_register();
+   tmp[1] = value_factory.temp_register();
+
+   for (unsigned i = 0; i < 2 ; ++i) {
+      ir = new AluInstr(opcode, tmp[i],
+                        value_factory.src(alu.src[0], i),
+                        value_factory.src(alu.src[1], i), {alu_write});
+      if (alu.src[0].abs)
+         ir->set_alu_flag(alu_src0_abs);
+      if (alu.src[0].negate)
+         ir->set_alu_flag(alu_src0_neg);
+
+      if (alu.src[1].abs)
+         ir->set_alu_flag(alu_src1_abs);
+      if (alu.src[1].negate)
+         ir->set_alu_flag(alu_src1_neg);
+
+      shader.emit_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+
+   opcode = (opcode == op2_setne_dx10) ? op2_or_int: op2_and_int;
+   ir = new AluInstr(opcode,
+                     value_factory.dest(alu.dest, 0, pin_free),
+                     tmp[0], tmp[1], AluInstr::last_write);
+   shader.emit_instruction(ir);
+   return true;
+}
+
+static bool emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
+{
+   /* This should probabyl be lowered in nir */
+   auto& value_factory = shader.value_factory();
+
+   AluInstr *ir = nullptr;
+   RegisterVec4 v = value_factory.temp_vec4(pin_group);
+   AluInstr::SrcValues s;
+
+   for (int i = 0; i < nc ; ++i) {
+      s.push_back(v[i]);
+   }
+
+   for (int i = nc; i < 4 ; ++i)
+      s.push_back(value_factory.inline_const(all ? ALU_SRC_1 : ALU_SRC_0, 0));
+
+   for (int i = 0; i < nc ; ++i) {
+      ir = new AluInstr(op, v[i],
+                        value_factory.src(alu.src[0], i),
+                        value_factory.src(alu.src[1], i), {alu_write});
+
+      if (alu.src[0].abs)
+         ir->set_alu_flag(alu_src0_abs);
+      if (alu.src[0].negate)
+         ir->set_alu_flag(alu_src0_neg);
+
+      if (alu.src[1].abs)
+         ir->set_alu_flag(alu_src1_abs);
+      if (alu.src[1].negate)
+         ir->set_alu_flag(alu_src1_neg);
+
+      shader.emit_instruction(ir);
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+
+   auto max_val = value_factory.temp_register();
+
+   ir = new AluInstr(op1_max4, max_val, s, AluInstr::last_write, 4);
+
+   if (all)
+      ir->set_alu_flag(alu_src0_neg);
+
+   shader.emit_instruction(ir);
+
+   if (all)
+      op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
+   else
+      op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
+
+   ir = new AluInstr(op,
+                     value_factory.dest(alu.dest, 0, pin_free),
+                     max_val,
+                     value_factory.inline_const(ALU_SRC_1, 0),
+                     AluInstr::last_write);
+   if (all)
+      ir->set_alu_flag(alu_src1_neg);
+   shader.emit_instruction(ir);
+
+   return true;
+}
+
+static bool emit_any_all_icomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
+{
+   assert(!alu.src[0].abs);
+   assert(!alu.src[0].negate);
+   assert(!alu.src[1].abs);
+   assert(!alu.src[1].negate);
+
+   /* This should probabyl be lowered in nir */
+   auto& value_factory = shader.value_factory();
+
+   AluInstr *ir = nullptr;
+   PRegister v[6];
+
+   auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
+
+   for (int i = 0; i < nc + nc/2; ++i)
+      v[i] = value_factory.temp_register();
+
+   EAluOp combine = all ? op2_and_int : op2_or_int;
+
+   for (int i = 0; i < nc ; ++i) {
+      ir = new AluInstr(op, v[i], value_factory.src(alu.src[0], i),
+            value_factory.src(alu.src[1], i), AluInstr::write);
+      shader.emit_instruction(ir);
+   }
+   if (ir)
+         ir->set_alu_flag(alu_last_instr);
+
+   if (nc ==2) {
+      ir = new AluInstr(combine, dest, v[0], v[1], AluInstr::last_write);
+      shader.emit_instruction(ir);
+      return true;
+   }
+
+   if (nc == 3) {
+      ir = new AluInstr(combine, v[3], v[0], v[1], AluInstr::last_write);
+      shader.emit_instruction(ir);
+      ir = new AluInstr(combine, dest, v[3], v[2], AluInstr::last_write);
+      shader.emit_instruction(ir);
+      return true;
+   }
+
+   if (nc == 4) {
+      ir = new AluInstr(combine, v[4], v[0], v[1], AluInstr::write);
+      shader.emit_instruction(ir);
+      ir = new AluInstr(combine, v[5], v[2], v[3], AluInstr::last_write);
+      shader.emit_instruction(ir);
+      ir = new AluInstr(combine, dest, v[4], v[5], AluInstr::last_write);
+      shader.emit_instruction(ir);
+      return true;
+   }
+
+   return false;
+}
+
+static bool emit_dot(const nir_alu_instr& alu, int n, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   const nir_alu_src& src0 = alu.src[0];
+   const nir_alu_src& src1 = alu.src[1];
+
+   auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
+
+   AluInstr::SrcValues srcs(8);
+
+   for (int i = 0; i < n ; ++i) {
+      srcs[2 * i    ] = value_factory.src(src0, i);
+      srcs[2 * i + 1] = value_factory.src(src1, i);
+   }
+
+   for (int i = n; i < 4 ; ++i) {
+      srcs[2 * i    ] = value_factory.zero();
+      srcs[2 * i + 1] = value_factory.zero();
+   }
+
+   AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs,  AluInstr::last_write, 4);
+
+   if (src0.negate) ir->set_alu_flag(alu_src0_neg);
+   if (src0.abs) ir->set_alu_flag(alu_src0_abs);
+   if (src1.negate) ir->set_alu_flag(alu_src1_neg);
+   if (src1.abs) ir->set_alu_flag(alu_src1_abs);
+
+   if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
+
+   shader.emit_instruction(ir);
+   return true;
+}
+
+static bool emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   AluInstr *ir = nullptr;
+
+   for(unsigned i = 0; i < nc; ++i) {
+      if (instr.dest.write_mask & (1 << i)){
+         auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]);
+         auto dst = value_factory.dest(instr.dest.dest, i, pin_chan);
+         ir = new AluInstr(op1_mov, dst, src, {alu_write});
+
+         if (instr.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
+         if (instr.src[i].negate) ir->set_alu_flag(alu_src0_neg);
+         if (instr.src[i].abs) ir->set_alu_flag(alu_src0_abs);
+
+         shader.emit_instruction(ir);
+      }
+   }
+
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+static bool emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   AluInstr *ir = nullptr;
+   Pin pin = nir_dest_num_components(alu.dest.dest) == 1 ? pin_free : pin_none;
+
+   for (int i = 0; i < 4 ; ++i) {
+      if (alu.dest.write_mask & (1 << i)) {
+         ir = new AluInstr(opcode,
+                           value_factory.dest(alu.dest, i, pin),
+                           value_factory.src(alu.src[0], i),
+                           value_factory.zero(),
+                           AluInstr::write);
+         shader.emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+static bool emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   AluInstr *ir = nullptr;
+   auto pin = pin_for_components(alu);
+   for (int i = 0; i < 4 ; ++i) {
+      if (alu.dest.write_mask & (1 << i)){
+         ir = new AluInstr(opcode,
+                           value_factory.dest(alu.dest, i, pin),
+                           value_factory.zero(),
+                           value_factory.src(alu.src[0], i),
+                           AluInstr::write);
+         shader.emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+
+   return true;
+}
+
+static bool emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < 2; ++i) {
+      ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none),
+                        value_factory.src(alu.src[i], 0), AluInstr::write);
+     shader.emit_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+static bool emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < 2; ++i) {
+      ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none),
+                        value_factory.src(alu.src[0], i), AluInstr::write);
+     shader.emit_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+
+static bool emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < 2; ++i) {
+      ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none),
+                        value_factory.src64(alu.src[0], 0, i), AluInstr::write);
+     shader.emit_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+bool emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < 2; ++i) {
+      ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_chan),
+                        value_factory.src64(alu.src[0], 0, i), AluInstr::write);
+     shader.emit_instruction(ir);
+   }
+   for (unsigned i = 0; i < 2; ++i) {
+      ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i + 2, pin_chan),
+                        value_factory.src64(alu.src[1], 1, i), AluInstr::write);
+     shader.emit_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+static bool emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+
+   auto x = value_factory.temp_register();
+   auto y = value_factory.temp_register();
+   auto yy = value_factory.temp_register();
+
+   shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, x,
+                                        value_factory.src(alu.src[0], 0), AluInstr::last_write));
+
+   shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, y,
+                                        value_factory.src(alu.src[1], 0), AluInstr::last_write));
+
+   shader.emit_instruction(new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::last_write));
+
+   shader.emit_instruction(new AluInstr(op2_or_int,
+                                        value_factory.dest(alu.dest, 0, pin_free),
+                                        x, yy, AluInstr::last_write));
+   return true;
+}
+
+static bool emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.dest, 0, pin_free),
+                                        value_factory.src64(alu.src[0], 0, comp), AluInstr::last_write));
+   return true;
+}
+
+static bool emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   shader.emit_instruction(new AluInstr(op1_flt16_to_flt32, value_factory.dest(alu.dest, 0, pin_free),
+                                        value_factory.src(alu.src[0], 0), AluInstr::last_write));
+   return true;
+}
+static bool emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   auto tmp = value_factory.temp_register();
+   shader.emit_instruction(new AluInstr(op2_lshr_int, tmp,
+                                        value_factory.src(alu.src[0], 0),
+                                        value_factory.literal(16),
+                                        AluInstr::last_write));
+
+   shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
+                                        value_factory.dest(alu.dest, 0, pin_free),
+                                        tmp, AluInstr::last_write));
+   return true;
+}
+
+
+
+static bool emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   const nir_alu_src& src0 = alu.src[0];
+
+   AluInstr *ir = nullptr;
+   auto pin = pin_for_components(alu);
+
+   for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest); ++i) {
+      if (alu.dest.write_mask & (1 << i)){
+         ir = new AluInstr(opcode,
+                           value_factory.dest(alu.dest.dest, i, pin),
+                           value_factory.src(src0, i),
+                           AluInstr::last_write);
+         if (src0.negate) ir->set_alu_flag(alu_src0_neg);
+         if (src0.abs) ir->set_alu_flag(alu_src0_abs);
+         if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
+         ir->set_alu_flag(alu_is_trans);
+         shader.emit_instruction(ir);
+      }
+   }
+
+   return true;
+}
+
+static bool emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   AluInstr *ir = nullptr;
+
+   PRegister reg[4];
+
+   int num_comp = nir_dest_num_components(alu.dest.dest);
+
+   for (int i = 0; i < num_comp; ++i) {
+      reg[i] = value_factory.temp_register();
+      ir = new AluInstr(op1_trunc, reg[i], value_factory.src(alu.src[0], i), AluInstr::last_write);
+      if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
+      if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
+      shader.emit_instruction(ir);
+   }
+
+   auto pin = pin_for_components(alu);
+   for (int i = 0; i < num_comp; ++i) {
+      ir = new AluInstr(opcode,
+                        value_factory.dest(alu.dest, i, pin),
+                        reg[i], AluInstr::write);
+      if (opcode == op1_flt_to_uint) {
+         ir->set_alu_flag(alu_is_trans);
+         ir->set_alu_flag(alu_last_instr);
+      }
+      shader.emit_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+static bool emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   const nir_alu_src& src0 = alu.src[0];
+
+   auto pin = pin_for_components(alu);
+
+   /* todo: Actually we need only three channels, but then we have
+    * to make sure that we don't hava w dest */
+   for (unsigned j = 0; j < 4; ++j) {
+      if (alu.dest.write_mask & (1 << j)) {
+         AluInstr::SrcValues srcs(4);
+         PRegister dest = value_factory.dest(alu.dest.dest, j, pin);
+
+         for (unsigned i = 0; i < 4; ++i)
+            srcs[i] = value_factory.src(src0, j);
+
+         auto ir = new AluInstr(opcode, dest,  srcs,  AluInstr::last_write, 4);
+
+         if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
+         if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
+         if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
+
+         ir->set_alu_flag(alu_is_cayman_trans);
+
+
+         shader.emit_instruction(ir);
+      }
+   }
+   return true;
+}
+
+static bool emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+
+   const nir_alu_src& src0 = alu.src[0];
+   const nir_alu_src& src1 = alu.src[1];
+
+   AluInstr *ir = nullptr;
+
+   auto pin = pin_for_components(alu);
+   for (int i = 0; i < 4 ; ++i) {
+      if (alu.dest.write_mask & (1 << i)){
+         ir = new AluInstr(opcode, value_factory.dest(alu.dest.dest, i, pin),
+                           value_factory.src(src0, i),
+                           value_factory.src(src1, i), AluInstr::last_write);
+         if (src0.negate) ir->set_alu_flag(alu_src0_neg);
+         if (src0.abs) ir->set_alu_flag(alu_src0_abs);
+         if (src1.negate) ir->set_alu_flag(alu_src1_neg);
+         if (src1.abs) ir->set_alu_flag(alu_src1_abs);
+         if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
+         ir->set_alu_flag(alu_is_trans);
+         shader.emit_instruction(ir);
+      }
+   }
+   return true;
+}
+
+static bool emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+
+   const nir_alu_src& src0 = alu.src[0];
+   const nir_alu_src& src1 = alu.src[1];
+
+   unsigned last_slot = 4;
+
+   for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) {
+      if (alu.dest.write_mask & (1 << k)) {
+         AluInstr::SrcValues srcs(2 * last_slot);
+         PRegister dest = value_factory.dest(alu.dest.dest, k, pin_free);
+
+         for (unsigned i = 0; i < last_slot ; ++i) {
+            srcs[2 * i    ] = value_factory.src(src0, k);
+            srcs[2 * i + 1] = value_factory.src(src1, k);
+         }
+
+         auto ir = new AluInstr(opcode,
+                                dest, srcs, AluInstr::last_write, last_slot);
+
+         if (src0.negate) ir->set_alu_flag(alu_src0_neg);
+         if (src0.abs) ir->set_alu_flag(alu_src0_abs);
+         if (src1.negate) ir->set_alu_flag(alu_src1_neg);
+         if (src1.abs) ir->set_alu_flag(alu_src1_abs);
+         if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
+         ir->set_alu_flag(alu_is_cayman_trans);
+         shader.emit_instruction(ir);
+      }
+   }
+   return true;
+}
+
+
+static bool emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+
+   int ncomp = nir_dest_num_components(alu.dest.dest);
+   RegisterVec4::Swizzle src_swz = {7,7,7,7};
+   for (auto i = 0; i < ncomp; ++i)
+      src_swz[i] = alu.src[0].swizzle[i];
+
+   auto src = value_factory.src_vec4(alu.src[0].src, pin_group, src_swz);
+
+   auto tmp = value_factory.temp_vec4(pin_group);
+   AluInstr *mv = nullptr;
+   for (int i = 0; i < ncomp; ++i) {
+      mv = new AluInstr(op1_mov, tmp[i], src[i], AluInstr::write);
+      if (alu.src[0].abs)
+         mv->set_alu_flag(alu_src0_abs);
+      if (alu.src[0].negate)
+         mv->set_alu_flag(alu_src0_neg);
+      shader.emit_instruction(mv);
+   }
+   if (mv)
+      mv->set_alu_flag(alu_last_instr);
+
+   auto dst = value_factory.dest_vec4(alu.dest.dest, pin_group);
+   RegisterVec4::Swizzle dst_swz = {7,7,7,7};
+   for (auto i = 0; i < ncomp; ++i) {
+      if (alu.dest.write_mask & (1 << i))
+         dst_swz[i] = i;
+   }
+
+   auto tex = new TexInstr(opcode, dst, dst_swz, tmp, 0, R600_MAX_CONST_BUFFERS);
+
+   if (fine)
+      tex->set_tex_flag(TexInstr::grad_fine);
+
+   shader.emit_instruction(tex);
+
+   return true;
+}
+
+static bool emit_alu_cube(const nir_alu_instr& alu, Shader& shader)
+{
+   auto& value_factory = shader.value_factory();
+   AluInstr *ir = nullptr;
+
+   const uint16_t src0_chan[4] = {2, 2, 0, 1};
+   const uint16_t src1_chan[4] = {1, 0, 2, 2};
+
+   auto group = new AluGroup();
+
+   for (int i = 0; i < 4; ++i)  {
+
+
+      ir = new AluInstr(op2_cube, value_factory.dest(alu.dest.dest, i, pin_chan),
+                        value_factory.src(alu.src[0], src0_chan[i]),
+                        value_factory.src(alu.src[0], src1_chan[i]),
+                        AluInstr::write);
+      group->add_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   shader.emit_instruction(group);
+   return true;
+}
+
+const std::set<AluModifiers> AluInstr::empty;
+const std::set<AluModifiers> AluInstr::write({alu_write});
+const std::set<AluModifiers> AluInstr::last({alu_last_instr});
+const std::set<AluModifiers> AluInstr::last_write({alu_write, alu_last_instr});
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.h b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h
new file mode 100644
index 0000000..ffe3118
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h
@@ -0,0 +1,193 @@
+#ifndef INSTRALU_H
+#define INSTRALU_H
+
+#include "sfn_instr.h"
+
+#include <unordered_set>
+
+struct nir_alu_instr;
+
+namespace r600 {
+
+class Shader;
+class ValueFactory;
+
+class AluInstr : public Instr {
+public:
+
+   using SrcValues = std::vector<PVirtualValue, Allocator<PVirtualValue>>;
+
+   enum Op2Options {
+      op2_opt_none = 0,
+      op2_opt_reverse = 1,
+      op2_opt_neg_src1 = 1 << 1,
+      op2_opt_abs_src0 = 1 << 2
+   };
+
+   static constexpr const AluBankSwizzle bs[6] = {
+      alu_vec_012,
+      alu_vec_021,
+      alu_vec_120,
+      alu_vec_102,
+      alu_vec_201,
+      alu_vec_210
+   };
+
+   static const AluModifiers src_abs_flags[2];
+   static const AluModifiers src_neg_flags[3];
+   static const AluModifiers src_rel_flags[3];
+
+   AluInstr(EAluOp opcode);
+   AluInstr(EAluOp opcode, int chan);
+   AluInstr(EAluOp opcode, PRegister dest,
+            SrcValues src0,
+            const std::set<AluModifiers>& flags, int alu_slot);
+
+   AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0,
+            const std::set<AluModifiers>& flags);
+
+   AluInstr(EAluOp opcode, PRegister dest,
+            PVirtualValue src0, PVirtualValue src1,
+            const std::set<AluModifiers>& flags);
+
+   AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0, PVirtualValue src1,
+            PVirtualValue src2,
+            const std::set<AluModifiers>& flags);
+
+   AluInstr(ESDOp op, PVirtualValue src0, PVirtualValue src1, PVirtualValue address);
+   AluInstr(ESDOp op, const SrcValues& src, const std::set<AluModifiers>& flags);
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   auto opcode() const {assert(!has_alu_flag(alu_is_lds)); return m_opcode;}
+   auto lds_opcode() const {assert(has_alu_flag(alu_is_lds)); return m_lds_opcode;}
+
+   bool can_propagate_src() const;
+   bool can_propagate_dest() const;
+
+   bool replace_source(PRegister old_src, PVirtualValue new_src) override;
+   bool replace_dest(PRegister new_dest, AluInstr *move_instr) override;
+
+   void set_op(EAluOp op) {m_opcode = op;}
+
+   PRegister dest() const {return m_dest;}
+   unsigned n_sources() const {return m_src.size();}
+
+   int dest_chan() const {return m_dest ? m_dest->chan() : m_fallback_chan;}
+
+   PVirtualValue psrc(unsigned i) {return i < m_src.size() ? m_src[i] : nullptr;}
+   VirtualValue& src(unsigned i) {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
+   const VirtualValue& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
+
+   void set_sources(SrcValues src);
+   const SrcValues& sources() const {return m_src;}
+   void pin_sources_to_chan();
+
+   int register_priority() const;
+
+   void reset_alu_flag(AluModifiers flag) {m_alu_flags.reset(flag);}
+   void set_alu_flag(AluModifiers flag) {m_alu_flags.set(flag);}
+   bool has_alu_flag(AluModifiers f) const {return m_alu_flags.test(f);}
+
+   ECFAluOpCode cf_type() const {return m_cf_type;}
+   void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; }
+   void set_bank_swizzle(AluBankSwizzle swz) {m_bank_swizzle = swz;}
+   AluBankSwizzle bank_swizzle() const {return m_bank_swizzle;}
+
+   void set_index_offset(unsigned offs) {m_idx_offset = offs;}
+   auto  index_offset() const {return m_idx_offset;}
+
+   bool is_equal_to(const AluInstr& lhs) const;
+
+   bool has_lds_access() const;
+
+   static const std::map<ECFAluOpCode, std::string> cf_map;
+   static const std::map<AluBankSwizzle, std::string> bank_swizzle_map;
+   static Instr::Pointer from_string(std::istream &is, ValueFactory& value_factory, AluGroup *);
+   static bool from_nir(nir_alu_instr *alu, Shader& shader);
+
+   int alu_slots() const {return m_alu_slots;}
+
+   AluGroup *split(ValueFactory &vf);
+
+   bool end_group() const override { return m_alu_flags.test(alu_last_instr);}
+
+   static const std::set<AluModifiers> empty;
+   static const std::set<AluModifiers> write;
+   static const std::set<AluModifiers> last;
+   static const std::set<AluModifiers> last_write;
+
+   std::pair<PRegister, bool> indirect_addr() const;
+
+   void add_extra_dependency(PVirtualValue reg);
+
+   void set_required_slots(int nslots) { m_required_slots = nslots;}
+   unsigned  required_slots() const { return m_required_slots;}
+
+   void add_priority(int priority) { m_priority += priority;}
+   int priority() const { return m_priority;}
+   void inc_priority() { ++m_priority;}
+
+   void set_parent_group(AluGroup *group) { m_parent_group = group;}
+
+private:
+   friend class AluGroup;
+
+   void update_uses();
+
+   bool do_ready() const override;
+
+   bool can_copy_propagate() const;
+
+   bool check_readport_validation(PRegister old_src, PVirtualValue new_src) const;
+
+   void set_alu_flags(const AluOpFlags& flags) { m_alu_flags = flags; }
+   bool propagate_death() override;
+
+   void do_print(std::ostream& os) const override;
+
+   union {
+      EAluOp m_opcode;
+      ESDOp m_lds_opcode;
+   };
+
+   PRegister m_dest{nullptr};
+   SrcValues m_src;
+
+   AluOpFlags m_alu_flags;
+   AluBankSwizzle m_bank_swizzle{alu_vec_unknown};
+   ECFAluOpCode m_cf_type{cf_alu};
+   int m_alu_slots{1};
+   int m_fallback_chan{0};
+   unsigned m_idx_offset{0};
+   unsigned m_required_slots{0};
+   int m_priority{0};
+   std::set<PRegister, std::less<PRegister>, Allocator<PRegister>> m_extra_dependencies;
+   AluGroup *m_parent_group{nullptr};
+};
+
+class AluInstrVisitor : public InstrVisitor {
+public:
+   void visit(AluGroup *instr) override;
+   void visit(Block *instr) override;
+   void visit(IfInstr *instr) override;
+
+   void visit(TexInstr *instr) override {(void)instr;}
+   void visit(ExportInstr *instr) override {(void)instr;}
+   void visit(FetchInstr *instr) override {(void)instr;}
+   void visit(ControlFlowInstr *instr) override {(void)instr;}
+   void visit(WriteScratchInstr *instr) override {(void)instr;}
+   void visit(StreamOutInstr *instr) override {(void)instr;}
+   void visit(MemRingOutInstr *instr) override {(void)instr;}
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override {(void)instr;};
+   void visit(RatInstr *instr) override {(void)instr;};
+};
+
+
+}
+#endif // INSTRALU_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp
new file mode 100644
index 0000000..c41a0f8
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp
@@ -0,0 +1,361 @@
+#include "sfn_instr_alugroup.h"
+#include "sfn_debug.h"
+#include <algorithm>
+
+namespace r600 {
+
+AluGroup::AluGroup()
+{
+   std::fill(m_slots.begin(), m_slots.end(), nullptr);
+}
+
+bool AluGroup::add_instruction(AluInstr *instr)
+{
+   /* we can only schedule one op that accesses LDS or
+     the LDS read queue */
+   if (m_has_lds_op && instr->has_lds_access())
+      return false;
+
+   if (instr->has_alu_flag(alu_is_trans) && add_trans_instructions(instr))
+      return true;
+
+   if (add_vec_instructions(instr)) {
+      instr->set_parent_group(this);
+      return true;
+   }
+
+   auto opinfo = alu_ops.find(instr->opcode());
+   assert(opinfo != alu_ops.end());
+
+   if (s_max_slots > 4 &&
+       opinfo->second.can_channel(AluOp::t) &&
+       add_trans_instructions(instr)) {
+      instr->set_parent_group(this);
+      return true;
+   }
+
+   return false;
+}
+
+bool AluGroup::add_trans_instructions(AluInstr *instr)
+{
+   if (m_slots[4] || s_max_slots < 5)
+      return false;
+
+   if (!update_indirect_access(instr))
+      return false;
+
+   /* LDS instructions have to be scheduled in X */
+   if (instr->has_alu_flag(alu_is_lds))
+      return false;
+
+   auto opinfo = alu_ops.find(instr->opcode());
+   assert(opinfo != alu_ops.end());
+
+   if (!opinfo->second.can_channel(AluOp::t))
+      return false;
+
+
+   /* if we schedule a non-trans instr into the trans slot, we have to make
+    * sure that the corresponding vector slot is already occupied, otherwise
+    * the hardware will schedule it as vector op and the bank-swizzle as
+    * checked here (and in r600_asm.c) will not catch conflicts.
+    */
+   if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) {
+      if (instr->dest() && instr->dest()->pin() == pin_free) {
+         int used_slot = 3;
+         while (!m_slots[used_slot] && used_slot >= 0)
+            --used_slot;
+
+         // if we schedule a non-trans instr into the trans slot,
+         // there should always be some slot that is already used
+         assert(used_slot >= 0);
+         instr->dest()->set_chan(used_slot);
+      }
+   }
+
+   for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown ; ++i) {
+      AluReadportReservation readports_evaluator = m_readports_evaluator;
+      if (readports_evaluator.schedule_trans_instruction(*instr, i)) {
+         m_readports_evaluator = readports_evaluator;
+         m_slots[4] = instr;
+         instr->pin_sources_to_chan();
+         sfn_log << SfnLog::schedule << "T: " << *instr << "\n";
+
+         /* We added a vector op in the trans channel, so we have to
+          * make sure the corresponding vector channel is used */
+         if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()])
+            m_slots[instr->dest_chan()] =
+                  new AluInstr(op0_nop, instr->dest_chan());
+         return true;
+      }
+   }
+   return false;
+}
+
+int AluGroup::free_slots() const
+{
+   int free_mask = 0;
+   for(int i = 0; i < s_max_slots; ++i) {
+      if (!m_slots[i])
+         free_mask |= 1 << i;
+   }
+   return free_mask;
+}
+
+class AluAllowSlotSwitch : public AluInstrVisitor {
+public:
+   using AluInstrVisitor::visit;
+
+   void visit(AluInstr *alu) {
+      yes = (alu->alu_slots() == 1 || alu->has_alu_flag(alu_is_cayman_trans));
+   }
+
+   bool yes{false};
+
+};
+
+bool AluGroup::add_vec_instructions(AluInstr *instr)
+{
+   if (!update_indirect_access(instr))
+      return false;
+
+   int param_src = -1;
+   for (auto& s : instr->sources()) {
+      auto is = s->as_inline_const();
+      if (is)
+         param_src = is->sel() - ALU_SRC_PARAM_BASE;
+   }
+
+   if (param_src >= 0) {
+      if (m_param_used < 0)
+         m_param_used = param_src;
+      else if (m_param_used != param_src)
+         return false;
+   }
+
+   if (m_has_lds_op && instr->has_lds_access())
+      return false;
+
+   int preferred_chan = instr->dest_chan();
+   if (!m_slots[preferred_chan]) {
+      if (instr->bank_swizzle() != alu_vec_unknown) {
+         if (try_readport(instr, instr->bank_swizzle()))
+             return true;
+      } else {
+         for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
+            if (try_readport(instr, i))
+               return true;
+         }
+      }
+   } else {
+
+      auto dest = instr->dest();
+      if (dest && dest->pin() == pin_free) {
+
+         for (auto u : dest->uses()) {
+            AluAllowSlotSwitch swich_allowed;
+            u->accept(swich_allowed);
+            if (!swich_allowed.yes)
+               return false;
+         }
+
+         int free_chan = 0;
+         while (m_slots[free_chan] && free_chan < 4)
+            free_chan++;
+
+         if (!m_slots[free_chan] && free_chan < 4) {
+            sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
+            dest->set_chan(free_chan);
+            if (instr->bank_swizzle() != alu_vec_unknown) {
+               if (try_readport(instr, instr->bank_swizzle()))
+                  return true;
+            } else {
+               for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
+                  if (try_readport(instr, i))
+                     return true;
+               }
+            }
+         }
+      }
+   }
+   return false;
+}
+
+bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
+{
+   int preferred_chan = instr->dest_chan();
+   AluReadportReservation readports_evaluator = m_readports_evaluator;
+   if (readports_evaluator.schedule_vec_instruction(*instr, cycle)) {
+      m_readports_evaluator = readports_evaluator;
+      m_slots[preferred_chan] = instr;
+      m_has_lds_op |= instr->has_lds_access();
+      sfn_log << SfnLog::schedule << "V: " << *instr << "\n";
+      auto dest = instr->dest();
+      if (dest && dest->pin() == pin_free)
+         dest->set_pin(pin_chan);
+      instr->pin_sources_to_chan();
+      return true;
+   }
+   return false;
+}
+
+bool AluGroup::update_indirect_access(AluInstr *instr)
+{
+   auto indirect_addr = instr->indirect_addr();
+
+   if (indirect_addr.first) {
+      if (!m_addr_used) {
+         m_addr_used = indirect_addr.first;
+         m_addr_is_index = indirect_addr.second;
+      } else if (!indirect_addr.first->equal_to(*m_addr_used)) {
+         return false;
+      }
+   }
+
+   return true;
+}
+
+void AluGroup::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void AluGroup::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+void AluGroup::set_scheduled()
+{
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i])
+         m_slots[i]->set_scheduled();
+   }
+}
+
+void AluGroup::fix_last_flag()
+{
+   bool last_seen = false;
+   for (int i = s_max_slots - 1; i >= 0; --i) {
+      if (m_slots[i]) {
+         if (!last_seen) {
+            m_slots[i]->set_alu_flag(alu_last_instr);
+            last_seen = true;
+         } else {
+            m_slots[i]->reset_alu_flag(alu_last_instr);
+         }
+      }
+   }
+}
+
+bool AluGroup::is_equal_to(const AluGroup& other) const
+{
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (!other.m_slots[i]) {
+         if (!m_slots[i])
+            continue;
+         else
+            return false;
+      }
+
+      if (m_slots[i]) {
+         if (!other.m_slots[i])
+            return false;
+         else if (!m_slots[i]->is_equal_to(*other.m_slots[i]))
+            return false;
+      }
+   }
+   return true;
+}
+
+bool AluGroup::has_lds_group_end() const
+{
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end))
+         return true;
+   }
+   return false;
+}
+
+bool AluGroup::do_ready() const
+{
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i] && !m_slots[i]->ready())
+         return false;
+   }
+   return true;
+}
+
+void AluGroup::forward_set_blockid(int id, int index)
+{
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i]) {
+         m_slots[i]->set_blockid(id, index);
+      }
+   }
+}
+
+uint32_t AluGroup::slots() const
+{
+   uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1;
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i])
+         ++result;
+   }
+   if (m_addr_used) {
+      ++result;
+      if (m_addr_is_index)
+         ++result;
+   }
+
+   return result;
+}
+
+void AluGroup::do_print(std::ostream& os) const
+{
+   const char slotname[] = "xyzwt";
+
+   os << "ALU_GROUP_BEGIN\n";
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i]) {
+         for (int j = 0; j < 2 * m_nesting_depth + 4; ++j)
+            os << ' ';
+         os << slotname[i] << ": ";
+         m_slots[i]->print(os);
+         os << "\n";
+      }
+   }
+   for (int i = 0; i < 2 * m_nesting_depth + 2; ++i)
+      os << ' ';
+   os << "ALU_GROUP_END";
+}
+
+AluInstr::SrcValues AluGroup::get_kconsts() const
+{
+   AluInstr::SrcValues result;
+
+   for (int i = 0; i < s_max_slots; ++i) {
+      if (m_slots[i]) {
+         for (auto s : m_slots[i]->sources())
+            if (s->as_uniform())
+               result.push_back(s);
+      }
+   }
+   return result;
+}
+
+void AluGroup::set_chipclass(r600_chip_class chip_class)
+{
+   switch (chip_class) {
+   case ISA_CC_CAYMAN:
+      s_max_slots = 4;
+   break;
+   default:
+      s_max_slots = 5;
+   }
+}
+
+int AluGroup::s_max_slots = 5;
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h
new file mode 100644
index 0000000..6b56a9f
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h
@@ -0,0 +1,89 @@
+#ifndef ALUGROUP_H
+#define ALUGROUP_H
+
+#include "sfn_instr_alu.h"
+#include "sfn_alu_readport_validation.h"
+
+namespace r600 {
+
+class AluGroup : public Instr
+{
+public:
+   using Slots = std::array<AluInstr *, 5>;
+
+   AluGroup();
+
+   using iterator = Slots::iterator;
+   using const_iterator = Slots::const_iterator;
+
+   bool add_instruction(AluInstr *instr);
+   bool add_trans_instructions(AluInstr *instr);
+   bool add_vec_instructions(AluInstr *instr);
+
+   bool is_equal_to(const AluGroup& other) const;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   auto begin() {return m_slots.begin(); }
+   auto end() {return m_slots.begin() + s_max_slots; }
+   auto begin() const {return m_slots.begin(); }
+   auto end() const {return m_slots.begin() + s_max_slots; }
+
+   bool end_group() const override { return true; }
+
+   void set_scheduled() override;
+
+   void set_nesting_depth(int depth) {m_nesting_depth = depth;}
+
+   void fix_last_flag();
+
+   static void set_chipclass(r600_chip_class chip_class);
+
+   int free_slots() const;
+
+   auto addr() const {return std::make_pair(m_addr_used, m_addr_is_index);}
+
+   uint32_t slots() const override;
+
+   AluInstr::SrcValues get_kconsts() const;
+
+   bool has_lds_group_start() const { return m_slots[0] ?
+            m_slots[0]->has_alu_flag(alu_lds_group_start) : false;}
+
+   bool has_lds_group_end() const;
+
+   const auto& readport_reserer() const { return m_readports_evaluator; }
+   void set_readport_reserer(const AluReadportReservation& rr) {
+       m_readports_evaluator = rr;
+   };
+
+   static bool has_t() { return s_max_slots == 5;}
+
+private:
+   void forward_set_blockid(int id, int index) override;
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   bool update_indirect_access(AluInstr *instr);
+   bool try_readport(AluInstr *instr, AluBankSwizzle cycle);
+
+   Slots m_slots;
+
+   AluReadportReservation m_readports_evaluator;
+
+   static int s_max_slots;
+
+   PRegister m_addr_used{nullptr};
+
+   int m_param_used{-1};
+
+   int m_nesting_depth{0};
+   bool m_has_lds_op{false};
+   bool m_addr_is_index{false};
+};
+
+
+}
+
+#endif // ALUGROUP_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp
new file mode 100644
index 0000000..e12b3a6
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp
@@ -0,0 +1,176 @@
+#include "sfn_instr_controlflow.h"
+
+#include <sstream>
+
+namespace r600 {
+
+ControlFlowInstr::ControlFlowInstr(CFType type):
+   m_type(type)
+{
+}
+
+bool ControlFlowInstr::do_ready() const
+{
+   /* Have to rework this, but the CF should always */
+   return true;
+}
+
+
+bool ControlFlowInstr::is_equal_to(const ControlFlowInstr& rhs) const
+{
+   return m_type == rhs.m_type;
+}
+
+void ControlFlowInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void ControlFlowInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+void ControlFlowInstr::do_print(std::ostream& os) const
+{
+   switch (m_type) {
+   case cf_else: os << "ELSE"; break;
+   case cf_endif: os << "ENDIF";break;
+   case cf_loop_begin: os << "LOOP_BEGIN"; break;
+   case cf_loop_end: os << "LOOP_END"; break;
+   case cf_loop_break: os << "BREAK"; break;
+   case cf_loop_continue: os << "CONTINUE"; break;
+   case cf_wait_ack: os << "WAIT_ACK"; break;
+   default:
+      unreachable("Unknown CF type");
+   }
+}
+
+Instr::Pointer ControlFlowInstr::from_string(std::string type_str)
+{
+   if (type_str == "ELSE")
+      return new ControlFlowInstr(cf_else);
+   else if (type_str == "ENDIF")
+      return new ControlFlowInstr(cf_endif);
+   else if (type_str == "LOOP_BEGIN")
+      return new ControlFlowInstr(cf_loop_begin);
+   else if (type_str == "LOOP_END")
+      return new ControlFlowInstr(cf_loop_end);
+   else if (type_str == "BREAK")
+      return new ControlFlowInstr(cf_loop_break);
+   else if (type_str == "CONTINUE")
+      return new ControlFlowInstr(cf_loop_continue);
+   else if (type_str == "WAIT_ACK")
+      return new ControlFlowInstr(cf_wait_ack);
+   else
+      return nullptr;
+}
+
+int ControlFlowInstr::nesting_corr() const
+{
+   switch (m_type) {
+   case cf_else:
+   case cf_endif:
+   case cf_loop_end: return -1;
+   default:
+      return 0;
+   }
+}
+
+int ControlFlowInstr::nesting_offset() const
+{
+   switch (m_type) {
+   case cf_endif:
+   case cf_loop_end: return -1;
+   case cf_loop_begin: return 1;
+   default:
+      return 0;
+   }
+}
+
+IfInstr::IfInstr(AluInstr *pred):
+   m_predicate(pred)
+{
+   assert(pred);
+}
+
+IfInstr::IfInstr(const IfInstr& orig)
+{
+   m_predicate = new AluInstr(*orig.m_predicate);
+}
+
+bool IfInstr::is_equal_to(const IfInstr& rhs) const
+{
+   return m_predicate->equal_to(*rhs.m_predicate);
+}
+
+void IfInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void IfInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool IfInstr::replace_source(PRegister old_src, PVirtualValue new_src)
+{
+   return m_predicate->replace_source(old_src, new_src);
+}
+
+bool IfInstr::do_ready() const
+{
+   return m_predicate->ready();
+}
+
+void IfInstr::forward_set_scheduled()
+{
+   m_predicate->set_scheduled();
+}
+
+void IfInstr::forward_set_blockid(int id, int index)
+{
+   m_predicate->set_blockid(id, index);
+}
+
+void IfInstr::do_print(std::ostream& os) const
+{
+   os << "IF (( " << *m_predicate << " ))";
+}
+
+void IfInstr::set_predicate(AluInstr *new_predicate)
+{
+   m_predicate = new_predicate;
+   m_predicate->set_blockid(block_id(), index());
+}
+
+Instr::Pointer IfInstr::from_string(std::istream &is, ValueFactory& value_factory)
+{
+   std::string pred_start;
+   is >> pred_start;
+   if (pred_start != "((")
+      return nullptr;
+   char buf[2048];
+
+   is.get(buf, 2048, ')');
+   std::string pred_end;
+   is >> pred_end;
+
+   if (pred_end != "))") {
+      return nullptr;
+   }
+
+   std::istringstream bufstr(buf);
+
+   std::string instr_type;
+   bufstr >> instr_type;
+
+   if (instr_type != "ALU")
+      return nullptr;
+
+   auto pred = AluInstr::from_string(bufstr, value_factory, nullptr);
+   return new IfInstr(static_cast<AluInstr*>(pred));
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h
new file mode 100644
index 0000000..9c7273c
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h
@@ -0,0 +1,81 @@
+#ifndef CONTROLFLOWINSTR_H
+#define CONTROLFLOWINSTR_H
+
+#include "sfn_instr_alu.h"
+
+namespace r600 {
+
+class ControlFlowInstr : public Instr
+{
+public:
+   enum CFType {
+      cf_else,
+      cf_endif,
+      cf_loop_begin,
+      cf_loop_end,
+      cf_loop_break,
+      cf_loop_continue,
+      cf_stream_write,
+      cf_wait_ack
+   };
+
+   ControlFlowInstr(CFType type);
+
+   ControlFlowInstr(const ControlFlowInstr& orig) = default;
+
+   bool is_equal_to(const ControlFlowInstr& lhs) const;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   CFType cf_type() const { return m_type;}
+
+   int nesting_corr() const override;
+
+   static Instr::Pointer from_string(std::string type_str);
+
+   bool end_block() const override { return true;}
+
+   int nesting_offset() const override;
+
+private:
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   CFType m_type;
+};
+
+class IfInstr : public Instr {
+public:
+
+   IfInstr(AluInstr *pred);
+   IfInstr(const IfInstr& orig);
+
+   bool is_equal_to(const IfInstr& lhs) const;
+
+   void set_predicate(AluInstr *new_predicate);
+
+   AluInstr *predicate() const { return m_predicate; }
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool replace_source(PRegister old_src, PVirtualValue new_src) override;
+
+   static Instr::Pointer from_string(std::istream &is, ValueFactory& value_factory);
+
+   bool end_block() const override { return true;}
+   int nesting_offset() const override { return 1;}
+
+private:
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+   void forward_set_blockid(int id, int index) override;
+   void forward_set_scheduled() override;
+
+   AluInstr *m_predicate;
+};
+
+}
+
+#endif // CONTROLFLOWINSTR_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp
new file mode 100644
index 0000000..5894b39
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp
@@ -0,0 +1,524 @@
+#include "sfn_instr_export.h"
+#include "sfn_valuefactory.h"
+
+#include <sstream>
+
+namespace r600 {
+
+using std::string;
+
+static char *writemask_to_swizzle(int writemask, char *buf)
+{
+   const char *swz = "xyzw";
+   for (int i = 0; i < 4; ++i) {
+      buf[i] = (writemask & (1 << i)) ? swz[i] : '_';
+   }
+   return buf;
+}
+
+WriteOutInstr::WriteOutInstr(const RegisterVec4& value):
+   m_value(value)
+{
+   m_value.add_use(this);
+   set_always_keep();
+}
+
+void WriteOutInstr::override_chan(int i, int chan)
+{
+   m_value.set_value(i,
+                     new Register(m_value[i]->sel(), chan,
+                                  m_value[i]->pin()));
+}
+
+ExportInstr::ExportInstr(ExportType type, unsigned loc, const RegisterVec4& value):
+   WriteOutInstr(value),
+   m_type(type),
+   m_loc(loc),
+   m_is_last(false)
+{
+}
+
+void ExportInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void ExportInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+
+bool ExportInstr::is_equal_to(const ExportInstr& lhs) const
+{
+   return
+
+         (m_type == lhs.m_type &&
+           m_loc == lhs.m_loc &&
+           value() == lhs.value() &&
+           m_is_last == lhs.m_is_last);
+}
+
+ExportInstr::ExportType ExportInstr::type_from_string(const std::string& s)
+{
+   (void)s;
+   return param;
+}
+
+void ExportInstr::do_print(std::ostream& os) const
+{
+   os << "EXPORT";
+   if (m_is_last)
+      os << "_DONE";
+
+   switch (m_type) {
+   case param: os << " PARAM "; break;
+   case pos: os << " POS "; break;
+   case pixel: os << " PIXEL "; break;
+   }
+   os << m_loc << " ";
+   value().print(os);
+}
+
+bool ExportInstr::do_ready() const
+{
+   return value().ready(block_id(), index());
+}
+
+Instr::Pointer ExportInstr::from_string(std::istream& is, ValueFactory& vf)
+{
+   return from_string_impl(is, vf);
+}
+
+Instr::Pointer ExportInstr::last_from_string(std::istream& is, ValueFactory &vf)
+{
+   auto result = from_string_impl(is, vf);
+   result->set_is_last_export(true);
+   return result;
+}
+
+ExportInstr::Pointer ExportInstr::from_string_impl(std::istream& is, ValueFactory &vf)
+{
+   string typestr;
+   int pos;
+   string value_str;
+
+   is >> typestr >> pos >> value_str;
+
+   ExportInstr::ExportType type;
+
+   if (typestr == "PARAM")
+      type = ExportInstr::param;
+   else if (typestr == "POS")
+      type = ExportInstr::pos;
+   else if (typestr == "PIXEL")
+      type = ExportInstr::pixel;
+   else
+      unreachable("Unknown export type");
+
+   RegisterVec4 value = vf.src_vec4_from_string(value_str);
+
+   return new ExportInstr( type, pos, value);
+}
+
+WriteScratchInstr::WriteScratchInstr(const RegisterVec4& value, PRegister addr,
+                                     int align, int align_offset, int writemask, int array_size):
+   WriteOutInstr(value),
+   m_address(addr),
+   m_align(align),
+   m_align_offset(align_offset),
+   m_writemask(writemask),
+   m_array_size(array_size - 1)
+{
+   addr->add_use(this);
+}
+
+WriteScratchInstr::WriteScratchInstr(const RegisterVec4& value, int loc,
+                                     int align, int align_offset,int writemask):
+   WriteOutInstr(value),
+   m_loc(loc),
+   m_align(align),
+   m_align_offset(align_offset),
+   m_writemask(writemask)
+{
+
+}
+
+void WriteScratchInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void WriteScratchInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool WriteScratchInstr::is_equal_to(const WriteScratchInstr& lhs) const
+{
+   if (m_address) {
+      if (!lhs.m_address)
+         return false;
+      if (! m_address->equal_to(*lhs.m_address))
+         return false;
+   } else if (lhs.m_address)
+      return false;
+
+   return  m_loc == lhs.m_loc &&
+         m_align == lhs.m_align &&
+         m_align_offset == lhs.m_align_offset &&
+         m_writemask == lhs.m_writemask &&
+         m_array_size == lhs.m_array_size &&
+         value().sel() == lhs.value().sel();
+}
+
+bool WriteScratchInstr::do_ready() const
+{
+   return value().ready(block_id(), index()) &&
+         (!m_address || m_address->ready(block_id(), index()));
+}
+
+void WriteScratchInstr::do_print(std::ostream& os) const
+{
+   char buf[6];
+
+   os << "WRITE_SCRATCH ";
+   if (m_address)
+      os << "@" << *m_address << "[" << m_array_size + 1<<"]";
+   else
+      os << m_loc;
+
+   os << (value()[0]->is_ssa() ? " S" : " R")
+      << value().sel() << "." << writemask_to_swizzle(m_writemask, buf)
+      << " " << "AL:" << m_align << " ALO:" << m_align_offset;
+}
+
+auto WriteScratchInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
+{
+   string loc_str;
+   string value_str;
+   string align_str;
+   string align_offset_str;
+   int offset;
+
+   int array_size = 0;
+   PVirtualValue addr_reg = nullptr;
+
+   is >> loc_str >>  value_str >> align_str >> align_offset_str;
+
+   std::istringstream loc_ss(loc_str);
+
+   auto align = int_from_string_with_prefix(align_str, "AL:");
+   auto align_offset = int_from_string_with_prefix(align_offset_str, "ALO:");
+   auto value = vf.src_vec4_from_string(value_str);
+
+   int writemask = 0;
+   for (int i = 0; i < 4; ++i) {
+      if (value[i]->chan() == i)
+         writemask |= 1 << i;
+   }
+
+   if (loc_str[0] == '@') {
+
+      string addr_str;
+      char c;
+      loc_ss >> c;
+      loc_ss >> c;
+
+      while (!loc_ss.eof() && c != '[') {
+         addr_str.append(1, c);
+         loc_ss >> c;
+      }
+      addr_reg = vf.src_from_string(addr_str);
+      assert(addr_reg && addr_reg->as_register());
+
+      loc_ss >> array_size;
+      loc_ss >> c;
+      assert(c == ']');
+      return new WriteScratchInstr(value, addr_reg->as_register(), align, align_offset, writemask, array_size);
+   } else {
+      loc_ss >> offset;
+      return new WriteScratchInstr(value, offset, align, align_offset, writemask);
+   }
+}
+
+StreamOutInstr::StreamOutInstr(const RegisterVec4& value, int num_components,
+                                         int array_base, int comp_mask, int out_buffer,
+                                         int stream):
+   WriteOutInstr(value),
+   m_element_size(num_components == 3 ? 3 : num_components - 1),
+   m_array_base(array_base),
+   m_writemask(comp_mask),
+   m_output_buffer(out_buffer),
+   m_stream(stream)
+{
+}
+
+unsigned StreamOutInstr::op() const
+{
+   int op = 0;
+   switch (m_output_buffer) {
+   case 0: op = CF_OP_MEM_STREAM0_BUF0; break;
+   case 1: op = CF_OP_MEM_STREAM0_BUF1; break;
+   case 2: op = CF_OP_MEM_STREAM0_BUF2; break;
+   case 3: op = CF_OP_MEM_STREAM0_BUF3; break;
+   }
+   return 4 * m_stream + op;
+}
+
+bool StreamOutInstr::is_equal_to(const StreamOutInstr& oth) const
+{
+
+   return value() == oth.value() &&
+         m_element_size == oth.m_element_size &&
+         m_burst_count == oth.m_burst_count &&
+         m_array_base == oth.m_array_base &&
+         m_array_size == oth.m_array_size &&
+         m_writemask == oth.m_writemask &&
+         m_output_buffer == oth.m_output_buffer &&
+         m_stream == oth.m_stream;
+}
+
+void StreamOutInstr::do_print(std::ostream& os) const
+{
+   os << "WRITE STREAM(" << m_stream << ") "  << value()
+      << " ES:" << m_element_size
+      << " BC:" << m_burst_count
+      << " BUF:" << m_output_buffer
+      << " ARRAY:" <<  m_array_base;
+   if (m_array_size != 0xfff)
+      os << "+" << m_array_size;
+}
+
+bool StreamOutInstr::do_ready() const
+{
+   return value().ready(block_id(), index());
+}
+
+void StreamOutInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void StreamOutInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+
+MemRingOutInstr::MemRingOutInstr(ECFOpCode ring, EMemWriteType type,
+                                           const RegisterVec4& value,
+                                           unsigned base_addr, unsigned ncomp,
+                                           PRegister index):
+   WriteOutInstr(value),
+   m_ring_op(ring),
+   m_type(type),
+   m_base_address(base_addr),
+   m_num_comp(ncomp),
+   m_export_index(index)
+{
+   assert(m_ring_op  == cf_mem_ring || m_ring_op  == cf_mem_ring1||
+          m_ring_op  == cf_mem_ring2 || m_ring_op  == cf_mem_ring3);
+   assert(m_num_comp <= 4);
+
+   if (m_export_index)
+      m_export_index->add_use(this);
+}
+
+unsigned MemRingOutInstr::ncomp() const
+{
+   switch (m_num_comp) {
+   case 1: return 0;
+   case 2: return 1;
+   case 3:
+   case 4: return 3;
+   default:
+      assert(0);
+   }
+   return 3;
+}
+
+bool MemRingOutInstr::is_equal_to(const MemRingOutInstr& oth) const
+{
+
+   bool equal = value() == oth.value() &&
+                m_ring_op == oth.m_ring_op &&
+                m_type == oth.m_type &&
+                m_num_comp == oth.m_num_comp &&
+                m_base_address == oth.m_base_address;
+
+   if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
+      equal &= (*m_export_index == *oth.m_export_index);
+   return equal;
+
+}
+
+static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" };
+void MemRingOutInstr::do_print(std::ostream& os) const
+{
+
+   os << "MEM_RING " << (m_ring_op == cf_mem_ring ? 0 : m_ring_op - cf_mem_ring1 + 1);
+   os << " " << write_type_str[m_type] << " " << m_base_address;
+   os << " " << value();
+   if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
+      os << " @" << *m_export_index;
+   os << " ES:" << m_num_comp;
+}
+
+void MemRingOutInstr::patch_ring(int stream, PRegister index)
+{
+   const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
+
+   assert(stream < 4);
+   m_ring_op = ring_op[stream];
+   m_export_index = index;
+}
+
+bool MemRingOutInstr::do_ready() const
+{
+   if (m_export_index && !m_export_index->ready(block_id(), index()))
+      return false;
+
+   return value().ready(block_id(), index());
+}
+
+void MemRingOutInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void MemRingOutInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+static const std::map<string,  MemRingOutInstr::EMemWriteType> type_lookop =
+{
+   {"WRITE", MemRingOutInstr::mem_write},
+   {"WRITE_IDX", MemRingOutInstr::mem_write_ind},
+   {"WRITE_ACK", MemRingOutInstr::mem_write_ack},
+   {"WRITE_IDX_ACK", MemRingOutInstr::mem_write_ind_ack}
+};
+
+auto MemRingOutInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
+{
+   string type_str;
+
+   int ring;
+
+   int base_address;
+   string value_str;
+
+   is >> ring >> type_str >> base_address >> value_str;
+   assert(ring < 4);
+
+   auto itype = type_lookop.find(type_str);
+   assert(itype != type_lookop.end());
+
+   auto type = itype->second;
+
+   PVirtualValue index{nullptr};
+   if (type == mem_write_ind || type == mem_write_ind_ack) {
+      char c;
+      string index_str;
+      is >> c >> index_str;
+      assert('@' == c );
+      index = vf.src_from_string(index_str);
+   }
+
+   string elm_size_str;
+   is >> elm_size_str;
+
+   int num_comp = int_from_string_with_prefix(elm_size_str, "ES:");
+
+   auto value = vf.src_vec4_from_string(value_str);
+
+   ECFOpCode opcodes[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
+   assert(ring < 4);
+
+   return new MemRingOutInstr(opcodes[ring], type, value, base_address, num_comp, index->as_register());
+}
+
+EmitVertexInstr::EmitVertexInstr(int stream, bool cut):
+   m_stream(stream),
+   m_cut(cut)
+{
+
+}
+
+
+bool EmitVertexInstr::is_equal_to(const EmitVertexInstr& oth) const
+{
+   return oth.m_stream == m_stream &&
+         oth.m_cut == m_cut;
+}
+
+void EmitVertexInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void EmitVertexInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool EmitVertexInstr::do_ready() const
+{
+   return true;
+}
+
+void EmitVertexInstr::do_print(std::ostream& os) const
+{
+   os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream;
+}
+
+auto EmitVertexInstr::from_string(std::istream& is, bool cut) -> Pointer
+{
+   char c;
+   is >> c;
+   assert(c == '@');
+
+   int stream;
+   is >> stream;
+
+   return new EmitVertexInstr(stream, cut);
+}
+
+void WriteTFInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void WriteTFInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool WriteTFInstr::is_equal_to(const WriteTFInstr& rhs) const
+{
+   return value() == rhs.value();
+}
+
+auto WriteTFInstr::from_string(std::istream& is, ValueFactory &vf) -> Pointer
+{
+   string value_str;
+   is >> value_str;
+
+   auto value = vf.src_vec4_from_string(value_str);
+
+   return new WriteTFInstr(value);
+}
+
+bool WriteTFInstr::do_ready() const
+{
+   return value().ready(block_id(), index());
+}
+
+void WriteTFInstr::do_print(std::ostream& os) const
+{
+   os << "WRITE_TF " << value();
+}
+
+}
+
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_export.h b/src/gallium/drivers/r600/sfn/sfn_instr_export.h
new file mode 100644
index 0000000..ba20b8f
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_export.h
@@ -0,0 +1,213 @@
+#ifndef INSTR_EXPORT_H
+#define INSTR_EXPORT_H
+
+#include "sfn_instr.h"
+
+namespace r600 {
+
+class ValueFactory;
+
+
+class WriteOutInstr: public Instr {
+public:
+   WriteOutInstr(const RegisterVec4& value);
+   WriteOutInstr(const WriteOutInstr& orig) = delete;
+
+   void override_chan(int i, int chan);
+
+   const RegisterVec4& value() const {return m_value;};
+   RegisterVec4& value() {return m_value;};
+private:
+
+   RegisterVec4 m_value;
+};
+
+class ExportInstr: public WriteOutInstr {
+public:
+   enum ExportType {
+      pixel,
+      pos,
+      param
+   };
+
+   using Pointer = R600_POINTER_TYPE(ExportInstr);
+
+   ExportInstr(ExportType type, unsigned loc, const RegisterVec4& value);
+   ExportInstr(const ExportInstr& orig) = delete;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const ExportInstr& lhs) const;
+
+
+   static ExportType type_from_string(const std::string& s);
+
+   ExportType export_type() const {return m_type;}
+
+   unsigned location() const {return m_loc;}
+
+   void set_is_last_export(bool value) {m_is_last = value;}
+   bool is_last_export()  const {return m_is_last;}
+
+   static Instr::Pointer from_string(std::istream& is, ValueFactory &vf);
+   static Instr::Pointer last_from_string(std::istream& is, ValueFactory &vf);
+
+private:
+   static ExportInstr::Pointer from_string_impl(std::istream& is, ValueFactory &vf);
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   ExportType m_type;
+   unsigned m_loc;
+   bool m_is_last;
+};
+
+class WriteScratchInstr : public WriteOutInstr {
+public:
+   WriteScratchInstr(const RegisterVec4& value, PRegister addr,
+                     int align, int align_offset, int writemask, int array_size);
+   WriteScratchInstr(const RegisterVec4& value, int addr,  int align, int align_offset,
+                     int writemask);
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const WriteScratchInstr& lhs) const;
+
+   unsigned location() const { return m_loc;};
+   int write_mask() const { return m_writemask;}
+   auto address() const { return m_address;}
+   bool indirect() const { return !!m_address;}
+   int array_size() const { return m_array_size;}
+
+   static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
+private:
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   unsigned m_loc{0};
+   PRegister m_address {nullptr};
+   unsigned m_align;
+   unsigned m_align_offset;
+   unsigned m_writemask;
+   int m_array_size{0};
+};
+
+class StreamOutInstr: public WriteOutInstr {
+public:
+   StreamOutInstr(const RegisterVec4& value, int num_components,
+                       int array_base, int comp_mask, int out_buffer,
+                       int stream);
+   int element_size() const { return m_element_size;}
+   int burst_count() const { return m_burst_count;}
+   int array_base() const { return m_array_base;}
+   int array_size() const { return m_array_size;}
+   int comp_mask() const { return m_writemask;}
+   unsigned op() const;
+
+   bool is_equal_to(const StreamOutInstr& lhs) const;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+private:
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   int m_element_size{0};
+   int m_burst_count{1};
+   int m_array_base{0};
+   int m_array_size{0xfff};
+   int m_writemask{0};
+   int m_output_buffer{0};
+   int m_stream{0};
+};
+
+class MemRingOutInstr: public WriteOutInstr {
+public:
+
+   enum EMemWriteType {
+      mem_write = 0,
+      mem_write_ind = 1,
+      mem_write_ack = 2,
+      mem_write_ind_ack = 3,
+   };
+
+   MemRingOutInstr(ECFOpCode ring, EMemWriteType type,
+                        const RegisterVec4& value, unsigned base_addr,
+                        unsigned ncomp, PRegister m_index);
+
+   unsigned op() const{return m_ring_op;}
+   unsigned ncomp() const;
+   unsigned addr() const {return m_base_address;}
+   EMemWriteType type() const {return m_type;}
+   unsigned index_reg() const {assert(m_export_index->sel() >= 0); return m_export_index->sel();}
+   unsigned array_base() const {return m_base_address; }
+   PVirtualValue export_index()  const {return m_export_index;}
+
+   void patch_ring(int stream, PRegister index);
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const MemRingOutInstr& lhs) const;
+
+   static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
+
+private:
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   ECFOpCode m_ring_op;
+   EMemWriteType m_type;
+   unsigned m_base_address;
+   unsigned m_num_comp;
+   PRegister m_export_index;
+};
+
+class EmitVertexInstr : public Instr {
+public:
+   EmitVertexInstr(int stream, bool cut);
+   ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;}
+   int stream() const { return m_stream;}
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const EmitVertexInstr& lhs) const;
+
+   static auto from_string(std::istream& is, bool cut) -> Pointer;
+
+private:
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   int m_stream;
+   bool m_cut;
+};
+
+class WriteTFInstr : public WriteOutInstr {
+public:
+   using WriteOutInstr::WriteOutInstr;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const WriteTFInstr& rhs) const;
+
+   static auto from_string(std::istream& is, ValueFactory &vf) -> Pointer;
+
+private:
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+};
+
+
+}
+
+#endif // INSTR_EXPORT_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp
new file mode 100644
index 0000000..e68c7b7
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_fetch.cpp
@@ -0,0 +1,659 @@
+#include "sfn_instr_fetch.h"
+#include "sfn_valuefactory.h"
+#include "sfn_defines.h"
+
+#include <sstream>
+
+namespace r600 {
+
+using std::string;
+using std::istringstream;
+
+FetchInstr::FetchInstr(EVFetchInstr opcode,
+                       const RegisterVec4& dst,
+                       const RegisterVec4::Swizzle& dest_swizzle,
+                       PRegister src,
+                       uint32_t src_offset,
+                       EVFetchType fetch_type,
+                       EVTXDataFormat data_format,
+                       EVFetchNumFormat num_format,
+                       EVFetchEndianSwap endian_swap,
+                       uint32_t resource_id,
+                       PRegister resource_offset):
+   InstrWithVectorResult(dst, dest_swizzle),
+   m_opcode(opcode),
+   m_src(src),
+   m_src_offset(src_offset),
+   m_fetch_type(fetch_type),
+   m_data_format(data_format),
+   m_num_format(num_format),
+   m_endian_swap(endian_swap),
+   m_resource_id(resource_id),
+   m_resource_offset(resource_offset),
+   m_mega_fetch_count(0),
+   m_array_base(0),
+   m_array_size(0),
+   m_elm_size(0)
+{
+   switch (m_opcode) {
+   case vc_fetch :
+      m_opname ="VFETCH";
+   break;
+   case vc_semantic :
+      m_opname = "FETCH_SEMANTIC";
+   break;
+   case vc_get_buf_resinfo :
+      set_print_skip(mfc);
+      set_print_skip(fmt);
+      set_print_skip(ftype);
+      m_opname = "GET_BUF_RESINFO";
+   break;
+   case vc_read_scratch :
+      m_opname = "READ_SCRATCH";
+   break;
+   default:
+      unreachable("Unknwon fetch instruction");
+   }
+
+   if (m_src)
+      m_src->add_use(this);
+
+   if (m_resource_offset && m_resource_offset->as_register())
+      m_resource_offset->as_register()->add_use(this);
+}
+
+void FetchInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void FetchInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool FetchInstr::is_equal_to(const FetchInstr& rhs) const
+{
+   if (m_src) {
+      if (rhs.m_src) {
+         if (!m_src->equal_to(*rhs.m_src))
+            return false;
+      } else
+         return false;
+   } else if (rhs.m_src)
+      return false;
+
+   if (!comp_dest(rhs.dst(), rhs.all_dest_swizzle()))
+      return false;
+
+   if (m_tex_flags != rhs.m_tex_flags)
+      return false;
+
+   if (m_resource_offset && rhs.m_resource_offset) {
+      if (!m_resource_offset->equal_to(*rhs.m_resource_offset))
+         return false;
+   } else if (!(!!m_resource_offset == !!rhs.m_resource_offset))
+      return false;
+
+   return m_opcode == rhs.m_opcode &&
+         m_src_offset == rhs.m_src_offset &&
+         m_fetch_type == rhs.m_fetch_type &&
+         m_data_format == rhs.m_data_format &&
+         m_num_format == rhs.m_num_format &&
+         m_endian_swap == rhs.m_endian_swap &&
+         m_resource_id == rhs.m_resource_id &&
+         m_mega_fetch_count == rhs.m_mega_fetch_count &&
+         m_array_base == rhs.m_array_base &&
+         m_array_size == rhs.m_array_size &&
+         m_elm_size == rhs.m_elm_size;
+}
+
+bool FetchInstr::propagate_death()
+{
+   auto reg = m_src->as_register();
+   if (reg)
+      reg->del_use(this);
+   return true;
+}
+
+bool FetchInstr::replace_source(PRegister old_src, PVirtualValue new_src)
+{
+   bool success = false;
+   auto new_reg = new_src->as_register();
+   if (new_reg) {
+      if (old_src->equal_to(*m_src)) {
+         m_src->del_use(this);
+         m_src = new_reg;
+         new_reg->add_use(this);
+         success = true;
+      }
+      if (m_resource_offset && old_src->equal_to(*m_resource_offset)) {
+         m_resource_offset->del_use(this);
+         m_resource_offset = new_reg;
+         new_reg->add_use(this);
+         success = true;
+      }
+   }
+   return success;
+}
+
+bool FetchInstr::do_ready() const
+{
+   for (auto i: required_instr()) {
+      if (!i->is_scheduled())
+         return false;
+   }
+
+   bool result = m_src && m_src->ready(block_id(), index());
+   if (m_resource_offset) {
+      auto r = m_resource_offset->as_register();
+      if (r)
+         result &= r->ready(block_id(), index());
+   }
+   return result;
+}
+
+void FetchInstr::do_print(std::ostream& os) const
+{
+   os << m_opname << ' ';
+
+   print_dest(os);
+
+   os << " :";
+
+   if (m_opcode != vc_get_buf_resinfo) {
+
+      if (m_src && m_src->chan() < 7) {
+         os << " " << *m_src;
+         if (m_src_offset)
+            os << " + " << m_src_offset << "b";
+      }
+   }
+
+   if (m_opcode != vc_read_scratch)
+      os << " RID:" << m_resource_id;
+
+   if (m_resource_offset) {
+      os << " + ";
+      m_resource_offset->print(os);
+   }
+
+   if (!m_skip_print.test(ftype)) {
+      switch (m_fetch_type) {
+      case vertex_data : os << " VERTEX"; break;
+      case instance_data : os << " INSTANCE_DATA"; break;
+      case no_index_offset : os << " NO_IDX_OFFSET"; break;
+      default:
+         unreachable("Unknwon fetch instruction type");
+      }
+   }
+
+   if (!m_skip_print.test(fmt)) {
+      os << " FMT(";
+      auto fmt = s_data_format_map.find(m_data_format);
+      if (fmt != s_data_format_map.end())
+         os << fmt->second << ",";
+      else
+         unreachable("unknwon data format");
+
+      if (m_tex_flags.test(format_comp_signed))
+         os << "S";
+      else
+         os << "U";
+
+      switch (m_num_format) {
+      case vtx_nf_norm : os << "NORM"; break;
+      case vtx_nf_int : os << "INT"; break;
+      case vtx_nf_scaled: os << "SCALED"; break;
+      default:
+         unreachable("Unknwon number format");
+      }
+
+      os << ")";
+   }
+
+   if (m_array_base) {
+      if (m_opcode != vc_read_scratch)
+         os << " BASE:" << m_array_base;
+      else
+         os << " L[0x" << std::uppercase << std::hex << m_array_base << std::dec << "]";
+   }
+
+   if (m_array_size)
+      os << " SIZE:" << m_array_size + 1;
+
+   if (m_tex_flags.test(is_mega_fetch) && !m_skip_print.test(mfc))
+      os << " MFC:" << m_mega_fetch_count;
+
+   if (m_elm_size)
+      os << " ES:" << m_elm_size;
+
+   if (m_tex_flags.test(fetch_whole_quad)) os << " WQ";
+   if (m_tex_flags.test(use_const_field)) os << " UCF";
+   if (m_tex_flags.test(srf_mode)) os << " SRF";
+   if (m_tex_flags.test(buf_no_stride)) os << " BNS";
+   if (m_tex_flags.test(alt_const)) os << " AC";
+   if (m_tex_flags.test(use_tc)) os << " TC";
+   if (m_tex_flags.test(vpm)) os << " VPM";
+   if (m_tex_flags.test(uncached) && m_opcode != vc_read_scratch) os << " UNCACHED";
+   if (m_tex_flags.test(indexed) && m_opcode != vc_read_scratch) os << " INDEXED";
+}
+
+Instr::Pointer FetchInstr::from_string(std::istream& is, ValueFactory& vf)
+{
+   return from_string_impl(is, vc_fetch, vf);
+}
+
+Instr::Pointer FetchInstr::from_string_impl(std::istream& is, EVFetchInstr opcode, ValueFactory& vf)
+{
+   std::string deststr;
+   is >> deststr;
+
+   RegisterVec4::Swizzle dst_swz;
+   auto dest_reg = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
+
+   char help;
+   is >> help;
+   assert(help == ':');
+
+   string srcstr;
+   is >> srcstr;
+
+   std::cerr << "Get source "  << srcstr << "\n";
+
+   auto src_reg = vf.src_from_string(srcstr)->as_register();
+   assert(src_reg);
+
+   string res_id_str;
+   string next;
+   is >> next;
+
+   int src_offset_val = 0;
+
+   if (next == "+") {
+      is >> src_offset_val;
+      is >> help;
+      assert(help == 'b');
+      is >> res_id_str;
+   } else {
+      res_id_str = next;
+   }
+
+   int res_id = int_from_string_with_prefix(res_id_str, "RID:");
+
+   string fetch_type_str;
+   is >> fetch_type_str;
+
+   EVFetchType fetch_type = vertex_data;
+   if (fetch_type_str == "VERTEX") {
+      fetch_type = vertex_data;
+   } else {
+      assert("Fetch type not yet implemented");
+   }
+
+   string format_str;
+   is >> format_str;
+
+   assert(!strncmp(format_str.c_str(), "FMT(", 4));
+   string data_format;
+   string num_format_str;
+
+   istringstream fmt_stream(format_str.substr(4));
+   bool is_num_fmr = false;
+   assert(!fmt_stream.eof());
+
+   do {
+      char c;
+      fmt_stream >> c;
+
+      if (c == ',')  {
+         is_num_fmr = true;
+         continue;
+      }
+
+      if (!is_num_fmr)
+         data_format.append(1, c);
+      else
+         num_format_str.append(1, c);
+   } while (!fmt_stream.eof());
+
+   EVTXDataFormat fmt = fmt_invalid;
+
+   for (auto& [f, name] :  s_data_format_map) {
+      if (data_format == name) {
+         fmt = f;
+         break;
+      }
+   }
+
+   assert(fmt != fmt_invalid);
+
+   bool fmt_signed = num_format_str[0] == 'S';
+   assert(fmt_signed || num_format_str[0] == 'U');
+
+   size_t num_format_end = num_format_str.find(')');
+   num_format_str = num_format_str.substr(1, num_format_end - 1) ;
+
+   EVFetchNumFormat num_fmt;
+   if (num_format_str == "NORM")
+      num_fmt = vtx_nf_norm;
+   else if (num_format_str == "INT")
+      num_fmt = vtx_nf_int;
+   else if (num_format_str == "SCALED")
+      num_fmt = vtx_nf_scaled;
+   else {
+      std::cerr << "Number format: '" << num_format_str << "' : ";
+      unreachable("Unknown number format");
+   }
+
+   auto fetch = new FetchInstr(opcode, dest_reg, dst_swz,
+                               src_reg, src_offset_val, fetch_type, fmt, num_fmt,
+                               vtx_es_none, res_id, nullptr);
+   if (fmt_signed)
+      fetch->set_fetch_flag(format_comp_signed);
+
+   while (!is.eof() && is.good()) {
+      std::string next_token;
+      is >> next_token;
+
+      if (next_token.empty())
+         break;
+
+      if (next_token.find(':') != string::npos) {
+         fetch->set_param_from_string(next_token);
+      } else {
+         fetch->set_flag_from_string(next_token);
+      }
+   }
+
+   return fetch;
+}
+
+void FetchInstr::set_param_from_string(const std::string& token)
+{
+   if (token.substr(0,4) == "MFC:")
+      set_mfc(int_from_string_with_prefix(token, "MFC:"));
+   else if (token.substr(0,5) == "ARRB:")
+      set_array_base(int_from_string_with_prefix(token, "ARRB:"));
+   else if (token.substr(0,5) == "ARRS:")
+      set_array_size(int_from_string_with_prefix(token, "ARRS:"));
+   else if (token.substr(0,3) == "ES:")
+      set_element_size(int_from_string_with_prefix(token, "ES:"));
+   else {
+      std::cerr << "Token '" << token << "': ";
+      unreachable("Unknown token in fetch param list");
+   }
+}
+
+void FetchInstr::set_flag_from_string(const std::string& token)
+{
+   auto flag = s_flag_map.find(token.c_str());
+   if (flag != s_flag_map.end())
+      set_fetch_flag(flag->second);
+   else {
+      std::cerr << "Token: " << token << " : ";
+      unreachable("Unknown token in fetch flag list");
+   }
+}
+
+
+const std::map<const char *, FetchInstr::EFlags> FetchInstr::s_flag_map = {
+   {"WQ", fetch_whole_quad},
+   {"UCF", use_const_field},
+   {"SRF", srf_mode},
+   {"BNS", buf_no_stride},
+   {"AC", alt_const},
+   {"TC", use_tc},
+   {"VPM", vpm},
+   {"UNCACHED", uncached},
+   {"INDEXED", indexed}
+};
+
+const std::map<EVTXDataFormat, const char *> FetchInstr::s_data_format_map = {
+   {fmt_invalid, "INVALID"},
+   {fmt_8, "8"},
+   {fmt_4_4, "4_4"},
+   {fmt_3_3_2, "3_3_2"},
+   {fmt_reserved_4, "RESERVED_4"},
+   {fmt_16, "16"},
+   {fmt_16_float, "16F"},
+   {fmt_8_8, "8_8"},
+   {fmt_5_6_5, "5_6_5"},
+   {fmt_6_5_5, "6_5_5"},
+   {fmt_1_5_5_5, "1_5_5_5"},
+   {fmt_4_4_4_4, "4_4_4_4"},
+   {fmt_5_5_5_1, "5_5_5_1"},
+   {fmt_32, "32"},
+   {fmt_32_float, "32F"},
+   {fmt_16_16,  "16_16"},
+   {fmt_16_16_float, "16_16F"},
+   {fmt_8_24, "8_24"},
+   {fmt_8_24_float, "8_24F"},
+   {fmt_24_8, "24_8"},
+   {fmt_24_8_float, "24_8F"},
+   {fmt_10_11_11, "10_11_11"},
+   {fmt_10_11_11_float, "10_11_11F"},
+   {fmt_11_11_10, "11_11_10"},
+   {fmt_10_11_11_float, "11_11_10F"},
+   {fmt_2_10_10_10, "2_10_10_10"},
+   {fmt_8_8_8_8, "8_8_8_8"},
+   {fmt_10_10_10_2, "10_10_10_2"},
+   {fmt_x24_8_32_float, "X24_8_32F"},
+   {fmt_32_32, "32_32"},
+   {fmt_32_32_float, "32_32F"},
+   {fmt_16_16_16_16, "16_16_16_16"},
+   {fmt_16_16_16_16_float, "16_16_16_16F"},
+   {fmt_reserved_33, "RESERVED_33"},
+   {fmt_32_32_32_32, "32_32_32_32"},
+   {fmt_32_32_32_32_float, "32_32_32_32F"},
+   {fmt_reserved_36, "RESERVED_36"},
+   {fmt_1, "1"},
+   {fmt_1_reversed, "1_REVERSED"},
+   {fmt_gb_gr, "GB_GR"},
+   {fmt_bg_rg, "BG_RG"},
+   {fmt_32_as_8, "32_AS_8"},
+   {fmt_32_as_8_8, "32_AS_8_8"},
+   {fmt_5_9_9_9_sharedexp, "5_9_9_9_SHAREDEXP"},
+   {fmt_8_8_8, "8_8_8"},
+   {fmt_16_16_16, "16_16_16"},
+   {fmt_16_16_16_float, "16_16_16F"},
+   {fmt_32_32_32, "32_32_32"},
+   {fmt_32_32_32_float, "32_32_32F"},
+   {fmt_bc1, "BC1"},
+   {fmt_bc2, "BC2"},
+   {fmt_bc3, "BC3"},
+   {fmt_bc4, "BC4"},
+   {fmt_bc5, "BC5"},
+   {fmt_apc0, "APC0"},
+   {fmt_apc1, "APC1"},
+   {fmt_apc2, "APC2"},
+   {fmt_apc3, "APC3"},
+   {fmt_apc4, "APC4"},
+   {fmt_apc5, "APC5"},
+   {fmt_apc6, "APC6"},
+   {fmt_apc7, "APC7"},
+   {fmt_ctx1, "CTX1"},
+   {fmt_reserved_63, "RESERVED_63"}
+};
+
+
+QueryBufferSizeInstr::QueryBufferSizeInstr(const RegisterVec4& dst,
+                                           const RegisterVec4::Swizzle& dst_swz,
+                                           uint32_t resid):
+   FetchInstr(vc_get_buf_resinfo,
+              dst, dst_swz,
+              new Register( 0, 7, pin_fully),
+              0,
+              no_index_offset,
+              fmt_32_32_32_32,
+              vtx_nf_norm,
+              vtx_es_none,
+              resid,
+              nullptr)
+{
+   set_fetch_flag(format_comp_signed);
+   set_print_skip(mfc);
+   set_print_skip(fmt);
+   set_print_skip(ftype);
+}
+
+Instr::Pointer QueryBufferSizeInstr::from_string(std::istream& is, ValueFactory& vf)
+{
+   std::string deststr, res_id_str;
+   is >> deststr;
+
+   char help;
+   is >> help;
+   assert(help == ':');
+
+   is >> res_id_str;
+
+   RegisterVec4::Swizzle dst_swz;
+   auto dst = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
+   int res_id = int_from_string_with_prefix(res_id_str, "RID:");
+
+   return new QueryBufferSizeInstr( dst, dst_swz, res_id);
+}
+
+LoadFromBuffer::LoadFromBuffer(const RegisterVec4& dst, const RegisterVec4::Swizzle& dst_swizzle,
+                               PRegister addr, uint32_t addr_offset,
+                               uint32_t resid, PRegister res_offset, EVTXDataFormat data_format):
+   FetchInstr(vc_fetch, dst,  dst_swizzle, addr, addr_offset, no_index_offset,
+              data_format, vtx_nf_scaled, vtx_es_none, resid, res_offset)
+{
+   set_fetch_flag(format_comp_signed);
+   set_mfc(16);
+   override_opname("LOAD_BUF");
+   set_print_skip(mfc);
+   set_print_skip(fmt);
+   set_print_skip(ftype);
+}
+
+Instr::Pointer LoadFromBuffer::from_string(std::istream& is, ValueFactory& vf)
+{
+   std::string deststr;
+   is >> deststr;
+
+   RegisterVec4::Swizzle dst_swz;
+   auto dst = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
+
+   char help;
+   is >> help;
+   assert(help == ':');
+
+   string addrstr;
+   is >> addrstr;
+   auto addr_reg = vf.src_from_string(addrstr)->as_register();
+
+   string res_id_str;
+   string next;
+   is >> next;
+
+   int addr_offset_val = 0;
+
+   if (next == "+") {
+      is >> addr_offset_val;
+      is >> help;
+      assert(help == 'b');
+      is >> res_id_str;
+   } else {
+      res_id_str = next;
+   }
+
+   int res_id = int_from_string_with_prefix(res_id_str, "RID:");
+
+   next.clear();
+   is >> next;
+   PRegister res_offset = nullptr;
+   if (next == "+") {
+      string res_offset_str;
+      is >> res_offset_str;
+      res_offset = vf.src_from_string(res_offset_str)->as_register();
+   }
+
+   auto fetch = new LoadFromBuffer( dst, dst_swz,
+                                    addr_reg, addr_offset_val,
+                                    res_id, res_offset, fmt_32_32_32_32_float);
+   is >> next;
+   if (next == "SRF")
+      fetch->set_fetch_flag(srf_mode);
+
+   return fetch;
+}
+
+class AddrResolver: public RegisterVisitor {
+public:
+   AddrResolver(LoadFromScratch *lfs) : m_lfs(lfs) {}
+
+   void visit(Register& value) {
+      m_lfs->set_fetch_flag(FetchInstr::indexed);
+      m_lfs->set_src(&value);
+      value.add_use(m_lfs);
+   }
+   void visit(LocalArray& value) {assert(0);(void)value;}
+   void visit(LocalArrayValue& value) {assert(0);(void)value;}
+   void visit(UniformValue& value) {assert(0);(void)value;}
+   void visit(LiteralConstant& value) {
+      m_lfs->set_array_base(value.value());
+      m_lfs->set_src(new Register( 0, 7, pin_none));
+   }
+   void visit(InlineConstant& value) {assert(0);(void)value;}
+
+   LoadFromScratch *m_lfs;
+};
+
+
+
+LoadFromScratch::LoadFromScratch(const RegisterVec4& dst, const RegisterVec4::Swizzle& dst_swz, PVirtualValue addr, uint32_t scratch_size):
+   FetchInstr(vc_read_scratch,
+              dst, dst_swz,
+              nullptr,
+              0,
+              no_index_offset,
+              fmt_32_32_32_32,
+              vtx_nf_int,
+              vtx_es_none,
+              0,
+              nullptr)
+{
+   set_fetch_flag(uncached);
+   set_fetch_flag(wait_ack);
+
+   assert(scratch_size >= 1);
+   set_array_size(scratch_size - 1);
+   set_array_base(0);
+   AddrResolver ar(this);
+   addr->accept(ar);
+
+   set_print_skip(mfc);
+   set_print_skip(fmt);
+   set_print_skip(ftype);
+   set_element_size(3);
+}
+
+Instr::Pointer LoadFromScratch::from_string(std::istream& is, ValueFactory &vf)
+{
+   std::string deststr;
+   is >> deststr;
+
+   RegisterVec4::Swizzle dst_swz;
+   auto dest = vf.dest_vec4_from_string(deststr, dst_swz, pin_group);
+
+   char help;
+   is >> help;
+   assert(help == ':');
+
+   string addrstr;
+   is >> addrstr;
+   auto addr_reg = vf.src_from_string(addrstr);
+
+   string offsetstr;
+   is >> offsetstr;
+   int size = int_from_string_with_prefix(offsetstr, "SIZE:");
+   assert(size >= 1);
+
+   return new LoadFromScratch( dest, dst_swz, addr_reg, size);
+}
+
+}
+
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_fetch.h b/src/gallium/drivers/r600/sfn/sfn_instr_fetch.h
new file mode 100644
index 0000000..3fd0f34
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_fetch.h
@@ -0,0 +1,152 @@
+#ifndef INSTR_FETCH_H
+#define INSTR_FETCH_H
+
+#include "sfn_instr.h"
+
+namespace r600 {
+
+class ValueFactory;
+
+class FetchInstr : public InstrWithVectorResult {
+public:
+
+   enum EFlags {
+      fetch_whole_quad,
+      use_const_field,
+      format_comp_signed,
+      srf_mode,
+      buf_no_stride,
+      alt_const,
+      use_tc,
+      vpm,
+      is_mega_fetch,
+      uncached,
+      indexed,
+      wait_ack,
+      unknown
+   };
+
+   enum EPrintSkip {
+      fmt,
+      ftype,
+      mfc,
+      count
+   };
+
+   FetchInstr(EVFetchInstr opcode,
+              const RegisterVec4& dst,
+              const RegisterVec4::Swizzle& dest_swizzle,
+              PRegister src,
+              uint32_t src_offset,
+              EVFetchType fetch_type,
+              EVTXDataFormat data_format,
+              EVFetchNumFormat num_format,
+              EVFetchEndianSwap endian_swap,
+              uint32_t resource_id,
+              PRegister resource_offset);
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   void set_src(PRegister src) { m_src = src; }
+   const auto& src() const {assert(m_src); return *m_src;}
+   uint32_t src_offset() const {return m_src_offset;}
+
+   uint32_t resource_id() const {return m_resource_id;}
+   auto resource_offset() const {return m_resource_offset;}
+
+   EVFetchType fetch_type() const {return m_fetch_type;}
+   EVTXDataFormat data_format() const {return m_data_format;}
+   void  set_num_format(EVFetchNumFormat nf) {m_num_format = nf;}
+   EVFetchNumFormat num_format() const {return m_num_format;}
+   EVFetchEndianSwap endian_swap() const {return m_endian_swap;}
+
+   uint32_t mega_fetch_count() const {return m_mega_fetch_count;}
+   uint32_t array_base() const {return m_array_base;}
+   uint32_t array_size() const {return m_array_size;}
+   uint32_t elm_size() const {return m_elm_size;}
+
+   void reset_fetch_flag(EFlags flag) {m_tex_flags.reset(flag);}
+   void set_fetch_flag(EFlags flag) {m_tex_flags.set(flag);}
+   bool has_fetch_flag(EFlags flag) const { return m_tex_flags.test(flag);}
+
+   EVFetchInstr opcode() const {return m_opcode;}
+
+   bool is_equal_to(const FetchInstr& rhs) const;
+
+   static Instr::Pointer from_string(std::istream& is, ValueFactory &vf);
+
+   void set_mfc(int mfc) {m_tex_flags.set(is_mega_fetch); m_mega_fetch_count = mfc;}
+   void set_array_base(int arrb) {m_array_base = arrb;}
+   void set_array_size(int arrs) {m_array_size = arrs;}
+
+   void set_element_size(int size) { m_elm_size = size;}
+   void set_print_skip(EPrintSkip skip) {m_skip_print.set(skip);}
+   uint32_t slots() const override {return 1;};
+
+   bool replace_source(PRegister old_src, PVirtualValue new_src) override;
+
+protected:
+   static Instr::Pointer from_string_impl(std::istream& is, EVFetchInstr opcode, ValueFactory &vf);
+
+   void override_opname(const char *opname) { m_opname = opname;}
+
+private:
+   bool do_ready() const override;
+
+   void do_print(std::ostream& os) const override;
+
+   void set_param_from_string(const std::string& next_token);
+   void set_flag_from_string(const std::string& next_token);
+
+   static const std::map<EVTXDataFormat, const char *> s_data_format_map;
+   static const std::map<const char *, EFlags> s_flag_map;
+
+   bool propagate_death() override;
+
+   EVFetchInstr m_opcode;
+
+   PRegister m_src;
+   uint32_t m_src_offset;
+
+   EVFetchType m_fetch_type;
+   EVTXDataFormat m_data_format;
+   EVFetchNumFormat m_num_format;
+   EVFetchEndianSwap m_endian_swap;
+
+   uint32_t m_resource_id;
+   PRegister m_resource_offset;
+
+   std::bitset<EFlags::unknown> m_tex_flags;
+   std::bitset<EPrintSkip::count> m_skip_print;
+
+   uint32_t m_mega_fetch_count;
+   uint32_t m_array_base;
+   uint32_t m_array_size;
+   uint32_t m_elm_size;
+
+   std::string m_opname;
+};
+
+class QueryBufferSizeInstr : public FetchInstr {
+public:
+   QueryBufferSizeInstr(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle, uint32_t resid);
+   static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
+};
+
+class LoadFromBuffer : public FetchInstr {
+public:
+   LoadFromBuffer(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle,
+                  PRegister addr, uint32_t addr_offset,
+                  uint32_t resid, PRegister res_offset, EVTXDataFormat data_format);
+   static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
+};
+
+class LoadFromScratch : public FetchInstr {
+public:
+   LoadFromScratch(const RegisterVec4& dst, const RegisterVec4::Swizzle& swizzle, PVirtualValue addr, uint32_t offset);
+   static Instr::Pointer from_string(std::istream& is, ValueFactory& vf);
+};
+
+}
+#endif // INSTR_FETCH_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp
new file mode 100644
index 0000000..caa48a9
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_lds.cpp
@@ -0,0 +1,411 @@
+#include "sfn_instr_lds.h"
+#include "sfn_instr_alu.h"
+#include "sfn_debug.h"
+
+namespace r600 {
+
+using std::istream;
+
+LDSReadInstr::LDSReadInstr(std::vector<PRegister, Allocator<PRegister>>& value,
+                           AluInstr::SrcValues& address):
+   m_address(address),
+   m_dest_value(value)
+{
+   assert(m_address.size() == m_dest_value.size());
+
+   for (auto& v: value)
+      v->add_parent(this);
+
+   for (auto& s: m_address)
+      if (s->as_register())
+         s->as_register()->add_use(this);
+}
+
+void LDSReadInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void LDSReadInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool LDSReadInstr::remove_unused_components()
+{
+   uint8_t inactive_mask = 0;
+   for (size_t i = 0; i < m_dest_value.size(); ++i) {
+      if (m_dest_value[i]->uses().empty())
+         inactive_mask |= 1 << i;
+   }
+
+   if (!inactive_mask)
+      return false;
+
+   auto new_addr = AluInstr::SrcValues();
+   auto new_dest = std::vector<PRegister, Allocator<PRegister>>();
+
+   for (size_t i = 0; i < m_dest_value.size(); ++i) {
+      if ((1 << i) & inactive_mask) {
+         if (m_address[i]->as_register())
+            m_address[i]->as_register()->del_use(this);
+         m_dest_value[i]->del_parent(this);
+      } else {
+         new_dest.push_back(m_dest_value[i]);
+         new_addr.push_back(m_address[i]);
+      }
+   }
+
+   m_dest_value.swap(new_dest);
+   m_address.swap(new_addr);
+
+   return m_address.size() != new_addr.size();
+}
+
+class SetLDSAddrProperty : public AluInstrVisitor {
+   using AluInstrVisitor::visit;
+   void visit(AluInstr *instr) override {
+      instr->set_alu_flag(alu_lds_address);
+   }
+};
+
+AluInstr *LDSReadInstr::split(std::vector<AluInstr*>& out_block, AluInstr *last_lds_instr)
+{
+   AluInstr* first_instr = nullptr;
+   SetLDSAddrProperty prop;
+   for (auto& addr: m_address) {
+      auto reg = addr->as_register();
+      if (reg) {
+         reg->del_use(this);
+         if (reg->parents().size() == 1) {
+            for (auto& p: reg->parents()) {
+               p->accept(prop);
+            }
+         }
+      }
+
+      auto instr = new AluInstr(DS_OP_READ_RET, nullptr, nullptr, addr);
+      instr->set_blockid(block_id(), index());
+
+      if (last_lds_instr)
+         instr->add_required_instr(last_lds_instr);
+      out_block.push_back(instr);
+      last_lds_instr = instr;
+      if (!first_instr) {
+         first_instr = instr;
+         first_instr->set_alu_flag(alu_lds_group_start);
+      } else {
+         /* In order to make it possible that the scheduler
+          * keeps the loads of a group close together, we
+          * require that the addresses are all already available
+          * when the first read instruction is emitted.
+          * Otherwise it might happen that the loads and reads from the
+          * queue are split across ALU cf clauses, and this is not allowed */
+         first_instr->add_extra_dependency(addr);
+      }
+   }
+
+   for (auto& dest: m_dest_value) {
+      dest->del_parent(this);
+      auto instr = new AluInstr(op1_mov, dest,
+                                new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
+                                AluInstr::last_write);
+      instr->add_required_instr(last_lds_instr);
+      instr->set_blockid(block_id(), index());
+      out_block.push_back(instr);
+      last_lds_instr = instr;
+   }
+   if (last_lds_instr)
+      last_lds_instr->set_alu_flag(alu_lds_group_end);
+
+   return last_lds_instr;
+}
+
+bool LDSReadInstr::do_ready() const
+{
+   unreachable("This instruction is not handled by the schduler");
+   return false;
+}
+
+void LDSReadInstr::do_print(std::ostream& os) const
+{
+   os << "LDS_READ ";
+
+   os << "[ ";
+   for (auto d: m_dest_value) {
+      os << *d << " ";
+   }
+   os << "] : [ ";
+   for (auto a: m_address) {
+      os << *a << " ";
+   }
+   os << "]";
+}
+
+bool LDSReadInstr::is_equal_to(const LDSReadInstr& rhs) const
+{
+   if (m_address.size() != rhs.m_address.size())
+      return false;
+
+   for (unsigned i = 0; i < num_values(); ++i) {
+      if (!m_address[i]->equal_to(*rhs.m_address[i]))
+         return false;
+      if (!m_dest_value[i]->equal_to(*rhs.m_dest_value[i]))
+         return false;
+   }
+   return true;
+}
+
+auto LDSReadInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer
+{
+   /* LDS_READ [ d1, d2, d3 ... ] : a1 a2 a3 ... */
+
+   std::string temp_str;
+
+   is >> temp_str;
+   assert(temp_str == "[");
+
+   std::vector<PRegister, Allocator<PRegister> > dests;
+   AluInstr::SrcValues srcs;
+
+   is >> temp_str;
+   while (temp_str != "]") {
+      auto dst = value_factory.dest_from_string(temp_str);
+      assert(dst);
+      dests.push_back(dst);
+      is >> temp_str;
+   }
+
+   is >> temp_str;
+   assert(temp_str == ":");
+   is >> temp_str;
+   assert(temp_str == "[");
+
+   is >> temp_str;
+   while (temp_str != "]") {
+      auto src = value_factory.src_from_string(temp_str);
+      assert(src);
+      srcs.push_back(src);
+      is >> temp_str;
+   };
+   assert(srcs.size() == dests.size() && !dests.empty());
+
+   return new LDSReadInstr(dests, srcs);
+}
+
+LDSAtomicInstr::LDSAtomicInstr(ESDOp op, PRegister dest, PVirtualValue address,
+                               const SrcValues& srcs):
+   m_opcode(op),
+   m_address(address),
+   m_dest(dest),
+   m_srcs(srcs)
+{
+   if (m_dest)
+      m_dest->add_parent(this);
+
+   if (m_address->as_register())
+      m_address->as_register()->add_use(this);
+
+   for (auto& s: m_srcs) {
+      if (s->as_register())
+         s->as_register()->add_use(this);
+   }
+}
+
+
+void LDSAtomicInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void LDSAtomicInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+AluInstr *LDSAtomicInstr::split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr)
+{
+   AluInstr::SrcValues srcs = {m_address};
+
+   for(auto& s : m_srcs)
+      srcs.push_back(s);
+
+   for(auto& s :srcs) {
+      if (s->as_register())
+         s->as_register()->del_use(this);
+   }
+
+   SetLDSAddrProperty prop;
+   auto reg = srcs[0]->as_register();
+   if (reg) {
+      reg->del_use(this);
+      if (reg->parents().size() == 1) {
+         for (auto& p: reg->parents()) {
+            p->accept(prop);
+         }
+      }
+   }
+
+   auto op_instr = new AluInstr(m_opcode, srcs, {});
+   op_instr->set_blockid(block_id(), index());
+
+   if (last_lds_instr) {
+      op_instr->add_required_instr(last_lds_instr);
+   }
+
+   out_block.push_back(op_instr);
+   if (m_dest) {
+      op_instr->set_alu_flag(alu_lds_group_start);
+      m_dest->del_parent(this);
+      auto read_instr = new AluInstr(op1_mov, m_dest,
+                                     new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
+                                     AluInstr::last_write);
+      read_instr->add_required_instr(op_instr);
+      read_instr->set_blockid(block_id(), index());
+      read_instr->set_alu_flag(alu_lds_group_end);
+      out_block.push_back(read_instr);
+      last_lds_instr = read_instr;
+   }
+   return last_lds_instr;
+}
+
+bool LDSAtomicInstr::replace_source(PRegister old_src, PVirtualValue new_src)
+{
+   bool process = false;
+
+
+   if (new_src->as_uniform() && m_srcs.size() > 2) {
+      int nconst = 0;
+      for (auto& s : m_srcs) {
+         if (s->as_uniform() && !s->equal_to(*old_src))
+            ++nconst;
+      }
+      /* Conservative check: with two kcache values can always live,
+       * tree might be a problem, don't care for now, just reject
+       */
+      if (nconst > 2)
+         return false;
+   }
+
+   /* If the old source is an array element, we assume that there
+    * might have been an (untracked) indirect access, so don't replace
+    * this source */
+   if (old_src->pin() == pin_array)
+      return false;
+
+   if (new_src->get_addr()) {
+      for (auto& s : m_srcs) {
+         auto addr = s->get_addr();
+         /* can't have two differen't indirect addresses in the same instr */
+         if (addr && !addr->equal_to(*new_src->get_addr()))
+            return false;
+      }
+   }
+
+   for (unsigned i = 0; i < m_srcs.size(); ++i) {
+      if (old_src->equal_to(*m_srcs[i])) {
+         m_srcs[i] = new_src;
+         process = true;
+      }
+   }
+
+   if (process) {
+      auto r = new_src->as_register();
+      if (r)
+         r->add_use(this);
+      old_src->del_use(this);
+   }
+   return process;
+}
+
+bool LDSAtomicInstr::do_ready() const
+{
+   unreachable("This instruction is not handled by the schduler");
+   return false;
+}
+
+void LDSAtomicInstr::do_print(std::ostream& os) const
+{
+   auto ii = lds_ops.find(m_opcode);
+   assert(ii != lds_ops.end());
+
+   os << "LDS " << ii->second.name << " ";
+   if (m_dest)
+      os << *m_dest;
+   else
+      os << "__.x";
+
+   os << " [ " << *m_address << " ] : " << *m_srcs[0];
+   if (m_srcs.size() > 1)
+      os << " " << *m_srcs[1];
+}
+
+bool LDSAtomicInstr::is_equal_to(const LDSAtomicInstr& rhs) const
+{
+   if (m_srcs.size() != rhs.m_srcs.size())
+      return false;
+
+   for (unsigned i = 0; i < m_srcs.size(); ++i) {
+      if (!m_srcs[i]->equal_to(*rhs.m_srcs[i]))
+         return false;
+   }
+
+   return m_opcode == rhs.m_opcode &&
+         sfn_value_equal(m_address, rhs.m_address) &&
+         sfn_value_equal(m_dest, rhs.m_dest);
+}
+
+
+auto LDSAtomicInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer
+{
+   /* LDS WRITE2 __.x [ R1.x ] : R2.y R3.z */
+   /* LDS WRITE __.x [ R1.x ] : R2.y  */
+   /* LDS ATOMIC_ADD_RET [ R5.y ] : R2.y  */
+
+   std::string temp_str;
+
+   is >> temp_str;
+
+   ESDOp opcode = DS_OP_INVALID;
+   int nsrc = 0;
+
+   for (auto& [op, opinfo] : lds_ops) {
+      if (temp_str == opinfo.name) {
+         opcode = op;
+         nsrc = opinfo.nsrc;
+         break;
+      }
+   }
+
+   assert(opcode != DS_OP_INVALID);
+
+   is >> temp_str;
+
+   PRegister dest = nullptr;
+   if (temp_str[0] != '_')
+      dest = value_factory.dest_from_string(temp_str);
+
+   is >> temp_str;
+   assert(temp_str == "[");
+   is >> temp_str;
+   auto addr = value_factory.src_from_string(temp_str);
+
+   is >> temp_str;
+   assert(temp_str == "]");
+
+   is >> temp_str;
+   assert(temp_str == ":");
+
+   AluInstr::SrcValues srcs;
+   for (int i = 0; i < nsrc - 1; ++i) {
+      is >> temp_str;
+      auto src = value_factory.src_from_string(temp_str);
+      assert(src);
+      srcs.push_back(src);
+   }
+
+   return new LDSAtomicInstr(opcode, dest, addr, srcs);
+}
+
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_lds.h b/src/gallium/drivers/r600/sfn/sfn_instr_lds.h
new file mode 100644
index 0000000..8b1c163
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_lds.h
@@ -0,0 +1,80 @@
+#ifndef LDSINSTR_H
+#define LDSINSTR_H
+
+#include "sfn_instr_alu.h"
+#include "sfn_valuefactory.h"
+
+namespace r600 {
+
+class LDSReadInstr : public Instr {
+public:
+   LDSReadInstr(std::vector<PRegister, Allocator<PRegister>>& value,
+                AluInstr::SrcValues& address);
+
+   unsigned num_values() const { return m_dest_value.size();}
+   auto address(unsigned i) const { return m_address[i];}
+   auto dest(unsigned i) const { return m_dest_value[i];}
+
+   auto address(unsigned i){ return m_address[i];}
+   auto dest(unsigned i)  { return m_dest_value[i];}
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   AluInstr *split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr);
+   bool is_equal_to(const LDSReadInstr& lhs) const;
+
+   static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
+
+   bool remove_unused_components();
+
+private:
+
+   bool do_ready() const override;
+
+   void do_print(std::ostream& os) const override;
+
+   AluInstr::SrcValues m_address;
+   std::vector<PRegister, Allocator<PRegister>> m_dest_value;
+};
+
+class LDSAtomicInstr : public Instr {
+public:
+   using SrcValues = AluInstr::SrcValues;
+
+   LDSAtomicInstr(ESDOp op, PRegister dest, PVirtualValue address, const SrcValues& src);
+
+   auto address() const { return m_address;}
+   auto dest() const { return m_dest;}
+   auto src0() const { return m_srcs[0];}
+   auto src1() const { return m_srcs.size() > 1 ? m_srcs[1] : nullptr;}
+
+   PVirtualValue address() { return m_address;}
+   PRegister dest()  { return m_dest;}
+   PVirtualValue src0() { return m_srcs[0];}
+   PVirtualValue src1() { return m_srcs.size() > 1 ? m_srcs[1] : nullptr;}
+
+   unsigned op() const {return m_opcode;}
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   AluInstr *split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr);
+   bool is_equal_to(const LDSAtomicInstr& lhs) const;
+
+   static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
+   bool replace_source(PRegister old_src, PVirtualValue new_src) override;
+
+private:
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   ESDOp m_opcode;
+   PVirtualValue m_address{nullptr};
+   PRegister m_dest{nullptr};
+   SrcValues m_srcs;
+};
+
+}
+
+#endif // LDSINSTR_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp
new file mode 100644
index 0000000..ba46d4a
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp
@@ -0,0 +1,844 @@
+#include "sfn_instr_mem.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_tex.h"
+#include "sfn_shader.h"
+
+namespace r600 {
+
+
+GDSInstr::GDSInstr(ESDOp op, Register *dest,
+                   const RegisterVec4& src, int uav_base,
+                   PRegister uav_id):
+   m_op(op),
+   m_dest(dest),
+   m_src(src),
+   m_uav_base(uav_base),
+   m_uav_id(uav_id)
+{
+   set_always_keep();
+
+   m_src.add_use(this);
+   m_dest->add_parent(this);
+
+   if (m_uav_id)
+      m_uav_id->add_use(this);
+}
+
+bool GDSInstr::is_equal_to(const GDSInstr& rhs) const
+{
+#define NE(X) (X != rhs. X)
+
+   if (NE(m_op) ||
+       NE(m_src) ||
+       NE(m_uav_base))
+      return false;
+
+   sfn_value_equal(m_dest, rhs.m_dest);
+
+   return sfn_value_equal(m_uav_id, rhs.m_uav_id);
+}
+
+void GDSInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void GDSInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool GDSInstr::do_ready() const
+{
+   return m_src.ready(block_id(), index()) &&
+         (!m_uav_id || m_uav_id->ready(block_id(), index()));
+}
+
+void GDSInstr::do_print(std::ostream& os) const
+{
+   os << "GDS " << lds_ops.at(m_op).name
+      << *m_dest;
+   os << " " << m_src;
+   os << " BASE:" << m_uav_base;
+
+   if (m_uav_id)
+      os << " UAV:" << *m_uav_id;
+}
+
+bool GDSInstr::emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_atomic_counter_add:
+   case nir_intrinsic_atomic_counter_and:
+   case nir_intrinsic_atomic_counter_exchange:
+   case nir_intrinsic_atomic_counter_max:
+   case nir_intrinsic_atomic_counter_min:
+   case nir_intrinsic_atomic_counter_or:
+   case nir_intrinsic_atomic_counter_xor:
+   case nir_intrinsic_atomic_counter_comp_swap:
+      return emit_atomic_op2(intr, shader);
+   case nir_intrinsic_atomic_counter_read:
+   case nir_intrinsic_atomic_counter_post_dec:
+      return emit_atomic_read(intr, shader);
+   case nir_intrinsic_atomic_counter_inc:
+      return emit_atomic_inc(intr, shader);
+   case nir_intrinsic_atomic_counter_pre_dec:
+      return emit_atomic_pre_dec(intr, shader);
+   default:
+      return false;
+   }
+}
+
+static ESDOp get_opcode(const nir_intrinsic_op opcode)
+{
+   switch (opcode) {
+   case nir_intrinsic_atomic_counter_add:
+      return DS_OP_ADD_RET;
+   case nir_intrinsic_atomic_counter_and:
+      return DS_OP_AND_RET;
+   case nir_intrinsic_atomic_counter_exchange:
+      return DS_OP_XCHG_RET;
+   case nir_intrinsic_atomic_counter_inc:
+      return DS_OP_INC_RET;
+   case nir_intrinsic_atomic_counter_max:
+      return DS_OP_MAX_UINT_RET;
+   case nir_intrinsic_atomic_counter_min:
+      return DS_OP_MIN_UINT_RET;
+   case nir_intrinsic_atomic_counter_or:
+      return DS_OP_OR_RET;
+   case nir_intrinsic_atomic_counter_read:
+      return DS_OP_READ_RET;
+   case nir_intrinsic_atomic_counter_xor:
+      return DS_OP_XOR_RET;
+   case nir_intrinsic_atomic_counter_post_dec:
+      return DS_OP_DEC_RET;
+   case nir_intrinsic_atomic_counter_comp_swap:
+      return DS_OP_CMP_XCHG_RET;
+   case nir_intrinsic_atomic_counter_pre_dec:
+   default:
+      return DS_OP_INVALID;
+   }
+}
+
+static ESDOp get_opcode_wo(const nir_intrinsic_op opcode)
+{
+   switch (opcode) {
+   case nir_intrinsic_atomic_counter_add:
+      return DS_OP_ADD;
+   case nir_intrinsic_atomic_counter_and:
+      return DS_OP_AND;
+   case nir_intrinsic_atomic_counter_inc:
+      return DS_OP_INC;
+   case nir_intrinsic_atomic_counter_max:
+      return DS_OP_MAX_UINT;
+   case nir_intrinsic_atomic_counter_min:
+      return DS_OP_MIN_UINT;
+   case nir_intrinsic_atomic_counter_or:
+      return DS_OP_OR;
+   case nir_intrinsic_atomic_counter_xor:
+      return DS_OP_XOR;
+   case nir_intrinsic_atomic_counter_post_dec:
+      return DS_OP_DEC;
+   case nir_intrinsic_atomic_counter_comp_swap:
+      return DS_OP_CMP_XCHG_RET;
+   case nir_intrinsic_atomic_counter_exchange:
+      return DS_OP_XCHG_RET;
+   case nir_intrinsic_atomic_counter_pre_dec:
+   default:
+      return DS_OP_INVALID;
+   }
+}
+
+
+bool GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
+
+   ESDOp op = read_result ? get_opcode(instr->intrinsic) :
+                            get_opcode_wo(instr->intrinsic);
+
+   if (DS_OP_INVALID == op)
+      return false;
+
+   auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
+   offset += nir_intrinsic_base(instr);
+
+   auto dest = vf.dest(instr->dest, 0, pin_free);
+
+   PRegister src_as_register = nullptr;
+   auto src_val = vf.src(instr->src[1], 0);
+   if (!src_val->as_register()) {
+      auto temp_src_val = vf.temp_register();
+      shader.emit_instruction(new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write));
+      src_as_register = temp_src_val;
+   } else
+      src_as_register = src_val->as_register();
+
+   if (uav_id != nullptr)
+      shader.set_flag(Shader::sh_indirect_atomic);
+
+   GDSInstr *ir = nullptr;
+   if (shader.chip_class() < ISA_CC_CAYMAN) {
+      RegisterVec4 src(nullptr, src_as_register, nullptr, nullptr, pin_free);
+      ir = new GDSInstr(op, dest, src, offset, uav_id);
+
+   } else {
+      auto dest = vf.dest(instr->dest, 0, pin_free);
+      auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
+      if (uav_id)
+         shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
+                                 AluInstr::write));
+      else
+         shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
+      shader.emit_instruction(new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write));
+      ir = new GDSInstr(op, dest, tmp, 0, nullptr);
+   }
+   shader.emit_instruction(ir);
+   return true;
+}
+
+bool GDSInstr::emit_atomic_read(nir_intrinsic_instr *instr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+
+   auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
+   offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
+
+   auto dest = vf.dest(instr->dest, 0, pin_free);
+
+   GDSInstr *ir = nullptr;
+
+   if (shader.chip_class() < ISA_CC_CAYMAN) {
+      RegisterVec4 src = RegisterVec4(0, true, {7,7,7,7});
+      ir = new GDSInstr(DS_OP_READ_RET, dest, src, offset, uav_id);
+   } else {
+      auto tmp = vf.temp_vec4(pin_group, {0, 7, 7, 7});
+      if (uav_id)
+         shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
+                                 AluInstr::write));
+      else
+         shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
+
+      ir = new GDSInstr(DS_OP_READ_RET, dest, tmp, 0, nullptr);
+   }
+
+   shader.emit_instruction(ir);
+   return true;
+}
+
+bool GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
+
+   auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
+   offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
+
+   GDSInstr *ir = nullptr;
+
+   if (shader.chip_class() < ISA_CC_CAYMAN) {
+      auto dest = vf.dest(instr->dest, 0, pin_free);
+      RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
+      ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
+                             dest, src, offset, uav_id);
+   } else {
+      auto dest = vf.dest(instr->dest, 0, pin_free);
+      auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
+
+      if (uav_id)
+         shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
+                                 AluInstr::write));
+      else
+         shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
+
+      shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
+      ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD,
+                        dest, tmp, 0, nullptr);
+   }
+   shader.emit_instruction(ir);
+   return true;
+}
+
+bool GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+
+   auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0); {}
+   offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
+
+   auto *tmp_dest = vf.temp_register();
+
+   GDSInstr *ir = nullptr;
+
+   if (shader.chip_class() < ISA_CC_CAYMAN) {
+      RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
+      ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, src, offset, uav_id);
+   } else {
+      auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
+      if (uav_id)
+         shader.emit_instruction(new AluInstr(op3_muladd_uint24, tmp[0], uav_id, vf.literal(4), vf.literal(4 * offset),
+                                 AluInstr::write));
+      else
+         shader.emit_instruction(new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
+
+      shader.emit_instruction(new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
+      ir = new GDSInstr(DS_OP_SUB_RET, tmp_dest, tmp, 0, nullptr);
+   }
+
+   shader.emit_instruction(ir);
+   shader.emit_instruction(new AluInstr(op2_sub_int,  vf.dest(instr->dest, 0, pin_free),
+                                        tmp_dest, vf.one_i(), AluInstr::last_write));
+   return true;
+}
+
+
+RatInstr::RatInstr(ECFOpCode cf_opcode, ERatOp rat_op,
+                   const RegisterVec4& data, const RegisterVec4& index,
+                   int rat_id, PRegister rat_id_offset,
+                   int burst_count, int comp_mask, int element_size):
+   m_cf_opcode(cf_opcode),
+   m_rat_op(rat_op),
+   m_data(data),
+   m_index(index),
+   m_rat_id_offset(rat_id_offset),
+   m_rat_id(rat_id),
+   m_burst_count(burst_count),
+   m_comp_mask(comp_mask),
+   m_element_size(element_size)
+{
+   set_always_keep();
+
+   m_data.add_use(this);
+   m_index.add_use(this);
+   if (m_rat_id_offset)
+      m_rat_id_offset->add_use(this);
+}
+
+
+void RatInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void RatInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+bool RatInstr::is_equal_to(const RatInstr& lhs) const
+{
+   (void)lhs;
+   assert(0);
+   return false;
+}
+
+bool RatInstr::do_ready() const
+{  
+   if (m_rat_op != STORE_TYPED) {
+      for (auto i: required_instr()) {
+         if (!i->is_scheduled()) {
+            return false;
+         }
+      }
+   }
+
+   return m_data.ready(block_id(), index()) &&
+         m_index.ready(block_id(), index());
+}
+
+void RatInstr::do_print(std::ostream& os) const
+{
+   os << "MEM_RAT RAT " << m_rat_id;
+   if (m_rat_id_offset)
+      os << "+" << *m_rat_id_offset;
+   os << " @" << m_index;
+   os << " OP:" << m_rat_op << " " << m_data;
+   os << " BC:" << m_burst_count
+      << " MASK:" << m_comp_mask
+      << " ES:" << m_element_size;
+   if (m_need_ack)
+      os << " ACK";
+}
+
+static RatInstr::ERatOp
+get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format)
+{
+	switch (opcode) {
+   case nir_intrinsic_image_load:
+      return RatInstr::NOP_RTN;
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_image_atomic_add:
+      return RatInstr::ADD_RTN;
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_image_atomic_and:
+      return RatInstr::AND_RTN;
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_image_atomic_or:
+      return RatInstr::OR_RTN;
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_image_atomic_imin:
+      return RatInstr::MIN_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_image_atomic_imax:
+      return RatInstr::MAX_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_image_atomic_umin:
+      return RatInstr::MIN_UINT_RTN;
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_image_atomic_umax:
+      return RatInstr::MAX_UINT_RTN;
+   case nir_intrinsic_ssbo_atomic_xor:
+   case nir_intrinsic_image_atomic_xor:
+      return RatInstr::XOR_RTN;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_comp_swap:
+      if (util_format_is_float(format))
+         return RatInstr::CMPXCHG_FLT_RTN;
+      else
+         return RatInstr::CMPXCHG_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_image_atomic_exchange:
+      return RatInstr::XCHG_RTN;
+   default:
+      unreachable("Unsupported WO RAT instruction");
+   }
+}
+
+static RatInstr::ERatOp
+get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format)
+{
+	switch (opcode) {
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_image_atomic_add:
+      return RatInstr::ADD;
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_image_atomic_and:
+      return RatInstr::AND;
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_image_atomic_or:
+      return RatInstr::OR;
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_image_atomic_imin:
+      return RatInstr::MIN_INT;
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_image_atomic_imax:
+      return RatInstr::MAX_INT;
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_image_atomic_umin:
+      return RatInstr::MIN_UINT;
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_image_atomic_umax:
+      return RatInstr::MAX_UINT;
+   case nir_intrinsic_ssbo_atomic_xor:
+   case nir_intrinsic_image_atomic_xor:
+      return RatInstr::XOR;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_comp_swap:
+      if (util_format_is_float(format))
+         return RatInstr::CMPXCHG_FLT;
+      else
+         return RatInstr::CMPXCHG_INT;
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_image_atomic_exchange:
+      return RatInstr::XCHG_RTN;
+   default:
+      unreachable("Unsupported WO RAT instruction");
+   }
+}
+
+bool RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_ssbo:
+      return emit_ssbo_load(intr, shader);
+   case nir_intrinsic_store_ssbo:
+      return emit_ssbo_store(intr, shader);
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_ssbo_atomic_xor:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_ssbo_atomic_exchange:
+      return emit_ssbo_atomic_op(intr, shader);
+   case nir_intrinsic_image_store:
+      return emit_image_store(intr, shader);
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_imax:
+      return emit_image_load_or_atomic(intr, shader);
+   case nir_intrinsic_image_size:
+      return emit_image_size(intr, shader);
+   case nir_intrinsic_get_ssbo_size:
+      return emit_ssbo_size(intr, shader);
+   default:
+      return false;
+   }
+}
+
+bool RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
+{
+   auto &vf = shader.value_factory();
+   auto dest = vf.dest_vec4(intr->dest, pin_group);
+
+   /** src0 not used, should be some offset */
+   auto addr = vf.src(intr->src[1], 0);
+   auto addr_temp = vf.temp_register();
+
+   /** Should be lowered in nir */
+   shader.emit_instruction(new AluInstr(op2_lshr_int, addr_temp, addr, vf.literal(2),
+                                        {alu_write, alu_last_instr}));
+
+   const EVTXDataFormat formats[4] = {
+      fmt_32,
+      fmt_32_32,
+      fmt_32_32_32,
+      fmt_32_32_32_32
+   };
+
+   RegisterVec4::Swizzle dest_swz[4] = {
+      {0,7,7,7},
+      {0,1,7,7},
+      {0,1,2,7},
+      {0,1,2,3}
+   };
+
+   int comp_idx = nir_dest_num_components(intr->dest) - 1;
+
+   auto [offset, res_offset] = shader.evaluate_resource_offset(intr, 0); {}
+
+   auto res_id =  R600_IMAGE_REAL_RESOURCE_OFFSET + offset +
+                  shader.ssbo_image_offset();
+
+   auto ir = new LoadFromBuffer(dest, dest_swz[comp_idx], addr_temp,  0,
+                                res_id, res_offset, formats[comp_idx]);
+   ir->set_fetch_flag(FetchInstr::use_tc);
+   ir->set_num_format(vtx_nf_int);
+
+   shader.emit_instruction(ir);
+   return true;
+}
+
+bool RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
+{
+
+   /* Forche the scheduler to not move the preparation too far away, by starting
+    * a new block (TODO: better priority handling in the scheduler)*/
+   if (nir_src_num_components(instr->src[0]) > 2)
+      shader.start_new_block(0);
+
+   auto &vf = shader.value_factory();
+   auto orig_addr = vf.src(instr->src[2], 0);
+
+   auto addr_base = vf.temp_register();
+
+   auto [offset, rat_id] = shader.evaluate_resource_offset(instr, 1);
+
+   shader.emit_instruction(new AluInstr(op2_lshr_int, addr_base, orig_addr,
+                           vf.literal(2), AluInstr::write));
+
+   for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
+      auto addr_vec = vf.temp_vec4(pin_group, {0,1,2,7});
+      if (i == 0) {
+         shader.emit_instruction(new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write));
+      } else {
+         shader.emit_instruction(new AluInstr(op2_add_int, addr_vec[0], addr_base,
+                                 vf.literal(i),
+                                 AluInstr::last_write));
+      }
+      auto value = vf.src(instr->src[0], i);
+      PRegister v = vf.temp_register(0);
+      shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write));
+      auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan);
+      auto store = new RatInstr(cf_mem_rat, RatInstr::STORE_TYPED,
+                                value_vec, addr_vec, offset + shader.ssbo_image_offset(),
+                                rat_id, 1, 1, 0);
+      shader.emit_instruction(store);
+   }
+
+   return true;
+}
+
+bool RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   auto [imageid, image_offset] = shader.evaluate_resource_offset(intr, 0); {}
+
+   bool read_result = !intr->dest.is_ssa || !list_is_empty(&intr->dest.ssa.uses);
+   auto opcode = read_result ? get_rat_opcode(intr->intrinsic, PIPE_FORMAT_R32_UINT) :
+                               get_rat_opcode_wo(intr->intrinsic, PIPE_FORMAT_R32_UINT);
+
+   auto coord_orig =  vf.src(intr->src[1], 0);
+   auto coord = vf.temp_register(0);
+
+   auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
+
+   shader.emit_instruction(new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write));
+
+   shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
+
+
+   if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[3], 0), AluInstr::write));
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
+                                           vf.src(intr->src[2], 0), {alu_last_instr, alu_write}));
+   } else {
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write));
+   }
+
+
+   RegisterVec4 out_vec(coord, coord, coord, coord, pin_group);
+
+   auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, out_vec, imageid + shader.ssbo_image_offset(),
+                              image_offset, 1, 0xf, 0);
+   shader.emit_instruction(atomic);
+
+   atomic->set_ack(); 
+   if (read_result) {
+      atomic->set_instr_flag(ack_rat_return_write);
+      auto dest = vf.dest_vec4(intr->dest, pin_group);
+
+      auto fetch = new FetchInstr(vc_fetch,
+                                  dest, {0, 1, 2, 3},
+                                  shader.rat_return_address(),
+                                  0,
+                                  no_index_offset,
+                                  fmt_32,
+                                  vtx_nf_int,
+                                  vtx_es_none,
+                                  R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
+                                  image_offset);
+      fetch->set_mfc(15);
+      fetch->set_fetch_flag(FetchInstr::srf_mode);
+      fetch->set_fetch_flag(FetchInstr::use_tc);
+      fetch->set_fetch_flag(FetchInstr::vpm);
+      fetch->set_fetch_flag(FetchInstr::wait_ack);
+      fetch->add_required_instr(atomic);
+      shader.chain_ssbo_read(fetch);
+      shader.emit_instruction(fetch);
+   }
+
+   return true;
+
+}
+
+bool RatInstr::emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   auto dest = vf.dest_vec4(intr->dest, pin_group);
+
+   auto const_offset = nir_src_as_const_value(intr->src[0]);
+   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
+   if (const_offset)
+      res_id += const_offset[0].u32;
+   else
+      assert(0 && "dynamic buffer offset not supported in buffer_size");
+
+   shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3},res_id));
+   return true;
+}
+
+bool RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
+
+
+   auto coord_load = vf.src_vec4(intrin->src[1], pin_chan);
+   auto coord =  vf.temp_vec4(pin_group);
+
+   auto value_load = vf.src_vec4(intrin->src[3], pin_chan);
+   auto value =  vf.temp_vec4(pin_group);
+
+   RegisterVec4::Swizzle swizzle = {0,1,2,3};
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
+       nir_intrinsic_image_array(intrin))
+      swizzle  = {0,2,1,3};
+
+   for (int i = 0; i < 4; ++i) {
+      auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
+      shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags));
+   }
+   for (int i = 0; i < 4; ++i) {
+      auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
+      shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags));
+   }
+
+   auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
+   auto store = new RatInstr(op, RatInstr::STORE_TYPED, value, coord, imageid,
+                             image_offset, 1, 0xf, 0);
+
+   if (nir_intrinsic_has_access(intrin) & ACCESS_COHERENT)
+      store->set_ack();
+   shader.emit_instruction(store);
+   return true;
+}
+
+bool RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0); {}
+
+   bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
+   auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
+                               get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
+
+   auto coord_orig =  vf.src_vec4(intrin->src[1], pin_chan);
+   auto coord = vf.temp_vec4(pin_group);
+
+   auto data_vec4 = vf.temp_vec4(pin_group, {0,1,2,3});
+
+   RegisterVec4::Swizzle swizzle = {0,1,2,3};
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
+       nir_intrinsic_image_array(intrin))
+      swizzle  = {0,2,1,3};
+
+   for (int i = 0; i < 4; ++i) {
+      auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
+      shader.emit_instruction(new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags));
+   }
+
+   shader.emit_instruction(new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
+
+   if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[4], 0), AluInstr::write));
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
+                                          vf.src(intrin->src[3], 0), AluInstr::last_write));
+   } else {
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[0],
+                                          vf.src(intrin->src[3], 0), AluInstr::write));
+      shader.emit_instruction(new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write));
+   }
+
+   auto atomic = new RatInstr(cf_mem_rat, opcode, data_vec4, coord, imageid,
+                              image_offset, 1, 0xf, 0);
+   shader.emit_instruction(atomic);
+
+   atomic->set_ack();
+   if (read_result) {
+      atomic->set_instr_flag(ack_rat_return_write);
+      auto dest = vf.dest_vec4(intrin->dest, pin_group);
+
+      pipe_format format = nir_intrinsic_format(intrin);
+      unsigned fmt = fmt_32;
+      unsigned num_format = 0;
+      unsigned format_comp = 0;
+      unsigned endian = 0;
+      r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
+
+      auto fetch = new FetchInstr(vc_fetch,
+                                  dest, {0, 1, 2, 3},
+                                  shader.rat_return_address(),
+                                  0,
+                                  no_index_offset,
+                                  (EVTXDataFormat)fmt,
+                                  (EVFetchNumFormat)num_format,
+                                  (EVFetchEndianSwap)endian,
+                                  R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
+                                  image_offset);
+      fetch->set_mfc(3);
+      fetch->set_fetch_flag(FetchInstr::srf_mode);
+      fetch->set_fetch_flag(FetchInstr::use_tc);
+      fetch->set_fetch_flag(FetchInstr::vpm);
+      fetch->set_fetch_flag(FetchInstr::wait_ack);
+      if (format_comp)
+         fetch->set_fetch_flag(FetchInstr::format_comp_signed);
+
+      shader.chain_ssbo_read(fetch);
+      shader.emit_instruction(fetch);
+   }
+
+   return true;
+}
+
+#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
+
+bool RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+
+   auto src = RegisterVec4(0, true, {4,4,4,4});
+
+   assert(nir_src_as_uint(intrin->src[1]) == 0);
+
+   auto const_offset = nir_src_as_const_value(intrin->src[0]);
+   PRegister dyn_offset = nullptr;
+
+   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
+   if (const_offset)
+      res_id += const_offset[0].u32;
+   else
+      dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
+
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
+      auto dest = vf.dest_vec4(intrin->dest, pin_group);
+      shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,1,2,3}, res_id));
+      return true;
+   } else {
+
+      if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
+          nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
+         /* Need to load the layers from a const buffer */
+
+         auto dest = vf.dest_vec4(intrin->dest, pin_group);
+         shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,7,3},
+                                              src, 0/* ?? */, res_id, dyn_offset));
+
+         shader.set_flag(Shader::sh_txs_cube_array_comp);
+
+         if (const_offset) {
+            unsigned lookup_resid = const_offset[0].u32;
+            shader.emit_instruction(new AluInstr(op1_mov, dest[2],
+                                    vf.uniform(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL,  lookup_resid % 4,
+                                               R600_BUFFER_INFO_CONST_BUFFER),
+                                    AluInstr::last_write));
+         } else {
+            /* If the adressing is indirect we have to get the z-value by using a binary search */
+            auto addr = vf.temp_register();
+            auto comp1 = vf.temp_register();
+            auto comp2 = vf.temp_register();
+            auto low_bit = vf.temp_register();
+            auto high_bit = vf.temp_register();
+
+            auto trgt = vf.temp_vec4(pin_group);
+
+            shader.emit_instruction(new AluInstr(op2_lshr_int, addr, vf.src(intrin->src[0], 0),
+                             vf.literal(2), AluInstr::write));
+            shader.emit_instruction(new AluInstr(op2_and_int, low_bit, vf.src(intrin->src[0], 0),
+                             vf.one_i(), AluInstr::write));
+            shader.emit_instruction(new AluInstr(op2_and_int, high_bit, vf.src(intrin->src[0], 0),
+                             vf.literal(2), AluInstr::last_write));
+
+            shader.emit_instruction(new LoadFromBuffer(trgt, {0, 1, 2, 3}, addr, R600_SHADER_BUFFER_INFO_SEL,
+                                                  R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float));
+
+            // this may be wrong
+            shader.emit_instruction(new AluInstr(op3_cnde_int, comp1, high_bit, trgt[0], trgt[2],
+                                                AluInstr::write));
+            shader.emit_instruction(new AluInstr(op3_cnde_int, comp2, high_bit, trgt[1], trgt[3],
+                                                 AluInstr::last_write));
+            shader.emit_instruction(new AluInstr(op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write));
+         }
+      } else {
+         auto dest = vf.dest_vec4(intrin->dest, pin_group);
+         shader.emit_instruction(new TexInstr(TexInstr::get_resinfo, dest, {0,1,2,3},
+                                              src, 0/* ?? */, res_id, dyn_offset));
+
+      }
+   }
+   return true;
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_mem.h b/src/gallium/drivers/r600/sfn/sfn_instr_mem.h
new file mode 100644
index 0000000..c224572
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_mem.h
@@ -0,0 +1,177 @@
+#ifndef GDSINSTR_H
+#define GDSINSTR_H
+
+#include "sfn_instr.h"
+#include "sfn_valuefactory.h"
+
+namespace r600 {
+
+class Shader;
+
+class GDSInstr : public Instr {
+public:
+
+   GDSInstr(ESDOp op, Register *dest,
+            const RegisterVec4& src, int uav_base,
+            PRegister uav_id);
+
+   bool is_equal_to(const GDSInstr& lhs) const;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool do_ready() const override;
+
+   auto opcode() const {return m_op;}
+   auto src() const { return m_src;}
+
+   const auto& dest() const { return m_dest;}
+   auto& dest() { return m_dest;}
+
+   auto uav_id() const {return m_uav_id;}
+   auto uav_base() const {return m_uav_base;}
+
+   static auto from_string(std::istream& is, ValueFactory& value_factory) -> Pointer;
+
+   static bool emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader);
+   uint32_t slots() const override {return 1;};
+
+private:
+
+   static bool emit_atomic_read(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_atomic_op2(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_atomic_inc(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_atomic_pre_dec(nir_intrinsic_instr *intr, Shader& shader);
+
+   void do_print(std::ostream& os) const override;
+
+   ESDOp m_op{DS_OP_INVALID};
+   Register *m_dest;
+
+   RegisterVec4 m_src;
+
+   int m_uav_base{0};
+   PRegister m_uav_id{nullptr};
+   std::bitset<8> m_tex_flags;
+};
+
+
+class RatInstr : public Instr {
+
+public:
+   enum ERatOp {
+      NOP,
+      STORE_TYPED,
+      STORE_RAW,
+      STORE_RAW_FDENORM,
+      CMPXCHG_INT,
+      CMPXCHG_FLT,
+      CMPXCHG_FDENORM,
+      ADD,
+      SUB,
+      RSUB,
+      MIN_INT,
+      MIN_UINT,
+      MAX_INT,
+      MAX_UINT,
+      AND,
+      OR,
+      XOR,
+      MSKOR,
+      INC_UINT,
+      DEC_UINT,
+      NOP_RTN = 32,
+      XCHG_RTN = 34,
+      XCHG_FDENORM_RTN,
+      CMPXCHG_INT_RTN,
+      CMPXCHG_FLT_RTN,
+      CMPXCHG_FDENORM_RTN,
+      ADD_RTN,
+      SUB_RTN,
+      RSUB_RTN,
+      MIN_INT_RTN,
+      MIN_UINT_RTN,
+      MAX_INT_RTN,
+      MAX_UINT_RTN,
+      AND_RTN,
+      OR_RTN,
+      XOR_RTN,
+      MSKOR_RTN,
+      UINT_RTN,
+      UNSUPPORTED
+   };
+
+   RatInstr(ECFOpCode cf_opcode, ERatOp rat_op,
+            const RegisterVec4& data, const RegisterVec4& index,
+            int rat_id, PRegister rat_id_offset,
+            int burst_count, int comp_mask, int element_size);
+
+   auto rat_id_offset() const { return m_rat_id_offset;}
+   int  rat_id() const { return m_rat_id;}
+
+   ERatOp rat_op() const {return m_rat_op;}
+
+   const auto& value() const { return m_data;}
+   auto& value() { return m_data;}
+
+   const auto& addr() const { return m_index;}
+   auto& addr() { return m_index;}
+
+   int data_gpr() const {return m_data.sel();}
+   int index_gpr() const {return m_index.sel();}
+   int elm_size() const {return m_element_size;}
+
+   int comp_mask() const {return m_comp_mask;}
+
+   bool need_ack() const {return m_need_ack;}
+   int burst_count() const {return m_burst_count;}
+
+   int data_swz(int chan) const {return m_data[chan]->chan();}
+
+   ECFOpCode cf_opcode() const { return m_cf_opcode;}
+
+   void set_ack() {m_need_ack = true; set_mark(); }
+   void set_mark() {m_need_mark = true; }
+   bool mark() {return m_need_mark;}
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   bool is_equal_to(const RatInstr& lhs) const;
+
+   static bool emit(nir_intrinsic_instr *intr, Shader& shader);
+
+private:
+
+   static bool emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_ssbo_store(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader);
+
+   static bool emit_image_store(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_image_load_or_atomic(nir_intrinsic_instr *intr, Shader& shader);
+   static bool emit_image_size(nir_intrinsic_instr *intr, Shader& shader);
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+
+   ECFOpCode m_cf_opcode;
+   ERatOp m_rat_op;
+
+   RegisterVec4 m_data;
+   RegisterVec4 m_index;
+   PRegister m_rat_id_offset{nullptr};
+
+   int m_rat_id{0};
+   int m_burst_count{0};
+   int m_comp_mask{15};
+   int m_element_size{3};
+   bool m_need_ack{false};
+   bool m_need_mark{false};
+
+};
+
+
+}
+
+#endif // GDSINSTR_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp
new file mode 100644
index 0000000..a13d2a9
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp
@@ -0,0 +1,1011 @@
+#include "sfn_instr_tex.h"
+#include "sfn_instr_alu.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_debug.h"
+
+namespace r600 {
+
+using std::string;
+
+TexInstr::TexInstr(Opcode op, const RegisterVec4& dest,
+                   const RegisterVec4::Swizzle& dest_swizzle,
+                   const RegisterVec4& src, unsigned sid, unsigned rid,
+                   PVirtualValue sampler_offs):
+   InstrWithVectorResult(dest, dest_swizzle),
+   m_opcode(op),
+   m_src(src),
+   m_sampler_offset(sampler_offs),
+   m_inst_mode(0),
+   m_sampler_id(sid),
+   m_resource_id(rid)
+{
+   memset(m_offset, 0, sizeof(m_offset));
+   m_src.add_use(this);
+
+   if (m_sampler_offset && m_sampler_offset->as_register())
+      m_sampler_offset->as_register()->add_use(this);
+}
+
+void TexInstr::accept(ConstInstrVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+void TexInstr::accept(InstrVisitor& visitor)
+{
+   visitor.visit(this);
+}
+
+void TexInstr::set_offset(unsigned index, int32_t val)
+{
+   assert(index < 3);
+   m_offset[index] = val;
+}
+
+int TexInstr::get_offset(unsigned index) const
+{
+   assert(index < 3);
+   return m_offset[index] << 1;
+}
+
+void TexInstr::set_gather_comp(int cmp)
+{
+   m_inst_mode = cmp;
+}
+
+bool TexInstr::is_equal_to(const TexInstr& lhs) const
+{
+   if (m_opcode != lhs.m_opcode)
+      return false;
+
+   if (!comp_dest(lhs.dst(), lhs.all_dest_swizzle()))
+      return false;
+
+   if (m_src != lhs.m_src)
+      return false;
+
+   if (m_sampler_offset && lhs.m_sampler_offset) {
+      if (!m_sampler_offset->equal_to(*lhs.m_sampler_offset))
+         return false;
+   } else if ((m_sampler_offset && !lhs.m_sampler_offset) ||
+              (!m_sampler_offset && lhs.m_sampler_offset))
+      return false;
+
+   if (m_tex_flags != lhs.m_tex_flags)
+      return false;
+
+   for(int i = 0; i < 3; ++i) {
+      if (m_offset[i] != lhs.m_offset[i])
+         return false;
+   }
+   return m_inst_mode == lhs.m_inst_mode &&
+         m_sampler_id == lhs.m_sampler_id &&
+         m_resource_id == lhs.m_resource_id;
+}
+
+bool TexInstr::propagate_death()
+{
+   m_src.del_use(this);
+   return true;
+}
+
+bool TexInstr::do_ready() const
+{
+   for (auto p : m_prepare_instr)
+      if (!p->ready())
+         return false;
+
+   for (auto p :required_instr())
+      if (!p->is_scheduled() && !p->is_dead()) {
+         return false;
+      }
+
+   if (m_sampler_offset && m_sampler_offset->as_register() &&
+       !m_sampler_offset->as_register()->ready(block_id(), index()))
+       return false;
+   return m_src.ready(block_id(), index());
+}
+
+void TexInstr::do_print(std::ostream& os) const
+{
+
+   os << "TEX " << opname(m_opcode) << " ";
+   print_dest(os);
+
+   os << " : ";
+   m_src.print(os);
+
+   os << " RID:" << m_resource_id
+      << " SID:" << m_sampler_id;
+
+   if (m_sampler_offset)
+      os << " SO:" << *m_sampler_offset;
+
+   if (m_offset[0])
+      os << " OX:" << m_offset[0];
+   if (m_offset[1])
+      os << " OY:" << m_offset[1];
+   if (m_offset[2])
+      os << " OZ:" << m_offset[2];
+
+   if (m_inst_mode || is_gather(m_opcode))
+      os << " MODE:" << m_inst_mode;
+
+   os << " ";
+   os << (m_tex_flags.test(x_unnormalized) ? "U" : "N");
+   os << (m_tex_flags.test(y_unnormalized) ? "U" : "N");
+   os << (m_tex_flags.test(z_unnormalized) ? "U" : "N");
+   os << (m_tex_flags.test(w_unnormalized) ? "U" : "N");
+}
+
+const char *TexInstr::opname(Opcode op)
+{
+   switch (op) {
+   case ld: return "LD";
+   case get_resinfo: return "GET_TEXTURE_RESINFO";
+   case get_nsamples: return "GET_NUMBER_OF_SAMPLES";
+   case get_tex_lod: return "GET_LOD";
+   case get_gradient_h: return "GET_GRADIENTS_H";
+   case get_gradient_v: return "GET_GRADIENTS_V";
+   case set_offsets: return "SET_TEXTURE_OFFSETS";
+   case keep_gradients: return "KEEP_GRADIENTS";
+   case set_gradient_h: return "SET_GRADIENTS_H";
+   case set_gradient_v: return "SET_GRADIENTS_V";
+   case sample: return "SAMPLE";
+   case sample_l: return "SAMPLE_L";
+   case sample_lb: return "SAMPLE_LB";
+   case sample_lz: return "SAMPLE_LZ";
+   case sample_g: return "SAMPLE_G";
+   case sample_g_lb: return "SAMPLE_G_L";
+   case gather4: return "GATHER4";
+   case gather4_o: return "GATHER4_O";
+   case sample_c: return "SAMPLE_C";
+   case sample_c_l: return "SAMPLE_C_L";
+   case sample_c_lb: return "SAMPLE_C_LB";
+   case sample_c_lz: return "SAMPLE_C_LZ";
+   case sample_c_g: return "SAMPLE_C_G";
+   case sample_c_g_lb: return "SAMPLE_C_G_L";
+   case gather4_c: return "GATHER4_C";
+   case gather4_c_o: return "OP_GATHER4_C_O";
+   default: return "ERROR";
+   }
+
+}
+
+const std::map<TexInstr::Opcode, std::string> TexInstr::s_opcode_map = {
+   {ld, "LD"},
+   {get_resinfo,"GET_TEXTURE_RESINFO"},
+   {get_nsamples,"GET_NUMBER_OF_SAMPLES"},
+   {get_tex_lod,"GET_LOD"},
+   {get_gradient_h,"GET_GRADIENTS_H"},
+   {get_gradient_v,"GET_GRADIENTS_V"},
+   {set_offsets,"SET_TEXTURE_OFFSETS"},
+   {keep_gradients,"KEEP_GRADIENTS"},
+   {set_gradient_h,"SET_GRADIENTS_H"},
+   {set_gradient_v,"SET_GRADIENTS_V"},
+   {sample,"SAMPLE"},
+   {sample_l,"SAMPLE_L"},
+   {sample_lb,"SAMPLE_LB"},
+   {sample_lz,"SAMPLE_LZ"},
+   {sample_g,"SAMPLE_G"},
+   {sample_g_lb,"SAMPLE_G_L"},
+   {gather4,"GATHER4"},
+   {gather4_o,"GATHER4_O"},
+   {sample_c,"SAMPLE_C"},
+   {sample_c_l,"SAMPLE_C_L"},
+   {sample_c_lb,"SAMPLE_C_LB"},
+   {sample_c_lz,"SAMPLE_C_LZ"},
+   {sample_c_g,"SAMPLE_C_G"},
+   {sample_c_g_lb,"SAMPLE_C_G_L"},
+   {gather4_c,"GATHER4_C"},
+   {gather4_c_o,"OP_GATHER4_C_O"},
+   {unknown, "ERROR"}
+};
+
+bool TexInstr::is_gather(Opcode op)
+{
+   return op == gather4 || op == gather4_c ||
+         op == gather4_o || op == gather4_c_o;
+}
+
+TexInstr::Opcode TexInstr::op_from_string(const std::string& s)
+{
+   for (auto& [op, str] : s_opcode_map) {
+      if (s == str)
+         return op;
+   }
+   return unknown;
+}
+
+Instr::Pointer TexInstr::from_string(std::istream& is, ValueFactory& value_fctory)
+{
+   string opstr;
+   string deststr;
+   is >> opstr >> deststr;
+
+   auto opcode = TexInstr::op_from_string(opstr);
+
+   RegisterVec4::Swizzle dest_swz;
+
+   auto dest = value_fctory.dest_vec4_from_string(deststr, dest_swz, pin_group);
+
+   char dummy;
+   is >> dummy;
+   assert(dummy == ':');
+
+   string srcstr;
+   is >> srcstr;
+
+   auto src = value_fctory.src_vec4_from_string(srcstr);
+
+   string res_id_str;
+   string sampler_id_str;
+
+   is >> res_id_str >> sampler_id_str;
+
+   int res_id = int_from_string_with_prefix(res_id_str, "RID:");
+   int sampler_id = int_from_string_with_prefix(sampler_id_str, "SID:");
+
+   auto tex = new TexInstr( opcode, dest, dest_swz, src, sampler_id, res_id, nullptr);
+
+   while (!is.eof() && is.good()) {
+      std::string next_token;
+      is >> next_token;
+
+      if (next_token.empty())
+         break;
+
+      if (next_token[0] == 'U' || next_token[0] == 'N') {
+         tex->read_tex_coord_normalitazion(next_token);
+      } else {
+         tex->set_tex_param(next_token);
+      }
+   }
+
+   return tex;
+}
+
+void TexInstr::read_tex_coord_normalitazion(const std::string& flags)
+{
+   assert(flags.length() == 4);
+   if (flags[0] == 'U') set_tex_flag(x_unnormalized);
+   if (flags[1] == 'U') set_tex_flag(y_unnormalized);
+   if (flags[2] == 'U') set_tex_flag(z_unnormalized);
+   if (flags[3] == 'U') set_tex_flag(w_unnormalized);
+}
+
+void TexInstr::set_tex_param(const std::string& token)
+{
+   if (token.substr(0,3) == "OX:")
+      set_offset(0, int_from_string_with_prefix(token, "OX:"));
+   else if (token.substr(0,3) == "OY:")
+      set_offset(1, int_from_string_with_prefix(token, "OY:"));
+   else if (token.substr(0,3) == "OZ:")
+      set_offset(2, int_from_string_with_prefix(token, "OZ:"));
+   else if (token.substr(0,5) == "MODE:")
+      set_inst_mode(int_from_string_with_prefix(token, "MODE:"));
+   else if (token.substr(0,3) == "SO:")
+      set_sampler_offset(VirtualValue::from_string(token.substr(3)));
+   else {
+      std::cerr << "Token '" << token << "': ";
+      unreachable("Unknown token in tex param");
+   }
+}
+
+bool TexInstr::from_nir(nir_tex_instr *tex, Shader& shader)
+{
+   Inputs src(*tex, shader.value_factory());
+
+   if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+      switch (tex->op) {
+      case nir_texop_txs:
+         return emit_tex_txs(tex, src, {0,1,2,3}, shader);
+      case nir_texop_txf:
+         return emit_buf_txf(tex, src, shader);
+      default:
+         return false;
+      }
+   } else {
+      switch (tex->op) {
+      case nir_texop_tex:
+         return emit_tex_tex(tex, src, shader);
+      case nir_texop_txf:
+      return emit_tex_txf(tex, src, shader);
+      case nir_texop_txb:
+      case nir_texop_txl:
+         return emit_tex_txl_txb(tex, src, shader);
+      case nir_texop_txs:
+         return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader);
+      case nir_texop_lod:
+         return emit_tex_lod(tex, src, shader);
+      case nir_texop_query_levels:
+         return emit_tex_txs(tex, src, {3,7,7,7}, shader);
+      case nir_texop_txd:
+          return emit_tex_txd(tex, src, shader);
+      case nir_texop_txf_ms:
+         return emit_tex_tex_ms(tex, src, shader);
+      case nir_texop_tg4:
+         return emit_tex_tg4(tex, src, shader);
+      case nir_texop_texture_samples:
+         return emit_tex_texture_samples(tex, src, shader);
+      default:
+      return false;
+      }
+   }
+   return true;
+}
+
+struct SamplerId {
+   int id;
+   bool indirect;
+};
+
+SamplerId
+get_sampler_id(int sampler_id, const nir_variable *deref)
+{
+   SamplerId result = {sampler_id, false};
+
+   if (deref) {
+      assert(glsl_type_is_sampler(deref->type));
+      result.id = deref->data.binding;
+   }
+   return result;
+}
+
+
+bool TexInstr::emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+
+   sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(tex)
+                 << "' (" << __func__ << ")\n";
+
+   auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect);
+
+   auto src_coord = prepare_source(tex, src, shader);
+   auto dst = vf.dest_vec4(tex->dest, pin_group);
+
+   auto irt = new TexInstr(src.opcode, dst, {0,1,2,3},  src_coord, sampler.id,
+                           sampler.id + R600_MAX_CONST_BUFFERS,
+                           src.sampler_offset);
+   if (tex->is_array)
+      irt->set_tex_flag(TexInstr::z_unnormalized);
+
+   irt->set_rect_coordinate_flags(tex);
+   irt->set_coord_offsets(src.offset);
+
+   shader.emit_instruction(irt);
+   return true;
+}
+
+bool TexInstr::emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+
+   auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+   auto src_coord = prepare_source(tex, src, shader);
+
+   auto dst = vf.dest_vec4(tex->dest, pin_group);
+
+   auto irt = new TexInstr(src.opcode, dst, {0,1,2,3},  src_coord, sampler.id,
+                           sampler.id + R600_MAX_CONST_BUFFERS,
+                           src.sampler_offset);
+
+   if (tex->is_array)
+      irt->set_tex_flag(TexInstr::z_unnormalized);
+
+   irt->set_rect_coordinate_flags(tex);
+   irt->set_coord_offsets(src.offset);
+
+   shader.emit_instruction(irt);
+   return true;
+}
+
+
+bool TexInstr::emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+
+   int sampler = tex->sampler_index;
+
+   auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
+   swizzle[3] = 3;
+
+   if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
+      swizzle[2] = 1;
+      swizzle[1] = 7;
+   }
+
+   auto src_coord = vf.temp_vec4(pin_group, swizzle);
+
+   for (unsigned i = 0; i < tex->coord_components; i++) {
+      unsigned k = i;
+      if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
+         k = 2;
+
+
+      if (src.offset) {
+         shader.emit_instruction(new AluInstr(op2_add_int, src_coord[k], src.coord[i],
+                                              vf.src(src.offset[i], i),
+                                              AluInstr::write));
+      } else {
+         shader.emit_instruction(new AluInstr(op1_mov, src_coord[k], src.coord[i],AluInstr::write));
+      }
+   }
+
+   shader.emit_instruction(new AluInstr(op1_mov, src_coord[3], src.lod, AluInstr::last_write));
+
+   auto dst = vf.dest_vec4(tex->dest, pin_group);
+
+   auto tex_ir = new TexInstr(src.opcode, dst, {0, 1, 2, 3}, src_coord,
+                              sampler,
+                              sampler + R600_MAX_CONST_BUFFERS,
+                              src.sampler_offset);
+
+   if (tex->is_array)
+      tex_ir->set_tex_flag(z_unnormalized);
+
+   tex_ir->set_rect_coordinate_flags(tex);
+   tex_ir->set_sampler_offset(src.sampler_offset);
+
+   shader.emit_instruction(tex_ir);
+
+   return true;
+}
+
+bool TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
+{
+   auto dst = shader.value_factory().dest_vec4(tex->dest, pin_group);
+
+   PRegister tex_offset = nullptr;
+   if (src.texture_offset)
+      tex_offset = shader.emit_load_to_register(src.texture_offset);
+   auto ir = new LoadFromBuffer(dst, {0,1,2,3}, src.coord[0], 0,
+                                tex->texture_index +  R600_MAX_CONST_BUFFERS,
+                                tex_offset, fmt_32_32_32_32_float);
+   ir->set_fetch_flag(FetchInstr::use_const_field);
+   shader.emit_instruction(ir);
+   shader.set_flag(Shader::sh_uses_tex_buffer);
+   return true;
+}
+
+bool TexInstr::emit_tex_tex_ms(nir_tex_instr *tex, Inputs& src, Shader& shader)
+{
+   assert(tex->src[0].src.is_ssa);
+   auto& vf = shader.value_factory();
+
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(tex)
+                 << "' (" << __func__ << ")\n";
+
+   auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+   auto sample_id_dest = vf.temp_vec4(pin_group);
+   RegisterVec4::Swizzle dest_swz = {0,7,7,7};
+
+   auto temp1 = vf.temp_vec4(pin_group);
+   for (unsigned i = 0; i < tex->coord_components; ++i) {
+      unsigned k = i;
+      if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
+         k = 2;
+
+      if (src.offset && i < src.offset->ssa->num_components)
+         shader.emit_instruction(new AluInstr(op2_add_int, temp1[k],
+                                              src.coord[i],
+                                              vf.src(*src.offset, i),
+                                              AluInstr::write));
+      else
+         shader.emit_instruction(new AluInstr(op1_mov, temp1[k],
+                                              src.coord[i], AluInstr::write));
+   }
+
+   shader.emit_instruction(new AluInstr(op1_mov, temp1[3],
+                                       src.ms_index, AluInstr::last_write));
+
+   auto tex_sample_id_ir = new TexInstr(ld, sample_id_dest, dest_swz, temp1,
+                                              sampler.id,
+                                              sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+   tex_sample_id_ir->set_tex_flag(x_unnormalized);
+   tex_sample_id_ir->set_tex_flag(y_unnormalized);
+   tex_sample_id_ir->set_tex_flag(z_unnormalized);
+   tex_sample_id_ir->set_tex_flag(w_unnormalized);
+   tex_sample_id_ir->set_inst_mode(1);
+
+   shader.emit_instruction(tex_sample_id_ir);
+
+   Register *sample_id_dest_reg = sample_id_dest[0];
+
+   if (!src.ms_index->as_inline_const() ||
+       src.ms_index->as_inline_const()->sel() != ALU_SRC_0) {
+
+      auto help = vf.temp_register();
+
+      shader.emit_instruction(new AluInstr(op2_lshl_int, help,
+                                           src.ms_index, vf.literal(2),
+                                           AluInstr::last_write));
+
+      sample_id_dest_reg = vf.temp_register();
+      shader.emit_instruction(new AluInstr(op2_lshr_int, sample_id_dest_reg,
+                                           sample_id_dest[0], help,
+                                           AluInstr::last_write));
+   }
+
+   auto temp2 = vf.temp_vec4(pin_group);
+
+   for (unsigned i = 0; i < tex->coord_components; ++i) {
+      unsigned k = i;
+      if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
+         k = 2;
+
+      shader.emit_instruction(new AluInstr(op1_mov, temp2[k],
+                                           temp1[k], AluInstr::write));
+   }
+
+   shader.emit_instruction(new AluInstr(op2_and_int, temp2[3],
+                                        sample_id_dest_reg, vf.literal(15),
+                                        AluInstr::last_write));
+
+   auto dst = vf.dest_vec4(tex->dest, pin_group);
+
+   /* txf doesn't need rounding for the array index, but 1D has the array index
+    * in the z component */
+   auto tex_ir = new TexInstr(ld, dst, {0,1,2,3}, temp2,
+                                    sampler.id,
+                                    sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+   shader.emit_instruction(tex_ir);
+   return true;
+}
+
+bool TexInstr::emit_tex_texture_samples(nir_tex_instr* instr, Inputs& src, Shader& shader)
+{
+   RegisterVec4 dest = shader.value_factory().dest_vec4(instr->dest, pin_chan);
+   RegisterVec4 help{0, true, {4,4,4,4}};
+
+   int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index;
+
+   auto ir = new TexInstr(src.opcode, dest, {3, 7, 7, 7}, help,
+                          0, res_id, src.sampler_offset);
+   shader.emit_instruction(ir);
+   return true;
+}
+
+
+bool TexInstr::emit_tex_txd(nir_tex_instr *tex, Inputs& src, Shader& shader)
+{
+
+   auto& vf = shader.value_factory();
+
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(tex)
+                 << "' (" << __func__ << ")\n";
+
+   auto dst = vf.dest_vec4(tex->dest, pin_group);
+   RegisterVec4 empty_dst(126, false, {0,0,0,0}, pin_group);
+
+   auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
+
+   if (tex->is_shadow)
+      swizzle[3] = 3;
+
+   unsigned array_coord = 2;
+   if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
+      swizzle[2] = 1;
+      swizzle[1] = 7;
+      array_coord = 1;
+   }
+
+   auto src_coord = vf.temp_vec4(pin_group, swizzle);
+
+   auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+   auto irgh = new TexInstr(set_gradient_h, empty_dst, {7,7,7,7}, src.ddx,
+                            sampler.id,
+                            sampler.id + R600_MAX_CONST_BUFFERS,
+                            src.sampler_offset);
+
+   auto irgv = new TexInstr(set_gradient_v, empty_dst, {7,7,7,7}, src.ddy,
+                            sampler.id, sampler.id + R600_MAX_CONST_BUFFERS,
+                            src.sampler_offset);
+
+   auto tir = new TexInstr(src.opcode, dst, {0,1,2,3}, src_coord, sampler.id,
+                          sampler.id + R600_MAX_CONST_BUFFERS,
+                          src.sampler_offset);
+
+
+   /* r600_bytecode_add_tex has a hack that will start a new tex CF if
+    * set_gradient_h is emitted, so make sure it is emitted first */
+
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < tex->coord_components; ++i) {
+      int k = i;
+      if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
+         k = 2;
+
+      ir = new AluInstr(tex->is_array && i == array_coord  ? op1_rndne : op1_mov,
+                        src_coord[k], src.coord[i],
+                        AluInstr::write);
+      shader.emit_instruction(ir);
+   }
+
+   if (tex->is_shadow)  {
+      ir = new AluInstr(op1_mov, src_coord[3], src.comperator, AluInstr::last_write);
+      shader.emit_instruction(ir);
+   }
+
+   tir->add_prepare_instr(irgh);
+   tir->add_prepare_instr(irgv);
+
+   if (tex->is_array)
+      tir->set_tex_flag(TexInstr::z_unnormalized);
+
+   irgh->set_rect_coordinate_flags(tex);
+   irgv->set_rect_coordinate_flags(tex);
+   irgh->set_always_keep();
+   irgv->set_always_keep();
+
+   tir->set_rect_coordinate_flags(tex);
+
+   tir->set_coord_offsets(src.offset);
+
+   if (shader.last_txd())
+      tir->add_required_instr(shader.last_txd());
+
+   shader.emit_instruction(tir);
+   shader.set_last_txd(tir);
+
+   return true;
+}
+
+bool TexInstr::emit_tex_txs(nir_tex_instr *tex, Inputs& src,
+                            RegisterVec4::Swizzle dest_swz, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+
+   auto dest = vf.dest_vec4(tex->dest, pin_group);
+
+   if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+      shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,7,7,7},
+                                                       tex->sampler_index + R600_MAX_CONST_BUFFERS));
+   } else {
+
+      auto src_lod = vf.temp_register();
+      shader.emit_instruction(new AluInstr(op1_mov, src_lod, src.lod, AluInstr::last_write));
+
+      RegisterVec4 src_coord(src_lod, src_lod, src_lod, src_lod, pin_free);
+
+      auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
+      assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+      if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
+         dest_swz[2] = 7;
+
+      auto ir = new TexInstr(get_resinfo, dest, dest_swz, src_coord,
+                             sampler.id,
+                             sampler.id + R600_MAX_CONST_BUFFERS,
+                             src.sampler_offset);
+
+      ir->set_dest_swizzle(dest_swz);
+      shader.emit_instruction(ir);
+
+      if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+         auto src_loc = vf.uniform(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2),
+                                   sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER);
+
+
+         auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::last_write);
+         shader.emit_instruction(alu);
+         shader.set_flag(Shader::sh_txs_cube_array_comp);
+      }
+   }
+
+   return true;
+}
+
+bool TexInstr::emit_tex_tg4(nir_tex_instr* tex, Inputs& src , Shader& shader)
+{
+   auto& vf = shader.value_factory();
+
+   r600::sfn_log << SfnLog::instr << "emit '"
+              << *reinterpret_cast<nir_instr*>(tex)
+              << "' (" << __func__ << ")\n";
+
+   TexInstr *set_ofs = nullptr;
+
+   auto src_coord = prepare_source(tex, src, shader);
+
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(tex)
+                 << "' (" << __func__ << ")\n";
+
+   auto dst = vf.dest_vec4(tex->dest, pin_group);
+
+   RegisterVec4 empty_dst(125, false, {7,7,7,7}, pin_group);
+
+   /* pre CAYMAN needs swizzle */
+   auto dest_swizzle = shader.chip_class() <= ISA_CC_EVERGREEN ?
+            RegisterVec4::Swizzle{1, 2, 0, 3} :
+            RegisterVec4::Swizzle{0, 1, 2, 3};
+
+   auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+   bool literal_offset = false;
+   if (src.offset) {
+      literal_offset =  nir_src_as_const_value(*src.offset) != 0;
+      r600::sfn_log << SfnLog::tex << " really have offsets and they are " <<
+                       (literal_offset ? "literal" : "varying") <<
+                       "\n";
+
+      if (!literal_offset) {
+         RegisterVec4::Swizzle swizzle = {4,4,4,4};
+         int src_components = tex->coord_components;
+         if (tex->is_array)
+            --src_components;
+
+         for (int i = 0; i < src_components; ++i)
+            swizzle[i] = i;
+
+         int noffsets = tex->coord_components;
+         if (tex->is_array)
+            --noffsets;
+
+         auto ofs = vf.src_vec4(*src.offset, pin_group, swizzle);
+         RegisterVec4 dummy(0, true, {7,7,7,7});
+
+         set_ofs = new TexInstr(TexInstr::set_offsets, dummy, {7,7,7,7},
+                                ofs, sampler.id,
+                                sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+      } else {
+         src.opcode = src.opcode == gather4_o ? gather4 : gather4_c;
+      }
+   }
+
+   auto irt = new TexInstr(src.opcode, dst, dest_swizzle, src_coord, sampler.id,
+                           sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+   irt->set_gather_comp(tex->component);
+
+   if (tex->is_array)
+      irt->set_tex_flag(z_unnormalized);
+
+   if (literal_offset) {
+      r600::sfn_log << SfnLog::tex << "emit literal offsets\n";
+      irt->set_coord_offsets(src.offset);
+   }
+
+   irt->set_rect_coordinate_flags(tex);
+
+   if (set_ofs) {
+      set_ofs->set_always_keep();
+      irt->add_prepare_instr(set_ofs);
+   }
+
+   shader.emit_instruction(irt);
+   return true;
+}
+
+auto TexInstr::prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader& shader) -> RegisterVec4
+{
+   RegisterVec4::Swizzle target{7,7,7,7};
+   PVirtualValue src[4]{nullptr,nullptr,nullptr,nullptr};
+
+
+   for (unsigned i = 0; i < tex->coord_components; ++i) {
+      target[i] = i;
+      src[i] = inputs.coord[i];
+   }
+
+   // array index always goes into z
+   if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
+      target[2]  = 1;
+      target[1]  = 7;
+      src[2] = inputs.coord[1];
+   }
+
+   /* With txl and txb shadow goes into z and lod or bias go into w */
+   if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) {
+      target[3] = 3;
+      src[3] = tex->op == nir_texop_txl ? inputs.lod : inputs.bias;
+      if (tex->is_shadow){
+         target[2] = 2;
+         src[2] = inputs.comperator;
+      }
+   } else if (tex->is_shadow) {
+      /* Other ops have shadow in w */
+      target[3] = 3;
+      src[3] = inputs.comperator;
+   }
+
+   auto src_coord = shader.value_factory().temp_vec4(pin_group, target);
+
+   AluInstr *ir = nullptr;
+   for (int i = 0; i < 4; ++i) {
+      if (target[i] > 3)
+        continue;
+
+      auto op = tex->is_array && i == 2 ? op1_rndne : op1_mov;
+
+      ir = new AluInstr(op,  src_coord[i], src[i], AluInstr::write);
+      shader.emit_instruction(ir);
+   }
+
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+
+   return src_coord;
+}
+
+TexInstr::Inputs::Inputs(const nir_tex_instr& instr, ValueFactory& vf):
+   sampler_deref(nullptr),
+   texture_deref(nullptr),
+   bias(nullptr),
+   comperator(nullptr),
+   lod(nullptr),
+   offset(nullptr),
+   gather_comp(nullptr),
+   ms_index(nullptr),
+   sampler_offset(nullptr),
+   texture_offset(nullptr),
+   opcode(ld)
+{
+   //sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n";
+
+   unsigned grad_components = instr.coord_components;
+   if (instr.is_array && !instr.array_is_lowered_cube)
+      --grad_components;
+
+   for (unsigned i = 0; i < instr.num_srcs; ++i) {
+      switch (instr.src[i].src_type) {
+      case nir_tex_src_bias:
+         bias = vf.src(instr.src[i], 0);
+      break;
+
+      case nir_tex_src_coord: {
+         coord = vf.src_vec4(instr.src[i].src, pin_none, swizzle_from_ncomps(instr.coord_components));
+      } break;
+      case nir_tex_src_comparator:
+         comperator = vf.src(instr.src[i], 0);
+      break;
+      case nir_tex_src_ddx:
+         ddx = vf.src_vec4(instr.src[i].src, pin_group, swizzle_from_ncomps(grad_components));
+      break;
+      case nir_tex_src_ddy:
+         ddy = vf.src_vec4(instr.src[i].src, pin_group, swizzle_from_ncomps(grad_components));
+      break;
+      case nir_tex_src_lod:
+         lod = vf.src(instr.src[i].src, 0);
+      break;
+      case nir_tex_src_offset:
+         offset = &instr.src[i].src;
+      break;
+         /* case nir_tex_src_sampler_deref:
+         sampler_deref = get_deref_location(instr.src[i].src);
+         break;
+      case nir_tex_src_texture_deref:
+         texture_deref = get_deref_location(instr.src[i].src);
+         break;
+      */
+      case nir_tex_src_ms_index:
+         ms_index = vf.src(instr.src[i], 0);
+      break;
+      case nir_tex_src_texture_offset:
+         texture_offset = vf.src(instr.src[i], 0);
+      break;
+      case nir_tex_src_sampler_offset:
+         sampler_offset = vf.src(instr.src[i], 0);
+      break;
+      case nir_tex_src_plane:
+      case nir_tex_src_projector:
+      case nir_tex_src_min_lod:
+      default:
+         unreachable("unsupported texture input type");
+      }
+   }
+
+   opcode = get_opcode(instr);
+
+
+}
+
+auto TexInstr::Inputs::get_opcode(const nir_tex_instr& instr) -> Opcode
+{
+   switch (instr.op) {
+   case nir_texop_tex:
+      return instr.is_shadow ? sample_c : sample;
+   case nir_texop_txf:
+      return ld;
+   case nir_texop_txb:
+      return instr.is_shadow ? sample_c_lb : sample_lb;
+   case nir_texop_txl:
+      return instr.is_shadow ? sample_c_l : sample_l;
+   case nir_texop_txs:
+      return get_resinfo;
+   case nir_texop_lod:
+      return get_resinfo;
+   case nir_texop_txd:
+      return instr.is_shadow ? sample_c_g : sample_g;
+   case nir_texop_tg4:
+      return instr.is_shadow ?
+               (offset ? gather4_c_o : gather4_c) :
+               (offset ? gather4_o : gather4);
+
+   case nir_texop_txf_ms:
+      return ld;
+   case nir_texop_query_levels:
+      return get_resinfo;
+   case nir_texop_texture_samples:
+      return TexInstr::get_nsamples;
+   default:
+      unreachable("unsupported texture input opcode");
+   }
+}
+
+bool TexInstr::emit_tex_lod(nir_tex_instr* tex, Inputs& src, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+   auto dst = shader.value_factory().dest_vec4(tex->dest, pin_group);
+
+   auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
+
+   auto src_coord = vf.temp_vec4(pin_group, swizzle);
+
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < tex->coord_components; ++i) {
+      ir = new AluInstr(op1_mov,
+                        src_coord[i], src.coord[i],
+                        AluInstr::write);
+      shader.emit_instruction(ir);
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+
+   auto irt = new TexInstr(TexInstr::get_tex_lod, dst, {1,0,7,7}, src_coord,
+                           sampler.id, sampler.id + R600_MAX_CONST_BUFFERS);
+
+   shader.emit_instruction(irt);
+   return true;
+}
+
+
+RegisterVec4::Swizzle TexInstr::Inputs::swizzle_from_ncomps(int comps) const
+{
+   RegisterVec4::Swizzle swz;
+   for (int i = 0; i < 4; ++i)
+      swz[i] = i < comps ? i : 7;
+   return swz;
+}
+
+void TexInstr::set_coord_offsets(nir_src *offset)
+{
+   if (!offset)
+      return;
+
+   assert(offset->is_ssa);
+   auto literal = nir_src_as_const_value(*offset);
+   assert(literal);
+
+   for (int i = 0; i < offset->ssa->num_components; ++i)
+      set_offset(i, literal[i].i32);
+}
+
+void TexInstr::set_rect_coordinate_flags(nir_tex_instr* instr)
+{
+   if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+      set_tex_flag(x_unnormalized);
+      set_tex_flag(y_unnormalized);
+   }
+}
+
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_tex.h b/src/gallium/drivers/r600/sfn/sfn_instr_tex.h
new file mode 100644
index 0000000..a7c068c
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.h
@@ -0,0 +1,166 @@
+#ifndef INSTR_TEX_H
+#define INSTR_TEX_H
+
+#include "sfn_instr.h"
+#include "sfn_valuefactory.h"
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+class TexInstr : public InstrWithVectorResult {
+public:
+   enum Opcode {
+      ld = FETCH_OP_LD,
+      get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO,
+      get_nsamples = FETCH_OP_GET_NUMBER_OF_SAMPLES,
+      get_tex_lod = FETCH_OP_GET_LOD,
+      get_gradient_h = FETCH_OP_GET_GRADIENTS_H,
+      get_gradient_v = FETCH_OP_GET_GRADIENTS_V,
+      set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS,
+      keep_gradients = FETCH_OP_KEEP_GRADIENTS,
+      set_gradient_h = FETCH_OP_SET_GRADIENTS_H,
+      set_gradient_v = FETCH_OP_SET_GRADIENTS_V,
+      sample = FETCH_OP_SAMPLE,
+      sample_l = FETCH_OP_SAMPLE_L,
+      sample_lb = FETCH_OP_SAMPLE_LB,
+      sample_lz = FETCH_OP_SAMPLE_LZ,
+      sample_g = FETCH_OP_SAMPLE_G,
+      sample_g_lb = FETCH_OP_SAMPLE_G_L,
+      gather4 = FETCH_OP_GATHER4,
+      gather4_o =  FETCH_OP_GATHER4_O,
+
+      sample_c = FETCH_OP_SAMPLE_C,
+      sample_c_l = FETCH_OP_SAMPLE_C_L,
+      sample_c_lb = FETCH_OP_SAMPLE_C_LB,
+      sample_c_lz = FETCH_OP_SAMPLE_C_LZ,
+      sample_c_g = FETCH_OP_SAMPLE_C_G,
+      sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L,
+      gather4_c = FETCH_OP_GATHER4_C,
+      gather4_c_o =  FETCH_OP_GATHER4_C_O,
+      unknown = 255
+   };
+
+   enum Flags {
+      x_unnormalized,
+      y_unnormalized,
+      z_unnormalized,
+      w_unnormalized,
+      grad_fine,
+      num_tex_flag
+   };
+
+   struct Inputs {
+      Inputs(const nir_tex_instr& instr, ValueFactory &vf);
+      const nir_variable *sampler_deref;
+      const nir_variable *texture_deref;
+      RegisterVec4 coord;
+      PVirtualValue bias;
+      PVirtualValue comperator;
+      PVirtualValue lod;
+      RegisterVec4 ddx;
+      RegisterVec4 ddy;
+      nir_src *offset;
+      PVirtualValue gather_comp;
+      PVirtualValue ms_index;
+      PVirtualValue sampler_offset;
+      PVirtualValue texture_offset;
+
+      RegisterVec4::Swizzle swizzle_from_ncomps(int comps) const;
+
+      Opcode opcode;
+   private:
+      auto get_opcode(const nir_tex_instr& instr) -> Opcode;
+   };
+
+   TexInstr(Opcode op, const RegisterVec4& dest,
+            const RegisterVec4::Swizzle& dest_swizzle,
+            const RegisterVec4& src, unsigned sid, unsigned rid,
+            PVirtualValue sampler_offs = nullptr);
+
+   TexInstr(const TexInstr& orig) = delete;
+   TexInstr(const TexInstr&& orig) = delete;
+   TexInstr& operator =(const TexInstr& orig) = delete;
+   TexInstr& operator =(const TexInstr&& orig) = delete;
+
+   void accept(ConstInstrVisitor& visitor) const override;
+   void accept(InstrVisitor& visitor) override;
+
+   const auto& src() const {return m_src;}
+   auto& src() {return m_src;}
+
+   unsigned opcode() const {return m_opcode;}
+   unsigned sampler_id() const {return m_sampler_id;}
+   unsigned resource_id() const {return m_resource_id;}
+
+   void set_offset(unsigned index, int32_t val);
+   int get_offset(unsigned index) const;
+
+   void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;}
+   int inst_mode() const { return m_inst_mode;}
+
+   void set_tex_flag(Flags flag) {m_tex_flags.set(flag);}
+   bool has_tex_flag(Flags flag) const {return m_tex_flags.test(flag);}
+
+   void set_sampler_offset(PVirtualValue ofs) {m_sampler_offset = ofs;}
+   auto* sampler_offset() const {return m_sampler_offset;}
+
+   void set_gather_comp(int cmp);
+   bool is_equal_to(const TexInstr& lhs) const;
+
+   static Opcode op_from_string(const std::string& s);
+   static Instr::Pointer from_string(std::istream& is, ValueFactory& value_fctory);
+
+   static bool from_nir(nir_tex_instr *tex, Shader& shader);
+
+   uint32_t slots() const override {return 1;};
+
+   auto prepare_instr() const { return m_prepare_instr;}
+
+private:
+
+   bool do_ready() const override;
+   void do_print(std::ostream& os) const override;
+   bool propagate_death() override;
+
+   static const char *opname(Opcode code);
+   static bool is_gather(Opcode op);
+
+   void read_tex_coord_normalitazion(const std::string& next_token);
+   void set_tex_param(const std::string& next_token);
+
+   static auto prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader &shader) -> RegisterVec4;
+
+   static bool emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_tex_ms(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_txs(nir_tex_instr *tex, Inputs& src,
+                            RegisterVec4::Swizzle dest_swz, Shader& shader);
+   static bool emit_tex_lod(nir_tex_instr* tex, Inputs& src, Shader& shader);
+   static bool emit_tex_txd(nir_tex_instr *tex, Inputs& src, Shader& shader);
+   static bool emit_tex_tg4(nir_tex_instr* instr, Inputs& src , Shader& shader);
+   static bool emit_tex_texture_samples(nir_tex_instr* instr, Inputs& src, Shader& shader);
+
+   void set_coord_offsets(nir_src *offset);
+   void set_rect_coordinate_flags(nir_tex_instr* instr);
+   void add_prepare_instr(TexInstr *ir) {m_prepare_instr.push_back(ir);};
+
+   Opcode m_opcode;
+
+   RegisterVec4 m_src;
+   PVirtualValue m_sampler_offset;
+   std::bitset<num_tex_flag> m_tex_flags;
+   int m_offset[3];
+   int m_inst_mode;
+   unsigned m_sampler_id;
+   unsigned m_resource_id;
+
+   static const std::map<Opcode, std::string> s_opcode_map;
+   std::list<TexInstr *> m_prepare_instr;
+};
+
+}
+
+#endif // INSTR_TEX_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp b/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp
new file mode 100644
index 0000000..b73cc13
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instrfactory.cpp
@@ -0,0 +1,188 @@
+#include "sfn_instrfactory.h"
+
+#include "sfn_instr_alugroup.h"
+#include "sfn_debug.h"
+#include "sfn_instr_controlflow.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_lds.h"
+#include "sfn_instr_mem.h"
+#include "sfn_instr_tex.h"
+
+#include "sfn_alu_defines.h"
+
+#include "sfn_shader.h"
+
+#include <string>
+#include <sstream>
+#include <vector>
+
+namespace r600 {
+
+using std::string;
+using std::vector;
+
+InstrFactory::InstrFactory():
+   group(nullptr)
+{
+
+}
+
+PInst InstrFactory::from_string(const std::string& s, int nesting_depth)
+{
+   string type;
+   std::istringstream is(s);
+
+   PInst result = nullptr;
+
+   do {
+      is >> type;
+   } while (type.empty() && is.good());
+
+   if (type == "ALU_GROUP_BEGIN") {
+      group = new AluGroup();
+      group->set_nesting_depth(nesting_depth);
+      return nullptr;
+   } else if (type == "ALU_GROUP_END") {
+      AluGroup *retval = group;
+      group = nullptr;
+      return retval;
+   } else if (type == "ALU") {
+      result = AluInstr::from_string(is, m_value_factory, group);
+   } else if (type == "TEX") {
+      result = TexInstr::from_string(is, m_value_factory);
+   } else if (type == "EXPORT") {
+      result = ExportInstr::from_string(is, m_value_factory);
+   } else if (type == "EXPORT_DONE") {
+      result = ExportInstr::last_from_string(is, m_value_factory);
+   } else if (type == "VFETCH") {
+      result = FetchInstr::from_string(is, m_value_factory);
+   } else if (type == "GET_BUF_RESINFO") {
+      result = QueryBufferSizeInstr::from_string(is, m_value_factory);
+   } else if (type == "LOAD_BUF") {
+      result = LoadFromBuffer::from_string(is, m_value_factory);
+   } else if (type == "READ_SCRATCH") {
+      result = LoadFromScratch::from_string(is, m_value_factory);
+   } else if (type == "IF") {
+      result = IfInstr::from_string(is, m_value_factory);
+   } else if (type == "WRITE_SCRATCH") {
+      result = WriteScratchInstr::from_string(is, m_value_factory);
+   } else if (type == "MEM_RING") {
+      result = MemRingOutInstr::from_string(is, m_value_factory);
+   } else if (type == "EMIT_VERTEX") {
+      result = EmitVertexInstr::from_string(is, false);
+   } else if (type == "EMIT_CUT_VERTEX") {
+      result = EmitVertexInstr::from_string(is, true);
+   } else if (type == "LDS_READ") {
+      result = LDSReadInstr::from_string(is, m_value_factory);
+   } else if (type == "LDS") {
+      result = LDSAtomicInstr::from_string(is, m_value_factory);
+   } else if (type == "WRITE_TF") {
+      result = WriteTFInstr::from_string(is, m_value_factory);
+   } else
+      result = ControlFlowInstr::from_string(type);
+
+   if (!result && !group) {
+      std::cerr << "Error translating '" << s << "'\n";
+   }
+
+   return result;
+}
+
+bool InstrFactory::from_nir(nir_instr *instr, Shader& shader)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      return AluInstr::from_nir(nir_instr_as_alu(instr), shader);
+   case nir_instr_type_intrinsic:
+      return shader.process_intrinsic(nir_instr_as_intrinsic(instr));
+   case nir_instr_type_load_const:
+      return load_const(nir_instr_as_load_const(instr), shader);
+   case nir_instr_type_tex:
+      return TexInstr::from_nir(nir_instr_as_tex(instr), shader);
+   case nir_instr_type_jump:
+      return process_jump(nir_instr_as_jump(instr), shader);
+   case nir_instr_type_ssa_undef:
+      return process_undef(nir_instr_as_ssa_undef(instr), shader);
+   default:
+      fprintf(stderr, "Instruction type %d not supported\n", instr->type);
+   return false;
+   }
+}
+
+bool InstrFactory::load_const(nir_load_const_instr *literal, Shader& shader)
+{
+   AluInstr *ir = nullptr;
+
+   if (literal->def.bit_size == 64) {
+      for (int i = 0; i < literal->def.num_components; ++i) {
+         auto dest0 = m_value_factory.dest(literal->def, 2 * i, pin_none);
+         auto src0 = m_value_factory.literal(literal->value[i].u64 & 0xffffffff);
+         shader.emit_instruction(new AluInstr(op1_mov, dest0, src0, {alu_write}));
+
+         auto dest1 = m_value_factory.dest(literal->def, 2 * i + 1, pin_none);
+         auto src1 = m_value_factory.literal((literal->value[i].u64 >> 32) & 0xffffffff);
+         shader.emit_instruction(new AluInstr(op1_mov, dest1, src1, AluInstr::last_write));
+      }
+   } else {
+      Pin pin = literal->def.num_components == 1 ? pin_free : pin_none;
+      for (int i = 0; i < literal->def.num_components; ++i) {
+         auto dest = m_value_factory.dest(literal->def, i, pin);
+         uint32_t v = literal->value[i].i32;
+         PVirtualValue src = nullptr;
+         switch (v) {
+         case 0: src = m_value_factory.zero(); break;
+      case 1: src = m_value_factory.one_i(); break;
+         case 0xffffffff: src = m_value_factory.inline_const(ALU_SRC_M_1_INT, 0); break;
+         case 0x3f800000: src = m_value_factory.inline_const(ALU_SRC_1, 0); break;
+      case 0x3f000000: src = m_value_factory.inline_const(ALU_SRC_0_5, 0); break;
+         default: src = m_value_factory.literal(v);
+      }
+
+         ir = new AluInstr(op1_mov, dest, src, {alu_write});
+         shader.emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_alu_flag(alu_last_instr);
+
+   }
+   return true;
+}
+
+bool InstrFactory::process_jump(nir_jump_instr *instr, Shader& shader)
+{
+   ControlFlowInstr::CFType type;
+   switch (instr->type) {
+   case nir_jump_break:
+      type = ControlFlowInstr::cf_loop_break;
+   break;
+
+   case nir_jump_continue:
+      type = ControlFlowInstr::cf_loop_continue;
+   break;
+
+   default: {
+      nir_instr *i = reinterpret_cast<nir_instr*>(instr);
+      sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
+      return false;
+   }
+   }
+   shader.emit_instruction(new ControlFlowInstr(type));
+   shader.start_new_block(0);
+
+   return true;
+}
+
+bool InstrFactory::process_undef(nir_ssa_undef_instr *undef, Shader& shader)
+{
+   for (int i = 0; i < undef->def.num_components; ++i) {
+      auto dest = shader.value_factory().undef(undef->def.index, i);
+      shader.emit_instruction(new AluInstr(op1_mov, dest,
+                                           value_factory().zero(),
+                                           AluInstr::last_write));
+   }
+   return true;
+}
+
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instrfactory.h b/src/gallium/drivers/r600/sfn/sfn_instrfactory.h
new file mode 100644
index 0000000..5775813
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_instrfactory.h
@@ -0,0 +1,34 @@
+#ifndef INSTRFACTORY_H
+#define INSTRFACTORY_H
+
+#include "sfn_instr.h"
+#include "sfn_valuefactory.h"
+
+
+#include <iosfwd>
+
+namespace r600 {
+
+class Shader;
+class InstrFactory : public Allocate {
+public:
+	InstrFactory();
+
+   PInst from_string(const std::string &s, int nesting_depth);
+   bool from_nir(nir_instr *instr, Shader& shader);
+   auto& value_factory() { return m_value_factory;}
+
+private:
+   bool load_const(nir_load_const_instr *lc, Shader& shader);
+   bool process_jump(nir_jump_instr *instr, Shader& shader);
+   bool process_undef(nir_ssa_undef_instr *undef, Shader& shader);
+
+   Instr::Pointer export_from_string(std::istream& is, bool is_last);
+
+	ValueFactory m_value_factory;
+   AluGroup *group;
+};
+
+}
+
+#endif // INSTRFACTORY_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp
deleted file mode 100644
index 72cf231..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_instruction_alu.h"
-#include "sfn_valuepool.h"
-
-namespace r600  {
-
-const AluModifiers AluInstruction::src_abs_flags[2] =
-   {alu_src0_abs, alu_src1_abs};
-const AluModifiers AluInstruction::src_neg_flags[3] =
-   {alu_src0_neg, alu_src1_neg, alu_src2_neg};
-const AluModifiers AluInstruction::src_rel_flags[3] =
-   {alu_src0_rel, alu_src1_rel, alu_src2_rel};
-
-AluInstruction::AluInstruction(EAluOp opcode):
-   Instruction (Instruction::alu),
-   m_opcode(opcode),
-   m_src(alu_ops.at(opcode).nsrc),
-   m_bank_swizzle(alu_vec_unknown),
-   m_cf_type(cf_alu)
-{
-   if (alu_ops.at(opcode).nsrc == 3)
-      m_flags.set(alu_op3);
-}
-
-AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
-                               std::vector<PValue> src,
-                               const std::set<AluModifiers>& flags):
-   Instruction (Instruction::alu),
-   m_opcode(opcode),
-   m_dest(dest),
-   m_bank_swizzle(alu_vec_unknown),
-   m_cf_type(cf_alu)
-{
-   assert(dest);
-   m_src.swap(src);
-   for (auto f : flags)
-      m_flags.set(f);
-
-   if (alu_ops.at(opcode).nsrc == 3)
-      m_flags.set(alu_op3);
-
-   for (auto &s: m_src)
-      add_remappable_src_value(&s);
-
-   add_remappable_dst_value(&m_dest);
-}
-
-AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
-                               const std::set<AluModifiers>& flags):
-   AluInstruction(opcode, dest, std::vector<PValue>{src0}, flags)
-{
-}
-
-AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
-                               PValue src0, PValue src1,
-                               const std::set<AluModifiers> &m_flags):
-   AluInstruction(opcode, dest, {src0, src1}, m_flags)
-{
-}
-
-AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
-                               PValue src1, PValue src2,
-                               const std::set<AluModifiers> &flags):
-   AluInstruction(opcode, dest, {src0, src1, src2}, flags)
-{
-}
-
-bool AluInstruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == alu);
-   const auto& oth = static_cast<const AluInstruction&>(lhs);
-
-   if (m_opcode != oth.m_opcode) {
-      return false;
-   }
-
-   if (*m_dest != *oth.m_dest)
-      return false;
-
-   if (m_src.size() != oth.m_src.size())
-      return false;
-
-   for (unsigned i = 0; i < m_src.size(); ++i)
-     if (*m_src[i] != *oth.m_src[i]) {
-        return false;
-     }
-   return (m_flags == oth.m_flags && m_cf_type == oth.m_cf_type);
-}
-
-void AluInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   for (auto c: candidates) {
-      if (*c == *m_dest)
-         m_dest = new_value;
-
-      for (auto& s: m_src) {
-         if (*c == *s)
-            s = new_value;
-      }
-   }
-}
-
-PValue AluInstruction::remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
-                                           ValueMap &values)
-{
-   auto new_index = map[reg->sel()];
-   if (new_index.valid)
-      reg = values.get_or_inject(new_index.new_reg, reg->chan());
-   map[reg->sel()].used = true;
-   return reg;
-}
-
-
-void AluInstruction::set_flag(AluModifiers flag)
-{
-   m_flags.set(flag);
-}
-
-void AluInstruction::set_bank_swizzle(AluBankSwizzle bswz)
-{
-   m_bank_swizzle = bswz;
-}
-
-unsigned AluInstruction::n_sources() const
-{
-   return m_src.size();
-}
-
-void AluInstruction::do_print(std::ostream& os) const
-{
-   os << "ALU " << alu_ops.at(m_opcode).name;
-   if (m_flags.test(alu_dst_clamp))
-      os << "_CLAMP";
-   if (m_dest)
-      os << ' ' << *m_dest << " : "  ;
-
-   for (unsigned i = 0; i < m_src.size(); ++i) {
-      int pflags = 0;
-      if (i)
-         os << ' ';
-      if (m_flags.test(src_neg_flags[i])) pflags |= Value::PrintFlags::has_neg;
-      if (m_flags.test(src_rel_flags[i])) pflags |= Value::PrintFlags::is_rel;
-      if (i < 2)
-         if (m_flags.test(src_abs_flags[i])) pflags |= Value::PrintFlags::has_abs;
-      m_src[i]->print(os, Value::PrintFlags(0, pflags));
-   }
-   os << " {";
-   os << (m_flags.test(alu_write) ? 'W' : ' ');
-   os << (m_flags.test(alu_last_instr) ? 'L' : ' ');
-   os << (m_flags.test(alu_update_exec) ? 'E' : ' ');
-   os << (m_flags.test(alu_update_pred) ? 'P' : ' ');
-   os << "}";
-
-   os <<  " BS:" << m_bank_swizzle;
-   os <<  " CF:" << m_cf_type;
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h b/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h
deleted file mode 100644
index ecf563c..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef sfn_r600_instruction_alu_h
-#define sfn_r600_instruction_alu_h
-
-#include "sfn_instruction_base.h"
-#include "sfn_alu_defines.h"
-
-namespace r600 {
-
-enum AluModifiers {
-   alu_src0_neg,
-   alu_src0_abs,
-   alu_src0_rel,
-   alu_src1_neg,
-   alu_src1_abs,
-   alu_src1_rel,
-   alu_src2_neg,
-   alu_src2_rel,
-   alu_dst_clamp,
-   alu_dst_rel,
-   alu_last_instr,
-   alu_update_exec,
-   alu_update_pred,
-   alu_write,
-   alu_op3
-};
-
-enum AluDstModifiers {
-   omod_off = 0,
-   omod_mul2 = 1,
-   omod_mul4 = 2,
-   omod_divl2 = 3
-};
-
-enum AluPredSel {
-   pred_off = 0,
-   pred_zero = 2,
-   pred_one = 3
-};
-
-enum AluBankSwizzle {
-   alu_vec_012 = 0,
-   sq_alu_scl_201 = 0,
-   alu_vec_021 = 1,
-   sq_alu_scl_122 = 1,
-   alu_vec_120 = 2,
-   sq_alu_scl_212 = 2,
-   alu_vec_102 = 3,
-   sq_alu_scl_221 = 3,
-   alu_vec_201 = 4,
-   alu_vec_210 = 5,
-   alu_vec_unknown = 6
-};
-
-class AluInstruction : public Instruction {
-public:
-
-   static const AluModifiers src_abs_flags[2];
-   static const AluModifiers src_neg_flags[3];
-   static const AluModifiers src_rel_flags[3];
-
-   AluInstruction(EAluOp opcode);
-   AluInstruction(EAluOp opcode, PValue dest,
-                  std::vector<PValue> src0,
-                  const std::set<AluModifiers>& m_flags);
-
-   AluInstruction(EAluOp opcode, PValue dest, PValue src0,
-                  const std::set<AluModifiers>& m_flags);
-
-   AluInstruction(EAluOp opcode, PValue dest,
-                  PValue src0, PValue src1,
-                  const std::set<AluModifiers>& m_flags);
-
-   AluInstruction(EAluOp opcode, PValue dest, PValue src0, PValue src1,
-                  PValue src2,
-                  const std::set<AluModifiers>& m_flags);
-
-   void set_flag(AluModifiers flag);
-   unsigned n_sources() const;
-
-   PValue dest() {return m_dest;}
-   EAluOp opcode() const {return m_opcode;}
-   const Value *dest() const {return m_dest.get();}
-   Value& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
-   PValue *psrc(unsigned i) {assert(i < m_src.size()); return &m_src[i];}
-   bool is_last() const {return m_flags.test(alu_last_instr);}
-   bool write() const {return m_flags.test(alu_write);}
-   bool flag(AluModifiers f) const {return m_flags.test(f);}
-   void set_bank_swizzle(AluBankSwizzle swz);
-   int bank_swizzle() const {return m_bank_swizzle;}
-   ECFAluOpCode cf_type() const {return m_cf_type;}
-   void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; }
-
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-   PValue remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
-                              ValueMap &values);
-
-
-   EAluOp m_opcode;
-   PValue m_dest;
-   std::vector<PValue> m_src;
-   AluOpFlags m_flags;
-   AluBankSwizzle m_bank_swizzle;
-   ECFAluOpCode m_cf_type;
-};
-
-}
-
-#endif
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp
deleted file mode 100644
index 116bfac..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp
+++ /dev/null
@@ -1,187 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include <algorithm>
-#include <cassert>
-
-#include "sfn_instruction_base.h"
-#include "sfn_liverange.h"
-#include "sfn_valuepool.h"
-
-namespace r600  {
-
-ValueRemapper::ValueRemapper(std::vector<rename_reg_pair>& m,
-                             ValueMap& values):
-   m_map(m),
-   m_values(values)
-{
-}
-
-void ValueRemapper::remap(PValue& v)
-{
-   if (!v)
-      return;
-   if (v->type() == Value::gpr) {
-      v = remap_one_registers(v);
-   } else if (v->type() == Value::gpr_array_value) {
-      GPRArrayValue& val = static_cast<GPRArrayValue&>(*v);
-      auto value = val.value();
-      auto addr = val.indirect();
-      val.reset_value(remap_one_registers(value));
-      if (addr) {
-         if (addr->type() == Value::gpr)
-            val.reset_addr(remap_one_registers(addr));
-      }
-      size_t range_start = val.sel();
-      size_t range_end = range_start + val.array_size();
-      while (range_start < range_end)
-         m_map[range_start++].used = true;
-   } else if (v->type() == Value::kconst) {
-      auto& val = static_cast<UniformValue&>(*v);
-      auto addr = val.addr();
-      if (addr && addr->type() == Value::gpr)
-            val.reset_addr(remap_one_registers(addr));
-   }
-
-}
-
-void ValueRemapper::remap(GPRVector& v)
-{
-   for (int i = 0; i < 4; ++i) {
-      if (v.reg_i(i)) {
-         auto& ns_idx = m_map[v.reg_i(i)->sel()];
-         if (ns_idx.valid)
-            v.set_reg_i(i,m_values.get_or_inject(ns_idx.new_reg, v.reg_i(i)->chan()));
-         m_map[v.reg_i(i)->sel()].used = true;
-      }
-   }
-}
-
-PValue ValueRemapper::remap_one_registers(PValue& reg)
-{
-   auto new_index = m_map[reg->sel()];
-   if (new_index.valid)
-      reg = m_values.get_or_inject(new_index.new_reg, reg->chan());
-   m_map[reg->sel()].used = true;
-   return reg;
-}
-
-
-Instruction::Instruction(instr_type t):
-   m_type(t)
-{
-}
-
-Instruction::~Instruction()
-{
-}
-
-void Instruction::print(std::ostream& os) const
-{
-   os << "OP:";
-   do_print(os);
-}
-
-
-void Instruction::remap_registers(ValueRemapper& map)
-{
-   sfn_log << SfnLog::merge << "REMAP " << *this << "\n";
-   for (auto& v: m_mappable_src_registers)
-      map.remap(*v);
-
-   for (auto& v: m_mappable_src_vectors)
-      map.remap(*v);
-
-   for (auto& v: m_mappable_dst_registers)
-      map.remap(*v);
-
-   for (auto& v: m_mappable_dst_vectors)
-      map.remap(*v);
-   sfn_log << SfnLog::merge << "TO    " << *this << "\n\n";
-}
-
-void Instruction::add_remappable_src_value(PValue *v)
-{
-   if (*v)
-      m_mappable_src_registers.push_back(v);
-}
-
-void Instruction::add_remappable_src_value(GPRVector *v)
-{
-   m_mappable_src_vectors.push_back(v);
-}
-
-void Instruction::add_remappable_dst_value(PValue *v)
-{
-   if (v)
-      m_mappable_dst_registers.push_back(v);
-}
-
-void Instruction::add_remappable_dst_value(GPRVector *v)
-{
-   m_mappable_dst_vectors.push_back(v);
-}
-
-void Instruction::replace_values(UNUSED const ValueSet& candidates, UNUSED PValue new_value)
-{
-
-}
-
-void Instruction::evalue_liveness(LiverangeEvaluator& eval) const
-{
-   sfn_log << SfnLog::merge << "Scan " << *this << "\n";
-   for (const auto& s: m_mappable_src_registers)
-      if (*s)
-         eval.record_read(**s);
-
-   for (const auto& s: m_mappable_src_vectors)
-      eval.record_read(*s);
-
-   for (const auto& s: m_mappable_dst_registers)
-      if (*s)
-         eval.record_write(**s);
-
-   for (const auto& s: m_mappable_dst_vectors)
-      eval.record_write(*s);
-
-   do_evalue_liveness(eval);
-}
-
-void Instruction::do_evalue_liveness(UNUSED LiverangeEvaluator& eval) const
-{
-
-}
-
-bool operator == (const Instruction& lhs, const Instruction& rhs)
-{
-   if (rhs.m_type != lhs.m_type)
-      return false;
-
-   return lhs.is_equal_to(rhs);
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_base.h b/src/gallium/drivers/r600/sfn/sfn_instruction_base.h
deleted file mode 100644
index 0689a47..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_base.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef sfn_r600_instr_h
-#define sfn_r600_instr_h
-
-#include "sfn_instructionvisitor.h"
-#include "sfn_value_gpr.h"
-#include "sfn_defines.h"
-
-#include "gallium/drivers/r600/r600_isa.h"
-#include <iostream>
-#include <memory>
-#include <vector>
-#include <set>
-
-namespace r600 {
-
-struct rename_reg_pair {
-   bool valid;
-   bool used;
-   int new_reg;
-};
-
-class LiverangeEvaluator;
-class ValueMap;
-
-
-class ValueRemapper {
-public:
-   ValueRemapper(std::vector<rename_reg_pair>& m,
-                 ValueMap& values);
-
-   void remap(PValue& v);
-   void remap(GPRVector& v);
-private:
-   PValue remap_one_registers(PValue& reg);
-
-   std::vector<rename_reg_pair>& m_map;
-   ValueMap& m_values;
-};
-
-
-using OutputRegisterMap = std::map<unsigned, const GPRVector *>;
-
-class Instruction {
-public:
-   enum instr_type {
-      alu,
-      exprt,
-      tex,
-      vtx,
-      wait_ack,
-      cond_if,
-      cond_else,
-      cond_endif,
-      lds_atomic,
-      lds_read,
-      lds_write,
-      loop_begin,
-      loop_end,
-      loop_break,
-      loop_continue,
-      phi,
-      streamout,
-      ring,
-      emit_vtx,
-      mem_wr_scratch,
-      gds,
-      rat,
-      tf_write,
-      block,
-      unknown
-   };
-
-   typedef std::shared_ptr<Instruction> Pointer;
-
-   friend bool operator == (const Instruction& lhs, const Instruction& rhs);
-
-   Instruction(instr_type t);
-
-   virtual ~Instruction();
-
-   instr_type type() const { return m_type;}
-
-   void print(std::ostream& os) const;
-
-   virtual void replace_values(const ValueSet& candidates, PValue new_value);
-
-   void evalue_liveness(LiverangeEvaluator& eval) const;
-
-   void remap_registers(ValueRemapper& map);
-
-   virtual bool accept(InstructionVisitor& visitor) = 0;
-   virtual bool accept(ConstInstructionVisitor& visitor) const = 0;
-
-protected:
-
-   void add_remappable_src_value(PValue *v);
-   void add_remappable_src_value(GPRVector *v);
-   void add_remappable_dst_value(PValue *v);
-   void add_remappable_dst_value(GPRVector *v);
-
-private:
-
-   virtual void do_evalue_liveness(LiverangeEvaluator& eval) const;
-
-   virtual bool is_equal_to(const Instruction& lhs) const = 0;
-
-   instr_type m_type;
-
-   virtual void do_print(std::ostream& os) const = 0;
-
-   std::vector<PValue*> m_mappable_src_registers;
-   std::vector<GPRVector*> m_mappable_src_vectors;
-   std::vector<PValue*> m_mappable_dst_registers;
-   std::vector<GPRVector*> m_mappable_dst_vectors;
-};
-
-using PInstruction=Instruction::Pointer;
-
-inline std::ostream& operator << (std::ostream& os, const Instruction& instr)
-{
-   instr.print(os);
-   return os;
-}
-
-bool operator == (const Instruction& lhs, const Instruction& rhs);
-
-}
-
-#endif
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp
deleted file mode 100644
index 212499f..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "sfn_instruction_block.h"
-
-namespace r600 {
-
-
-InstructionBlock::InstructionBlock(unsigned nesting_depth, unsigned block_number):
-   Instruction(block),
-   m_block_number(block_number),
-   m_nesting_depth(nesting_depth)
-{
-}
-
-void InstructionBlock::emit(PInstruction instr)
-{
-   m_block.push_back(instr);
-}
-
-void InstructionBlock::remap_registers(ValueRemapper& map)
-{
-   for(auto& i: m_block)
-      i->remap_registers(map);
-}
-
-void InstructionBlock::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   for(auto& i: m_block)
-      i->evalue_liveness(eval);
-}
-
-bool InstructionBlock::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == block);
-   auto& l = static_cast<const InstructionBlock&>(lhs);
-
-   if (m_block.size() != l.m_block.size())
-      return false;
-
-   if (m_block_number != l.m_block_number)
-      return false;
-
-   return std::equal(m_block.begin(), m_block.end(), l.m_block.begin(),
-                     [](PInstruction ri, PInstruction li) {return *ri == *li;});
-}
-
-PInstruction InstructionBlock::last_instruction()
-{
-   return m_block.size() ? *m_block.rbegin() : nullptr;
-}
-
-void InstructionBlock::do_print(std::ostream& os) const
-{
-   std::string space(" ", 2 * m_nesting_depth);
-   for(auto& i: m_block)
-      os << space << *i << "\n";
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_block.h b/src/gallium/drivers/r600/sfn/sfn_instruction_block.h
deleted file mode 100644
index fe40cc1..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_block.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef sfn_instruction_block_h
-#define sfn_instruction_block_h
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class InstructionBlock : public Instruction
-{
-public:
-	InstructionBlock(unsigned nesting_depth, unsigned block_number);
-
-        void emit(PInstruction instr);
-
-
-        std::vector<PInstruction>::const_iterator begin() const  {
-           return m_block.begin();
-        }
-        std::vector<PInstruction>::const_iterator end() const {
-           return m_block.end();
-        }
-
-        void remap_registers(ValueRemapper& map);
-
-        size_t size() const {
-           return m_block.size();
-        }
-
-        const PInstruction& operator [] (int i) const {
-           return m_block[i];
-        }
-
-        unsigned number() const  {
-           return m_block_number;
-        }
-
-        PInstruction last_instruction();
-
-        bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-        bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-        void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-        bool is_equal_to(const Instruction& lhs) const override;
-        void do_print(std::ostream& os) const override;
-
-        std::vector<PInstruction> m_block;
-
-        unsigned m_block_number;
-        unsigned m_nesting_depth;
-};
-
-}
-
-#endif // INSTRUCTIONBLOCK_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp
deleted file mode 100644
index 455d6d6..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp
+++ /dev/null
@@ -1,195 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_instruction_cf.h"
-#include "sfn_liverange.h"
-
-namespace  r600 {
-
-CFInstruction::CFInstruction(instr_type type):Instruction(type)
-{
-
-}
-
-IfElseInstruction::IfElseInstruction(instr_type type):
-   CFInstruction (type)
-{
-
-}
-
-IfInstruction::IfInstruction(AluInstruction *pred):
-   IfElseInstruction(cond_if),
-   m_pred(pred)
-{
-   PValue *v = m_pred->psrc(0);
-   add_remappable_src_value(v);
-   pred->set_cf_type(cf_alu_push_before);
-}
-
-void IfInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_if();
-}
-
-bool IfInstruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == cond_if);
-   const IfInstruction& l = static_cast<const IfInstruction&>(lhs);
-   return *l.m_pred == *m_pred;
-}
-
-void IfInstruction::do_print(std::ostream& os) const
-{
-   os << "PRED = " << *m_pred << "\n";
-   os << "IF (PRED)";
-}
-
-ElseInstruction::ElseInstruction(IfInstruction *jump_src):
-   IfElseInstruction(cond_else),
-   m_jump_src(jump_src)
-{
-}
-
-void ElseInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_else();
-}
-
-
-bool ElseInstruction::is_equal_to(const Instruction& lhs) const
-{
-   if (lhs.type() != cond_else)
-      return false;
-   auto& l = static_cast<const ElseInstruction&>(lhs);
-   return (*m_jump_src == *l.m_jump_src);
-}
-
-void ElseInstruction::do_print(std::ostream& os) const
-{
-   os << "ELSE";
-}
-
-IfElseEndInstruction::IfElseEndInstruction():
-   IfElseInstruction(cond_endif)
-{
-}
-
-void IfElseEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_endif();
-}
-
-bool IfElseEndInstruction::is_equal_to(const Instruction& lhs) const
-{
-   if (lhs.type() != cond_endif)
-      return false;
-   return true;
-}
-
-void IfElseEndInstruction::do_print(std::ostream& os) const
-{
-   os << "ENDIF";
-}
-
-LoopBeginInstruction::LoopBeginInstruction():
-   CFInstruction(loop_begin)
-{
-}
-
-void LoopBeginInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_loop_begin();
-}
-
-bool LoopBeginInstruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == loop_begin);
-   return true;
-}
-
-void LoopBeginInstruction::do_print(std::ostream& os) const
-{
-   os << "BGNLOOP";
-}
-
-LoopEndInstruction::LoopEndInstruction(LoopBeginInstruction *start):
-   CFInstruction (loop_end),
-   m_start(start)
-{
-}
-
-void LoopEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_loop_end();
-}
-
-bool LoopEndInstruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == loop_end);
-   const auto& other = static_cast<const LoopEndInstruction&>(lhs);
-   return *m_start == *other.m_start;
-}
-
-void LoopEndInstruction::do_print(std::ostream& os) const
-{
-   os << "ENDLOOP";
-}
-
-LoopBreakInstruction::LoopBreakInstruction():
-   CFInstruction (loop_break)
-{
-}
-
-void LoopBreakInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
-{
-   eval.scope_loop_break();
-}
-
-bool LoopBreakInstruction::is_equal_to(UNUSED const Instruction& lhs) const
-{
-   return true;
-}
-
-void LoopBreakInstruction::do_print(std::ostream& os) const
-{
-   os << "BREAK";
-}
-
-LoopContInstruction::LoopContInstruction():
-   CFInstruction (loop_continue)
-{
-}
-
-bool LoopContInstruction::is_equal_to(UNUSED const Instruction& lhs) const
-{
-   return true;
-}
-void LoopContInstruction::do_print(std::ostream& os) const
-{
-   os << "CONTINUE";
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h
deleted file mode 100644
index a137948..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_IFELSEINSTRUCTION_H
-#define SFN_IFELSEINSTRUCTION_H
-
-#include "sfn_instruction_alu.h"
-
-namespace r600  {
-
-class CFInstruction : public Instruction {
-protected:
-   CFInstruction(instr_type type);
-};
-
-class IfElseInstruction : public CFInstruction {
-public:
-   IfElseInstruction(instr_type type);
-
-};
-
-class IfInstruction : public IfElseInstruction {
-public:
-   IfInstruction(AluInstruction *pred);
-   const AluInstruction& pred() const {return *m_pred;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-   std::shared_ptr<AluInstruction> m_pred;
-};
-
-class ElseInstruction : public IfElseInstruction {
-public:
-   ElseInstruction(IfInstruction *jump_src);
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   IfElseInstruction *m_jump_src;
-};
-
-class IfElseEndInstruction : public IfElseInstruction {
-public:
-   IfElseEndInstruction();
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-};
-
-class LoopBeginInstruction: public CFInstruction {
-public:
-   LoopBeginInstruction();
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-};
-
-class LoopEndInstruction: public CFInstruction {
-public:
-   LoopEndInstruction(LoopBeginInstruction *start);
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-   LoopBeginInstruction *m_start;
-};
-
-class LoopBreakInstruction: public CFInstruction {
-public:
-   LoopBreakInstruction();
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-};
-
-class LoopContInstruction: public CFInstruction {
-public:
-   LoopContInstruction();
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-};
-
-}
-
-#endif // SFN_IFELSEINSTRUCTION_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp
deleted file mode 100644
index 7d1d948..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp
+++ /dev/null
@@ -1,341 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "sfn_instruction_export.h"
-#include "sfn_liverange.h"
-#include "sfn_valuepool.h"
-
-namespace r600 {
-
-WriteoutInstruction::WriteoutInstruction(instr_type t, const GPRVector& value):
-   Instruction(t),
-   m_value(value)
-{
-   add_remappable_src_value(&m_value);
-}
-
-void WriteoutInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   // I wonder whether we can actually end up here ...
-   for (auto c: candidates) {
-      if (*c == *m_value.reg_i(c->chan()))
-         m_value.set_reg_i(c->chan(), new_value);
-   }
-
-   replace_values_child(candidates, new_value);
-}
-
-void WriteoutInstruction::replace_values_child(UNUSED const ValueSet& candidates,
-                                               UNUSED PValue new_value)
-{
-}
-
-void WriteoutInstruction::remap_registers_child(UNUSED std::vector<rename_reg_pair>& map,
-                                                UNUSED ValueMap& values)
-{
-}
-
-ExportInstruction::ExportInstruction(unsigned loc, const GPRVector &value, ExportType type):
-   WriteoutInstruction(Instruction::exprt, value),
-   m_type(type),
-   m_loc(loc),
-   m_is_last(false)
-{
-}
-
-
-bool ExportInstruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == exprt);
-   const auto& oth = static_cast<const ExportInstruction&>(lhs);
-
-   return (gpr() == oth.gpr()) &&
-         (m_type == oth.m_type) &&
-         (m_loc == oth.m_loc) &&
-         (m_is_last == oth.m_is_last);
-}
-
-void ExportInstruction::do_print(std::ostream& os) const
-{
-   os << (m_is_last ? "EXPORT_DONE ":"EXPORT ");
-   switch (m_type) {
-   case et_pixel: os << "PIXEL "; break;
-   case et_pos: os << "POS "; break;
-   case et_param: os << "PARAM "; break;
-   }
-   os << m_loc << " " << gpr();
-}
-
-void ExportInstruction::update_output_map(OutputRegisterMap& map) const
-{
-   map[m_loc] = gpr_ptr();
-}
-
-void ExportInstruction::set_last()
-{
-   m_is_last = true;
-}
-
-WriteScratchInstruction::WriteScratchInstruction(unsigned loc, const GPRVector& value,
-                                                 int align, int align_offset, int writemask):
-   WriteoutInstruction (Instruction::mem_wr_scratch, value),
-   m_loc(loc),
-   m_align(align),
-   m_align_offset(align_offset),
-   m_writemask(writemask),
-   m_array_size(0)
-{
-}
-
-WriteScratchInstruction::WriteScratchInstruction(const PValue& address, const GPRVector& value,
-                                                 int align, int align_offset, int writemask, int array_size):
-   WriteoutInstruction (Instruction::mem_wr_scratch, value),
-   m_loc(0),
-   m_address(address),
-   m_align(align),
-   m_align_offset(align_offset),
-   m_writemask(writemask),
-   m_array_size(array_size - 1)
-{
-   add_remappable_src_value(&m_address);
-}
-
-bool WriteScratchInstruction::is_equal_to(const Instruction& lhs) const
-{
-   if (lhs.type() != Instruction::mem_wr_scratch)
-      return false;
-   const auto& other = static_cast<const WriteScratchInstruction&>(lhs);
-
-   if (m_address) {
-      if (!other.m_address)
-         return false;
-      if (*m_address != *other.m_address)
-         return false;
-   } else {
-      if (other.m_address)
-         return false;
-   }
-
-   return gpr() == other.gpr() &&
-         m_loc == other.m_loc &&
-         m_align == other.m_align &&
-         m_align_offset == other.m_align_offset &&
-         m_writemask == other.m_writemask;
-}
-
-static char *writemask_to_swizzle(int writemask, char *buf)
-{
-   const char *swz = "xyzw";
-   for (int i = 0; i < 4; ++i) {
-      buf[i] = (writemask & (1 << i)) ? swz[i] : '_';
-   }
-   return buf;
-}
-
-void WriteScratchInstruction::do_print(std::ostream& os) const
-{
-   char buf[5];
-
-   os << "MEM_SCRATCH_WRITE ";
-   if (m_address)
-      os << "@" << *m_address << "+";
-
-   os << m_loc  << "." << writemask_to_swizzle(m_writemask, buf)
-      << " " <<  gpr()  << " AL:" << m_align << " ALO:" << m_align_offset;
-}
-
-void WriteScratchInstruction::replace_values_child(const ValueSet& candidates, PValue new_value)
-{
-   if (!m_address)
-      return;
-
-   for (auto c: candidates) {
-      if (*c == *m_address)
-         m_address = new_value;
-   }
-}
-
-void WriteScratchInstruction::remap_registers_child(std::vector<rename_reg_pair>& map,
-                           ValueMap& values)
-{
-   if (!m_address)
-      return;
-   sfn_log << SfnLog::merge << "Remap " << *m_address <<  " of type " << m_address->type() << "\n";
-   assert(m_address->type() == Value::gpr);
-   auto new_index = map[m_address->sel()];
-   if (new_index.valid)
-      m_address = values.get_or_inject(new_index.new_reg, m_address->chan());
-   map[m_address->sel()].used = true;
-}
-
-StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components,
-                                         int array_base, int comp_mask, int out_buffer,
-                                         int stream):
-   WriteoutInstruction(Instruction::streamout, value),
-   m_element_size(num_components == 3 ? 3 : num_components - 1),
-   m_burst_count(1),
-   m_array_base(array_base),
-   m_array_size(0xfff),
-   m_writemask(comp_mask),
-   m_output_buffer(out_buffer),
-   m_stream(stream)
-{
-}
-
-unsigned StreamOutIntruction::op() const
-{
-   int op = 0;
-   switch (m_output_buffer) {
-   case 0: op = CF_OP_MEM_STREAM0_BUF0; break;
-   case 1: op = CF_OP_MEM_STREAM0_BUF1; break;
-   case 2: op = CF_OP_MEM_STREAM0_BUF2; break;
-   case 3: op = CF_OP_MEM_STREAM0_BUF3; break;
-   }
-   return 4 * m_stream + op;
-}
-
-bool StreamOutIntruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == streamout);
-   const auto& oth = static_cast<const StreamOutIntruction&>(lhs);
-
-   return gpr() == oth.gpr() &&
-         m_element_size == oth.m_element_size &&
-         m_burst_count == oth.m_burst_count &&
-         m_array_base == oth.m_array_base &&
-         m_array_size == oth.m_array_size &&
-         m_writemask == oth.m_writemask &&
-         m_output_buffer == oth.m_output_buffer &&
-         m_stream == oth.m_stream;
-}
-
-void StreamOutIntruction::do_print(std::ostream& os) const
-{
-   os << "WRITE STREAM(" << m_stream << ") "  << gpr()
-      << " ES:" << m_element_size
-      << " BC:" << m_burst_count
-      << " BUF:" << m_output_buffer
-      << " ARRAY:" <<  m_array_base;
-   if (m_array_size != 0xfff)
-      os << "+" << m_array_size;
-}
-
-MemRingOutIntruction::MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
-                                           const GPRVector& value,
-                                           unsigned base_addr, unsigned ncomp,
-                                           PValue index):
-   WriteoutInstruction(Instruction::ring, value),
-   m_ring_op(ring),
-   m_type(type),
-   m_base_address(base_addr),
-   m_num_comp(ncomp),
-   m_index(index)
-{
-   add_remappable_src_value(&m_index);
-
-   assert(m_ring_op  == cf_mem_ring || m_ring_op  == cf_mem_ring1||
-          m_ring_op  == cf_mem_ring2 || m_ring_op  == cf_mem_ring3);
-   assert(m_num_comp <= 4);
-}
-
-unsigned MemRingOutIntruction::ncomp() const
-{
-   switch (m_num_comp) {
-   case 1: return 0;
-   case 2: return 1;
-   case 3:
-   case 4: return 3;
-   default:
-      assert(0);
-   }
-   return 3;
-}
-
-bool MemRingOutIntruction::is_equal_to(const Instruction& lhs) const
-{
-   assert(lhs.type() == streamout);
-   const auto& oth = static_cast<const MemRingOutIntruction&>(lhs);
-
-   bool equal = gpr() == oth.gpr() &&
-                m_ring_op == oth.m_ring_op &&
-                m_type == oth.m_type &&
-                m_num_comp == oth.m_num_comp &&
-                m_base_address == oth.m_base_address;
-
-   if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
-      equal &= (*m_index == *oth.m_index);
-   return equal;
-
-}
-
-static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" };
-void MemRingOutIntruction::do_print(std::ostream& os) const
-{
-   os << "MEM_RING " << m_ring_op;
-   os << " " << write_type_str[m_type] << " " << m_base_address;
-   os << " " << gpr();
-   if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
-      os << " @" << *m_index;
-   os << " ES:" << m_num_comp;
-}
-
-
-void MemRingOutIntruction::replace_values_child(const ValueSet& candidates,
-                                                PValue new_value)
-{
-   if (!m_index)
-      return;
-
-   for (auto c: candidates) {
-      if (*c == *m_index)
-         m_index = new_value;
-   }
-}
-
-void MemRingOutIntruction::remap_registers_child(std::vector<rename_reg_pair>& map,
-                                                 ValueMap& values)
-{
-   if (!m_index)
-      return;
-
-   assert(m_index->type() == Value::gpr);
-   auto new_index = map[m_index->sel()];
-   if (new_index.valid)
-      m_index = values.get_or_inject(new_index.new_reg, m_index->chan());
-   map[m_index->sel()].used = true;
-}
-
-void MemRingOutIntruction::patch_ring(int stream, PValue index)
-{
-   const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
-
-   assert(stream < 4);
-   m_ring_op = ring_op[stream];
-   m_index = index;
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_export.h b/src/gallium/drivers/r600/sfn/sfn_instruction_export.h
deleted file mode 100644
index 6d01408..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_export.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_EXPORTINSTRUCTION_H
-#define SFN_EXPORTINSTRUCTION_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class WriteoutInstruction: public Instruction {
-public:
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-   const GPRVector&  gpr() const {return m_value;}
-   const GPRVector  *gpr_ptr() const {return &m_value;}
-protected:
-   WriteoutInstruction(instr_type t, const GPRVector& value);
-private:
-   virtual void replace_values_child(const ValueSet& candidates, PValue new_value);
-   virtual void remap_registers_child(std::vector<rename_reg_pair>& map,
-                        ValueMap& values);
-
-   GPRVector m_value;
-};
-
-class ExportInstruction : public WriteoutInstruction {
-public:
-   enum ExportType {
-      et_pixel,
-      et_pos,
-      et_param
-   };
-
-   ExportInstruction(unsigned loc, const GPRVector& value, ExportType type);
-   void set_last();
-
-   ExportType export_type() const {return m_type;}
-
-   unsigned location() const {return m_loc;}
-   bool is_last_export() const {return m_is_last;}
-
-   void update_output_map(OutputRegisterMap& map) const;
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   ExportType m_type;
-   unsigned m_loc;
-   bool m_is_last;
-};
-
-class WriteScratchInstruction : public WriteoutInstruction {
-public:
-
-   WriteScratchInstruction(unsigned loc, const GPRVector& value, int align,
-                           int align_offset, int writemask);
-   WriteScratchInstruction(const PValue& address, const GPRVector& value,
-                           int align, int align_offset, int writemask, int array_size);
-   unsigned location() const {return m_loc;}
-
-   int write_mask() const { return m_writemask;}
-   int address() const { assert(m_address); return m_address->sel();}
-   bool indirect() const { return !!m_address;}
-   int array_size() const { return m_array_size;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   void replace_values_child(const ValueSet& candidates, PValue new_value) override;
-   void remap_registers_child(std::vector<rename_reg_pair>& map,
-                              ValueMap& values)override;
-
-   unsigned m_loc;
-   PValue m_address;
-   unsigned m_align;
-   unsigned m_align_offset;
-   unsigned m_writemask;
-   int m_array_size;
-};
-
-
-class StreamOutIntruction: public WriteoutInstruction {
-public:
-   StreamOutIntruction(const GPRVector& value, int num_components,
-                       int array_base, int comp_mask, int out_buffer,
-                       int stream);
-   int element_size() const { return m_element_size;}
-   int burst_count() const { return m_burst_count;}
-   int array_base() const { return m_array_base;}
-   int array_size() const { return m_array_size;}
-   int comp_mask() const { return m_writemask;}
-   unsigned op() const;
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   int m_element_size;
-   int m_burst_count;
-   int m_array_base;
-   int m_array_size;
-   int m_writemask;
-   int m_output_buffer;
-   int m_stream;
-};
-
-enum EMemWriteType {
-   mem_write = 0,
-   mem_write_ind = 1,
-   mem_write_ack = 2,
-   mem_write_ind_ack = 3,
-};
-
-class MemRingOutIntruction: public WriteoutInstruction {
-public:
-
-   MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
-                        const GPRVector& value, unsigned base_addr,
-                        unsigned ncomp, PValue m_index);
-
-   unsigned op() const{return m_ring_op;}
-   unsigned ncomp() const;
-   unsigned addr() const {return m_base_address;}
-   EMemWriteType type() const {return m_type;}
-   unsigned index_reg() const {return m_index->sel();}
-   unsigned array_base() const {return m_base_address; }
-   void replace_values_child(const ValueSet& candidates, PValue new_value) override;
-   void remap_registers_child(std::vector<rename_reg_pair>& map,
-                        ValueMap& values) override;
-   void patch_ring(int stream, PValue index);
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   ECFOpCode m_ring_op;
-   EMemWriteType m_type;
-   unsigned m_base_address;
-   unsigned m_num_comp;
-   PValue m_index;
-
-};
-
-}
-
-
-#endif // SFN_EXPORTINSTRUCTION_H
\ No newline at end of file
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp
deleted file mode 100644
index ec1a488..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp
+++ /dev/null
@@ -1,480 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_instruction_fetch.h"
-
-#include "gallium/drivers/r600/r600_pipe.h"
-
-namespace r600 {
-
-/* refactor this to add status create methods for specific tasks */
-FetchInstruction::FetchInstruction(EVFetchInstr op,
-                                   EVFetchType type,
-                                   GPRVector dst,
-                                   PValue src, int offset,
-                                   int buffer_id, PValue buffer_offset,
-                                   EBufferIndexMode cp_rel,
-                                   bool use_const_field):
-   Instruction(vtx),
-   m_vc_opcode(op),
-   m_fetch_type(type),
-   m_endian_swap(vtx_es_none),
-   m_src(src),
-   m_dst(dst),
-   m_offset(offset),
-   m_is_mega_fetch(1),
-   m_mega_fetch_count(16),
-   m_buffer_id(buffer_id),
-   m_semantic_id(0),
-   m_buffer_index_mode(cp_rel),
-   m_flags(0),
-   m_uncached(false),
-   m_indexed(false),
-   m_array_base(0),
-   m_array_size(0),
-   m_elm_size(0),
-   m_buffer_offset(buffer_offset),
-   m_dest_swizzle({0,1,2,3})
-{
-   if (use_const_field) {
-      m_flags.set(vtx_use_const_field);
-      m_data_format = fmt_invalid;
-      m_num_format = vtx_nf_norm;
-   } else {
-      m_flags.set(vtx_format_comp_signed);
-      m_data_format = fmt_32_32_32_32_float;
-      m_num_format = vtx_nf_scaled;
-   }
-
-   add_remappable_src_value(&m_src);
-   add_remappable_src_value(&m_buffer_offset);
-
-   add_remappable_dst_value(&m_dst);
-}
-
-/* Resource query */
-FetchInstruction::FetchInstruction(EVFetchInstr vc_opcode,
-                                   EVFetchType fetch_type,
-                                   EVTXDataFormat data_format,
-                                   EVFetchNumFormat num_format,
-                                   EVFetchEndianSwap endian_swap,
-                                   const PValue src,
-                                   const GPRVector dst,
-                                   uint32_t offset,
-                                   bool is_mega_fetch,
-                                   uint32_t mega_fetch_count,
-                                   uint32_t buffer_id,
-                                   uint32_t semantic_id,
-
-                                   EBufferIndexMode buffer_index_mode,
-                                   bool uncached,
-                                   bool indexed,
-                                   int array_base,
-                                   int array_size,
-                                   int elm_size,
-                                   PValue buffer_offset,
-                                   const std::array<int, 4>& dest_swizzle):
-   Instruction(vtx),
-   m_vc_opcode(vc_opcode),
-   m_fetch_type(fetch_type),
-   m_data_format(data_format),
-   m_num_format(num_format),
-   m_endian_swap(endian_swap),
-   m_src(src),
-   m_dst(dst),
-   m_offset(offset),
-   m_is_mega_fetch(is_mega_fetch),
-   m_mega_fetch_count(mega_fetch_count),
-   m_buffer_id(buffer_id),
-   m_semantic_id(semantic_id),
-   m_buffer_index_mode(buffer_index_mode),
-   m_uncached(uncached),
-   m_indexed(indexed),
-   m_array_base(array_base),
-   m_array_size(array_size),
-   m_elm_size(elm_size),
-   m_buffer_offset(buffer_offset),
-   m_dest_swizzle(dest_swizzle)
-{
-   add_remappable_src_value(&m_src);
-   add_remappable_dst_value(&m_dst);
-   add_remappable_src_value(&m_buffer_offset);
-}
-
-FetchInstruction::FetchInstruction(GPRVector dst,
-                                   PValue src,
-                                   int buffer_id, PValue buffer_offset,
-                                   EVTXDataFormat format,
-                                   EVFetchNumFormat num_format):
-   Instruction(vtx),
-   m_vc_opcode(vc_fetch),
-   m_fetch_type(no_index_offset),
-   m_data_format(format),
-   m_num_format(num_format),
-   m_endian_swap(vtx_es_none),
-   m_src(src),
-   m_dst(dst),
-   m_offset(0),
-   m_is_mega_fetch(0),
-   m_mega_fetch_count(0),
-   m_buffer_id(buffer_id),
-   m_semantic_id(0),
-   m_buffer_index_mode(bim_none),
-   m_flags(0),
-   m_uncached(false),
-   m_indexed(false),
-   m_array_base(0),
-   m_array_size(0),
-   m_elm_size(1),
-   m_buffer_offset(buffer_offset),
-   m_dest_swizzle({0,1,2,3})
-{
-   m_flags.set(vtx_format_comp_signed);
-
-   add_remappable_src_value(&m_src);
-   add_remappable_dst_value(&m_dst);
-   add_remappable_src_value(&m_buffer_offset);
-}
-
-
-/* Resource query */
-FetchInstruction::FetchInstruction(GPRVector dst,
-                                   PValue src,
-                                   int buffer_id,
-                                   EBufferIndexMode cp_rel):
-   Instruction(vtx),
-   m_vc_opcode(vc_get_buf_resinfo),
-   m_fetch_type(no_index_offset),
-   m_data_format(fmt_32_32_32_32),
-   m_num_format(vtx_nf_norm),
-   m_endian_swap(vtx_es_none),
-   m_src(src),
-   m_dst(dst),
-   m_offset(0),
-   m_is_mega_fetch(0),
-   m_mega_fetch_count(16),
-   m_buffer_id(buffer_id),
-   m_semantic_id(0),
-   m_buffer_index_mode(cp_rel),
-   m_flags(0),
-   m_uncached(false),
-   m_indexed(false),
-   m_array_base(0),
-   m_array_size(0),
-   m_elm_size(0),
-   m_dest_swizzle({0,1,2,3})
-{
-   m_flags.set(vtx_format_comp_signed);
-   add_remappable_src_value(&m_src);
-   add_remappable_dst_value(&m_dst);
-   add_remappable_src_value(&m_buffer_offset);
-}
-
-FetchInstruction::FetchInstruction(GPRVector dst, PValue src, int scratch_size):
-   Instruction(vtx),
-   m_vc_opcode(vc_read_scratch),
-   m_fetch_type(vertex_data),
-   m_data_format(fmt_32_32_32_32),
-   m_num_format(vtx_nf_int),
-   m_endian_swap(vtx_es_none),
-   m_dst(dst),
-   m_offset(0),
-   m_is_mega_fetch(0),
-   m_mega_fetch_count(16),
-   m_buffer_id(0),
-   m_semantic_id(0),
-   m_buffer_index_mode(bim_none),
-   m_flags(0),
-   m_uncached(true),
-   m_array_base(0),
-   m_array_size(0),
-   m_elm_size(3),
-   m_dest_swizzle({0,1,2,3})
-{
-   if (src->type() == Value::literal) {
-      const auto& lv = static_cast<const LiteralValue&>(*src);
-      m_array_base = lv.value();
-      m_indexed = false;
-      m_src.reset(new GPRValue(0,0));
-      m_array_size = 0;
-   } else {
-      m_array_base = 0;
-      m_src = src;
-      m_indexed = true;
-      m_array_size = scratch_size - 1;
-   }
-   add_remappable_src_value(&m_src);
-   add_remappable_dst_value(&m_dst);
-   add_remappable_src_value(&m_buffer_offset);
-}
-
-void FetchInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   if (!m_src)
-      return;
-   for (auto c: candidates) {
-      for (int i = 0; i < 4; ++i) {
-         if (*c == *m_dst.reg_i(i))
-            m_dst.set_reg_i(i, new_value);
-      }
-      if (*m_src == *c)
-         m_src = new_value;
-   }
-}
-
-
-bool FetchInstruction::is_equal_to(const Instruction& lhs) const
-{
-   auto& l = static_cast<const FetchInstruction&>(lhs);
-   if (m_src) {
-      if (!l.m_src)
-         return false;
-      if (*m_src != *l.m_src)
-         return false;
-   } else {
-      if (l.m_src)
-         return false;
-   }
-
-   return m_vc_opcode == l.m_vc_opcode &&
-         m_fetch_type == l.m_fetch_type &&
-         m_data_format == l.m_data_format &&
-         m_num_format == l.m_num_format &&
-         m_endian_swap == l.m_endian_swap &&
-         m_dst == l.m_dst &&
-         m_offset == l.m_offset &&
-         m_buffer_id == l.m_buffer_id &&
-         m_semantic_id == l.m_semantic_id &&
-         m_buffer_index_mode == l.m_buffer_index_mode &&
-         m_flags == l.m_flags &&
-         m_indexed == l.m_indexed &&
-         m_uncached == l.m_uncached;
-}
-
-void FetchInstruction::set_format(EVTXDataFormat fmt)
-{
-   m_data_format = fmt;
-}
-
-
-void FetchInstruction::set_dest_swizzle(const std::array<int,4>& swz)
-{
-   m_dest_swizzle = swz;
-}
-
-void FetchInstruction::prelude_append(Instruction *instr)
-{
-   assert(instr);
-   m_prelude.push_back(PInstruction(instr));
-}
-
-const std::vector<PInstruction>& FetchInstruction::prelude() const
-{
-   return m_prelude;
-}
-
-LoadFromScratch::LoadFromScratch(GPRVector dst, PValue src, int scratch_size):
-   FetchInstruction(dst, src, scratch_size)
-{
-}
-
-FetchGDSOpResult::FetchGDSOpResult(const GPRVector dst, const PValue src):
-   FetchInstruction(vc_fetch,
-                    no_index_offset,
-                    fmt_32,
-                    vtx_nf_int,
-                    vtx_es_none,
-                    src,
-                    dst,
-                    0,
-                    false,
-                    0xf,
-                    R600_IMAGE_IMMED_RESOURCE_OFFSET,
-                    0,
-                    bim_none,
-                    false,
-                    false,
-                    0,
-                    0,
-                    0,
-                    PValue(),
-                    {0,7,7,7})
-{
-   set_flag(vtx_srf_mode);
-   set_flag(vtx_vpm);
-}
-
-FetchTCSIOParam::FetchTCSIOParam(GPRVector dst, PValue src, int offset):
-   FetchInstruction(vc_fetch,
-                    no_index_offset,
-                    fmt_32_32_32_32,
-                    vtx_nf_scaled,
-                    vtx_es_none,
-                    src,
-                    dst,
-                    offset,
-                    false,
-                    16,
-                    R600_LDS_INFO_CONST_BUFFER,
-                    0,
-                    bim_none,
-                    false,
-                    false,
-                    0,
-                    0,
-                    0,
-                    PValue(),
-                    {0,1,2,3})
-{
-   set_flag(vtx_srf_mode);
-   set_flag(vtx_format_comp_signed);
-}
-
-
-static const char *fmt_descr[64] = {
-   "INVALID",
-   "8",
-   "4_4",
-   "3_3_2",
-   "RESERVED_4",
-   "16",
-   "16F",
-   "8_8",
-   "5_6_5",
-   "6_5_5",
-   "1_5_5_5",
-   "4_4_4_4",
-   "5_5_5_1",
-   "32",
-   "32F",
-   "16_16",
-   "16_16F",
-   "8_24",
-   "8_24F",
-   "24_8",
-   "24_8F",
-   "10_11_11",
-   "10_11_11F",
-   "11_11_10",
-   "11_11_10F",
-   "2_10_10_10",
-   "8_8_8_8",
-   "10_10_10_2",
-   "X24_8_32F",
-   "32_32",
-   "32_32F",
-   "16_16_16_16",
-   "16_16_16_16F",
-   "RESERVED_33",
-   "32_32_32_32",
-   "32_32_32_32F",
-   "RESERVED_36",
-   "1",
-   "1_REVERSED",
-   "GB_GR",
-   "BG_RG",
-   "32_AS_8",
-   "32_AS_8_8",
-   "5_9_9_9_SHAREDEXP",
-   "8_8_8",
-   "16_16_16",
-   "16_16_16F",
-   "32_32_32",
-   "32_32_32F",
-   "BC1",
-   "BC2",
-   "BC3",
-   "BC4",
-   "BC5",
-   "APC0",
-   "APC1",
-   "APC2",
-   "APC3",
-   "APC4",
-   "APC5",
-   "APC6",
-   "APC7",
-   "CTX1",
-   "RESERVED_63"
-};
-
-
-void FetchInstruction::do_print(std::ostream& os) const
-{
-   static const std::string num_format_char[] = {"norm", "int", "scaled"};
-   static const std::string endian_swap_code[] = {
-      "noswap", "8in16", "8in32"
-   };
-   static const char buffer_index_mode_char[] = "_01E";
-   static const char *flag_string[] = {"WQM",  "CF", "signed", "no_zero",
-                                       "nostride", "AC", "TC", "VPM"};
-   switch (m_vc_opcode) {
-   case vc_fetch:
-      os << "Fetch " << m_dst;
-      break;
-   case vc_semantic:
-      os << "Fetch Semantic ID:" << m_semantic_id;
-      break;
-   case vc_get_buf_resinfo:
-      os << "Fetch BufResinfo:" << m_dst;
-      break;
-   case vc_read_scratch:
-      os << "MEM_READ_SCRATCH:" << m_dst;
-      break;
-   default:
-      os << "Fetch ERROR";
-      return;
-   }
-
-   os << ", " << *m_src;
-
-   if (m_offset)
-      os << "+" << m_offset;
-
-   os << " BUFID:" << m_buffer_id
-      << " FMT:(" << fmt_descr[m_data_format]
-      << " " << num_format_char[m_num_format]
-      << " " << endian_swap_code[m_endian_swap]
-      << ")";
-   if (m_buffer_index_mode > 0)
-      os << " IndexMode:" << buffer_index_mode_char[m_buffer_index_mode];
-
-
-   if (m_is_mega_fetch)
-      os << " MFC:" << m_mega_fetch_count;
-   else
-      os << " mfc*:" << m_mega_fetch_count;
-
-   if (m_flags.any()) {
-      os << " Flags:";
-      for( int i = 0; i < vtx_unknown; ++i) {
-         if (m_flags.test(i))
-            os << ' ' << flag_string[i];
-      }
-   }
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h
deleted file mode 100644
index 71a3f69..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_INSTRUCTION_FETCH_H
-#define SFN_INSTRUCTION_FETCH_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class FetchInstruction : public Instruction {
-public:
-
-   FetchInstruction(EVFetchInstr vc_opcode,
-                    EVFetchType fetch_type,
-                    EVTXDataFormat data_format,
-                    EVFetchNumFormat num_format,
-                    EVFetchEndianSwap endian_swap,
-                    const PValue src,
-                    const GPRVector dst,
-                    uint32_t offset,
-                    bool is_mega_fetch,
-                    uint32_t mega_fetch_count,
-                    uint32_t buffer_id,
-                    uint32_t semantic_id,
-
-                    EBufferIndexMode buffer_index_mode,
-                    bool uncached,
-                    bool indexed,
-                    int array_base,
-                    int array_size,
-                    int elm_size,
-                    PValue buffer_offset,
-                    const std::array<int, 4>& dest_swizzle);
-
-   FetchInstruction(EVFetchInstr op,
-                    EVFetchType type,
-                    GPRVector dst,
-                    PValue src, int offset,
-                    int buffer_id, PValue buffer_offset,
-                    EBufferIndexMode cp_rel,
-                    bool use_const_field = false);
-
-   FetchInstruction(GPRVector dst,
-                    PValue src,
-                    int buffer_id,
-                    PValue buffer_offset,
-                    EVTXDataFormat format,
-                    EVFetchNumFormat num_format);
-
-   FetchInstruction(GPRVector dst,
-                    PValue src,
-                    int buffer_id,
-                    EBufferIndexMode cp_rel);
-
-   FetchInstruction(GPRVector dst, PValue src, int scratch_size);
-
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-   EVFetchInstr vc_opcode() const { return m_vc_opcode;}
-   EVFetchType fetch_type() const { return m_fetch_type;}
-
-   EVTXDataFormat data_format() const { return m_data_format;}
-   EVFetchNumFormat num_format() const { return m_num_format;}
-   EVFetchEndianSwap endian_swap() const { return m_endian_swap;}
-
-   const Value& src() const { return *m_src;}
-   const GPRVector& dst() const { return m_dst;}
-   uint32_t offset() const { return m_offset;}
-
-   bool is_mega_fetchconst() { return m_is_mega_fetch;}
-   uint32_t mega_fetch_count() const { return m_mega_fetch_count;}
-
-   uint32_t buffer_id() const { return m_buffer_id;}
-   uint32_t semantic_id() const { return m_semantic_id;}
-   EBufferIndexMode buffer_index_mode() const{ return m_buffer_index_mode;}
-
-   bool is_signed() const { return m_flags.test(vtx_format_comp_signed);}
-   bool use_const_fields() const { return m_flags.test(vtx_use_const_field);}
-
-   bool srf_mode_no_zero() const { return m_flags.test(vtx_srf_mode);}
-
-   void set_flag(EVFetchFlagShift flag) {m_flags.set(flag);}
-
-   bool uncached() const {return m_uncached; }
-   bool indexed() const {return m_indexed; }
-   int array_base()const {return m_array_base; }
-   int array_size() const {return m_array_size; }
-   int elm_size() const {return m_elm_size; }
-
-   void set_buffer_offset(PValue buffer_offset) {
-      m_buffer_offset = buffer_offset;
-      add_remappable_src_value(&m_buffer_offset);
-   }
-   PValue buffer_offset() const { return m_buffer_offset; }
-
-   void set_dest_swizzle(const std::array<int,4>& swz);
-   void set_format(EVTXDataFormat fmt);
-
-   int swz(int idx) const { return m_dest_swizzle[idx];}
-
-   bool use_tc() const {return m_flags.test(vtx_use_tc);}
-
-   bool use_vpm() const {return m_flags.test(vtx_vpm);}
-
-   void prelude_append(Instruction *instr);
-
-   const std::vector<PInstruction>& prelude() const;
-
-   bool has_prelude() const {return !m_prelude.empty();}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   EVFetchInstr m_vc_opcode;
-   EVFetchType m_fetch_type;
-
-   EVTXDataFormat m_data_format;
-   EVFetchNumFormat m_num_format;
-   EVFetchEndianSwap m_endian_swap;
-
-   PValue m_src;
-   GPRVector m_dst;
-   uint32_t m_offset;
-
-   bool m_is_mega_fetch;
-   uint32_t m_mega_fetch_count;
-
-   uint32_t m_buffer_id;
-   uint32_t m_semantic_id;
-
-   EBufferIndexMode m_buffer_index_mode;
-   std::bitset<16> m_flags;
-   bool m_uncached;
-   bool m_indexed;
-   int m_array_base;
-   int m_array_size;
-   int m_elm_size;
-   PValue m_buffer_offset;
-   std::array<int, 4> m_dest_swizzle;
-   std::vector<PInstruction> m_prelude;
-};
-
-class LoadFromScratch: public FetchInstruction {
-public:
-   LoadFromScratch(GPRVector dst, PValue src, int scratch_size);
-};
-
-class FetchGDSOpResult : public FetchInstruction {
-public:
-   FetchGDSOpResult(const GPRVector dst, const PValue src);
-};
-
-class FetchTCSIOParam : public FetchInstruction {
-public:
-   FetchTCSIOParam(GPRVector dst, PValue src, int offset);
-};
-
-}
-
-#endif // SFN_INSTRUCTION_FETCH_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp
deleted file mode 100644
index 095cd40..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_instruction_gds.h"
-#include "sfn_liverange.h"
-
-namespace  r600 {
-
-GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
-                   const PValue& value2, const PValue& uav_id, int uav_base):
-   Instruction(gds),
-   m_op(op),
-   m_src(value),
-   m_src2(value2),
-   m_dest(dest),
-   m_dest_swizzle({PIPE_SWIZZLE_X,7,7,7}),
-   m_src_swizzle({PIPE_SWIZZLE_0, PIPE_SWIZZLE_X, PIPE_SWIZZLE_0}),
-   m_buffer_index_mode(bim_none),
-   m_uav_id(uav_id),
-   m_uav_base(uav_base),
-   m_flags(0)
-{
-   add_remappable_src_value(&m_src);
-   add_remappable_src_value(&m_src2);
-   add_remappable_src_value(&m_uav_id);
-   add_remappable_dst_value(&m_dest);
-   m_dest_swizzle[0] = m_dest.chan_i(0);
-}
-
-GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
-                   const PValue& uav_id, int uav_base):
-   GDSInstr(op, dest,  value, PValue(), uav_id, uav_base)
-{
-      assert(value);
-      m_src_swizzle[1] = value->chan();
-      m_src_swizzle[2] = PIPE_SWIZZLE_0;
-}
-
-GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,
-                   const PValue& uav_id, int uav_base):
-   GDSInstr(op, dest,  PValue(), PValue(), uav_id, uav_base)
-{
-   m_src_swizzle[1] = PIPE_SWIZZLE_0;
-}
-
-bool GDSInstr::is_equal_to(UNUSED const Instruction& lhs) const
-{
-   return false;
-}
-
-void GDSInstr::do_print(std::ostream& os) const
-{
-   const char *swz = "xyzw01?_";
-   os << lds_ops.at(m_op).name << " R" << m_dest.sel() << ".";
-   for (int i = 0; i < 4; ++i) {
-      os << swz[m_dest_swizzle[i]];
-   }
-   if (m_src)
-      os << " " << *m_src;
-
-   os << " UAV:" << *m_uav_id;
-}
-
-RatInstruction::RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
-                               const GPRVector& data, const GPRVector& index,
-                               int rat_id, const PValue& rat_id_offset,
-                               int burst_count, int comp_mask, int element_size, bool ack):
-   Instruction(rat),
-   m_cf_opcode(cf_opcode),
-   m_rat_op(rat_op),
-   m_data(data),
-   m_index(index),
-   m_rat_id(rat_id),
-   m_rat_id_offset(rat_id_offset),
-   m_burst_count(burst_count),
-   m_comp_mask(comp_mask),
-   m_element_size(element_size),
-   m_need_ack(ack)
-{
-   add_remappable_src_value(&m_data);
-   add_remappable_src_value(&m_rat_id_offset);
-   add_remappable_src_value(&m_index);
-}
-
-bool RatInstruction::is_equal_to(UNUSED const Instruction& lhs) const
-{
-   return false;
-}
-
-void RatInstruction::do_print(std::ostream& os) const
-{
-   os << "MEM_RAT RAT(" << m_rat_id;
-   if (m_rat_id_offset)
-      os << "+" << *m_rat_id_offset;
-   os << ") @" << m_index;
-   os << " OP:" << m_rat_op << " " << m_data;
-   os << " BC:" << m_burst_count
-      << " MASK:" << m_comp_mask
-      << " ES:" << m_element_size;
-   if (m_need_ack)
-      os << " ACK";
-}
-
-RatInstruction::ERatOp RatInstruction::opcode(nir_intrinsic_op opcode)
-{
-   switch (opcode) {
-   case nir_intrinsic_ssbo_atomic_add:
-      return ADD_RTN;
-   case nir_intrinsic_ssbo_atomic_and:
-      return AND_RTN;
-   case nir_intrinsic_ssbo_atomic_exchange:
-      return XCHG_RTN;
-   case nir_intrinsic_ssbo_atomic_umax:
-      return MAX_UINT_RTN;
-   case nir_intrinsic_ssbo_atomic_umin:
-      return MIN_UINT_RTN;
-   case nir_intrinsic_ssbo_atomic_imax:
-      return MAX_INT_RTN;
-   case nir_intrinsic_ssbo_atomic_imin:
-      return MIN_INT_RTN;
-   case nir_intrinsic_ssbo_atomic_xor:
-      return XOR_RTN;
-   default:
-      return UNSUPPORTED;
-   }
-}
-
-GDSStoreTessFactor::GDSStoreTessFactor(GPRVector& value):
-   Instruction(tf_write),
-   m_value(value)
-{
-   add_remappable_src_value(&m_value);
-}
-
-void GDSStoreTessFactor::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   for (auto& c: candidates) {
-      for (int i = 0; i < 4; ++i) {
-         if (*c == *m_value[i])
-            m_value[i] = new_value;
-      }
-   }
-}
-
-
-bool GDSStoreTessFactor::is_equal_to(const Instruction& lhs) const
-{
-   auto& other = static_cast<const GDSStoreTessFactor&>(lhs);
-   return m_value == other.m_value;
-}
-
-void GDSStoreTessFactor::do_print(std::ostream& os) const
-{
-   os << "TF_WRITE " << m_value;
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h b/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
deleted file mode 100644
index 6f8e0f2..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
+++ /dev/null
@@ -1,225 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_GDSINSTR_H
-#define SFN_GDSINSTR_H
-
-#include "sfn_instruction_base.h"
-
-#include <bitset>
-
-namespace r600 {
-
-class GDSInstr : public Instruction
-{
-public:
-   GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
-            const PValue &uav_id, int uav_base);
-   GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
-            const PValue& value2, const PValue &uav_id, int uav_base);
-   GDSInstr(ESDOp op, const GPRVector& dest,  const PValue &uav_id, int uav_base);
-
-   ESDOp op() const {return m_op;}
-
-   int src_sel() const {
-      if (!m_src)
-         return 0;
-
-      assert(m_src->type() == Value::gpr);
-      return m_src->sel();
-   }
-
-   int src2_chan() const {
-      if (!m_src2)
-         return 0;
-
-      assert(m_src->type() == Value::gpr);
-      return m_src->chan();
-   }
-
-   int src_swizzle(int idx) const {assert(idx < 3); return m_src_swizzle[idx];}
-
-   int dest_sel() const {
-      return m_dest.sel();
-   }
-
-   int dest_swizzle(int i) const {
-      if (i < 4)
-         return m_dest_swizzle[i];
-      return 7;
-   }
-
-   void set_dest_swizzle(const std::array<int,4>& swz) {
-      m_dest_swizzle = swz;
-   }
-
-   PValue uav_id() const {return m_uav_id;}
-   int uav_base() const {return m_uav_base;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   ESDOp m_op;
-
-   PValue m_src;
-   PValue m_src2;
-   GPRVector m_dest;
-   std::array <int, 4> m_dest_swizzle;
-   std::array <int, 3> m_src_swizzle;
-
-   EBufferIndexMode m_buffer_index_mode;
-   PValue m_uav_id;
-   int m_uav_base;
-   std::bitset<8> m_flags;
-
-};
-
-class RatInstruction : public Instruction {
-
-public:
-   enum ERatOp {
-      NOP,
-      STORE_TYPED,
-      STORE_RAW,
-      STORE_RAW_FDENORM,
-      CMPXCHG_INT,
-      CMPXCHG_FLT,
-      CMPXCHG_FDENORM,
-      ADD,
-      SUB,
-      RSUB,
-      MIN_INT,
-      MIN_UINT,
-      MAX_INT,
-      MAX_UINT,
-      AND,
-      OR,
-      XOR,
-      MSKOR,
-      INC_UINT,
-      DEC_UINT,
-      NOP_RTN = 32,
-      XCHG_RTN = 34,
-      XCHG_FDENORM_RTN,
-      CMPXCHG_INT_RTN,
-      CMPXCHG_FLT_RTN,
-      CMPXCHG_FDENORM_RTN,
-      ADD_RTN,
-      SUB_RTN,
-      RSUB_RTN,
-      MIN_INT_RTN,
-      MIN_UINT_RTN,
-      MAX_INT_RTN,
-      MAX_UINT_RTN,
-      AND_RTN,
-      OR_RTN,
-      XOR_RTN,
-      MSKOR_RTN,
-      UINT_RTN,
-      UNSUPPORTED
-   };
-
-   RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
-                  const GPRVector& data, const GPRVector& index,
-                  int rat_id, const PValue& rat_id_offset,
-                  int burst_count, int comp_mask, int element_size,
-                  bool ack);
-
-   PValue rat_id_offset() const { return m_rat_id_offset;}
-   int  rat_id() const { return m_rat_id;}
-
-   ERatOp rat_op() const {return m_rat_op;}
-
-   int data_gpr() const {return m_data.sel();}
-   int index_gpr() const {return m_index.sel();}
-   int elm_size() const {return m_element_size;}
-
-   int comp_mask() const {return m_comp_mask;}
-
-   bool need_ack() const {return m_need_ack;}
-   int burst_count() const {return m_burst_count;}
-
-   static ERatOp opcode(nir_intrinsic_op opcode);
-
-   int data_swz(int chan) const {return m_data.chan_i(chan);}
-
-   ECFOpCode cf_opcode() const { return m_cf_opcode;}
-
-   void set_ack() {m_need_ack = true; }
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-
-private:
-
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   ECFOpCode m_cf_opcode;
-   ERatOp m_rat_op;
-
-   GPRVector m_data;
-   GPRVector m_index;
-
-   int m_rat_id;
-   PValue m_rat_id_offset;
-   int m_burst_count;
-   int m_comp_mask;
-   int m_element_size;
-
-   std::bitset<8> m_flags;
-
-   bool m_need_ack;
-
-};
-
-class GDSStoreTessFactor : public Instruction {
-public:
-      GDSStoreTessFactor(GPRVector& value);
-      int sel() const {return m_value.sel();}
-      int chan(int i ) const {return m_value.chan_i(i);}
-
-      void replace_values(const ValueSet& candiates, PValue new_value) override;
-
-      bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-      bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-      bool is_equal_to(const Instruction& lhs) const override;
-      void do_print(std::ostream& os) const override;
-
-      GPRVector m_value;
-};
-
-}
-
-#endif // SFN_GDSINSTR_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp
deleted file mode 100644
index b77461a..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-#include "sfn_instruction_lds.h"
-
-namespace r600 {
-
-void LDSReadInstruction::do_print(std::ostream& os) const
-{
-   os << "LDS Read  [";
-   for (auto& v : m_dest_value)
-      os << *v << " ";
-   os << "], ";
-   for (auto& a : m_address)
-      os << *a << " ";
-}
-
-LDSReadInstruction::LDSReadInstruction(std::vector<PValue>& address, std::vector<PValue>& value):
-   Instruction(lds_read),
-   m_address(address),
-   m_dest_value(value)
-{
-   assert(address.size() == value.size());
-
-   for (unsigned i = 0; i < address.size(); ++i) {
-      add_remappable_src_value(&m_address[i]);
-      add_remappable_dst_value(&m_dest_value[i]);
-   }
-}
-
-void LDSReadInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   for (auto& c : candidates) {
-      for (auto& d: m_dest_value) {
-         if (*c == *d)
-            d = new_value;
-      }
-
-      for (auto& a: m_address) {
-         if (*c == *a)
-            a = new_value;
-      }
-   }
-}
-
-bool LDSReadInstruction::is_equal_to(const Instruction& lhs) const
-{
-   auto& other = static_cast<const LDSReadInstruction&>(lhs);
-   return m_address == other.m_address &&
-         m_dest_value == other.m_dest_value;
-}
-
-LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op):
-   Instruction(lds_atomic),
-   m_address(address),
-   m_dest_value(dest),
-   m_src0_value(src0),
-   m_src1_value(src1),
-   m_opcode(op)
-{
-   add_remappable_src_value(&m_src0_value);
-   add_remappable_src_value(&m_src1_value);
-   add_remappable_src_value(&m_address);
-   add_remappable_dst_value(&m_dest_value);
-}
-
-LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op):
-   LDSAtomicInstruction(dest, src0, PValue(), address, op)
-{
-
-}
-
-
-void LDSAtomicInstruction::do_print(std::ostream& os) const
-{
-   os << "LDS " << m_opcode << " " << *m_dest_value << " ";
-   os << "[" << *m_address << "] " << *m_src0_value;
-   if (m_src1_value)
-      os << ", " << *m_src1_value;
-}
-
-bool LDSAtomicInstruction::is_equal_to(const Instruction& lhs) const
-{
-   auto& other = static_cast<const LDSAtomicInstruction&>(lhs);
-
-   return m_opcode == other.m_opcode &&
-         *m_dest_value == *other.m_dest_value &&
-         *m_src0_value == *other.m_src0_value &&
-         *m_address == *other.m_address &&
-         ((m_src1_value && other.m_src1_value && (*m_src1_value == *other.m_src1_value)) ||
-          (!m_src1_value && !other.m_src1_value));
-}
-
-LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0):
-   LDSWriteInstruction::LDSWriteInstruction(address, idx_offset, value0, PValue())
-
-{
-}
-
-LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1):
-   Instruction(lds_write),
-   m_address(address),
-   m_value0(value0),
-   m_value1(value1),
-   m_idx_offset(idx_offset)
-{
-   add_remappable_src_value(&m_address);
-   add_remappable_src_value(&m_value0);
-   if (m_value1)
-      add_remappable_src_value(&m_value1);
-}
-
-
-void LDSWriteInstruction::do_print(std::ostream& os) const
-{
-   os << "LDS Write" << num_components()
-      << " " << address() << ", " << value0();
-   if (num_components() > 1)
-      os << ", " << value1();
-}
-
-void LDSWriteInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   for (auto c: candidates) {
-      if (*c == *m_address)
-         m_address = new_value;
-
-      if (*c == *m_value0)
-         m_value0 = new_value;
-
-      if (*c == *m_value1)
-         m_value1 = new_value;
-   }
-}
-
-bool LDSWriteInstruction::is_equal_to(const Instruction& lhs) const
-{
-   auto& other = static_cast<const LDSWriteInstruction&>(lhs);
-
-   if (m_value1) {
-      if (!other.m_value1)
-         return false;
-      if (*m_value1 != *other.m_value1)
-         return false;
-   } else {
-      if (other.m_value1)
-         return false;
-   }
-
-   return (m_value0 != other.m_value0 &&
-           *m_address != *other.m_address);
-}
-
-} // namespace r600
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h b/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h
deleted file mode 100644
index 96439a7..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h
+++ /dev/null
@@ -1,82 +0,0 @@
-#ifndef LDSINSTRUCTION_H
-#define LDSINSTRUCTION_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class LDSReadInstruction : public Instruction {
-public:
-   LDSReadInstruction(std::vector<PValue>& value, std::vector<PValue>& address);
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-
-   unsigned num_values() const { return m_dest_value.size();}
-   const Value& address(unsigned i) const { return *m_address[i];}
-   const Value& dest(unsigned i) const { return *m_dest_value[i];}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_print(std::ostream& os) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-
-   std::vector<PValue> m_address;
-   std::vector<PValue> m_dest_value;
-};
-
-class LDSAtomicInstruction : public Instruction {
-public:
-   LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op);
-   LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op);
-
-   const Value& address() const { return *m_address;}
-   const Value& dest() const { return *m_dest_value;}
-   const Value& src0() const { return *m_src0_value;}
-   const PValue& src1() const { return m_src1_value;}
-   unsigned op() const {return m_opcode;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_print(std::ostream& os) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-
-   PValue m_address;
-   PValue m_dest_value;
-   PValue m_src0_value;
-   PValue m_src1_value;
-   unsigned m_opcode;
-};
-
-class LDSWriteInstruction : public Instruction {
-public:
-   LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0);
-   LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1);
-
-   const Value& address() const {return *m_address;};
-   const Value& value0() const { return *m_value0;}
-   const Value& value1() const { return *m_value1;}
-   unsigned num_components() const { return m_value1 ? 2 : 1;}
-   unsigned idx_offset() const {return m_idx_offset;};
-
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   void do_print(std::ostream& os) const override;
-   bool is_equal_to(const Instruction& lhs) const override;
-
-   PValue m_address;
-   PValue m_value0;
-   PValue m_value1;
-   unsigned m_idx_offset;
-
-};
-
-}
-
-#endif // LDSINSTRUCTION_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp
deleted file mode 100644
index 1c1a98c..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_instruction_misc.h"
-
-namespace r600 {
-EmitVertex::EmitVertex(int stream, bool cut):
-   Instruction (emit_vtx),
-   m_stream(stream),
-   m_cut(cut)
-{
-
-}
-
-bool EmitVertex::is_equal_to(const Instruction& lhs) const
-{
-   auto& oth = static_cast<const EmitVertex&>(lhs);
-   return oth.m_stream == m_stream &&
-         oth.m_cut == m_cut;
-}
-
-void EmitVertex::do_print(std::ostream& os) const
-{
-   os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream;
-}
-
-WaitAck::WaitAck(int nack):
-   Instruction (wait_ack),
-   m_nack(nack)
-{
-
-}
-
-bool WaitAck::is_equal_to(const Instruction& lhs) const
-{
-   const auto& l = static_cast<const WaitAck&>(lhs);
-   return m_nack == l.m_nack;
-}
-
-void WaitAck::do_print(std::ostream& os) const
-{
-   os << "WAIT_ACK @" << m_nack;
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h b/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h
deleted file mode 100644
index d322b4a..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_INSTRUCTION_MISC_H
-#define SFN_INSTRUCTION_MISC_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class EmitVertex : public Instruction {
-public:
-   EmitVertex(int stream, bool cut);
-   ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;}
-   int stream() const { return m_stream;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-   int m_stream;
-   bool m_cut;
-};
-
-class WaitAck : public Instruction {
-public:
-   WaitAck(int nack);
-   ECFOpCode op() const {return cf_wait_ack;}
-   int n_ack() const {return m_nack;}
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-   int m_nack;
-};
-
-}
-
-#endif // SFN_INSTRUCTION_MISC_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h b/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h
deleted file mode 100644
index 2fe7cba..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef INSTRUCTION_TEX_H
-#define INSTRUCTION_TEX_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-class TexInstruction : public Instruction {
-public:
-   enum Opcode {
-      ld = FETCH_OP_LD,
-      get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO,
-      get_nsampled = FETCH_OP_GET_NUMBER_OF_SAMPLES,
-      get_tex_lod = FETCH_OP_GET_LOD,
-      get_gradient_h = FETCH_OP_GET_GRADIENTS_H,
-      get_gradient_v = FETCH_OP_GET_GRADIENTS_V,
-      set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS,
-      keep_gradients = FETCH_OP_KEEP_GRADIENTS,
-      set_gradient_h = FETCH_OP_SET_GRADIENTS_H,
-      set_gradient_v = FETCH_OP_SET_GRADIENTS_V,
-      sample = FETCH_OP_SAMPLE,
-      sample_l = FETCH_OP_SAMPLE_L,
-      sample_lb = FETCH_OP_SAMPLE_LB,
-      sample_lz = FETCH_OP_SAMPLE_LZ,
-      sample_g = FETCH_OP_SAMPLE_G,
-      sample_g_lb = FETCH_OP_SAMPLE_G_L,
-      gather4 = FETCH_OP_GATHER4,
-      gather4_o =  FETCH_OP_GATHER4_O,
-
-      sample_c = FETCH_OP_SAMPLE_C,
-      sample_c_l = FETCH_OP_SAMPLE_C_L,
-      sample_c_lb = FETCH_OP_SAMPLE_C_LB,
-      sample_c_lz = FETCH_OP_SAMPLE_C_LZ,
-      sample_c_g = FETCH_OP_SAMPLE_C_G,
-      sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L,
-      gather4_c = FETCH_OP_GATHER4_C,
-      gather4_c_o =  FETCH_OP_GATHER4_C_O,
-
-   };
-
-   enum Flags {
-      x_unnormalized,
-      y_unnormalized,
-      z_unnormalized,
-      w_unnormalized,
-      grad_fine
-   };
-
-   TexInstruction(Opcode op, const GPRVector& dest, const GPRVector& src, unsigned sid,
-                  unsigned rid, PValue sampler_offset);
-
-   const GPRVector& src() const {return m_src;}
-   const GPRVector& dst() const {return m_dst;}
-   unsigned opcode() const {return m_opcode;}
-   unsigned sampler_id() const {return m_sampler_id;}
-   unsigned resource_id() const {return m_resource_id;}
-
-   void replace_values(const ValueSet& candidates, PValue new_value) override;
-
-   void set_offset(unsigned index, int32_t val);
-   int get_offset(unsigned index) const;
-
-   void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;}
-
-   int inst_mode() const { return m_inst_mode;}
-
-   void set_flag(Flags flag) {
-      m_flags.set(flag);
-   }
-
-   PValue sampler_offset() const {
-      return m_sampler_offset;
-   }
-
-   bool has_flag(Flags flag) const {
-      return m_flags.test(flag);
-   }
-
-   int dest_swizzle(int i) const {
-      assert(i < 4);
-      return m_dest_swizzle[i];
-   }
-
-   void set_dest_swizzle(const std::array<int,4>& swz) {
-      m_dest_swizzle = swz;
-   }
-
-   void set_gather_comp(int cmp);
-
-   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
-   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
-
-private:
-   bool is_equal_to(const Instruction& lhs) const override;
-   void do_print(std::ostream& os) const override;
-
-   static const char *opname(Opcode code);
-
-   Opcode m_opcode;
-   GPRVector m_dst;
-   GPRVector m_src;
-   unsigned m_sampler_id;
-   unsigned m_resource_id;
-   std::bitset<8> m_flags;
-   int m_offset[3];
-   int m_inst_mode;
-   std::array<int,4> m_dest_swizzle;
-   PValue m_sampler_offset;
-};
-
-bool r600_nir_lower_int_tg4(nir_shader *nir);
-bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader);
-bool r600_nir_lower_cube_to_2darray(nir_shader *shader);
-
-}
-
-#endif // INSTRUCTION_TEX_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp b/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.cpp
deleted file mode 100644
index e69de29..0000000
diff --git a/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h b/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h
deleted file mode 100644
index 9b34fcd..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_instructionvisitor.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef INSTRUCTIONVISITOR_H
-#define INSTRUCTIONVISITOR_H
-
-namespace r600 {
-
-
-class AluInstruction;
-class ExportInstruction;
-class TexInstruction;
-class FetchInstruction;
-class IfInstruction;
-class ElseInstruction;
-class IfElseEndInstruction;
-class LoopBeginInstruction;
-class LoopEndInstruction;
-class LoopBreakInstruction;
-class LoopContInstruction;
-class StreamOutIntruction;
-class MemRingOutIntruction;
-class EmitVertex;
-class WaitAck;
-class WriteScratchInstruction;
-class GDSInstr;
-class RatInstruction;
-class LDSWriteInstruction;
-class LDSReadInstruction;
-class LDSAtomicInstruction;
-class GDSStoreTessFactor;
-class InstructionBlock;
-
-class InstructionVisitor
-{
-public:
-   virtual ~InstructionVisitor() {};
-   virtual bool visit(AluInstruction& i) = 0;
-   virtual bool visit(ExportInstruction& i) = 0;
-   virtual bool visit(TexInstruction& i) = 0;
-   virtual bool visit(FetchInstruction& i) = 0;
-   virtual bool visit(IfInstruction& i) = 0;
-   virtual bool visit(ElseInstruction& i) = 0;
-   virtual bool visit(IfElseEndInstruction& i) = 0;
-   virtual bool visit(LoopBeginInstruction& i) = 0;
-   virtual bool visit(LoopEndInstruction& i) = 0;
-   virtual bool visit(LoopBreakInstruction& i) = 0;
-   virtual bool visit(LoopContInstruction& i) = 0;
-   virtual bool visit(StreamOutIntruction& i) = 0;
-   virtual bool visit(MemRingOutIntruction& i) = 0;
-   virtual bool visit(EmitVertex& i) = 0;
-   virtual bool visit(WaitAck& i) = 0;
-   virtual bool visit(WriteScratchInstruction& i) = 0;
-   virtual bool visit(GDSInstr& i) = 0;
-   virtual bool visit(RatInstruction& i) = 0;
-   virtual bool visit(LDSWriteInstruction& i) = 0;
-   virtual bool visit(LDSReadInstruction& i) = 0;
-   virtual bool visit(LDSAtomicInstruction& i) = 0;
-   virtual bool visit(GDSStoreTessFactor& i) = 0;
-   virtual bool visit(InstructionBlock& i) = 0;
-};
-
-class ConstInstructionVisitor
-{
-public:
-   virtual ~ConstInstructionVisitor() {};
-   virtual bool visit(const AluInstruction& i) = 0;
-   virtual bool visit(const ExportInstruction& i) = 0;
-   virtual bool visit(const TexInstruction& i) = 0;
-   virtual bool visit(const FetchInstruction& i) = 0;
-   virtual bool visit(const IfInstruction& i) = 0;
-   virtual bool visit(const ElseInstruction& i) = 0;
-   virtual bool visit(const IfElseEndInstruction& i) = 0;
-   virtual bool visit(const LoopBeginInstruction& i) = 0;
-   virtual bool visit(const LoopEndInstruction& i) = 0;
-   virtual bool visit(const LoopBreakInstruction& i) = 0;
-   virtual bool visit(const LoopContInstruction& i) = 0;
-   virtual bool visit(const StreamOutIntruction& i) = 0;
-   virtual bool visit(const MemRingOutIntruction& i) = 0;
-   virtual bool visit(const EmitVertex& i) = 0;
-   virtual bool visit(const WaitAck& i) = 0;
-   virtual bool visit(const WriteScratchInstruction& i) = 0;
-   virtual bool visit(const GDSInstr& i) = 0;
-   virtual bool visit(const RatInstruction& i) = 0;
-   virtual bool visit(const LDSWriteInstruction& i) = 0;
-   virtual bool visit(const LDSReadInstruction& i) = 0;
-   virtual bool visit(const LDSAtomicInstruction& i) = 0;
-   virtual bool visit(const GDSStoreTessFactor& i) = 0;
-   virtual bool visit(const InstructionBlock& i) = 0;
-};
-
-}
-
-#endif // INSTRUCTIONVISITOR_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h
deleted file mode 100644
index 0c82032..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "sfn_nir.h"
-
-struct r600_shader;
-union r600_shader_key;
-
-namespace r600 {
-
-class AssemblyFromShaderLegacy : public AssemblyFromShader {
-public:
-   AssemblyFromShaderLegacy(struct r600_shader *sh, r600_shader_key *key);
-   ~AssemblyFromShaderLegacy() override;
-private:
-   bool do_lower(const std::vector<InstructionBlock> &ir)  override ;
-
-   struct AssemblyFromShaderLegacyImpl *impl;
-};
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_liverange.cpp b/src/gallium/drivers/r600/sfn/sfn_liverange.cpp
deleted file mode 100644
index 28eef05..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_liverange.cpp
+++ /dev/null
@@ -1,1006 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Gert Wollny
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_liverange.h"
-#include "sfn_debug.h"
-#include "sfn_value.h"
-#include "sfn_value_gpr.h"
-
-#include "program/prog_instruction.h"
-#include "util/bitscan.h"
-#include "util/u_math.h"
-
-#include <limits>
-#include <cstdlib>
-#include <iomanip>
-
-/* std::sort is significantly faster than qsort */
-#include <algorithm>
-
-/* If <windows.h> is included this is defined and clashes with
- * std::numeric_limits<>::max()
- */
-#ifdef max
-#undef max
-#endif
-
-
-namespace r600 {
-
-using std::numeric_limits;
-using std::unique_ptr;
-using std::setw;
-
-prog_scope_storage::prog_scope_storage(int n):
-   current_slot(0),
-   storage(n)
-{
-}
-
-prog_scope_storage::~prog_scope_storage()
-{
-}
-
-prog_scope*
-prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id,
-                           int lvl, int s_begin)
-{
-   storage[current_slot] = prog_scope(p, type, id, lvl, s_begin);
-   return &storage[current_slot++];
-}
-
-prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id,
-                       int depth, int scope_begin):
-   scope_type(type),
-   scope_id(id),
-   scope_nesting_depth(depth),
-   scope_begin(scope_begin),
-   scope_end(-1),
-   break_loop_line(numeric_limits<int>::max()),
-   parent_scope(parent)
-{
-}
-
-prog_scope::prog_scope():
-   prog_scope(nullptr, undefined_scope, -1, -1, -1)
-{
-}
-
-prog_scope_type prog_scope::type() const
-{
-   return scope_type;
-}
-
-prog_scope *prog_scope::parent() const
-{
-   return parent_scope;
-}
-
-int prog_scope::nesting_depth() const
-{
-   return scope_nesting_depth;
-}
-
-bool prog_scope::is_loop() const
-{
-   return (scope_type == loop_body);
-}
-
-bool prog_scope::is_in_loop() const
-{
-   if (scope_type == loop_body)
-      return true;
-
-   if (parent_scope)
-      return parent_scope->is_in_loop();
-
-   return false;
-}
-
-const prog_scope *prog_scope::innermost_loop() const
-{
-   if (scope_type == loop_body)
-      return this;
-
-   if (parent_scope)
-      return parent_scope->innermost_loop();
-
-   return nullptr;
-}
-
-const prog_scope *prog_scope::outermost_loop() const
-{
-   const prog_scope *loop = nullptr;
-   const prog_scope *p = this;
-
-   do {
-      if (p->type() == loop_body)
-         loop = p;
-      p = p->parent();
-   } while (p);
-
-   return loop;
-}
-
-bool prog_scope::is_child_of_ifelse_id_sibling(const prog_scope *scope) const
-{
-   const prog_scope *my_parent = in_parent_ifelse_scope();
-   while (my_parent) {
-      /* is a direct child? */
-      if (my_parent == scope)
-         return false;
-      /* is a child of the conditions sibling? */
-      if (my_parent->id() == scope->id())
-         return true;
-      my_parent = my_parent->in_parent_ifelse_scope();
-   }
-   return false;
-}
-
-bool prog_scope::is_child_of(const prog_scope *scope) const
-{
-   const prog_scope *my_parent = parent();
-   while (my_parent) {
-      if (my_parent == scope)
-         return true;
-      my_parent = my_parent->parent();
-   }
-   return false;
-}
-
-const prog_scope *prog_scope::enclosing_conditional() const
-{
-   if (is_conditional())
-      return this;
-
-   if (parent_scope)
-      return parent_scope->enclosing_conditional();
-
-   return nullptr;
-}
-
-bool prog_scope::contains_range_of(const prog_scope& other) const
-{
-   return (begin() <= other.begin()) && (end() >= other.end());
-}
-
-bool prog_scope::is_conditional() const
-{
-   return scope_type == if_branch ||
-         scope_type == else_branch ||
-         scope_type == switch_case_branch ||
-         scope_type == switch_default_branch;
-}
-
-const prog_scope *prog_scope::in_else_scope() const
-{
-   if (scope_type == else_branch)
-      return this;
-
-   if (parent_scope)
-      return parent_scope->in_else_scope();
-
-   return nullptr;
-}
-
-const prog_scope *prog_scope::in_parent_ifelse_scope() const
-{
-        if (parent_scope)
-                return parent_scope->in_ifelse_scope();
-        else
-                return nullptr;
-}
-
-const prog_scope *prog_scope::in_ifelse_scope() const
-{
-   if (scope_type == if_branch ||
-       scope_type == else_branch)
-      return this;
-
-   if (parent_scope)
-      return parent_scope->in_ifelse_scope();
-
-   return nullptr;
-}
-
-bool prog_scope::is_switchcase_scope_in_loop() const
-{
-   return (scope_type == switch_case_branch ||
-           scope_type == switch_default_branch) &&
-         is_in_loop();
-}
-
-bool prog_scope::break_is_for_switchcase() const
-{
-   if (scope_type == loop_body)
-      return false;
-
-   if (scope_type == switch_case_branch ||
-       scope_type == switch_default_branch ||
-       scope_type == switch_body)
-      return true;
-
-   if (parent_scope)
-      return parent_scope->break_is_for_switchcase();
-
-   return false;
-}
-
-int prog_scope::id() const
-{
-   return scope_id;
-}
-
-int prog_scope::begin() const
-{
-   return scope_begin;
-}
-
-int prog_scope::end() const
-{
-   return scope_end;
-}
-
-void prog_scope::set_end(int end)
-{
-   if (scope_end == -1)
-      scope_end = end;
-}
-
-void prog_scope::set_loop_break_line(int line)
-{
-   if (scope_type == loop_body) {
-      break_loop_line = MIN2(break_loop_line, line);
-   } else {
-      if (parent_scope)
-         parent()->set_loop_break_line(line);
-   }
-}
-
-int prog_scope::loop_break_line() const
-{
-   return break_loop_line;
-}
-
-temp_access::temp_access():
-   access_mask(0),
-   needs_component_tracking(false),
-   is_array_element(false)
-{
-}
-
-void temp_access::update_access_mask(int mask)
-{
-   if (access_mask && access_mask != mask)
-      needs_component_tracking = true;
-   access_mask |= mask;
-}
-
-void temp_access::record_write(int line, prog_scope *scope, int writemask, bool is_array_elm)
-{
-
-
-   update_access_mask(writemask);
-   is_array_element |= is_array_elm;
-
-   if (writemask & WRITEMASK_X)
-      comp[0].record_write(line, scope);
-   if (writemask & WRITEMASK_Y)
-      comp[1].record_write(line, scope);
-   if (writemask & WRITEMASK_Z)
-      comp[2].record_write(line, scope);
-   if (writemask & WRITEMASK_W)
-      comp[3].record_write(line, scope);
-}
-
-void temp_access::record_read(int line, prog_scope *scope, int readmask, bool is_array_elm)
-{
-   update_access_mask(readmask);
-   is_array_element |= is_array_elm;
-
-   if (readmask & WRITEMASK_X)
-      comp[0].record_read(line, scope);
-   if (readmask & WRITEMASK_Y)
-      comp[1].record_read(line, scope);
-   if (readmask & WRITEMASK_Z)
-      comp[2].record_read(line, scope);
-   if (readmask & WRITEMASK_W)
-      comp[3].record_read(line, scope);
-}
-
-inline static register_live_range make_live_range(int b, int e)
-{
-   register_live_range lt;
-   lt.begin = b;
-   lt.end = e;
-   lt.is_array_elm = false;
-   return lt;
-}
-
-register_live_range temp_access::get_required_live_range()
-{
-   register_live_range result = make_live_range(-1, -1);
-
-   unsigned mask = access_mask;
-   while (mask) {
-      unsigned chan = u_bit_scan(&mask);
-      register_live_range lt = comp[chan].get_required_live_range();
-
-      if (lt.begin >= 0) {
-         if ((result.begin < 0) || (result.begin > lt.begin))
-            result.begin = lt.begin;
-      }
-
-      if (lt.end > result.end)
-         result.end = lt.end;
-
-      if (!needs_component_tracking)
-         break;
-   }
-   result.is_array_elm = is_array_element;
-
-   return result;
-}
-
-const int
-temp_comp_access::conditionality_untouched = std::numeric_limits<int>::max();
-
-const int
-temp_comp_access::write_is_unconditional = std::numeric_limits<int>::max() - 1;
-
-
-temp_comp_access::temp_comp_access():
-   last_read_scope(nullptr),
-   first_read_scope(nullptr),
-   first_write_scope(nullptr),
-   first_write(-1),
-   last_read(-1),
-   last_write(-1),
-   first_read(numeric_limits<int>::max()),
-   conditionality_in_loop_id(conditionality_untouched),
-   if_scope_write_flags(0),
-   next_ifelse_nesting_depth(0),
-   current_unpaired_if_write_scope(nullptr),
-   was_written_in_current_else_scope(false)
-{
-}
-
-void temp_comp_access::record_read(int line, prog_scope *scope)
-{
-   last_read_scope = scope;
-   if (last_read < line)
-      last_read = line;
-
-   if (first_read > line) {
-      first_read = line;
-      first_read_scope = scope;
-   }
-
-   /* If the conditionality of the first write is already resolved then
-    * no further checks are required.
-    */
-   if (conditionality_in_loop_id == write_is_unconditional ||
-       conditionality_in_loop_id == write_is_conditional)
-      return;
-
-   /* Check whether we are in a condition within a loop */
-   const prog_scope *ifelse_scope = scope->in_ifelse_scope();
-   const prog_scope *enclosing_loop;
-   if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) {
-
-      /* If we have either not yet written to this register nor writes are
-       * resolved as unconditional in the enclosing loop then check whether
-       * we read before write in an IF/ELSE branch.
-       */
-      if ((conditionality_in_loop_id != write_is_conditional) &&
-          (conditionality_in_loop_id != enclosing_loop->id())) {
-
-         if (current_unpaired_if_write_scope)  {
-
-            /* Has been written in this or a parent scope? - this makes the temporary
-             * unconditionally set at this point.
-             */
-            if (scope->is_child_of(current_unpaired_if_write_scope))
-               return;
-
-            /* Has been written in the same scope before it was read? */
-            if (ifelse_scope->type() == if_branch) {
-               if (current_unpaired_if_write_scope->id() == scope->id())
-                  return;
-            } else {
-               if (was_written_in_current_else_scope)
-                  return;
-            }
-         }
-
-         /* The temporary was read (conditionally) before it is written, hence
-          * it should survive a loop. This can be signaled like if it were
-          * conditionally written.
-          */
-         conditionality_in_loop_id = write_is_conditional;
-      }
-   }
-}
-
-void temp_comp_access::record_write(int line, prog_scope *scope)
-{
-   last_write = line;
-
-   if (first_write < 0) {
-      first_write = line;
-      first_write_scope = scope;
-
-      /* If the first write we encounter is not in a conditional branch, or
-       * the conditional write is not within a loop, then this is to be
-       * considered an unconditional dominant write.
-       */
-      const prog_scope *conditional = scope->enclosing_conditional();
-      if (!conditional || !conditional->innermost_loop()) {
-         conditionality_in_loop_id = write_is_unconditional;
-      }
-   }
-
-   /* The conditionality of the first write is already resolved. */
-   if (conditionality_in_loop_id == write_is_unconditional ||
-       conditionality_in_loop_id == write_is_conditional)
-      return;
-
-   /* If the nesting depth is larger than the supported level,
-    * then we assume conditional writes.
-    */
-   if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) {
-      conditionality_in_loop_id = write_is_conditional;
-      return;
-   }
-
-   /* If we are in an IF/ELSE scope within a loop and the loop has not
-    * been resolved already, then record this write.
-    */
-   const prog_scope *ifelse_scope = scope->in_ifelse_scope();
-   if (ifelse_scope && ifelse_scope->innermost_loop() &&
-       ifelse_scope->innermost_loop()->id()  != conditionality_in_loop_id)
-      record_ifelse_write(*ifelse_scope);
-}
-
-void temp_comp_access::record_ifelse_write(const prog_scope& scope)
-{
-   if (scope.type() == if_branch) {
-      /* The first write in an IF branch within a loop implies unresolved
-       * conditionality (if it was untouched or unconditional before).
-       */
-      conditionality_in_loop_id = conditionality_unresolved;
-      was_written_in_current_else_scope = false;
-      record_if_write(scope);
-   } else {
-      was_written_in_current_else_scope = true;
-      record_else_write(scope);
-   }
-}
-
-void temp_comp_access::record_if_write(const prog_scope& scope)
-{
-   /* Don't record write if this IF scope if it ...
-    * - is not the first write in this IF scope,
-    * - has already been written in a parent IF scope.
-    * In both cases this write is a secondary write that doesn't contribute
-    * to resolve conditionality.
-    *
-    * Record the write if it
-    * - is the first one (obviously),
-    * - happens in an IF branch that is a child of the ELSE branch of the
-    *   last active IF/ELSE pair. In this case recording this write is used to
-    *   established whether the write is (un-)conditional in the scope enclosing
-    *   this outer IF/ELSE pair.
-    */
-   if (!current_unpaired_if_write_scope ||
-       (current_unpaired_if_write_scope->id() != scope.id() &&
-        scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope)))  {
-      if_scope_write_flags |= 1 << next_ifelse_nesting_depth;
-      current_unpaired_if_write_scope = &scope;
-      next_ifelse_nesting_depth++;
-   }
-}
-
-void temp_comp_access::record_else_write(const prog_scope& scope)
-{
-   int mask = 1 << (next_ifelse_nesting_depth - 1);
-
-   /* If the temporary was written in an IF branch on the same scope level
-    * and this branch is the sibling of this ELSE branch, then we have a
-    * pair of writes that makes write access to this temporary unconditional
-    * in the enclosing scope.
-    */
-
-   if ((if_scope_write_flags & mask) &&
-       (scope.id() == current_unpaired_if_write_scope->id())) {
-          --next_ifelse_nesting_depth;
-         if_scope_write_flags &= ~mask;
-
-         /* The following code deals with propagating unconditionality from
-          * inner levels of nested IF/ELSE to the outer levels like in
-          *
-          * 1: var t;
-          * 2: if (a) {        <- start scope A
-          * 3:    if (b)
-          * 4:         t = ...
-          * 5:    else
-          * 6:         t = ...
-          * 7: } else {        <- start scope B
-          * 8:    if (c)
-          * 9:         t = ...
-          * A:    else         <- start scope C
-          * B:         t = ...
-          * C: }
-          *
-          */
-
-         const prog_scope *parent_ifelse = scope.parent()->in_ifelse_scope();
-
-         if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) {
-            /* We are at the end of scope C and already recorded a write
-             * within an IF scope (A), the sibling of the parent ELSE scope B,
-             * and it is not yet resolved. Mark that as the last relevant
-             * IF scope. Below the write will be resolved for the A/B
-             * scope pair.
-             */
-            current_unpaired_if_write_scope = parent_ifelse;
-         } else {
-            current_unpaired_if_write_scope = nullptr;
-         }
-	 /* Promote the first write scope to the enclosing scope because
-	  * the current IF/ELSE pair is now irrelevant for the analysis.
-	  * This is also required to evaluate the minimum life time for t in
-	  * {
-	  *    var t;
-	  *    if (a)
-	  *      t = ...
-	  *    else
-	  *      t = ...
-	  *    x = t;
-	  *    ...
-	  * }
-	  */
-	 first_write_scope = scope.parent();
-
-         /* If some parent is IF/ELSE and in a loop then propagate the
-          * write to that scope. Otherwise the write is unconditional
-          * because it happens in both corresponding IF/ELSE branches
-          * in this loop, and hence, record the loop id to signal the
-          * resolution.
-          */
-         if (parent_ifelse && parent_ifelse->is_in_loop()) {
-            record_ifelse_write(*parent_ifelse);
-         } else {
-            conditionality_in_loop_id = scope.innermost_loop()->id();
-         }
-   } else {
-     /* The temporary was not written in the IF branch corresponding
-      * to this ELSE branch, hence the write is conditional.
-      */
-      conditionality_in_loop_id = write_is_conditional;
-   }
-}
-
-bool temp_comp_access::conditional_ifelse_write_in_loop() const
-{
-   return conditionality_in_loop_id <= conditionality_unresolved;
-}
-
-void temp_comp_access::propagate_live_range_to_dominant_write_scope()
-{
-   first_write = first_write_scope->begin();
-   int lr = first_write_scope->end();
-
-   if (last_read < lr)
-      last_read = lr;
-}
-
-register_live_range temp_comp_access::get_required_live_range()
-{
-   bool keep_for_full_loop = false;
-
-   /* This register component is not used at all, or only read,
-    * mark it as unused and ignore it when renaming.
-    * glsl_to_tgsi_visitor::renumber_registers will take care of
-    * eliminating registers that are not written to.
-    */
-   if (last_write < 0)
-      return make_live_range(-1, -1);
-
-   assert(first_write_scope);
-
-   /* Only written to, just make sure the register component is not
-    * reused in the range it is used to write to
-    */
-   if (!last_read_scope)
-      return make_live_range(first_write, last_write + 1);
-
-   const prog_scope *enclosing_scope_first_read = first_read_scope;
-   const prog_scope *enclosing_scope_first_write = first_write_scope;
-
-   /* We read before writing in a loop
-    * hence the value must survive the loops
-    */
-   if ((first_read <= first_write) &&
-       first_read_scope->is_in_loop()) {
-      keep_for_full_loop = true;
-      enclosing_scope_first_read = first_read_scope->outermost_loop();
-   }
-
-   /* A conditional write within a (nested) loop must survive the outermost
-    * loop if the last read was not within the same scope.
-    */
-   const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional();
-   if (conditional && !conditional->contains_range_of(*last_read_scope) &&
-       (conditional->is_switchcase_scope_in_loop() ||
-        conditional_ifelse_write_in_loop())) {
-         keep_for_full_loop = true;
-         enclosing_scope_first_write = conditional->outermost_loop();
-   }
-
-   /* Evaluate the scope that is shared by all: required first write scope,
-    * required first read before write scope, and last read scope.
-    */
-   const prog_scope *enclosing_scope = enclosing_scope_first_read;
-   if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
-      enclosing_scope = enclosing_scope_first_write;
-
-   if (last_read_scope->contains_range_of(*enclosing_scope))
-      enclosing_scope = last_read_scope;
-
-   while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
-          !enclosing_scope->contains_range_of(*last_read_scope)) {
-      enclosing_scope = enclosing_scope->parent();
-      assert(enclosing_scope);
-   }
-
-   /* Propagate the last read scope to the target scope */
-   while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
-      /* If the read is in a loop and we have to move up the scope we need to
-       * extend the live range to the end of this current loop because at this
-       * point we don't know whether the component was written before
-       * un-conditionally in the same loop.
-       */
-      if (last_read_scope->is_loop())
-         last_read = last_read_scope->end();
-
-      last_read_scope = last_read_scope->parent();
-   }
-
-   /* If the variable has to be kept for the whole loop, and we
-    * are currently in a loop, then propagate the live range.
-    */
-   if (keep_for_full_loop && first_write_scope->is_loop())
-      propagate_live_range_to_dominant_write_scope();
-
-   /* Propagate the first_dominant_write scope to the target scope */
-   while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
-      /* Propagate live_range if there was a break in a loop and the write was
-       * after the break inside that loop. Note, that this is only needed if
-       * we move up in the scopes.
-       */
-      if (first_write_scope->loop_break_line() < first_write) {
-         keep_for_full_loop = true;
-	 propagate_live_range_to_dominant_write_scope();
-      }
-
-      first_write_scope = first_write_scope->parent();
-
-      /* Propagate live_range if we are now in a loop */
-      if (keep_for_full_loop && first_write_scope->is_loop())
-	  propagate_live_range_to_dominant_write_scope();
-   }
-
-   /* The last write past the last read is dead code, but we have to
-    * ensure that the component is not reused too early, hence extend the
-    * live_range past the last write.
-    */
-   if (last_write >= last_read)
-      last_read = last_write + 1;
-
-   /* Here we are at the same scope, all is resolved */
-   return make_live_range(first_write, last_read);
-}
-
-/* Helper class for sorting and searching the registers based
- * on live ranges. */
-class register_merge_record {
-public:
-   int begin;
-   int end;
-   int reg;
-   bool erase;
-   bool is_array_elm;
-
-   bool operator < (const register_merge_record& rhs) const {
-      return begin < rhs.begin;
-   }
-};
-
-LiverangeEvaluator::LiverangeEvaluator():
-   line(0),
-   loop_id(1),
-   if_id(1),
-   switch_id(0),
-   is_at_end(false),
-   n_scopes(1),
-   cur_scope(nullptr)
-{
-}
-
-void LiverangeEvaluator::run(const Shader& shader,
-                             std::vector<register_live_range>& register_live_ranges)
-{
-   temp_acc.resize(register_live_ranges.size());
-   fill(temp_acc.begin(), temp_acc.end(), temp_access());
-
-   sfn_log << SfnLog::merge << "have " << temp_acc.size() << " temps\n";
-
-   for (const auto& block: shader.m_ir) {
-      for (const auto& ir: block) {
-         switch (ir->type()) {
-         case Instruction::cond_if:
-         case Instruction::cond_else:
-         case Instruction::loop_begin:
-            ++n_scopes;
-         default:
-            ;
-         }
-      }
-   }
-
-   scopes.reset(new prog_scope_storage(n_scopes));
-
-   cur_scope = scopes->create(nullptr, outer_scope, 0, 0, line);
-
-   line = 0;
-
-   for (auto& v: shader.m_temp) {
-      if (v.second->type() == Value::gpr) {
-         sfn_log << SfnLog::merge << "Record " << *v.second << "\n";
-         const auto& g = static_cast<const GPRValue&>(*v.second);
-         if (g.is_input()) {
-            sfn_log << SfnLog::merge << "Record INPUT write for "
-                    << g << " in " << temp_acc.size() << " temps\n";
-            temp_acc[g.sel()].record_write(line, cur_scope, 1 << g.chan(), false);
-            temp_acc[g.sel()].record_read(line, cur_scope, 1 << g.chan(), false);
-         }
-         if (g.keep_alive()) {
-            sfn_log << SfnLog::merge << "Record KEEP ALIVE for "
-                    << g << " in " << temp_acc.size() << " temps\n";
-            temp_acc[g.sel()].record_read(0x7fffff, cur_scope, 1 << g.chan(), false);
-         }
-      }
-   }
-
-   for (const auto& block: shader.m_ir)
-      for (const auto& ir: block)  {
-         ir->evalue_liveness(*this);
-         if (ir->type() != Instruction::alu ||
-             static_cast<const AluInstruction&>(*ir).flag(alu_last_instr))
-            ++line;
-      }
-
-   assert(cur_scope->type() == outer_scope);
-   cur_scope->set_end(line);
-   is_at_end = true;
-
-   get_required_live_ranges(register_live_ranges);
-}
-
-
-void LiverangeEvaluator::record_read(const Value& src, bool is_array_elm)
-{
-   sfn_log << SfnLog::merge << "Record read l:" << line << " reg:" << src << "\n";
-   if (src.type() == Value::gpr) {
-      const GPRValue& v = static_cast<const GPRValue&>(src);
-      if (v.chan() < 4)
-         temp_acc[v.sel()].record_read(v.keep_alive() ? 0x7fffff: line, cur_scope, 1 << v.chan(), is_array_elm);
-      return;
-   } else if (src.type() == Value::gpr_array_value) {
-      const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src);
-      v.record_read(*this);
-   } else if (src.type() == Value::kconst) {
-      const UniformValue& v = static_cast<const UniformValue&>(src);
-      if (v.addr())
-         record_read(*v.addr(),is_array_elm);
-   }
-}
-
-void LiverangeEvaluator::record_write(const Value& src, bool is_array_elm)
-{
-   sfn_log << SfnLog::merge << "Record write for "
-           << src << " in " << temp_acc.size() << " temps\n";
-
-   if (src.type() == Value::gpr) {
-      const GPRValue& v = static_cast<const GPRValue&>(src);
-      assert(v.sel() < temp_acc.size());
-      if (v.chan() < 4)
-         temp_acc[v.sel()].record_write(line, cur_scope, 1 << v.chan(), is_array_elm);
-      return;
-   } else if (src.type() == Value::gpr_array_value) {
-      const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src);
-      v.record_write(*this);
-   } else if (src.type() == Value::kconst) {
-      const UniformValue& v = static_cast<const UniformValue&>(src);
-      if (v.addr())
-         record_write(*v.addr(),is_array_elm);
-   }
-}
-
-void LiverangeEvaluator::record_read(const GPRVector& src)
-{
-   for (int i = 0; i < 4; ++i)
-      if (src.reg_i(i))
-         record_read(*src.reg_i(i));
-}
-
-void LiverangeEvaluator::record_write(const GPRVector& dst)
-{
-   for (int i = 0; i < 4; ++i)
-      if (dst.reg_i(i))
-         record_write(*dst.reg_i(i));
-}
-
-void LiverangeEvaluator::get_required_live_ranges(std::vector<register_live_range>& register_live_ranges)
-{
-   sfn_log << SfnLog::merge << "== register live ranges ==========\n";
-   for(unsigned i = 0; i < register_live_ranges.size(); ++i) {
-      sfn_log << SfnLog::merge << setw(4) << i;
-      register_live_ranges[i] = temp_acc[i].get_required_live_range();
-      sfn_log << SfnLog::merge << ": [" << register_live_ranges[i].begin << ", "
-		   << register_live_ranges[i].end << "]\n";
-   }
-   sfn_log << SfnLog::merge << "==================================\n\n";
-}
-
-void LiverangeEvaluator::scope_if()
-{
-   cur_scope = scopes->create(cur_scope, if_branch, if_id++,
-                              cur_scope->nesting_depth() + 1, line + 1);
-}
-
-void LiverangeEvaluator::scope_else()
-{
-   assert(cur_scope->type() == if_branch);
-   cur_scope->set_end(line - 1);
-   cur_scope = scopes->create(cur_scope->parent(), else_branch,
-                              cur_scope->id(), cur_scope->nesting_depth(),
-                             line + 1);
-}
-
-void LiverangeEvaluator::scope_endif()
-{
-   cur_scope->set_end(line - 1);
-   cur_scope = cur_scope->parent();
-   assert(cur_scope);
-}
-
-void LiverangeEvaluator::scope_loop_begin()
-{
-   cur_scope = scopes->create(cur_scope, loop_body, loop_id++,
-                              cur_scope->nesting_depth() + 1, line);
-}
-
-void LiverangeEvaluator::scope_loop_end()
-{
-   assert(cur_scope->type() == loop_body);
-   cur_scope->set_end(line);
-   cur_scope = cur_scope->parent();
-   assert(cur_scope);
-}
-
-void LiverangeEvaluator::scope_loop_break()
-{
-   cur_scope->set_loop_break_line(line);
-}
-
-/* This functions evaluates the register merges by using a binary
- * search to find suitable merge candidates. */
-
-std::vector<rename_reg_pair>
-get_temp_registers_remapping(const std::vector<register_live_range>& live_ranges)
-{
-
-   std::vector<rename_reg_pair> result(live_ranges.size(), rename_reg_pair{false, false, 0});
-   std::vector<register_merge_record> reg_access;
-
-   for (unsigned i = 0; i < live_ranges.size(); ++i) {
-      if (live_ranges[i].begin >= 0) {
-         register_merge_record r;
-         r.begin = live_ranges[i].begin;
-         r.end = live_ranges[i].end;
-         r.is_array_elm = live_ranges[i].is_array_elm;
-         r.reg = i;
-         r.erase = false;
-         reg_access.push_back(r);
-      }
-   }
-
-   std::sort(reg_access.begin(), reg_access.end());
-
-   for (auto& r : reg_access)
-      sfn_log << SfnLog::merge << "Use Range " <<r.reg << " ["
-              << r.begin << ", "  << r.end << "]\n";
-
-   auto trgt = reg_access.begin();
-   auto reg_access_end = reg_access.end();
-   auto first_erase = reg_access_end;
-   auto search_start = trgt + 1;
-
-   while (trgt != reg_access_end) {
-      /* Find the next register that has a live-range starting past the
-       * search start and that is not an array element. Array elements can't
-       * be moved (Moving the whole array could be an option to be implemented later)*/
-
-      sfn_log << SfnLog::merge << "Next target is "
-              << trgt->reg << "[" << trgt->begin << ", "  << trgt->end << "]\n";
-
-
-      auto src = upper_bound(search_start, reg_access_end, trgt->end,
-                             [](int bound, const register_merge_record& m){
-                                    return bound < m.begin && !m.is_array_elm;}
-                             );
-
-      if (src != reg_access_end) {
-         result[src->reg].new_reg = trgt->reg;
-         result[src->reg].valid = true;
-
-         sfn_log << SfnLog::merge << "Map "
-                 << src->reg << "[" << src->begin << ", "  << src->end << "] to  "
-                 << trgt->reg << "[" << trgt->begin << ", "  << trgt->end << ":";
-         trgt->end = src->end;
-         sfn_log << SfnLog::merge << trgt->end  << "]\n";
-
-         /* Since we only search forward, don't remove the renamed
-          * register just now, only mark it. */
-         src->erase = true;
-
-         if (first_erase == reg_access_end)
-            first_erase = src;
-
-         search_start = src + 1;
-      } else {
-         /* Moving to the next target register it is time to remove
-          * the already merged registers from the search range */
-         if (first_erase != reg_access_end) {
-	    auto outp = first_erase;
-	    auto inp = first_erase + 1;
-
-            while (inp != reg_access_end) {
-               if (!inp->erase)
-                  *outp++ = *inp;
-               ++inp;
-            }
-
-            reg_access_end = outp;
-            first_erase = reg_access_end;
-         }
-         ++trgt;
-         search_start = trgt + 1;
-      }
-   }
-   return result;
-}
-
-} // end ns r600
diff --git a/src/gallium/drivers/r600/sfn/sfn_liverange.h b/src/gallium/drivers/r600/sfn/sfn_liverange.h
deleted file mode 100644
index 8b9ed2e..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_liverange.h
+++ /dev/null
@@ -1,314 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_LIVERANGE_H
-#define SFN_LIVERANGE_H
-
-#include <cstdint>
-#include <ostream>
-#include <vector>
-#include <limits>
-
-#include "sfn_instruction_base.h"
-#include "sfn_nir.h"
-
-namespace r600 {
-
-/** Storage to record the required live range of a temporary register
- * begin == end == -1 indicates that the register can be reused without
- * limitations. Otherwise, "begin" indicates the first instruction in which
- * a write operation may target this temporary, and end indicates the
- * last instruction in which a value can be read from this temporary.
- * Hence, a register R2 can be merged with a register R1 if R1.end <= R2.begin.
- */
-struct register_live_range {
-   int begin;
-   int end;
-   bool is_array_elm;
-};
-
-enum prog_scope_type {
-   outer_scope,           /* Outer program scope */
-   loop_body,             /* Inside a loop */
-   if_branch,             /* Inside if branch */
-   else_branch,           /* Inside else branch */
-   switch_body,           /* Inside switch statement */
-   switch_case_branch,    /* Inside switch case statement */
-   switch_default_branch, /* Inside switch default statement */
-   undefined_scope
-};
-
-class prog_scope {
-public:
-   prog_scope();
-   prog_scope(prog_scope *parent, prog_scope_type type, int id,
-              int depth, int begin);
-
-   prog_scope_type type() const;
-   prog_scope *parent() const;
-   int nesting_depth() const;
-   int id() const;
-   int end() const;
-   int begin() const;
-   int loop_break_line() const;
-
-   const prog_scope *in_else_scope() const;
-   const prog_scope *in_ifelse_scope() const;
-   const prog_scope *in_parent_ifelse_scope() const;
-   const prog_scope *innermost_loop() const;
-   const prog_scope *outermost_loop() const;
-   const prog_scope *enclosing_conditional() const;
-
-   bool is_loop() const;
-   bool is_in_loop() const;
-   bool is_switchcase_scope_in_loop() const;
-   bool is_conditional() const;
-   bool is_child_of(const prog_scope *scope) const;
-   bool is_child_of_ifelse_id_sibling(const prog_scope *scope) const;
-
-   bool break_is_for_switchcase() const;
-   bool contains_range_of(const prog_scope& other) const;
-
-   void set_end(int end);
-   void set_loop_break_line(int line);
-
-private:
-   prog_scope_type scope_type;
-   int scope_id;
-   int scope_nesting_depth;
-   int scope_begin;
-   int scope_end;
-   int break_loop_line;
-   prog_scope *parent_scope;
-};
-
-/* Some storage class to encapsulate the prog_scope (de-)allocations */
-class prog_scope_storage {
-public:
-   prog_scope_storage(int n);
-   ~prog_scope_storage();
-   prog_scope * create(prog_scope *p, prog_scope_type type, int id,
-                       int lvl, int s_begin);
-private:
-   int current_slot;
-   std::vector<prog_scope> storage;
-};
-
-/* Class to track the access to a component of a temporary register. */
-
-class temp_comp_access {
-public:
-   temp_comp_access();
-
-   void record_read(int line, prog_scope *scope);
-   void record_write(int line, prog_scope *scope);
-   register_live_range get_required_live_range();
-private:
-   void propagate_live_range_to_dominant_write_scope();
-   bool conditional_ifelse_write_in_loop() const;
-
-   void record_ifelse_write(const prog_scope& scope);
-   void record_if_write(const prog_scope& scope);
-   void record_else_write(const prog_scope& scope);
-
-   prog_scope *last_read_scope;
-   prog_scope *first_read_scope;
-   prog_scope *first_write_scope;
-
-   int first_write;
-   int last_read;
-   int last_write;
-   int first_read;
-
-   /* This member variable tracks the current resolution of conditional writing
-    * to this temporary in IF/ELSE clauses.
-    *
-    * The initial value "conditionality_untouched" indicates that this
-    * temporary has not yet been written to within an if clause.
-    *
-    * A positive (other than "conditionality_untouched") number refers to the
-    * last loop id for which the write was resolved as unconditional. With each
-    * new loop this value will be overwitten by "conditionality_unresolved"
-    * on entering the first IF clause writing this temporary.
-    *
-    * The value "conditionality_unresolved" indicates that no resolution has
-    * been achieved so far. If the variable is set to this value at the end of
-    * the processing of the whole shader it also indicates a conditional write.
-    *
-    * The value "write_is_conditional" marks that the variable is written
-    * conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at
-    * least one loop.
-    */
-   int conditionality_in_loop_id;
-
-   /* Helper constants to make the tracking code more readable. */
-   static const int write_is_conditional = -1;
-   static const int conditionality_unresolved = 0;
-   static const int conditionality_untouched;
-   static const int write_is_unconditional;
-
-   /* A bit field tracking the nexting levels of if-else clauses where the
-    * temporary has (so far) been written to in the if branch, but not in the
-    * else branch.
-    */
-   unsigned int if_scope_write_flags;
-
-   int next_ifelse_nesting_depth;
-   static const int supported_ifelse_nesting_depth = 32;
-
-   /* Tracks the last if scope in which the temporary was written to
-    * without a write in the corresponding else branch. Is also used
-    * to track read-before-write in the according scope.
-    */
-   const prog_scope *current_unpaired_if_write_scope;
-
-   /* Flag to resolve read-before-write in the else scope. */
-   bool was_written_in_current_else_scope;
-};
-
-/* Class to track the access to all components of a temporary register. */
-class temp_access {
-public:
-   temp_access();
-   void record_read(int line, prog_scope *scope, int swizzle, bool is_array_elm);
-   void record_write(int line, prog_scope *scope, int writemask, bool is_array_elm);
-   register_live_range get_required_live_range();
-private:
-   void update_access_mask(int mask);
-
-   temp_comp_access comp[4];
-   int access_mask;
-   bool needs_component_tracking;
-   bool is_array_element;
-};
-
-/* Helper class to merge the live ranges of an arrays.
- *
- * For arrays the array length, live range, and component access needs to
- * be kept, because when live ranges are merged or arrays are interleaved
- * one can only merge or interleave an array into another with equal or more
- * elements. For interleaving it is also required that the sum of used swizzles
- * is at most four.
- */
-
-class array_live_range {
-public:
-   array_live_range();
-   array_live_range(unsigned aid, unsigned alength);
-   array_live_range(unsigned aid, unsigned alength, int first_access,
-		  int last_access, int mask);
-
-   void set_live_range(int first_access, int last_access);
-   void set_begin(int _begin){first_access = _begin;}
-   void set_end(int _end){last_access = _end;}
-   void set_access_mask(int s);
-
-   static void merge(array_live_range *a, array_live_range *b);
-   static void interleave(array_live_range *a, array_live_range *b);
-
-   int array_id() const {return id;}
-   int target_array_id() const {return target_array ? target_array->id : 0;}
-   const array_live_range *final_target() const {return target_array ?
-	       target_array->final_target() : this;}
-   unsigned array_length() const { return length;}
-   int begin() const { return first_access;}
-   int end() const { return last_access;}
-   int access_mask() const { return component_access_mask;}
-   int used_components() const {return used_component_count;}
-
-   bool time_doesnt_overlap(const array_live_range& other) const;
-
-   void print(std::ostream& os) const;
-
-   bool is_mapped() const { return target_array != nullptr;}
-
-   int8_t remap_one_swizzle(int8_t idx) const;
-
-private:
-   void init_swizzles();
-   void set_target(array_live_range  *target);
-   void merge_live_range_from(array_live_range *other);
-   void interleave_into(array_live_range *other);
-
-   unsigned id;
-   unsigned length;
-   int first_access;
-   int last_access;
-   uint8_t component_access_mask;
-   uint8_t used_component_count;
-   array_live_range *target_array;
-   int8_t swizzle_map[4];
-};
-
-
-
-class LiverangeEvaluator {
-public:
-   LiverangeEvaluator();
-
-   void run(const Shader& shader,
-            std::vector<register_live_range> &register_live_ranges);
-
-   void scope_if();
-   void scope_else();
-   void scope_endif();
-   void scope_loop_begin();
-   void scope_loop_end();
-   void scope_loop_break();
-
-   void record_read(const Value& src, bool is_array_elm = false);
-   void record_write(const Value& dst, bool is_array_elm = false);
-
-   void record_read(const GPRVector& src);
-   void record_write(const GPRVector& dst);
-
-private:
-
-   prog_scope *create_scope(prog_scope *parent, prog_scope_type type, int id,
-                            int lvl, int s_begin);
-
-
-   void get_required_live_ranges(std::vector<register_live_range>& register_live_ranges);
-
-   int line;
-   int loop_id;
-   int if_id;
-   int switch_id;
-   bool is_at_end;
-   int n_scopes;
-   std::unique_ptr<prog_scope_storage> scopes;
-   prog_scope *cur_scope;
-
-   std::vector<temp_access> temp_acc;
-
-};
-
-std::vector<rename_reg_pair>
-get_temp_registers_remapping(const std::vector<register_live_range>& live_ranges);
-
-} // end namespace r600
-
-#endif
diff --git a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp
new file mode 100644
index 0000000..b02a51ed
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp
@@ -0,0 +1,438 @@
+#include "sfn_liverangeevaluator.h"
+#include "sfn_liverangeevaluator_helpers.h"
+
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_controlflow.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_mem.h"
+#include "sfn_instr_tex.h"
+#include "sfn_shader.h"
+#include "sfn_debug.h"
+
+#include <algorithm>
+#include <map>
+
+namespace r600 {
+
+class LiveRangeInstrVisitor : public InstrVisitor {
+public:
+   LiveRangeInstrVisitor(LiveRangeMap& live_range_map);
+
+   void visit(AluInstr *instr) override;
+   void visit(AluGroup *instr) override;
+   void visit(TexInstr *instr) override;
+   void visit(ExportInstr *instr) override;
+   void visit(FetchInstr *instr) override;
+   void visit(Block *instr) override;
+   void visit(ControlFlowInstr *instr) override;
+   void visit(IfInstr *instr) override;
+   void visit(WriteScratchInstr *instr) override;
+   void visit(StreamOutInstr *instr) override;
+   void visit(MemRingOutInstr *instr) override;
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override;
+   void visit(WriteTFInstr *instr) override;
+   void visit(LDSAtomicInstr *instr) override;
+   void visit(LDSReadInstr *instr) override;
+   void visit(RatInstr *instr) override;
+
+   void finalize();
+private:
+
+   void record_write(const Register *reg);
+   void record_read(const Register *reg, LiveRangeEntry::EUse use);
+
+   void record_write(const RegisterVec4& reg, const RegisterVec4::Swizzle& swizzle);
+   void record_read(const RegisterVec4 &reg, LiveRangeEntry::EUse use);
+
+   void scope_if();
+   void scope_else();
+   void scope_endif();
+   void scope_loop_begin();
+   void scope_loop_end();
+   void scope_loop_break();
+   ProgramScope *create_scope(ProgramScope *parent, ProgramScopeType type,
+                              int id, int nesting_depth, int line);
+
+   std::vector<std::unique_ptr<ProgramScope>> m_scopes;
+   ProgramScope *m_current_scope;
+   LiveRangeMap& m_live_range_map;
+   RegisterAccess m_register_access;
+
+   int m_line{0};
+   int m_if_id{1};
+   int m_loop_id{1};
+};
+
+LiveRangeEvaluator::LiveRangeEvaluator()
+{
+
+}
+
+
+LiveRangeMap LiveRangeEvaluator::run(Shader& sh)
+{
+
+   LiveRangeMap range_map = sh.prepare_live_range_map();
+
+
+   LiveRangeInstrVisitor evaluator(range_map);
+
+   for (auto& b : sh.func())
+      b->accept(evaluator);
+
+   evaluator.finalize();
+
+   return range_map;
+}
+
+void LiveRangeInstrVisitor::finalize()
+{
+   m_current_scope->set_end(m_line);
+
+   for (int i = 0; i < 4; ++i) {
+
+      auto& live_ranges = m_live_range_map.component(i);
+      for(const auto& r : live_ranges) {
+         if (r.m_register->live_end_pinned())
+            record_read(r.m_register, LiveRangeEntry::use_unspecified);
+      }
+
+      auto& comp_access = m_register_access.component(i);
+
+      for (size_t i = 0; i < comp_access.size(); ++i) {
+         sfn_log << SfnLog::merge << "Evaluae access for " << *live_ranges[i].m_register << "\n";
+
+         auto& rca = comp_access[i];
+         rca.update_required_live_range();
+         live_ranges[i].m_start = rca.range().start;
+         live_ranges[i].m_end = rca.range().end;
+         live_ranges[i].m_use = rca.use_type();
+      }
+   }
+}
+
+LiveRangeInstrVisitor::LiveRangeInstrVisitor(LiveRangeMap& live_range_map):
+   m_live_range_map(live_range_map),
+   m_register_access(live_range_map.sizes())
+{
+   if (sfn_log.has_debug_flag(SfnLog::merge)) {
+      sfn_log << SfnLog::merge << "Have component register numbers: ";
+      for (auto n : live_range_map.sizes())
+         sfn_log <<  n << " ";
+      sfn_log << "\n";
+   }
+
+   m_scopes.push_back(std::make_unique<ProgramScope>(nullptr, outer_scope, 0, 0, 0));
+   m_current_scope = m_scopes[0].get();
+
+   for (int i = 0; i < 4; ++i) {
+      const auto& comp = live_range_map.component(i);
+      for(const auto& r : comp) {
+         if (r.m_register->live_start_pinned())
+            record_write(r.m_register);
+      }
+   }
+   m_line = 1;
+}
+
+void LiveRangeInstrVisitor::record_write(const RegisterVec4& reg, const RegisterVec4::Swizzle &swizzle)
+{
+   for (int i = 0; i < 4; ++i) {
+      if (swizzle[i] < 6 && reg[i]->chan() < 4)
+         record_write(reg[i]);
+   }
+}
+
+void LiveRangeInstrVisitor::record_read(const RegisterVec4& reg, LiveRangeEntry::EUse use)
+{
+   for (int i = 0; i < 4; ++i) {
+      if (reg[i]->chan() < 4)
+         record_read(reg[i], use);
+   }
+}
+
+void LiveRangeInstrVisitor::scope_if()
+{
+   m_current_scope = create_scope(m_current_scope, if_branch, m_if_id++,
+                                  m_current_scope->nesting_depth() + 1, m_line + 1);
+}
+
+void LiveRangeInstrVisitor::scope_else()
+{
+   assert(m_current_scope->type() == if_branch);
+   m_current_scope->set_end(m_line - 1);
+
+   m_current_scope = create_scope(m_current_scope->parent(), else_branch, m_current_scope->id(),
+                                  m_current_scope->nesting_depth() + 1, m_line + 1);
+}
+
+void LiveRangeInstrVisitor::scope_endif()
+{
+   m_current_scope->set_end(m_line - 1);
+   m_current_scope = m_current_scope->parent();
+   assert(m_current_scope);
+}
+
+void LiveRangeInstrVisitor::scope_loop_begin()
+{
+   m_current_scope = create_scope(m_current_scope, loop_body, m_loop_id++,
+                                  m_current_scope->nesting_depth() + 1, m_line);
+}
+
+void LiveRangeInstrVisitor::scope_loop_end()
+{
+   m_current_scope->set_end(m_line);
+   m_current_scope = m_current_scope->parent();
+   assert(m_current_scope);
+}
+
+void LiveRangeInstrVisitor::scope_loop_break()
+{
+   m_current_scope->set_loop_break_line(m_line);
+}
+
+ProgramScope *LiveRangeInstrVisitor::create_scope(ProgramScope *parent, ProgramScopeType type,
+                                                  int id, int nesting_depth, int line)
+{
+   m_scopes.emplace_back(std::make_unique<ProgramScope>(parent, type, id, nesting_depth, line));
+   return m_scopes[m_scopes.size() - 1].get();
+}
+
+void LiveRangeInstrVisitor::visit(AluInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   if (instr->has_alu_flag(alu_write))
+      record_write(instr->dest());
+   for (unsigned i = 0; i < instr->n_sources(); ++i) {
+      record_read(instr->src(i).as_register(), LiveRangeEntry::use_unspecified);
+      auto uniform = instr->src(i).as_uniform();
+      if (uniform && uniform->buf_addr()) {
+         record_read(uniform->buf_addr()->as_register(), LiveRangeEntry::use_unspecified);
+      }
+   }
+}
+
+void LiveRangeInstrVisitor::visit(AluGroup *group)
+{
+   for (auto i : *group)
+      if (i)
+         i->accept(*this);
+}
+
+void LiveRangeInstrVisitor::visit(TexInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   record_write(instr->dst(), instr->all_dest_swizzle());
+
+   auto src = instr->src();
+   record_read(src, LiveRangeEntry::use_unspecified);
+
+   if (instr->sampler_offset() && instr->sampler_offset()->as_register())
+      record_read(instr->sampler_offset()->as_register(), LiveRangeEntry::use_unspecified);
+
+}
+
+void LiveRangeInstrVisitor::visit(ExportInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   auto src = instr->value();
+   record_read(src, LiveRangeEntry::use_export);
+}
+
+void LiveRangeInstrVisitor::visit(FetchInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   record_write(instr->dst(), instr->all_dest_swizzle());
+   auto& src = instr->src();
+   if (src.chan() < 4) /* Channel can be 7 to disable source */
+      record_read(&src, LiveRangeEntry::use_unspecified);
+}
+
+void LiveRangeInstrVisitor::visit(Block *instr)
+{
+   sfn_log << SfnLog::merge << "Visit block\n";
+   for (auto i : *instr) {
+      i->accept(*this);
+      if (i->end_group())
+         ++m_line;
+   }
+   sfn_log << SfnLog::merge << "End block\n";
+}
+
+void LiveRangeInstrVisitor::visit(WriteScratchInstr *instr)
+{
+   auto& src = instr->value();
+   for (int i = 0; i < 4; ++i) {
+      if ((1 << i) & instr->write_mask()) {
+         record_read(src[i], LiveRangeEntry::use_unspecified);
+      }
+   }
+
+   auto addr = instr->address();
+   if (addr)
+      record_read(addr, LiveRangeEntry::use_unspecified);
+}
+
+void LiveRangeInstrVisitor::visit(StreamOutInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   auto src = instr->value();
+   record_read(src, LiveRangeEntry::use_export);
+}
+
+void LiveRangeInstrVisitor::visit(MemRingOutInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   auto src = instr->value();
+   record_read(src, LiveRangeEntry::use_export);
+
+   auto idx = instr->export_index();
+   if (idx && idx->as_register())
+      record_read(idx->as_register(), LiveRangeEntry::use_unspecified);
+}
+
+void LiveRangeInstrVisitor::visit(ControlFlowInstr *instr)
+{
+   switch (instr->cf_type()) {
+   case ControlFlowInstr::cf_else: scope_else(); break;
+   case ControlFlowInstr::cf_endif: scope_endif(); break;
+   case ControlFlowInstr::cf_loop_begin: scope_loop_begin(); break;
+   case ControlFlowInstr::cf_loop_end: scope_loop_end(); break;
+   case ControlFlowInstr::cf_loop_break: scope_loop_break(); break;
+   case ControlFlowInstr::cf_loop_continue: break;
+   case ControlFlowInstr::cf_wait_ack: break;
+   default:
+      unreachable("Flow control unreachanble");
+   }
+}
+
+void LiveRangeInstrVisitor::visit(IfInstr *instr)
+{
+   instr->predicate()->accept(*this);
+   scope_if();
+}
+
+void LiveRangeInstrVisitor::visit(GDSInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   record_read(instr->src(), LiveRangeEntry::use_unspecified);
+   if (instr->uav_id())
+      record_read(instr->uav_id(), LiveRangeEntry::use_unspecified);
+   record_write(instr->dest());
+}
+
+void LiveRangeInstrVisitor::visit(RatInstr *instr)
+{
+   sfn_log << SfnLog::merge << "Visit " << *instr << "\n";
+   record_read(instr->value(), LiveRangeEntry::use_unspecified);
+   record_read(instr->addr(), LiveRangeEntry::use_unspecified);
+
+   auto idx = instr->rat_id_offset();
+   if (idx)
+      record_read(idx, LiveRangeEntry::use_unspecified);
+}
+
+
+void LiveRangeInstrVisitor::visit(WriteTFInstr *instr)
+{
+   record_read(instr->value(), LiveRangeEntry::use_export);
+}
+
+void LiveRangeInstrVisitor::visit(UNUSED LDSAtomicInstr *instr)
+{   
+   unreachable("LDSAtomicInstr must be lowered before scheduling and live range evaluation");
+}
+
+void LiveRangeInstrVisitor::visit(UNUSED LDSReadInstr *instr)
+{
+   unreachable("LDSReadInstr must be lowered before scheduling and live range evaluation");
+}
+
+void LiveRangeInstrVisitor::record_write(const Register *reg)
+{
+   auto addr = reg->get_addr();
+   if (addr && addr->as_register()) {
+      record_read(addr->as_register(), LiveRangeEntry::use_unspecified);
+
+      const auto av = static_cast<const LocalArrayValue *>(reg);
+      auto& array = av->array();
+
+      sfn_log << SfnLog::merge << array << " write:" << m_line << "\n";
+
+      for (auto i = 0u; i < array.size(); ++i) {
+         auto& rav = m_register_access(array(i, reg->chan()));
+         rav.record_write(m_line, m_current_scope);
+      }
+   } else {
+      auto& ra = m_register_access(*reg);
+      sfn_log << SfnLog::merge << *reg  << " write:" << m_line << "\n";
+      ra.record_write(m_line, m_current_scope);
+   }
+}
+
+void LiveRangeInstrVisitor::record_read(const Register *reg, LiveRangeEntry::EUse use)
+{
+   if (!reg)
+      return;
+
+   auto addr = reg->get_addr();
+   if (addr && addr->as_register()) {
+      sfn_log << SfnLog::merge << "Record reading address register " << *addr  << "\n";
+
+      auto& ra = m_register_access(*addr->as_register());
+      ra.record_read(m_line, m_current_scope, use);
+
+      const auto av = static_cast<const LocalArrayValue *>(reg);
+      auto& array = av->array();
+      sfn_log << SfnLog::merge << array << " read:" << m_line << "\n";
+
+      for (auto i = 0u; i < array.size(); ++i) {
+         auto& rav = m_register_access(array(i, reg->chan()));
+         rav.record_read(m_line, m_current_scope, use);
+      }
+   } else {
+      sfn_log << SfnLog::merge << *reg << " read:" << m_line << "\n";
+      auto& ra = m_register_access(*reg);
+      ra.record_read(m_line, m_current_scope, use);
+   }
+}
+
+std::ostream& operator <<  (std::ostream& os, const LiveRangeMap& lrm)
+{
+   os << "Live ranges\n";
+   for (int i = 0; i < 4; ++i) {
+      const auto& comp = lrm.component(i);
+      for (auto& range : comp)
+         os << "  " << range << "\n";
+   }
+   return os;
+}
+
+bool operator == (const LiveRangeMap& lhs, const LiveRangeMap& rhs)
+{
+   for (int i = 0; i < 4; ++i) {
+      const auto& lc = lhs.component(i);
+      const auto& rc = rhs.component(i);
+      if (lc.size() != rc.size())
+         return false;
+
+      for (auto j = 0u; j < lc.size(); ++j) {
+         const auto& lv = lc[j];
+         const auto& rv = rc[j];
+
+         if (lv.m_start != rv.m_start ||
+             lv.m_end != rv.m_end ||
+             lv.m_color != rv.m_color ||
+             !lv.m_register->equal_to(*rv.m_register))
+            return false;
+      }
+   }
+
+   return true;
+}
+
+
+}
+
diff --git a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h
new file mode 100644
index 0000000..3518902
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.h
@@ -0,0 +1,23 @@
+#ifndef LIFERANGEEVALUATOR_H
+#define LIFERANGEEVALUATOR_H
+
+#include "sfn_valuefactory.h"
+
+#include <map>
+#include <cassert>
+
+namespace r600 {
+
+class Shader;
+
+class LiveRangeEvaluator  {
+public:
+
+   LiveRangeEvaluator();
+
+   LiveRangeMap run(Shader &sh);
+};
+
+}
+
+#endif // LIFERANGEEVALUATOR_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp
new file mode 100644
index 0000000..20294ac
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.cpp
@@ -0,0 +1,623 @@
+#include "sfn_liverangeevaluator_helpers.h"
+
+#include "sfn_virtualvalues.h"
+
+#include "util/u_math.h"
+
+#include <limits>
+#include <cassert>
+#include <iostream>
+
+namespace r600 {
+
+ProgramScope::ProgramScope(ProgramScope *parent, ProgramScopeType type, int id,
+                           int depth, int scope_begin):
+   scope_type(type),
+   scope_id(id),
+   scope_nesting_depth(depth),
+   scope_begin(scope_begin),
+   scope_end(-1),
+   break_loop_line(std::numeric_limits<int>::max()),
+   parent_scope(parent)
+{
+}
+
+ProgramScope::ProgramScope():
+   ProgramScope(nullptr, undefined_scope, -1, -1, -1)
+{
+}
+
+ProgramScopeType ProgramScope::type() const
+{
+   return scope_type;
+}
+
+ProgramScope *ProgramScope::parent() const
+{
+   return parent_scope;
+}
+
+int ProgramScope::nesting_depth() const
+{
+   return scope_nesting_depth;
+}
+
+bool ProgramScope::is_loop() const
+{
+   return (scope_type == loop_body);
+}
+
+bool ProgramScope::is_in_loop() const
+{
+   if (scope_type == loop_body)
+      return true;
+
+   if (parent_scope)
+      return parent_scope->is_in_loop();
+
+   return false;
+}
+
+const ProgramScope *ProgramScope::innermost_loop() const
+{
+   if (scope_type == loop_body)
+      return this;
+
+   if (parent_scope)
+      return parent_scope->innermost_loop();
+
+   return nullptr;
+}
+
+const ProgramScope *ProgramScope::outermost_loop() const
+{
+   const ProgramScope *loop = nullptr;
+   const ProgramScope *p = this;
+
+   do {
+      if (p->type() == loop_body)
+         loop = p;
+      p = p->parent();
+   } while (p);
+
+   return loop;
+}
+
+bool ProgramScope::is_child_of_ifelse_id_sibling(const ProgramScope *scope) const
+{
+   const ProgramScope *my_parent = in_parent_ifelse_scope();
+   while (my_parent) {
+      /* is a direct child? */
+      if (my_parent == scope)
+         return false;
+      /* is a child of the conditions sibling? */
+      if (my_parent->id() == scope->id())
+         return true;
+      my_parent = my_parent->in_parent_ifelse_scope();
+   }
+   return false;
+}
+
+bool ProgramScope::is_child_of(const ProgramScope *scope) const
+{
+   const ProgramScope *my_parent = parent();
+   while (my_parent) {
+      if (my_parent == scope)
+         return true;
+      my_parent = my_parent->parent();
+   }
+   return false;
+}
+
+const ProgramScope *ProgramScope::enclosing_conditional() const
+{
+   if (is_conditional())
+      return this;
+
+   if (parent_scope)
+      return parent_scope->enclosing_conditional();
+
+   return nullptr;
+}
+
+bool ProgramScope::contains_range_of(const ProgramScope& other) const
+{
+   return (begin() <= other.begin()) && (end() >= other.end());
+}
+
+bool ProgramScope::is_conditional() const
+{
+   return scope_type == if_branch ||
+         scope_type == else_branch ||
+         scope_type == switch_case_branch ||
+         scope_type == switch_default_branch;
+}
+
+const ProgramScope *ProgramScope::in_else_scope() const
+{
+   if (scope_type == else_branch)
+      return this;
+
+   if (parent_scope)
+      return parent_scope->in_else_scope();
+
+   return nullptr;
+}
+
+const ProgramScope *ProgramScope::in_parent_ifelse_scope() const
+{
+   if (parent_scope)
+      return parent_scope->in_ifelse_scope();
+   else
+      return nullptr;
+}
+
+const ProgramScope *ProgramScope::in_ifelse_scope() const
+{
+   if (scope_type == if_branch ||
+       scope_type == else_branch)
+      return this;
+
+   if (parent_scope)
+      return parent_scope->in_ifelse_scope();
+
+   return nullptr;
+}
+
+bool ProgramScope::is_switchcase_scope_in_loop() const
+{
+   return (scope_type == switch_case_branch ||
+           scope_type == switch_default_branch) &&
+         is_in_loop();
+}
+
+bool ProgramScope::break_is_for_switchcase() const
+{
+   if (scope_type == loop_body)
+      return false;
+
+   if (scope_type == switch_case_branch ||
+       scope_type == switch_default_branch ||
+       scope_type == switch_body)
+      return true;
+
+   if (parent_scope)
+      return parent_scope->break_is_for_switchcase();
+
+   return false;
+}
+
+int ProgramScope::id() const
+{
+   return scope_id;
+}
+
+int ProgramScope::begin() const
+{
+   return scope_begin;
+}
+
+int ProgramScope::end() const
+{
+   return scope_end;
+}
+
+void ProgramScope::set_end(int end)
+{
+   if (scope_end == -1)
+      scope_end = end;
+}
+
+void ProgramScope::set_loop_break_line(int line)
+{
+   if (scope_type == loop_body) {
+      break_loop_line = MIN2(break_loop_line, line);
+   } else {
+      if (parent_scope)
+         parent()->set_loop_break_line(line);
+   }
+}
+
+int ProgramScope::loop_break_line() const
+{
+   return break_loop_line;
+}
+
+RegisterCompAccess::RegisterCompAccess(LiveRange range):
+   last_read_scope(nullptr),
+   first_read_scope(nullptr),
+   first_write_scope(nullptr),
+   first_write(range.start),
+   last_read(range.end),
+   last_write(range.start),
+   first_read(std::numeric_limits<int>::max()),
+   conditionality_in_loop_id(conditionality_untouched),
+   if_scope_write_flags(0),
+   next_ifelse_nesting_depth(0),
+   current_unpaired_if_write_scope(nullptr),
+   was_written_in_current_else_scope(false),
+   m_range(range)
+{
+
+}
+
+RegisterCompAccess::RegisterCompAccess():
+   RegisterCompAccess(LiveRange(-1,-1))
+{
+}
+
+
+void RegisterCompAccess::record_read(int line, ProgramScope *scope, LiveRangeEntry::EUse use)
+{
+   last_read_scope = scope;
+   if (use != LiveRangeEntry::use_unspecified)
+      m_use_type.set(use);
+   if (last_read < line)
+      last_read = line;
+
+   if (first_read > line) {
+      first_read = line;
+      first_read_scope = scope;
+   }
+
+   /* If the conditionality of the first write is already resolved then
+    * no further checks are required.
+    */
+   if (conditionality_in_loop_id == write_is_unconditional ||
+       conditionality_in_loop_id == write_is_conditional)
+      return;
+
+   /* Check whether we are in a condition within a loop */
+   const ProgramScope *ifelse_scope = scope->in_ifelse_scope();
+   const ProgramScope *enclosing_loop;
+   if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) {
+
+      /* If we have either not yet written to this register nor writes are
+       * resolved as unconditional in the enclosing loop then check whether
+       * we read before write in an IF/ELSE branch.
+       */
+      if ((conditionality_in_loop_id != write_is_conditional) &&
+          (conditionality_in_loop_id != enclosing_loop->id())) {
+
+         if (current_unpaired_if_write_scope)  {
+
+            /* Has been written in this or a parent scope? - this makes the temporary
+             * unconditionally set at this point.
+             */
+            if (scope->is_child_of(current_unpaired_if_write_scope))
+               return;
+
+            /* Has been written in the same scope before it was read? */
+            if (ifelse_scope->type() == if_branch) {
+               if (current_unpaired_if_write_scope->id() == scope->id())
+                  return;
+            } else {
+               if (was_written_in_current_else_scope)
+                  return;
+            }
+         }
+
+         /* The temporary was read (conditionally) before it is written, hence
+          * it should survive a loop. This can be signaled like if it were
+          * conditionally written.
+          */
+         conditionality_in_loop_id = write_is_conditional;
+      }
+   }
+}
+
+void RegisterCompAccess::record_write(int line, ProgramScope *scope)
+{
+   last_write = line;
+
+   if (first_write < 0) {
+      first_write = line;
+      first_write_scope = scope;
+
+      /* If the first write we encounter is not in a conditional branch, or
+       * the conditional write is not within a loop, then this is to be
+       * considered an unconditional dominant write.
+       */
+      const ProgramScope *conditional = scope->enclosing_conditional();
+      if (!conditional || !conditional->innermost_loop()) {
+         conditionality_in_loop_id = write_is_unconditional;
+      }
+   }
+
+   /* The conditionality of the first write is already resolved. */
+   if (conditionality_in_loop_id == write_is_unconditional ||
+       conditionality_in_loop_id == write_is_conditional)
+      return;
+
+   /* If the nesting depth is larger than the supported level,
+    * then we assume conditional writes.
+    */
+   if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) {
+      conditionality_in_loop_id = write_is_conditional;
+      return;
+   }
+
+   /* If we are in an IF/ELSE scope within a loop and the loop has not
+    * been resolved already, then record this write.
+    */
+   const ProgramScope *ifelse_scope = scope->in_ifelse_scope();
+   if (ifelse_scope && ifelse_scope->innermost_loop() &&
+       ifelse_scope->innermost_loop()->id()  != conditionality_in_loop_id)
+      record_ifelse_write(*ifelse_scope);
+}
+
+void RegisterCompAccess::record_ifelse_write(const ProgramScope& scope)
+{
+   if (scope.type() == if_branch) {
+      /* The first write in an IF branch within a loop implies unresolved
+       * conditionality (if it was untouched or unconditional before).
+       */
+      conditionality_in_loop_id = conditionality_unresolved;
+      was_written_in_current_else_scope = false;
+      record_if_write(scope);
+   } else {
+      was_written_in_current_else_scope = true;
+      record_else_write(scope);
+   }
+}
+
+void RegisterCompAccess::record_if_write(const ProgramScope& scope)
+{
+   /* Don't record write if this IF scope if it ...
+    * - is not the first write in this IF scope,
+    * - has already been written in a parent IF scope.
+    * In both cases this write is a secondary write that doesn't contribute
+    * to resolve conditionality.
+    *
+    * Record the write if it
+    * - is the first one (obviously),
+    * - happens in an IF branch that is a child of the ELSE branch of the
+    *   last active IF/ELSE pair. In this case recording this write is used to
+    *   established whether the write is (un-)conditional in the scope enclosing
+    *   this outer IF/ELSE pair.
+    */
+   if (!current_unpaired_if_write_scope ||
+       (current_unpaired_if_write_scope->id() != scope.id() &&
+        scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope)))  {
+      if_scope_write_flags |= 1 << next_ifelse_nesting_depth;
+      current_unpaired_if_write_scope = &scope;
+      next_ifelse_nesting_depth++;
+   }
+}
+
+void RegisterCompAccess::record_else_write(const ProgramScope& scope)
+{
+   int mask = 1 << (next_ifelse_nesting_depth - 1);
+
+   /* If the temporary was written in an IF branch on the same scope level
+    * and this branch is the sibling of this ELSE branch, then we have a
+    * pair of writes that makes write access to this temporary unconditional
+    * in the enclosing scope.
+    */
+
+   if ((if_scope_write_flags & mask) &&
+       (scope.id() == current_unpaired_if_write_scope->id())) {
+      --next_ifelse_nesting_depth;
+      if_scope_write_flags &= ~mask;
+
+      /* The following code deals with propagating unconditionality from
+          * inner levels of nested IF/ELSE to the outer levels like in
+          *
+          * 1: var t;
+          * 2: if (a) {        <- start scope A
+          * 3:    if (b)
+          * 4:         t = ...
+          * 5:    else
+          * 6:         t = ...
+          * 7: } else {        <- start scope B
+          * 8:    if (c)
+          * 9:         t = ...
+          * A:    else         <- start scope C
+          * B:         t = ...
+          * C: }
+          *
+          */
+
+      const ProgramScope *parent_ifelse = scope.parent()->in_ifelse_scope();
+
+      if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) {
+         /* We are at the end of scope C and already recorded a write
+             * within an IF scope (A), the sibling of the parent ELSE scope B,
+             * and it is not yet resolved. Mark that as the last relevant
+             * IF scope. Below the write will be resolved for the A/B
+             * scope pair.
+             */
+         current_unpaired_if_write_scope = parent_ifelse;
+      } else {
+         current_unpaired_if_write_scope = nullptr;
+      }
+      /* Promote the first write scope to the enclosing scope because
+     * the current IF/ELSE pair is now irrelevant for the analysis.
+     * This is also required to evaluate the minimum life time for t in
+     * {
+     *    var t;
+     *    if (a)
+     *      t = ...
+     *    else
+     *      t = ...
+     *    x = t;
+     *    ...
+     * }
+     */
+      first_write_scope = scope.parent();
+
+      /* If some parent is IF/ELSE and in a loop then propagate the
+          * write to that scope. Otherwise the write is unconditional
+          * because it happens in both corresponding IF/ELSE branches
+          * in this loop, and hence, record the loop id to signal the
+          * resolution.
+          */
+      if (parent_ifelse && parent_ifelse->is_in_loop()) {
+         record_ifelse_write(*parent_ifelse);
+      } else {
+         conditionality_in_loop_id = scope.innermost_loop()->id();
+      }
+   } else {
+      /* The temporary was not written in the IF branch corresponding
+      * to this ELSE branch, hence the write is conditional.
+      */
+      conditionality_in_loop_id = write_is_conditional;
+   }
+}
+
+bool RegisterCompAccess::conditional_ifelse_write_in_loop() const
+{
+   return conditionality_in_loop_id <= conditionality_unresolved;
+}
+
+void RegisterCompAccess::propagate_live_range_to_dominant_write_scope()
+{
+   first_write = first_write_scope->begin();
+   int lr = first_write_scope->end();
+
+   if (last_read < lr)
+      last_read = lr;
+}
+
+void RegisterCompAccess::update_required_live_range()
+{
+   bool keep_for_full_loop = false;
+
+   /* This register component is not used at all, or only read,
+    * mark it as unused and ignore it when renaming.
+    * glsl_to_tgsi_visitor::renumber_registers will take care of
+    * eliminating registers that are not written to.
+    */
+   if (last_write < 0) {
+      m_range.start = -1;
+      m_range.end = -1;
+      return;
+   }
+
+   /* Only written to, just make sure the register component is not
+    * reused in the range it is used to write to
+    */
+   if (!last_read_scope) {
+      m_range.start = first_write;
+      m_range.end = last_write + 1;
+      return;
+   }
+
+   assert(first_write_scope || m_range.start >= 0);
+
+   /* The register was pre-defines, so th first write scope is the outerpost scopw */
+   if (!first_write_scope) {
+      first_write_scope = first_read_scope;
+      while (first_write_scope->parent())
+         first_write_scope = first_write_scope->parent();
+   }
+
+   const ProgramScope *enclosing_scope_first_read = first_read_scope;
+   const ProgramScope *enclosing_scope_first_write = first_write_scope;
+
+   /* We read before writing in a loop
+    * hence the value must survive the loops
+    */
+   if ((first_read <= first_write) &&
+       first_read_scope->is_in_loop()) {
+      keep_for_full_loop = true;
+      enclosing_scope_first_read = first_read_scope->outermost_loop();
+   }
+
+   /* A conditional write within a (nested) loop must survive the outermost
+    * loop if the last read was not within the same scope.
+    */
+   const ProgramScope *conditional = enclosing_scope_first_write->enclosing_conditional();
+   if (conditional && !conditional->contains_range_of(*last_read_scope) &&
+       (conditional->is_switchcase_scope_in_loop() ||
+        conditional_ifelse_write_in_loop())) {
+      keep_for_full_loop = true;
+      enclosing_scope_first_write = conditional->outermost_loop();
+   }
+
+   /* Evaluate the scope that is shared by all: required first write scope,
+    * required first read before write scope, and last read scope.
+    */
+   const ProgramScope *enclosing_scope = enclosing_scope_first_read;
+   if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
+      enclosing_scope = enclosing_scope_first_write;
+
+   if (last_read_scope->contains_range_of(*enclosing_scope))
+      enclosing_scope = last_read_scope;
+
+   while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
+          !enclosing_scope->contains_range_of(*last_read_scope)) {
+      enclosing_scope = enclosing_scope->parent();
+      assert(enclosing_scope);
+   }
+
+   /* Propagate the last read scope to the target scope */
+   while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
+      /* If the read is in a loop and we have to move up the scope we need to
+       * extend the live range to the end of this current loop because at this
+       * point we don't know whether the component was written before
+       * un-conditionally in the same loop.
+       */
+      if (last_read_scope->is_loop())
+         last_read = last_read_scope->end();
+
+      last_read_scope = last_read_scope->parent();
+   }
+
+   /* If the variable has to be kept for the whole loop, and we
+    * are currently in a loop, then propagate the live range.
+    */
+   if (keep_for_full_loop && first_write_scope->is_loop())
+      propagate_live_range_to_dominant_write_scope();
+
+   /* Propagate the first_dominant_write scope to the target scope */
+   while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
+      /* Propagate live_range if there was a break in a loop and the write was
+       * after the break inside that loop. Note, that this is only needed if
+       * we move up in the scopes.
+       */
+      if (first_write_scope->loop_break_line() < first_write) {
+         keep_for_full_loop = true;
+         propagate_live_range_to_dominant_write_scope();
+      }
+
+      first_write_scope = first_write_scope->parent();
+
+      /* Propagate live_range if we are now in a loop */
+      if (keep_for_full_loop && first_write_scope->is_loop())
+         propagate_live_range_to_dominant_write_scope();
+   }
+
+   /* The last write past the last read is dead code, but we have to
+    * ensure that the component is not reused too early, hence extend the
+    * live_range past the last write.
+    */
+   if (last_write >= last_read)
+      last_read = last_write + 1;
+
+   /* Here we are at the same scope, all is resolved */
+   m_range.start = first_write;
+   m_range.end = last_read;
+}
+
+const int
+RegisterCompAccess::conditionality_untouched = std::numeric_limits<int>::max();
+
+const int
+RegisterCompAccess::write_is_unconditional = std::numeric_limits<int>::max() - 1;
+
+
+RegisterAccess::RegisterAccess(const std::array<size_t, 4>& sizes)
+{
+   for (int i = 0; i < 4; ++i)
+      m_access_record[i].resize(sizes[i]);
+}
+
+RegisterCompAccess& RegisterAccess::operator() (const Register& reg)
+{
+   assert(reg.chan() < 4);
+   assert(m_access_record[reg.chan()].size() > (size_t)reg.index());
+   return m_access_record[reg.chan()][reg.index()];
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h
new file mode 100644
index 0000000..3a1fed5
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator_helpers.h
@@ -0,0 +1,162 @@
+#ifndef SFN_LIFERANGEEVALUATOR_HELPERS_H
+#define SFN_LIFERANGEEVALUATOR_HELPERS_H
+
+#include "sfn_valuefactory.h"
+
+namespace r600 {
+
+enum ProgramScopeType {
+   outer_scope,           /* Outer program scope */
+   loop_body,             /* Inside a loop */
+   if_branch,             /* Inside if branch */
+   else_branch,           /* Inside else branch */
+   switch_body,           /* Inside switch statement */
+   switch_case_branch,    /* Inside switch case statement */
+   switch_default_branch, /* Inside switch default statement */
+   undefined_scope
+};
+
+class ProgramScope {
+public:
+   ProgramScope();
+   ProgramScope(ProgramScope *parent, ProgramScopeType type, int id,
+                int depth, int begin);
+
+   ProgramScopeType type() const;
+   ProgramScope *parent() const;
+   int nesting_depth() const;
+   int id() const;
+   int end() const;
+   int begin() const;
+   int loop_break_line() const;
+
+   const ProgramScope *in_else_scope() const;
+   const ProgramScope *in_ifelse_scope() const;
+   const ProgramScope *in_parent_ifelse_scope() const;
+   const ProgramScope *innermost_loop() const;
+   const ProgramScope *outermost_loop() const;
+   const ProgramScope *enclosing_conditional() const;
+
+   bool is_loop() const;
+   bool is_in_loop() const;
+   bool is_switchcase_scope_in_loop() const;
+   bool is_conditional() const;
+   bool is_child_of(const ProgramScope *scope) const;
+   bool is_child_of_ifelse_id_sibling(const ProgramScope *scope) const;
+
+   bool break_is_for_switchcase() const;
+   bool contains_range_of(const ProgramScope& other) const;
+
+   void set_end(int end);
+   void set_loop_break_line(int line);
+
+private:
+   ProgramScopeType scope_type;
+   int scope_id;
+   int scope_nesting_depth;
+   int scope_begin;
+   int scope_end;
+   int break_loop_line;
+   ProgramScope *parent_scope;
+};
+
+/* Class to track the access to a component of a temporary register. */
+
+struct LiveRange;
+
+class RegisterCompAccess {
+public:
+   RegisterCompAccess();
+   RegisterCompAccess(LiveRange range);
+
+   void record_read(int line, ProgramScope *scope, LiveRangeEntry::EUse use);
+   void record_write(int line, ProgramScope *scope);
+
+   void update_required_live_range();
+
+   const auto& range() { return m_range;}
+
+   const auto& use_type() { return m_use_type; }
+private:
+   void propagate_live_range_to_dominant_write_scope();
+   bool conditional_ifelse_write_in_loop() const;
+
+   void record_ifelse_write(const ProgramScope& scope);
+   void record_if_write(const ProgramScope& scope);
+   void record_else_write(const ProgramScope& scope);
+
+   ProgramScope *last_read_scope;
+   ProgramScope *first_read_scope;
+   ProgramScope *first_write_scope;
+
+   int first_write;
+   int last_read;
+   int last_write;
+   int first_read;
+
+   /* This member variable tracks the current resolution of conditional writing
+    * to this temporary in IF/ELSE clauses.
+    *
+    * The initial value "conditionality_untouched" indicates that this
+    * temporary has not yet been written to within an if clause.
+    *
+    * A positive (other than "conditionality_untouched") number refers to the
+    * last loop id for which the write was resolved as unconditional. With each
+    * new loop this value will be overwitten by "conditionality_unresolved"
+    * on entering the first IF clause writing this temporary.
+    *
+    * The value "conditionality_unresolved" indicates that no resolution has
+    * been achieved so far. If the variable is set to this value at the end of
+    * the processing of the whole shader it also indicates a conditional write.
+    *
+    * The value "write_is_conditional" marks that the variable is written
+    * conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at
+    * least one loop.
+    */
+   int conditionality_in_loop_id;
+
+   /* Helper constants to make the tracking code more readable. */
+   static const int write_is_conditional = -1;
+   static const int conditionality_unresolved = 0;
+   static const int conditionality_untouched;
+   static const int write_is_unconditional;
+
+   /* A bit field tracking the nexting levels of if-else clauses where the
+    * temporary has (so far) been written to in the if branch, but not in the
+    * else branch.
+    */
+   unsigned int if_scope_write_flags;
+
+   int next_ifelse_nesting_depth;
+   static const int supported_ifelse_nesting_depth = 32;
+
+   /* Tracks the last if scope in which the temporary was written to
+    * without a write in the corresponding else branch. Is also used
+    * to track read-before-write in the according scope.
+    */
+   const ProgramScope *current_unpaired_if_write_scope;
+
+   /* Flag to resolve read-before-write in the else scope. */
+   bool was_written_in_current_else_scope;
+
+   LiveRange m_range;
+
+   std::bitset<LiveRangeEntry::use_unspecified> m_use_type;
+};
+
+class RegisterAccess {
+public:
+   using RegisterCompAccessVector = std::vector<RegisterCompAccess>;
+
+   RegisterAccess(const std::array<size_t, 4>& sizes);
+
+   RegisterCompAccess& operator() (const Register& reg);
+
+   auto& component(int i) { return m_access_record[i]; }
+
+private:
+   std::array<RegisterCompAccessVector, 4> m_access_record;
+};
+
+}
+#endif // SFN_LIFERANGEEVALUATOR_HELPERS_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_memorypool.cpp b/src/gallium/drivers/r600/sfn/sfn_memorypool.cpp
new file mode 100644
index 0000000..00e9c39
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_memorypool.cpp
@@ -0,0 +1,86 @@
+#include "sfn_memorypool.h"
+
+#include <cassert>
+#include <iostream>
+
+namespace r600 {
+
+struct MemoryPoolImpl {
+public:
+   MemoryPoolImpl();
+   ~MemoryPoolImpl();
+
+   using MemoryBacking = ::std::pmr::monotonic_buffer_resource;
+
+   MemoryBacking *pool;
+};
+
+MemoryPool::MemoryPool() noexcept : impl(nullptr)
+{
+}
+
+MemoryPool& MemoryPool::instance()
+{
+    static thread_local MemoryPool me;
+    me.initialize();
+    return me;
+}
+
+void MemoryPool::free()
+{
+   delete impl;
+   impl = nullptr;
+}
+
+void MemoryPool::initialize()
+{
+   if (!impl)
+      impl = new MemoryPoolImpl();
+}
+
+void *MemoryPool::allocate(size_t size)
+{
+   return impl->pool->allocate(size);
+}
+
+void *MemoryPool::allocate(size_t size, size_t align)
+{
+   return impl->pool->allocate(size, align);
+}
+
+void MemoryPool::release_all()
+{
+   instance().free();
+}
+
+void init_pool()
+{
+    MemoryPool::instance();
+}
+
+void release_pool()
+{
+    MemoryPool::release_all();
+}
+
+void *Allocate::operator new(size_t size)
+{
+    return MemoryPool::instance().allocate(size);
+}
+
+void Allocate::operator delete (void *p, size_t size)
+{
+    // MemoryPool::instance().deallocate(p, size);
+}
+
+MemoryPoolImpl::MemoryPoolImpl()
+{
+   pool = new MemoryBacking();
+}
+
+MemoryPoolImpl::~MemoryPoolImpl()
+{   
+   delete pool;
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_memorypool.h b/src/gallium/drivers/r600/sfn/sfn_memorypool.h
new file mode 100644
index 0000000..57d404d
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_memorypool.h
@@ -0,0 +1,69 @@
+#ifndef MEMORYPOOL_H
+#define MEMORYPOOL_H
+
+#include <cstdlib>
+#include <memory>
+#include <stack>
+
+#if __cplusplus >= 21703L
+#include <memory_resource>
+#define R600_POINTER_TYPE(X) X *
+#else
+#error Need C++17
+#endif
+
+namespace r600  {
+
+void init_pool();
+void release_pool();
+
+class Allocate
+{
+public:
+    void * operator new(size_t size);
+    void operator delete (void *p, size_t size);
+};
+
+class MemoryPool  {
+public:
+    static MemoryPool& instance();
+    static void release_all();
+
+    void free();
+    void initialize();
+
+    void *allocate(size_t size);
+    void *allocate(size_t size, size_t align);
+
+private:
+    MemoryPool() noexcept;
+
+    struct MemoryPoolImpl* impl;
+};
+
+template <typename T>
+struct Allocator {
+   using value_type = T;
+
+   Allocator() = default;
+   Allocator(const Allocator& other) = default;
+
+   template <typename U>
+   Allocator(const Allocator<U>& other) {(void)other;}
+
+   T *allocate(size_t n) {
+      return (T *)MemoryPool::instance().allocate(n * sizeof(T), alignof(T));
+   }
+
+   void deallocate(void *p, size_t n) {
+      (void)p; (void)n;
+      //MemoryPool::instance().deallocate(p, n * sizeof(T), alignof(T));
+   }
+
+   friend bool operator == (const Allocator<T>& lhs, const Allocator<T>& rhs) {
+      (void)lhs; (void)rhs; return true;}
+};
+
+}
+
+#endif // MEMORYPOOL_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
index 46c10e4..fe5397e 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
@@ -30,19 +30,19 @@
 #include "../r600_pipe.h"
 #include "../r600_shader.h"
 
-#include "util/u_prim.h"
 
-#include "sfn_instruction_tex.h"
+#include "util/u_prim.h"
 
-#include "sfn_shader_vertex.h"
-#include "sfn_shader_fragment.h"
-#include "sfn_shader_geometry.h"
-#include "sfn_shader_compute.h"
-#include "sfn_shader_tcs.h"
-#include "sfn_shader_tess_eval.h"
+#include "sfn_shader.h"
+#include "sfn_assembler.h"
+#include "sfn_debug.h"
+#include "sfn_liverangeevaluator.h"
 #include "sfn_nir_lower_fs_out_to_vector.h"
-#include "sfn_ir_to_assembly.h"
 #include "sfn_nir_lower_alu.h"
+#include "sfn_nir_lower_tex.h"
+#include "sfn_optimizer.h"
+#include "sfn_ra.h"
+#include "sfn_scheduler.h"
 
 #include <vector>
 
@@ -78,264 +78,11 @@ bool NirLowerInstruction::run(nir_shader *shader)
                                         (void *)this);
 }
 
-
-ShaderFromNir::ShaderFromNir():sh(nullptr),
-   gfx_level(CLASS_UNKNOWN),
-   m_current_if_id(0),
-   m_current_loop_id(0),
-   scratch_size(0)
-{
-}
-
-bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
-                          r600_pipe_shader_selector *sel, r600_shader_key& key,
-                          struct r600_shader* gs_shader, enum amd_gfx_level _chip_class)
-{
-   sh = shader;
-   gfx_level = _chip_class;
-   assert(sh);
-
-   switch (shader->info.stage) {
-   case MESA_SHADER_VERTEX:
-      impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, gfx_level));
-      break;
-   case MESA_SHADER_TESS_CTRL:
-      sfn_log << SfnLog::trans << "Start TCS\n";
-      impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, gfx_level));
-      break;
-   case MESA_SHADER_TESS_EVAL:
-      sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
-      impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, gfx_level));
-      break;
-   case MESA_SHADER_GEOMETRY:
-      sfn_log << SfnLog::trans << "Start GS\n";
-      impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, gfx_level));
-      break;
-   case MESA_SHADER_FRAGMENT:
-      sfn_log << SfnLog::trans << "Start FS\n";
-      impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, gfx_level));
-      break;
-   case MESA_SHADER_COMPUTE:
-      sfn_log << SfnLog::trans << "Start CS\n";
-      impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, gfx_level));
-      break;
-   default:
-      return false;
-   }
-
-   sfn_log << SfnLog::trans << "Process declarations\n";
-   if (!process_declaration())
-      return false;
-
-   // at this point all functions should be inlined
-   const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
-
-   sfn_log << SfnLog::trans << "Scan shader\n";
-
-   if (sfn_log.has_debug_flag(SfnLog::instr))
-      nir_print_shader(const_cast<nir_shader *>(shader), stderr);
-
-   nir_foreach_block(block, func->impl) {
-      nir_foreach_instr(instr, block) {
-         if (!impl->scan_instruction(instr)) {
-            fprintf(stderr, "Unhandled sysvalue access ");
-            nir_print_instr(instr, stderr);
-            fprintf(stderr, "\n");
-            return false;
-         }
-      }
-   }
-
-   sfn_log << SfnLog::trans << "Reserve registers\n";
-   if (!impl->allocate_reserved_registers()) {
-      return false;
-   }
-
-   ValuePool::array_list arrays;
-   sfn_log << SfnLog::trans << "Allocate local registers\n";
-   foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
-      impl->allocate_local_register(*reg, arrays);
-   }
-
-   sfn_log << SfnLog::trans << "Emit shader start\n";
-   impl->allocate_arrays(arrays);
-
-   impl->emit_shader_start();
-
-   sfn_log << SfnLog::trans << "Process shader \n";
-   foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
-      if (!process_cf_node(node))
-         return false;
-   }
-
-   // Add optimizations here
-   sfn_log << SfnLog::trans << "Finalize\n";
-   impl->finalize();
-
-   impl->get_array_info(pipe_shader->shader);
-
-   if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
-      sfn_log << SfnLog::trans << "Merge registers\n";
-      impl->remap_registers();
-   }
-
-   sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
-   return true;
-}
-
-Shader ShaderFromNir::shader() const
-{
-   return Shader{impl->m_output, impl->get_temp_registers()};
-}
-
-
-bool ShaderFromNir::process_cf_node(nir_cf_node *node)
-{
-   SFN_TRACE_FUNC(SfnLog::flow, "CF");
-   switch (node->type) {
-   case nir_cf_node_block:
-      return process_block(nir_cf_node_as_block(node));
-   case nir_cf_node_if:
-      return process_if(nir_cf_node_as_if(node));
-   case nir_cf_node_loop:
-      return process_loop(nir_cf_node_as_loop(node));
-   default:
-      return false;
-   }
-}
-
-bool ShaderFromNir::process_if(nir_if *if_stmt)
-{
-   SFN_TRACE_FUNC(SfnLog::flow, "IF");
-
-   if (!impl->emit_if_start(m_current_if_id, if_stmt))
-      return false;
-
-   int if_id = m_current_if_id++;
-   m_if_stack.push(if_id);
-
-   foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
-         if (!process_cf_node(n)) return false;
-
-   if (!if_stmt->then_list.is_empty()) {
-      if (!impl->emit_else_start(if_id))
-         return false;
-
-      foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
-            if (!process_cf_node(n)) return false;
-   }
-
-   if (!impl->emit_ifelse_end(if_id))
-      return false;
-
-   m_if_stack.pop();
-   return true;
-}
-
-bool ShaderFromNir::process_loop(nir_loop *node)
-{
-   SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
-   int loop_id = m_current_loop_id++;
-
-   if (!impl->emit_loop_start(loop_id))
-      return false;
-
-   foreach_list_typed(nir_cf_node, n, node, &node->body)
-         if (!process_cf_node(n)) return false;
-
-   if (!impl->emit_loop_end(loop_id))
-      return false;
-
-   return true;
-}
-
-bool ShaderFromNir::process_block(nir_block *block)
-{
-   SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
-   nir_foreach_instr(instr, block) {
-      int r = emit_instruction(instr);
-      if (!r) {
-         sfn_log << SfnLog::err << "R600: Unsupported instruction: "
-                 << *instr << "\n";
-         return false;
-      }
-   }
-   return true;
-}
-
-
-ShaderFromNir::~ShaderFromNir()
-{
-}
-
-pipe_shader_type ShaderFromNir::processor_type() const
-{
-   return impl->m_processor_type;
-}
-
-
-bool ShaderFromNir::emit_instruction(nir_instr *instr)
-{
-   assert(impl);
-
-   sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
-
-   switch (instr->type) {
-   case nir_instr_type_alu:
-      return impl->emit_alu_instruction(instr);
-   case nir_instr_type_deref:
-      return impl->emit_deref_instruction(nir_instr_as_deref(instr));
-   case nir_instr_type_intrinsic:
-      return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
-   case nir_instr_type_load_const: /* const values are loaded when needed */
-      return true;
-   case nir_instr_type_tex:
-      return impl->emit_tex_instruction(instr);
-   case nir_instr_type_jump:
-      return impl->emit_jump_instruction(nir_instr_as_jump(instr));
-   default:
-      fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
-      nir_print_instr(instr, stderr);
-      fprintf(stderr, "'\n");
-      return false;
-   case nir_instr_type_ssa_undef:
-      return impl->create_undef(nir_instr_as_ssa_undef(instr));
-      return true;
-   }
-}
-
-bool ShaderFromNir::process_declaration()
-{
-   impl->set_shader_info(sh);
-
-   if (!impl->scan_inputs_read(sh))
-      return false;
-
-   // scan declarations
-   nir_foreach_variable_with_modes(variable, sh, nir_var_uniform |
-                                                 nir_var_mem_ubo |
-                                                 nir_var_mem_ssbo) {
-      if (!impl->process_uniforms(variable)) {
-         fprintf(stderr, "R600: error parsing outputs variable %s\n", variable->name);
-         return false;
-      }
-   }
-
-   return true;
-}
-
-const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
-{
-   assert(impl);
-   return impl->m_output;
-}
-
-
 AssemblyFromShader::~AssemblyFromShader()
 {
 }
 
-bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
+bool AssemblyFromShader::lower(const Shader& ir)
 {
    return do_lower(ir);
 }
@@ -557,7 +304,6 @@ r600_nir_lower_atomics(nir_shader *shader)
                                        nir_metadata_dominance,
                                        NULL);
 }
-using r600::r600_nir_lower_int_tg4;
 using r600::r600_lower_scratch_addresses;
 using r600::r600_lower_fs_out_to_vector;
 using r600::r600_lower_ubo_to_align16;
@@ -676,6 +422,7 @@ r600_lower_shared_io(nir_shader *nir)
 static nir_ssa_def *
 r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
 {
+   (void)_options;
    auto old_ir = nir_instr_as_intrinsic(instr);
    auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
    nir_ssa_dest_init(&load->instr, &load->dest,
@@ -693,6 +440,8 @@ r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
 
 bool r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options)
 {
+   (void)_options;
+
    if (instr->type != nir_instr_type_intrinsic)
       return false;
 
@@ -713,7 +462,7 @@ bool r600_lower_fs_pos_input(nir_shader *shader)
 };
 
 static bool
-optimize_once(nir_shader *shader, bool vectorize)
+optimize_once(nir_shader *shader)
 {
    bool progress = false;
    NIR_PASS(progress, shader, nir_lower_vars_to_ssa);
@@ -722,9 +471,6 @@ optimize_once(nir_shader *shader, bool vectorize)
    NIR_PASS(progress, shader, nir_opt_algebraic);
    NIR_PASS(progress, shader, nir_opt_constant_folding);
    NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
-   if (vectorize)
-      NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
-
    NIR_PASS(progress, shader, nir_opt_remove_phis);
 
    if (nir_opt_trivial_continues(shader)) {
@@ -777,13 +523,9 @@ bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
    case nir_op_fdot2:
    case nir_op_fdot3:
    case nir_op_fdot4:
+      return nir_src_bit_size(alu->src[0].src) == 64;
    case nir_op_cube_r600:
       return false;
-   case nir_op_bany_fnequal2:
-   case nir_op_ball_fequal2:
-   case nir_op_bany_inequal2:
-   case nir_op_ball_iequal2:
-      return nir_src_bit_size(alu->src[0].src) != 64;
    default:
       return true;
    }
@@ -793,15 +535,13 @@ int r600_shader_from_nir(struct r600_context *rctx,
                          struct r600_pipe_shader *pipeshader,
                          r600_shader_key *key)
 {
-   char filename[4000];
    struct r600_pipe_shader_selector *sel = pipeshader->selector;
 
-   bool lower_64bit = ((sel->nir->options->lower_int64_options ||
+   bool lower_64bit = (rctx->b.gfx_level < CAYMAN  &&
+                       (sel->nir->options->lower_int64_options ||
                         sel->nir->options->lower_doubles_options) &&
                        (sel->nir->info.bit_sizes_float | sel->nir->info.bit_sizes_int) & 64);
 
-   r600::ShaderFromNir convert;
-
    if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
       fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
       nir_print_shader(sel->nir, stderr);
@@ -813,10 +553,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
    /* Cayman seems very crashy about accessing images that don't exists or are
     * accessed out of range, this lowering seems to help (but it can also be
     * another problem */
-   if (sel->nir->info.num_images > 0 && rctx->b.gfx_level == CAYMAN)
-       NIR_PASS_V(sel->nir, r600_legalize_image_load_store);
 
-   NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
    NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
    nir_lower_idiv_options idiv_options = {0};
    idiv_options.imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE;
@@ -828,7 +565,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
 
    if (lower_64bit)
       NIR_PASS_V(sel->nir, nir_lower_int64);
-   while(optimize_once(sel->nir, false));
+   while(optimize_once(sel->nir));
 
    NIR_PASS_V(sel->nir, r600_lower_shared_io);
    NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
@@ -839,8 +576,8 @@ int r600_shader_from_nir(struct r600_context *rctx,
    lower_tex_options.lower_invalid_implicit_lod = true;
 
    NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
-   NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
-   NIR_PASS_V(sel->nir, r600::r600_nir_lower_cube_to_2darray);
+   NIR_PASS_V(sel->nir, r600_nir_lower_txl_txf_array_or_cube);
+   NIR_PASS_V(sel->nir, r600_nir_lower_cube_to_2darray);
 
    NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
 
@@ -851,30 +588,11 @@ int r600_shader_from_nir(struct r600_context *rctx,
       NIR_PASS_V(sel->nir, nir_lower_fragcoord_wtrans);
       NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
    }
+   nir_variable_mode io_modes = nir_var_uniform |
+                                nir_var_shader_in |
+                                nir_var_shader_out;
 
-   nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in;
-
-   //if (sel->nir->info.stage != MESA_SHADER_FRAGMENT)
-      io_modes |= nir_var_shader_out;
-
-   if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
-
-      /* Lower IO to temporaries late, because otherwise we get into trouble
-       * with the glsl 4.40 interpolateAt swizzle tests. There seems to be a bug
-       * somewhere that results in the input alweas reading from the same temp
-       * regardless of interpolation when the lowering is done early */
-      NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(sel->nir),
-              true, true);
-
-      /* Since we're doing nir_lower_io_to_temporaries late, we need
-       * to lower all the copy_deref's introduced by
-       * lower_io_to_temporaries before calling nir_lower_io.
-       */
-      NIR_PASS_V(sel->nir, nir_split_var_copies);
-      NIR_PASS_V(sel->nir, nir_lower_var_copies);
-      NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local);
-   }
-
+   NIR_PASS_V(sel->nir, nir_opt_combine_stores, nir_var_shader_out);
    NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size,
                  nir_lower_io_lower_64bit_to_32);
 
@@ -916,14 +634,27 @@ int r600_shader_from_nir(struct r600_context *rctx,
       NIR_PASS_V(sh, r600_lower_tess_coord, u_tess_prim_from_shader(sh->info.tess._primitive_mode));
    }
 
+   NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+   NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
+   NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+
+   NIR_PASS_V(sh, r600::r600_nir_split_64bit_io);
+   NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);
+   NIR_PASS_V(sh, nir_split_64bit_vec3_and_vec4);
+   NIR_PASS_V(sh, nir_lower_int64);
+
    NIR_PASS_V(sh, nir_lower_ubo_vec4);
+
+
    if (lower_64bit)
       NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
 
+   NIR_PASS_V(sh, r600::r600_split_64bit_uniforms_and_ubo);
    /* Lower to scalar to let some optimization work out better */
-   while(optimize_once(sh, false));
+   while(optimize_once(sh));
 
-   NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
+   if (lower_64bit)
+      NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
 
    NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL);
    NIR_PASS_V(sh, nir_remove_dead_variables,  nir_var_shader_out, NULL);
@@ -934,7 +665,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
               40,
               r600_get_natural_size_align_bytes);
 
-   while (optimize_once(sh, true));
+   while (optimize_once(sh));
 
    NIR_PASS_V(sh, nir_lower_bool_to_int32);
    NIR_PASS_V(sh, r600_nir_lower_int_tg4);
@@ -945,8 +676,6 @@ int r600_shader_from_nir(struct r600_context *rctx,
 
    NIR_PASS_V(sh, nir_lower_locals_to_regs);
 
-   //NIR_PASS_V(sh, nir_opt_algebraic);
-   //NIR_PASS_V(sh, nir_copy_prop);
    NIR_PASS_V(sh, nir_lower_to_source_mods,
 	      (nir_lower_to_source_mods_flags)(nir_lower_float_source_mods |
 					       nir_lower_64bit_source_mods));
@@ -974,33 +703,66 @@ int r600_shader_from_nir(struct r600_context *rctx,
       pipeshader->shader.cc_dist_mask = (1 <<  (sh->info.cull_distance_array_size +
                                                 sh->info.clip_distance_array_size)) - 1;
    }
-
-   struct r600_shader* gs_shader = nullptr;
+   struct r600_shader* gs_shader = nullptr;   
    if (rctx->gs_shader)
       gs_shader = &rctx->gs_shader->current->shader;
    r600_screen *rscreen = rctx->screen;
 
-   bool r = convert.lower(sh, pipeshader, sel, *key, gs_shader, rscreen->b.gfx_level);
-   if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
-      static int shnr = 0;
+   r600::Shader *shader = r600::Shader::translate_from_nir(sh, &sel->so, gs_shader,
+                                                           *key, rctx->isa->hw_class);
 
-      snprintf(filename, 4000, "nir-%s_%d.inc", sh->info.name, shnr++);
+   assert(shader);
+   if (!shader)
+      return -2;
 
-      if (access(filename, F_OK) == -1) {
-         FILE *f = fopen(filename, "w");
+   pipeshader->enabled_stream_buffers_mask = shader->enabled_stream_buffers_mask();
+   pipeshader->selector->info.file_count[TGSI_FILE_HW_ATOMIC] += shader->atomic_file_count();
+   pipeshader->selector->info.writes_memory = shader->has_flag(r600::Shader::sh_writes_memory);
 
-         if (f) {
-            fprintf(f, "const char *shader_blob_%s = {\nR\"(", sh->info.name);
-            nir_print_shader(sh, f);
-            fprintf(f, ")\";\n");
-            fclose(f);
-         }
+   if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
+      std::cerr << "Shader after conversion from nir\n";
+      shader->print(std::cerr);
+   }
+
+   if (!r600::sfn_log.has_debug_flag(r600::SfnLog::noopt)) {
+      optimize(*shader);
+
+      if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
+         std::cerr << "Shader after optimization\n";
+         shader->print(std::cerr);
       }
-      if (!r)
-         return -2;
    }
 
-   auto shader = convert.shader();
+   auto scheduled_shader = r600::schedule(shader);
+   if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
+      std::cerr << "Shader after scheduling\n";
+      shader->print(std::cerr);
+   }
+
+   if (!r600::sfn_log.has_debug_flag(r600::SfnLog::nomerge)) {
+
+      if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge)) {
+         r600::sfn_log << r600::SfnLog::merge << "Shader before RA\n";
+         scheduled_shader->print(std::cerr);
+      }
+
+      r600::sfn_log << r600::SfnLog::trans << "Merge registers\n";
+      auto lrm = r600::LiveRangeEvaluator().run(*scheduled_shader);
+
+      if (!r600::register_allocation(lrm)) {
+         R600_ERR("%s: Register allocation failed\n", __func__);
+         /* For now crash if the shader could not be benerated */
+         assert(0);
+         return -1;
+      } else if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge) ||
+                 r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
+         r600::sfn_log << "Shader after RA\n";
+         scheduled_shader->print(std::cerr);
+      }
+   }
+
+   scheduled_shader->get_shader_info(&pipeshader->shader);
+   pipeshader->shader.uses_doubles = sh->info.bit_sizes_float & 64 ? 1 : 0;
 
    r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.gfx_level, rscreen->b.family,
                       rscreen->has_compressed_msaa_texturing);
@@ -1012,9 +774,13 @@ int r600_shader_from_nir(struct r600_context *rctx,
    pipeshader->shader.bc.type = pipeshader->shader.processor_type;
    pipeshader->shader.bc.isa = rctx->isa;
 
-   r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
-   if (!afs.lower(shader.m_ir)) {
+   r600::Assembler afs(&pipeshader->shader, *key);
+   if (!afs.lower(scheduled_shader)) {
       R600_ERR("%s: Lowering to assembly failed\n", __func__);
+
+      scheduled_shader->print(std::cerr);
+      /* For now crash if the shader could not be benerated */
+      assert(0);
       return -1;
    }
 
@@ -1025,8 +791,5 @@ int r600_shader_from_nir(struct r600_context *rctx,
    } else {
       r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
    }
-   if (pipeshader->shader.bc.ngpr < 6)
-      pipeshader->shader.bc.ngpr = 6;
-
    return 0;
 }
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.h b/src/gallium/drivers/r600/sfn/sfn_nir.h
index ee9ace6..0514cc3 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir.h
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.h
@@ -31,7 +31,7 @@
 #include "nir_builder.h"
 
 #ifdef __cplusplus
-#include "sfn_shader_base.h"
+#include "sfn_shader.h"
 #include <vector>
 
 namespace r600 {
@@ -64,56 +64,16 @@ bool r600_nir_64_to_vec2(nir_shader *sh);
 
 bool r600_merge_vec2_stores(nir_shader *shader);
 
-class Shader {
-public:
-   std::vector<InstructionBlock>& m_ir;
-   ValueMap m_temp;
-};
-
-class ShaderFromNir {
-public:
-   ShaderFromNir();
-   ~ShaderFromNir();
-
-   unsigned ninputs() const;
-
-   bool lower(const nir_shader *shader, r600_pipe_shader *sh,
-              r600_pipe_shader_selector *sel, r600_shader_key &key,
-              r600_shader *gs_shader, enum amd_gfx_level gfx_level);
-
-   bool process_declaration();
-
-   pipe_shader_type processor_type() const;
-
-   bool emit_instruction(nir_instr *instr);
-
-   const std::vector<InstructionBlock> &shader_ir() const;
-
-   Shader shader() const;
-private:
-
-   bool process_block();
-   bool process_cf_node(nir_cf_node *node);
-   bool process_if(nir_if *node);
-   bool process_loop(nir_loop *node);
-   bool process_block(nir_block *node);
-
-   std::unique_ptr<ShaderFromNirProcessor> impl;
-   const nir_shader *sh;
-
-   enum amd_gfx_level gfx_level;
-   int m_current_if_id;
-   int m_current_loop_id;
-   std::stack<int> m_if_stack;
-   int scratch_size;
-};
+bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh);
+bool r600_lower_64bit_to_vec2(nir_shader *sh);
+bool r600_split_64bit_alu_and_phi(nir_shader *sh);
 
 class AssemblyFromShader {
 public:
    virtual ~AssemblyFromShader();
-   bool lower(const std::vector<InstructionBlock> &ir);
+   bool lower(const Shader& s);
 private:
-   virtual bool do_lower(const std::vector<InstructionBlock>& ir)  = 0 ;
+   virtual bool do_lower(const Shader& s)  = 0 ;
 };
 
 }
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_legalize_image_load_store.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_legalize_image_load_store.cpp
index 4929cbc..4211d1f 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir_legalize_image_load_store.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_legalize_image_load_store.cpp
@@ -32,7 +32,8 @@
 
 
 static nir_ssa_def *
-r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr, void *_options)
+r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr,
+                                    UNUSED void *_options)
 {
    b->cursor = nir_before_instr(instr);
    auto ir = nir_instr_as_intrinsic(instr);
@@ -143,7 +144,8 @@ r600_legalize_image_load_store_impl(nir_builder *b, nir_instr *instr, void *_opt
 }
 
 static bool
-r600_legalize_image_load_store_filter(const nir_instr *instr, const void *_options)
+r600_legalize_image_load_store_filter(const nir_instr *instr,
+                                      UNUSED const void *_options)
 {
    if (instr->type != nir_instr_type_intrinsic)
       return false;
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
index 88e0085..ba68cd7 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
@@ -108,6 +108,193 @@ private:
 };
 
 
+class LowerLoad64Uniform : public NirLowerInstruction {
+   bool filter(const nir_instr *instr) const override;
+   nir_ssa_def *lower(nir_instr *instr) override;
+};
+
+bool LowerLoad64Uniform::filter(const nir_instr *instr) const
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   auto intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_load_uniform &&
+       intr->intrinsic != nir_intrinsic_load_ubo &&
+       intr->intrinsic != nir_intrinsic_load_ubo_vec4)
+      return false;
+
+   return nir_dest_bit_size(intr->dest) == 64;
+}
+
+
+nir_ssa_def *LowerLoad64Uniform::lower(nir_instr *instr)
+{
+   auto intr = nir_instr_as_intrinsic(instr);
+   int old_components = nir_dest_num_components(intr->dest);
+   assert(old_components <= 2);
+   assert(intr->dest.is_ssa);
+   intr->dest.ssa.num_components *= 2;
+   intr->dest.ssa.bit_size = 32;
+   intr->num_components *= 2;
+
+   if (intr->intrinsic ==nir_intrinsic_load_ubo ||
+       intr->intrinsic ==nir_intrinsic_load_ubo_vec4)
+      nir_intrinsic_set_component(intr, 2 * nir_intrinsic_component(intr));
+
+   nir_ssa_def *result_vec[2] = {nullptr, nullptr};
+
+   for (int i = 0; i < old_components; ++i) {
+      result_vec[i] = nir_pack_64_2x32_split(b,
+                                             nir_channel(b, &intr->dest.ssa, 2 * i),
+                                             nir_channel(b, &intr->dest.ssa, 2 * i + 1));
+   }
+   if (old_components == 1)
+      return result_vec[0];
+
+   return nir_vec2(b, result_vec[0], result_vec[1]);
+}
+
+bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh)
+{
+   return LowerLoad64Uniform().run(sh);
+}
+
+class LowerSplit64op : public NirLowerInstruction {
+   bool filter(const nir_instr *instr) const override {
+      switch (instr->type) {
+      case nir_instr_type_alu: {
+         auto alu = nir_instr_as_alu(instr);
+         switch (alu->op) {
+         case nir_op_bcsel:
+            return nir_dest_bit_size(alu->dest.dest) == 64;
+         case nir_op_f2b1:
+         case nir_op_f2i32:
+         case nir_op_f2u32:
+         case nir_op_f2i64:
+         case nir_op_f2u64:
+         case nir_op_u2f64:
+         case nir_op_i2f64:
+            return nir_src_bit_size(alu->src[0].src) == 64;
+         default:
+            return false;
+         }
+      }
+      case nir_instr_type_phi: {
+         auto phi = nir_instr_as_phi(instr);
+         return nir_dest_num_components(phi->dest) == 64;
+      }
+      default:
+         return false;
+      }
+   }
+
+   nir_ssa_def *lower(nir_instr *instr) override {
+
+      switch (instr->type) {
+      case nir_instr_type_alu: {
+         auto alu = nir_instr_as_alu(instr);
+         switch (alu->op) {
+
+         case nir_op_bcsel: {
+            auto lo = nir_bcsel(b, nir_ssa_for_src(b, alu->src[0].src, 1),
+                  nir_unpack_64_2x32_split_x(b, nir_ssa_for_alu_src(b, alu, 1)),
+                  nir_unpack_64_2x32_split_x(b, nir_ssa_for_alu_src(b, alu, 2)));
+            auto hi = nir_bcsel(b, nir_ssa_for_src(b, alu->src[0].src, 1),
+                  nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 1)),
+                  nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 2)));
+            return nir_pack_64_2x32_split(b, lo, hi);
+         }
+         case nir_op_f2b1: {
+            auto mask = nir_component_mask(nir_dest_num_components(alu->dest.dest));
+            return nir_fneu(b, nir_channels(b, nir_ssa_for_alu_src(b, alu, 0), mask),
+                  nir_imm_zero(b, nir_dest_num_components(alu->dest.dest), 64));
+         }
+         case nir_op_f2i32: {
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
+            auto abs_src = nir_fabs(b, src);
+            auto value = nir_f2u32(b, abs_src);
+            return nir_bcsel(b, gt0, value, nir_ineg(b, value));
+         }
+         case nir_op_f2u32: {
+            /* fp32 doesn't hold suffient bits to represent the full range of
+             * u32, therefore we have to split the values, and because f2f32
+             * rounds, we have to remove the fractional part in the hi bits
+             * For values > UINT_MAX the result is undefined */
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
+            auto highval = nir_fmul_imm(b, src, 1.0/65536.0);
+            auto fract = nir_ffract(b, highval);
+            auto high = nir_f2u32(b, nir_f2f32(b, nir_fsub(b, highval, fract)));
+            auto lowval = nir_fmul_imm(b, fract, 65536.0);
+            auto low = nir_f2u32(b, nir_f2f32(b, lowval));
+            return nir_bcsel(b, gt0, nir_ior(b, nir_ishl_imm(b, high, 16), low),
+                             nir_imm_int(b, 0));
+         }
+         case nir_op_f2i64: {
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
+            auto abs_src = nir_fabs(b, src);
+            auto value = nir_f2u64(b, abs_src);
+            return nir_bcsel(b, gt0, value, nir_isub(b, nir_imm_zero(b, 1, 64), value));
+         }
+         case nir_op_f2u64: {
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
+            auto highval = nir_fmul_imm(b, src, 1.0/(65536.0 * 65536.0));
+            auto fract = nir_ffract(b, highval);
+            auto high = nir_f2u32(b, nir_fsub(b, highval, fract));
+            auto low = nir_f2u32(b, nir_fmul_imm(b, fract, 65536.0 * 65536.0));
+            return nir_bcsel(b, gt0, nir_pack_64_2x32_split(b, low, high),
+                             nir_imm_zero(b, 1, 64));
+         }
+         case nir_op_u2f64: {
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto low = nir_unpack_64_2x32_split_x(b, src);
+            auto high = nir_unpack_64_2x32_split_y(b, src);
+            auto flow = nir_u2f64(b, low);
+            auto fhigh = nir_u2f64(b, high);
+            return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
+         }
+         case nir_op_i2f64: {
+            auto src = nir_ssa_for_alu_src(b, alu, 0);
+            auto low = nir_unpack_64_2x32_split_x(b, src);
+            auto high = nir_unpack_64_2x32_split_y(b, src);
+            auto flow = nir_u2f64(b, low);
+            auto fhigh = nir_i2f64(b, high);
+            return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
+         }
+         default:
+            unreachable("trying to lower instruction that was not in filter");
+         }
+      }
+      case nir_instr_type_phi: {
+         auto phi = nir_instr_as_phi(instr);
+         auto phi_lo = nir_phi_instr_create(b->shader);
+         auto phi_hi = nir_phi_instr_create(b->shader);
+         nir_ssa_dest_init(&phi_lo->instr, &phi_lo->dest, phi->dest.ssa.num_components * 2, 32, "");
+         nir_ssa_dest_init(&phi_hi->instr, &phi_hi->dest, phi->dest.ssa.num_components * 2, 32, "");
+         nir_foreach_phi_src(s, phi) {
+            auto lo = nir_unpack_32_2x16_split_x(b, nir_ssa_for_src(b, s->src, 1));
+            auto hi = nir_unpack_32_2x16_split_x(b, nir_ssa_for_src(b, s->src, 1));
+            nir_phi_instr_add_src(phi_lo, s->pred, nir_src_for_ssa(lo));
+            nir_phi_instr_add_src(phi_hi, s->pred, nir_src_for_ssa(hi));
+         }
+         return nir_pack_64_2x32_split(b, &phi_lo->dest.ssa, &phi_hi->dest.ssa);
+      }
+      default:
+         unreachable("Trying to lower instruction that was not in filter");
+      }
+   }
+};
+
+bool r600_split_64bit_alu_and_phi(nir_shader *sh)
+{
+   return LowerSplit64op().run(sh);
+}
+
+
 bool
 LowerSplit64BitVar::filter(const nir_instr *instr) const
 {
@@ -271,7 +458,7 @@ LowerSplit64BitVar::split_store_deref_array(nir_intrinsic_instr *intr, nir_deref
 }
 
 nir_ssa_def *
-LowerSplit64BitVar::split_store_deref_var(nir_intrinsic_instr *intr, nir_deref_instr *deref)
+LowerSplit64BitVar::split_store_deref_var(nir_intrinsic_instr *intr, UNUSED nir_deref_instr *deref)
 {
    auto old_var = nir_intrinsic_get_var(intr, 0);
    unsigned old_components = old_var->type->without_array()->components();
@@ -556,8 +743,6 @@ LowerSplit64BitVar::lower(nir_instr *instr)
    }
    case  nir_instr_type_alu: {
       auto alu = nir_instr_as_alu(instr);
-      nir_print_instr(instr, stderr);
-      fprintf(stderr, "\n");
       switch (alu->op) {
       case nir_op_bany_fnequal3:
          return split_reduction3(alu, nir_op_bany_fnequal2, nir_op_fneu, nir_op_ior);
@@ -845,7 +1030,7 @@ static bool store_64bit_intr(nir_src *src, void *state)
    return !*s;
 }
 
-static bool double2vec2(nir_src *src, void *state)
+static bool double2vec2(nir_src *src, UNUSED void *state)
 {
    if (nir_src_bit_size(*src) != 64)
       return true;
@@ -1058,6 +1243,206 @@ bool r600_merge_vec2_stores(nir_shader *shader)
    return merger.combine();
 }
 
+static bool
+r600_lower_64bit_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
+{
+   b->cursor = nir_after_instr(&instr->instr);
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_ubo:
+   case nir_intrinsic_load_ubo_vec4:
+   case nir_intrinsic_load_uniform:
+   case nir_intrinsic_load_ssbo:
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input:
+   case nir_intrinsic_load_per_vertex_input:
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_ssbo:
+      break;
+   default:
+      return false;
+   }
+
+   if (instr->num_components <= 2)
+      return false;
+
+   bool has_dest = nir_intrinsic_infos[instr->intrinsic].has_dest;
+   if (has_dest) {
+      if (nir_dest_bit_size(instr->dest) != 64)
+         return false;
+   } else  {
+      if (nir_src_bit_size(instr->src[0]) != 64)
+          return false;
+   }
+
+   nir_intrinsic_instr *first =
+      nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
+   nir_intrinsic_instr *second =
+      nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_ubo:
+   case nir_intrinsic_load_ubo_vec4:
+   case nir_intrinsic_load_uniform:
+   case nir_intrinsic_load_ssbo:
+   case nir_intrinsic_store_ssbo:
+      break;
+
+   default: {
+      nir_io_semantics semantics = nir_intrinsic_io_semantics(second);
+      semantics.location++;
+      semantics.num_slots--;
+      nir_intrinsic_set_io_semantics(second, semantics);
+
+      nir_intrinsic_set_base(second, nir_intrinsic_base(second) + 1);
+      break;
+   }
+   }
+
+   first->num_components = 2;
+   second->num_components -= 2;
+   if (has_dest) {
+      first->dest.ssa.num_components = 2;
+      second->dest.ssa.num_components -= 2;
+   }
+
+   nir_builder_instr_insert(b, &first->instr);
+   nir_builder_instr_insert(b, &second->instr);
+
+   if (has_dest) {
+      /* Merge the two loads' results back into a vector. */
+      nir_ssa_scalar channels[4] = {
+         nir_get_ssa_scalar(&first->dest.ssa, 0),
+         nir_get_ssa_scalar(&first->dest.ssa, 1),
+         nir_get_ssa_scalar(&second->dest.ssa, 0),
+         nir_get_ssa_scalar(&second->dest.ssa, second->num_components > 1 ? 1 : 0),
+      };
+      nir_ssa_def *new_ir = nir_vec_scalars(b, channels, instr->num_components);
+      nir_ssa_def_rewrite_uses(&instr->dest.ssa, new_ir);
+   } else {
+      /* Split the src value across the two stores. */
+      b->cursor = nir_before_instr(&instr->instr);
+
+      nir_ssa_def *src0 = instr->src[0].ssa;
+      nir_ssa_scalar channels[4] = { 0 };
+      for (int i = 0; i < instr->num_components; i++)
+         channels[i] = nir_get_ssa_scalar(src0, i);
+
+      nir_intrinsic_set_write_mask(first, nir_intrinsic_write_mask(instr) & 3);
+      nir_intrinsic_set_write_mask(second, nir_intrinsic_write_mask(instr) >> 2);
+
+      nir_instr_rewrite_src(&first->instr, &first->src[0],
+                            nir_src_for_ssa(nir_vec_scalars(b, channels, 2)));
+      nir_instr_rewrite_src(&second->instr, &second->src[0],
+                            nir_src_for_ssa(nir_vec_scalars(b, &channels[2],
+                                                           second->num_components)));
+   }
+
+   int offset_src = -1;
+   uint32_t offset_amount = 16;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_ssbo:
+   case nir_intrinsic_load_ubo:
+      offset_src = 1;
+      break;
+   case nir_intrinsic_load_ubo_vec4:
+   case nir_intrinsic_load_uniform:
+      offset_src = 0;
+      offset_amount = 1;
+      break;
+   case nir_intrinsic_store_ssbo:
+      offset_src = 2;
+      break;
+   default:
+      break;
+   }
+   if (offset_src != -1) {
+      b->cursor = nir_before_instr(&second->instr);
+      nir_ssa_def *second_offset =
+         nir_iadd_imm(b, second->src[offset_src].ssa, offset_amount);
+      nir_instr_rewrite_src(&second->instr, &second->src[offset_src],
+                            nir_src_for_ssa(second_offset));
+   }
+
+   /* DCE stores we generated with no writemask (nothing else does this
+    * currently).
+    */
+   if (!has_dest) {
+      if (nir_intrinsic_write_mask(first) == 0)
+         nir_instr_remove(&first->instr);
+      if (nir_intrinsic_write_mask(second) == 0)
+         nir_instr_remove(&second->instr);
+   }
+
+   nir_instr_remove(&instr->instr);
+
+   return true;
+}
+
+static bool
+r600_lower_64bit_load_const(nir_builder *b, nir_load_const_instr *instr)
+{
+   int num_components = instr->def.num_components;
+
+   if (instr->def.bit_size != 64 || num_components <= 2)
+      return false;
+
+   b->cursor = nir_before_instr(&instr->instr);
+
+   nir_load_const_instr *first =
+      nir_load_const_instr_create(b->shader, 2, 64);
+   nir_load_const_instr *second =
+      nir_load_const_instr_create(b->shader, num_components - 2, 64);
+
+   first->value[0] = instr->value[0];
+   first->value[1] = instr->value[1];
+   second->value[0] = instr->value[2];
+   if (num_components == 4)
+      second->value[1] = instr->value[3];
+
+   nir_builder_instr_insert(b, &first->instr);
+   nir_builder_instr_insert(b, &second->instr);
+
+   nir_ssa_def *channels[4] = {
+      nir_channel(b, &first->def, 0),
+      nir_channel(b, &first->def, 1),
+      nir_channel(b, &second->def, 0),
+      num_components == 4 ? nir_channel(b, &second->def, 1) : NULL,
+   };
+   nir_ssa_def *new_ir = nir_vec(b, channels, num_components);
+   nir_ssa_def_rewrite_uses(&instr->def, new_ir);
+   nir_instr_remove(&instr->instr);
+
+   return true;
+}
+
+static bool
+r600_lower_64bit_to_vec2_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+   switch (instr->type) {
+   case nir_instr_type_load_const:
+      return r600_lower_64bit_load_const(b, nir_instr_as_load_const(instr));
+
+   case nir_instr_type_intrinsic:
+      return r600_lower_64bit_intrinsic(b, nir_instr_as_intrinsic(instr));
+   default:
+      return false;
+   }
+}
+
+bool
+r600_lower_64bit_to_vec2(nir_shader *s)
+{
+   return nir_shader_instructions_pass(s,
+                                       r600_lower_64bit_to_vec2_instr,
+                                       nir_metadata_block_index |
+                                       nir_metadata_dominance,
+                                       NULL);
+}
+
+
 } // end namespace r600
 
 
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
index 892f006..4a4cc0a 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
@@ -95,13 +95,14 @@ emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_i
 
    auto idx2 = nir_src_as_const_value(op->src[1]);
    if (!idx2 || idx2->u32 != 0)
-      offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)));
+      offset = nir_iadd(b, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)), offset);
 
    return nir_iadd(b, addr, offset);
 }
 
 static nir_ssa_def *
-emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset)
+emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op,
+                  UNUSED nir_variable_mode mode, int src_offset)
 {
 
    nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0),
@@ -552,7 +553,7 @@ r600_lower_tess_coord_filter(const nir_instr *instr, UNUSED const void *_options
 }
 
 static nir_ssa_def *
-r600_lower_tess_coord_impl(nir_builder *b, nir_instr *instr, void *_options)
+r600_lower_tess_coord_impl(nir_builder *b, UNUSED nir_instr *instr, void *_options)
 {
    pipe_prim_type prim_type = *(pipe_prim_type *)_options;
 
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.cpp
similarity index 65%
rename from src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp
rename to src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.cpp
index 8fc5469..197bff4 100644
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.cpp
@@ -1,142 +1,9 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_instruction_tex.h"
+#include "sfn_nir_lower_tex.h"
+
+#include "nir.h"
 #include "nir_builder.h"
 #include "nir_builtin_builder.h"
 
-namespace r600 {
-
-TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src,
-                               unsigned sid, unsigned rid, PValue sampler_offset):
-   Instruction(tex),
-   m_opcode(op),
-   m_dst(dest),
-   m_src(src),
-   m_sampler_id(sid),
-   m_resource_id(rid),
-   m_flags(0),
-   m_inst_mode(0),
-   m_dest_swizzle{0,1,2,3},
-   m_sampler_offset(sampler_offset)
-
-{
-   memset(m_offset, 0, sizeof (m_offset));
-
-   add_remappable_src_value(&m_src);
-   add_remappable_src_value(&m_sampler_offset);
-   add_remappable_dst_value(&m_dst);
-}
-
-void TexInstruction::set_gather_comp(int cmp)
-{
-   m_inst_mode = cmp;
-}
-
-void TexInstruction::replace_values(const ValueSet& candidates, PValue new_value)
-{
-   // I wonder whether we can actually end up here ...
-   for (auto c: candidates) {
-      if (*c == *m_src.reg_i(c->chan()))
-         m_src.set_reg_i(c->chan(), new_value);
-      if (*c == *m_dst.reg_i(c->chan()))
-         m_dst.set_reg_i(c->chan(), new_value);
-   }
-}
-
-void TexInstruction::set_offset(unsigned index, int32_t val)
-{
-   assert(index < 3);
-   m_offset[index] = val;
-}
-
-int TexInstruction::get_offset(unsigned index) const
-{
-   assert(index < 3);
-   return (m_offset[index] << 1 & 0x1f);
-}
-
-bool TexInstruction::is_equal_to(const Instruction& rhs) const
-{
-   assert(rhs.type() == tex);
-   const auto& r = static_cast<const TexInstruction&>(rhs);
-   return (m_opcode == r.m_opcode &&
-           m_dst == r.m_dst &&
-           m_src == r.m_src &&
-           m_sampler_id == r.m_sampler_id &&
-           m_resource_id == r.m_resource_id);
-}
-
-void TexInstruction::do_print(std::ostream& os) const
-{
-   const char *map_swz = "xyzw01?_";
-   os << opname(m_opcode) << " R" << m_dst.sel() << ".";
-   for (int i = 0; i < 4; ++i)
-      os << map_swz[m_dest_swizzle[i]];
-
-   os << " " << m_src
-      << " RESID:"  << m_resource_id << " SAMPLER:"
-      << m_sampler_id;
-}
-
-const char *TexInstruction::opname(Opcode op)
-{
-   switch (op) {
-   case ld: return "LD";
-   case get_resinfo: return "GET_TEXTURE_RESINFO";
-   case get_nsampled: return "GET_NUMBER_OF_SAMPLES";
-   case get_tex_lod: return "GET_LOD";
-   case get_gradient_h: return "GET_GRADIENTS_H";
-   case get_gradient_v: return "GET_GRADIENTS_V";
-   case set_offsets: return "SET_TEXTURE_OFFSETS";
-   case keep_gradients: return "KEEP_GRADIENTS";
-   case set_gradient_h: return "SET_GRADIENTS_H";
-   case set_gradient_v: return "SET_GRADIENTS_V";
-   case sample: return "SAMPLE";
-   case sample_l: return "SAMPLE_L";
-   case sample_lb: return "SAMPLE_LB";
-   case sample_lz: return "SAMPLE_LZ";
-   case sample_g: return "SAMPLE_G";
-   case sample_g_lb: return "SAMPLE_G_L";
-   case gather4: return "GATHER4";
-   case gather4_o: return "GATHER4_O";
-   case sample_c: return "SAMPLE_C";
-   case sample_c_l: return "SAMPLE_C_L";
-   case sample_c_lb: return "SAMPLE_C_LB";
-   case sample_c_lz: return "SAMPLE_C_LZ";
-   case sample_c_g: return "SAMPLE_C_G";
-   case sample_c_g_lb: return "SAMPLE_C_G_L";
-   case gather4_c: return "GATHER4_C";
-   case gather4_c_o: return "OP_GATHER4_C_O";
-   }
-   return "ERROR";
-}
-
-
-
 static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)
 {
    b->cursor = nir_before_instr(&tex->instr);
@@ -273,14 +140,14 @@ bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
    nir_ssa_def *lambda_exp =  nir_fexp2(b, lod);
    nir_ssa_def *scale = NULL;
 
-   if  (tex->is_array) {
+   if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+         unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
+         scale = nir_frcp(b, nir_channels(b, size, 1));
+         scale = nir_swizzle(b, scale, swizzle, 3);
+   } else if  (tex->is_array) {
       int cmp_mask = (1 << (size->num_components - 1)) - 1;
       scale = nir_frcp(b, nir_channels(b, size,
                                        (nir_component_mask_t)cmp_mask));
-   } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-      unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
-      scale = nir_frcp(b, nir_channels(b, size, 1));
-      scale = nir_swizzle(b, scale, swizzle, 3);
    }
 
    nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
@@ -408,7 +275,3 @@ r600_nir_lower_cube_to_2darray(nir_shader *shader)
                                         r600_nir_lower_cube_to_2darray_filer,
                                         r600_nir_lower_cube_to_2darray_impl, nullptr);
 }
-
-
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h
new file mode 100644
index 0000000..0601d65
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.h
@@ -0,0 +1,10 @@
+#ifndef SFN_NIR_LOWER_TEX_H
+#define SFN_NIR_LOWER_TEX_H
+
+struct nir_shader;
+
+bool r600_nir_lower_int_tg4(nir_shader *nir);
+bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader);
+bool r600_nir_lower_cube_to_2darray(nir_shader *shader);
+
+#endif // LALA_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp
new file mode 100644
index 0000000..393bc67
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp
@@ -0,0 +1,627 @@
+#include "sfn_optimizer.h"
+
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_controlflow.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_tex.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_lds.h"
+#include "sfn_peephole.h"
+#include "sfn_debug.h"
+
+#include <sstream>
+
+namespace r600 {
+
+bool optimize(Shader& shader)
+{
+   bool progress;
+
+   sfn_log << SfnLog::opt  << "Shader before optimization\n";
+   if (sfn_log.has_debug_flag(SfnLog::opt)) {
+      std::stringstream ss;
+      shader.print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+   do {
+      progress = false;
+      progress |= copy_propagation_fwd(shader);
+      progress |= dead_code_elimination(shader);
+      progress |= copy_propagation_backward(shader);
+      progress |= dead_code_elimination(shader);
+      progress |= simplify_source_vectors(shader);
+      progress |= peephole(shader);
+      progress |= dead_code_elimination(shader);
+   } while (progress);
+
+   return progress;
+}
+
+class DCEVisitor : public InstrVisitor {
+public:
+   DCEVisitor();
+
+   void visit(AluInstr *instr) override;
+   void visit(AluGroup *instr) override;
+   void visit(TexInstr  *instr) override;
+   void visit(ExportInstr *instr) override {(void)instr;};
+   void visit(FetchInstr *instr) override;
+   void visit(Block *instr) override;
+
+   void visit(ControlFlowInstr *instr) override {(void)instr;};
+   void visit(IfInstr *instr) override {(void)instr;};
+   void visit(WriteScratchInstr *instr) override {(void)instr;};
+   void visit(StreamOutInstr *instr) override {(void)instr;};
+   void visit(MemRingOutInstr *instr) override {(void)instr;};
+   void visit(EmitVertexInstr *instr) override {(void)instr;};
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override;
+   void visit(RatInstr *instr) override {(void)instr;};
+
+
+   bool progress;
+};
+
+bool dead_code_elimination(Shader& shader)
+{
+   DCEVisitor dce;
+
+   do {
+
+      sfn_log << SfnLog::opt << "start dce run\n";
+
+      dce.progress = false;
+      for (auto& b : shader.func())
+         b->accept(dce);
+
+      sfn_log << SfnLog::opt << "finished dce run\n\n";
+
+   }  while (dce.progress);
+
+   sfn_log << SfnLog::opt  << "Shader after DCE\n";
+   if (sfn_log.has_debug_flag(SfnLog::opt)) {
+      std::stringstream ss;
+      shader.print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+   return dce.progress;
+}
+
+DCEVisitor::DCEVisitor():progress(false)
+{
+}
+
+void DCEVisitor::visit(AluInstr *instr)
+{
+   sfn_log << SfnLog::opt << "DCE: visit '" << *instr;
+
+   if (instr->has_instr_flag(Instr::dead))
+      return;
+
+   if (instr->dest() && instr->dest()->has_uses()) {
+      sfn_log << SfnLog::opt << " dest used\n";
+      return;
+   }
+
+   switch (instr->opcode()) {
+   case op2_kille:
+   case op2_killne:
+   case op2_kille_int:
+   case op2_killne_int:
+   case op2_killge:
+   case op2_killge_int:
+   case op2_killge_uint:
+   case op2_killgt:
+   case op2_killgt_int:
+   case op2_killgt_uint:
+   case op0_group_barrier:
+      sfn_log << SfnLog::opt << " never kill\n";
+      return;
+   default:
+      ;
+   }
+
+   bool dead = instr->set_dead();
+   sfn_log << SfnLog::opt << (dead ? "dead" : "alive") << "\n";
+   progress |= dead;
+}
+
+void DCEVisitor::visit(LDSReadInstr *instr)
+{
+   sfn_log << SfnLog::opt << "visit " << *instr << "\n";
+   progress |= instr->remove_unused_components();
+}
+
+void DCEVisitor::visit(AluGroup *instr)
+{
+   /* Groups are created because the instructions are used together
+    * so don't try to eliminate code there */
+   (void)instr;
+}
+
+void DCEVisitor::visit(TexInstr *instr)
+{
+   auto& dest = instr->dst();
+
+   bool has_uses = false;
+   RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
+   for (int i = 0; i < 4; ++i) {
+      if (!dest[i]->has_uses())
+         swz[i] = 7;
+      else
+         has_uses |= true;
+   }
+   instr->set_dest_swizzle(swz);
+
+   if (has_uses)
+      return;
+
+   progress |= instr->set_dead();
+}
+
+void DCEVisitor::visit(FetchInstr *instr)
+{
+   auto& dest = instr->dst();
+
+   bool has_uses = false;
+   RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
+   for (int i = 0; i < 4; ++i) {
+      if (!dest[i]->has_uses())
+         swz[i] = 7;
+      else
+         has_uses |= true;
+   }
+   instr->set_dest_swizzle(swz);
+
+   if (has_uses)
+      return;
+
+   sfn_log << SfnLog::opt << "set dead: " << *instr << "\n";
+
+   progress |= instr->set_dead();
+}
+
+void DCEVisitor::visit(Block *block)
+{
+   auto i = block->begin();
+   auto e = block->end();
+   while (i != e) {
+      auto n = i++;
+      if (!(*n)->keep()) {
+         (*n)->accept(*this);
+         if ((*n)->is_dead()) {
+            block->erase(n);
+         }
+      }
+   }
+}
+
+void visit(ControlFlowInstr *instr)
+{
+   (void)instr;
+}
+
+void visit(IfInstr *instr)
+{
+   (void)instr;
+}
+
+class CopyPropFwdVisitor : public InstrVisitor {
+public:
+   CopyPropFwdVisitor();
+
+   void visit(AluInstr *instr) override;
+   void visit(AluGroup *instr) override;
+   void visit(TexInstr *instr) override;
+   void visit(ExportInstr *instr) override {(void)instr;}
+   void visit(FetchInstr *instr) override;
+   void visit(Block *instr) override;
+   void visit(ControlFlowInstr *instr) override {(void)instr;}
+   void visit(IfInstr *instr) override {(void)instr;}
+   void visit(WriteScratchInstr *instr) override {(void)instr;}
+   void visit(StreamOutInstr *instr) override {(void)instr;}
+   void visit(MemRingOutInstr *instr) override {(void)instr;}
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(RatInstr *instr) override {(void)instr;};
+
+   // TODO: these two should use copy propagation
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override {(void)instr;};
+
+   bool progress;
+};
+
+
+class CopyPropBackVisitor : public InstrVisitor {
+public:
+   CopyPropBackVisitor();
+
+   void visit(AluInstr *instr) override;
+   void visit(AluGroup *instr) override;
+   void visit(TexInstr *instr) override;
+   void visit(ExportInstr *instr) override {(void)instr;}
+   void visit(FetchInstr *instr) override;
+   void visit(Block *instr) override;
+   void visit(ControlFlowInstr *instr) override {(void)instr;}
+   void visit(IfInstr *instr) override {(void)instr;}
+   void visit(WriteScratchInstr *instr) override {(void)instr;}
+   void visit(StreamOutInstr *instr) override {(void)instr;}
+   void visit(MemRingOutInstr *instr) override {(void)instr;}
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override {(void)instr;};
+   void visit(RatInstr *instr) override {(void)instr;};
+
+   bool progress;
+};
+
+bool copy_propagation_fwd(Shader& shader)
+{
+   auto& root = shader.func();
+   CopyPropFwdVisitor copy_prop;
+
+   do {
+      copy_prop.progress = false;
+      for (auto b : root)
+         b->accept(copy_prop);
+   }  while (copy_prop.progress);
+
+   sfn_log << SfnLog::opt  << "Shader after Copy Prop forward\n";
+   if (sfn_log.has_debug_flag(SfnLog::opt)) {
+      std::stringstream ss;
+      shader.print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+
+   return copy_prop.progress;
+}
+
+bool copy_propagation_backward(Shader& shader)
+{
+   CopyPropBackVisitor copy_prop;
+
+   do {
+      copy_prop.progress = false;
+      for (auto b: shader.func())
+         b->accept(copy_prop);
+   }  while (copy_prop.progress);
+
+   sfn_log << SfnLog::opt  << "Shader after Copy Prop backwards\n";
+   if (sfn_log.has_debug_flag(SfnLog::opt)) {
+      std::stringstream ss;
+      shader.print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+   return copy_prop.progress;
+}
+
+CopyPropFwdVisitor::CopyPropFwdVisitor():
+   progress(false)
+{}
+
+void CopyPropFwdVisitor::visit(AluInstr *instr)
+{
+   sfn_log << SfnLog::opt << "CopyPropFwdVisitor:["
+           << instr->block_id() << ":" << instr->index() << "] " << *instr
+           << " dset=" << instr->dest() << " ";
+
+
+
+   if (instr->dest()) {
+      sfn_log << SfnLog::opt << "has uses; "
+              << instr->dest()->uses().size();
+   }
+
+   sfn_log << SfnLog::opt << "\n";
+
+   if (!instr->can_propagate_src()) {
+      return;
+   }
+
+   auto src = instr->psrc(0);
+   auto dest = instr->dest();
+
+   for (auto& i : instr->dest()->uses()) {
+      /* SSA can always be propagated, registers only in the same block
+       * and only if they are not assigned to more than once */
+      if (dest->is_ssa() ||
+          (instr->block_id() == i->block_id() &&
+           instr->index() < i->index() &&
+           dest->uses().size() == 1)) {
+         sfn_log << SfnLog::opt << "   Try replace in "
+                 << i->block_id() << ":" << i->index()
+                 << *i<< "\n";
+         progress |= i->replace_source(dest, src);
+      }
+   }
+   if (instr->dest()) {
+      sfn_log << SfnLog::opt << "has uses; "
+              << instr->dest()->uses().size();
+   }
+   sfn_log << SfnLog::opt << "  done\n";
+}
+
+
+void CopyPropFwdVisitor::visit(AluGroup *instr)
+{
+   (void)instr;
+}
+
+void CopyPropFwdVisitor::visit(TexInstr *instr)
+{
+   (void)instr;
+}
+
+void CopyPropFwdVisitor::visit(FetchInstr *instr)
+{
+   (void)instr;
+}
+
+void CopyPropFwdVisitor::visit(Block *instr)
+{
+   for (auto& i: *instr)
+      i->accept(*this);
+}
+
+CopyPropBackVisitor::CopyPropBackVisitor():
+   progress(false)
+{
+
+}
+
+void CopyPropBackVisitor::visit(AluInstr *instr)
+{
+   bool local_progress = false;
+
+   sfn_log << SfnLog::opt << "CopyPropBackVisitor:["
+           << instr->block_id() << ":" << instr->index() << "] " << *instr << "\n";
+
+
+   if (!instr->can_propagate_dest()) {
+      return;
+   }
+
+   auto src_reg = instr->psrc(0)->as_register();
+   if (!src_reg) {
+      return;
+   }
+
+   if (src_reg->uses().size() > 1)
+      return;
+
+   auto dest = instr->dest();
+   if (!dest ||
+       !instr->has_alu_flag(alu_write)) {
+      return;
+   }
+
+   if (!dest->is_ssa() && dest->parents().size() > 1)
+      return;
+
+  for (auto& i: src_reg->parents()) {
+     sfn_log << SfnLog::opt << "Try replace dest in "
+             << i->block_id() << ":" << i->index()
+             << *i<< "\n";
+
+     if (i->replace_dest(dest, instr))  {
+        dest->del_parent(instr);
+        dest->add_parent(i);
+        for (auto d : instr->dependend_instr()) {
+           d->add_required_instr(i);
+        }
+        local_progress = true;
+     }
+  }
+
+  if (local_progress)
+     instr->set_dead();
+
+  progress |= local_progress;
+}
+
+void CopyPropBackVisitor::visit(AluGroup *instr)
+{
+   for (auto& i: *instr) {
+      if (i)
+         i->accept(*this);
+   }
+}
+
+void CopyPropBackVisitor::visit(TexInstr *instr)
+{
+   (void)instr;
+}
+
+void CopyPropBackVisitor::visit(FetchInstr *instr)
+{
+   (void)instr;
+}
+
+void CopyPropBackVisitor::visit(Block *instr)
+{
+   for (auto i = instr->rbegin(); i != instr->rend(); ++i)
+      if (!(*i)->is_dead())
+         (*i)->accept(*this);
+}
+
+class SimplifySourceVecVisitor : public InstrVisitor {
+public:
+   SimplifySourceVecVisitor():progress(false) {}
+
+   void visit(AluInstr *instr) override{(void)instr;}
+   void visit(AluGroup *instr) override{(void)instr;}
+   void visit(TexInstr *instr) override;
+   void visit(ExportInstr *instr) override;
+   void visit(FetchInstr *instr) override;
+   void visit(Block *instr) override;
+   void visit(ControlFlowInstr *instr) override;
+   void visit(IfInstr *instr) override;
+   void visit(WriteScratchInstr *instr) override;
+   void visit(StreamOutInstr *instr) override;
+   void visit(MemRingOutInstr *instr) override;
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override {(void)instr;};
+   void visit(RatInstr *instr) override {(void)instr;};
+
+   void replace_src(Instr *instr, RegisterVec4& reg4);
+
+   bool progress;
+};
+
+bool simplify_source_vectors(Shader& sh)
+{
+   SimplifySourceVecVisitor visitor;
+
+   for (auto b: sh.func())
+      b->accept(visitor);
+
+   return visitor.progress;
+}
+
+void SimplifySourceVecVisitor::visit(TexInstr *instr)
+{
+   if (instr->opcode() != TexInstr::get_resinfo) {
+      replace_src(instr, instr->src());
+   }
+}
+
+void SimplifySourceVecVisitor::visit(WriteScratchInstr *instr)
+{
+   (void) instr;
+}
+
+class ReplaceConstSource : public AluInstrVisitor {
+public:
+   ReplaceConstSource(Instr *old_use_, RegisterVec4& vreg_, int i):
+       old_use(old_use_), vreg(vreg_), index(i),success(false) {}
+
+   using AluInstrVisitor::visit;
+
+   void visit(AluInstr *alu) override;
+
+   Instr *old_use;
+   RegisterVec4& vreg;
+   int index;
+   bool success;
+};
+
+void SimplifySourceVecVisitor::visit(ExportInstr *instr)
+{
+   replace_src(instr, instr->value());
+}
+
+void SimplifySourceVecVisitor::replace_src(Instr *instr, RegisterVec4& reg4)
+{
+   for (int i = 0; i < 4; ++i) {
+      auto s = reg4[i];
+
+      if (s->chan() > 3)
+         continue;
+
+      if (!s->is_ssa())
+         continue;
+
+      /* Cayman trans ops have more then one parent for
+       * one dest */
+      if (s->parents().size() != 1)
+         continue;
+
+      auto& op = *s->parents().begin();
+
+      ReplaceConstSource visitor(instr, reg4, i);
+
+      op->accept(visitor);
+
+      progress |= visitor.success;
+   }
+}
+
+void SimplifySourceVecVisitor::visit(StreamOutInstr *instr)
+{
+   (void)instr;
+}
+
+void SimplifySourceVecVisitor::visit(MemRingOutInstr *instr)
+{
+   (void)instr;
+}
+
+void ReplaceConstSource::visit(AluInstr *alu)
+{
+   if (alu->opcode() != op1_mov)
+      return;
+
+   if (alu->has_alu_flag(alu_src0_abs) ||
+       alu->has_alu_flag(alu_src0_neg))
+      return;
+
+   auto src = alu->psrc(0);
+   assert(src);
+
+   int override_chan = -1;
+
+   auto ic = src->as_inline_const();
+   if (ic) {
+      if (ic->sel() == ALU_SRC_0)
+         override_chan = 4;
+
+      if (ic->sel() == ALU_SRC_1)
+         override_chan = 5;
+   }
+
+   auto literal = src->as_literal();
+   if (literal) {
+
+      if (literal->value() == 0)
+         override_chan = 4;
+
+      if (literal->value() == 0x3F800000)
+         override_chan = 5;
+   }
+
+   if (override_chan >= 0) {
+      vreg[index]->del_use(old_use);
+      auto reg = new Register(vreg.sel(), override_chan, vreg[index]->pin());
+      vreg.set_value(index, reg);
+      success = true;
+   }
+}
+
+void SimplifySourceVecVisitor::visit(FetchInstr *instr)
+{
+   (void) instr;
+}
+
+void SimplifySourceVecVisitor::visit(Block *instr)
+{
+   for (auto i = instr->rbegin(); i != instr->rend(); ++i)
+      if (!(*i)->is_dead())
+         (*i)->accept(*this);
+}
+
+void SimplifySourceVecVisitor::visit(ControlFlowInstr *instr)
+{
+   (void) instr;
+}
+
+void SimplifySourceVecVisitor::visit(IfInstr *instr)
+{
+   (void) instr;
+}
+
+
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizer.h b/src/gallium/drivers/r600/sfn/sfn_optimizer.h
new file mode 100644
index 0000000..a1b3a13
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_optimizer.h
@@ -0,0 +1,17 @@
+#ifndef OPTIMIZER_H
+#define OPTIMIZER_H
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+bool dead_code_elimination(Shader& shader);
+bool copy_propagation_fwd(Shader& shader);
+bool copy_propagation_backward(Shader& shader);
+bool simplify_source_vectors(Shader& sh);
+
+bool optimize(Shader& shader);
+
+}
+
+#endif // OPTIMIZER_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizers.cpp b/src/gallium/drivers/r600/sfn/sfn_optimizers.cpp
deleted file mode 100644
index dbffcfa..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_optimizers.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-#include "sfn_optimizers.h"
-#include "sfn_instruction_block.h"
-
-namespace r600 {
-
-std::vector<PInstruction>
-flatten_shader(const std::vector<InstructionBlock> &ir)
-{
-
-}
-
-}
\ No newline at end of file
diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizers.h b/src/gallium/drivers/r600/sfn/sfn_optimizers.h
deleted file mode 100644
index d17d32b..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_optimizers.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef SFN_OPTIMIZERS_H
-#define SFN_OPTIMIZERS_H
-
-#include "sfn_instruction_base.h"
-
-namespace r600 {
-
-std::vector<PInstruction>
-flatten_alu_ops(const std::vector<InstructionBlock> &ir);
-
-
-}
-
-#endif // SFN_OPTIMIZERS_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_peephole.cpp b/src/gallium/drivers/r600/sfn/sfn_peephole.cpp
new file mode 100644
index 0000000..8eebec0
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_peephole.cpp
@@ -0,0 +1,212 @@
+#include "sfn_peephole.h"
+
+namespace r600 {
+
+
+class PeepholeVisitor : public InstrVisitor {
+public:
+   void visit(AluInstr *instr) override;
+   void visit(AluGroup *instr) override;
+   void visit(TexInstr *instr) override {(void)instr;};
+   void visit(ExportInstr *instr) override {(void)instr;}
+   void visit(FetchInstr *instr) override {(void)instr;}
+   void visit(Block *instr) override;
+   void visit(ControlFlowInstr *instr) override {(void)instr;}
+   void visit(IfInstr *instr) override;
+   void visit(WriteScratchInstr *instr) override {(void)instr;}
+   void visit(StreamOutInstr *instr) override {(void)instr;}
+   void visit(MemRingOutInstr *instr) override {(void)instr;}
+   void visit(EmitVertexInstr *instr) override {(void)instr;}
+   void visit(GDSInstr *instr) override {(void)instr;};
+   void visit(WriteTFInstr *instr) override {(void)instr;};
+   void visit(LDSAtomicInstr *instr) override {(void)instr;};
+   void visit(LDSReadInstr *instr) override {(void)instr;};
+   void visit(RatInstr *instr) override {(void)instr;};
+
+   bool src_is_zero(PVirtualValue value);
+   bool src_is_one(PVirtualValue value);
+
+   void convert_to_mov(AluInstr *alu, int src_idx);
+
+
+   bool progress{false};
+};
+
+
+bool peephole(Shader& sh)
+{
+   PeepholeVisitor peephole;
+   for(auto b : sh.func())
+      b->accept(peephole);
+   return peephole.progress;
+}
+
+void PeepholeVisitor::visit(AluInstr *instr)
+{
+   switch (instr->opcode()) {
+   case op2_add:
+   case op2_add_int:
+      if (src_is_zero(instr->psrc(0)))
+         convert_to_mov(instr, 1);
+      else if (src_is_zero(instr->psrc(1)))
+         convert_to_mov(instr, 0);
+      break;
+   case op2_mul:
+   case op2_mul_ieee:
+      if (src_is_one(instr->psrc(0)))
+         convert_to_mov(instr, 1);
+      else if (src_is_one(instr->psrc(1)))
+         convert_to_mov(instr, 0);
+      break;
+   case op3_muladd:
+   case op3_muladd_ieee:
+      if (src_is_zero(instr->psrc(0)) ||
+          src_is_zero(instr->psrc(1)))
+         convert_to_mov(instr, 2);
+      break;
+   default:
+      ;
+   }
+}
+
+bool PeepholeVisitor::src_is_zero(PVirtualValue value)
+{
+   if (value->as_inline_const() &&
+       value->as_inline_const()->sel() == ALU_SRC_0)
+      return true;
+
+   if (value->as_literal() &&
+       value->as_literal()->value() == 0)
+      return true;
+
+   return false;
+}
+
+bool PeepholeVisitor::src_is_one(PVirtualValue value)
+{
+   if (value->as_inline_const() &&
+       value->as_inline_const()->sel() == ALU_SRC_1)
+      return true;
+
+   if (value->as_literal() &&
+       value->as_literal()->value() == 0x3f800000)
+      return true;
+
+   return false;
+}
+
+void PeepholeVisitor::convert_to_mov(AluInstr *alu, int src_idx)
+{
+   AluInstr::SrcValues new_src{alu->psrc(src_idx)};
+   alu->set_sources(new_src);
+   alu->set_op(op1_mov);
+   progress = true;
+}
+
+
+void PeepholeVisitor::visit(AluGroup *instr)
+{
+
+}
+
+void PeepholeVisitor::visit(Block *instr)
+{
+   for (auto& i: *instr)
+      i->accept(*this);
+}
+
+class ReplaceIfPredicate : public AluInstrVisitor {
+public:
+   ReplaceIfPredicate(AluInstr *pred):
+      m_pred(pred) {}
+
+   using AluInstrVisitor::visit;
+
+   void visit(AluInstr *alu) override;
+
+   AluInstr *m_pred;
+   bool success{false};
+};
+
+void PeepholeVisitor::visit(IfInstr *instr)
+{
+   auto pred = instr->predicate();
+
+   auto& src1 = pred->src(1);
+   if (src1.as_inline_const() &&
+       src1.as_inline_const()->sel() == ALU_SRC_0) {
+      auto src0 = pred->src(0).as_register();
+      if (src0 && src0->is_ssa()) {
+         assert(!src0->parents().empty());
+         auto parent = *src0->parents().begin();
+
+         ReplaceIfPredicate visitor(pred);
+         parent->accept(visitor);
+         progress |= visitor.success;
+      }
+   }
+}
+
+static EAluOp pred_from_op(EAluOp pred_op, EAluOp op)
+{
+   switch (pred_op) {
+   case op2_pred_setne_int:
+      switch (op) {
+      /*
+       case op2_setge_dx10 : return op2_pred_setge_int;
+       case op2_setgt_dx10 : return op2_pred_setgt_int;
+       case op2_sete_dx10 : return op2_prede_int;
+       case op2_setne_dx10 : return op2_pred_setne_int;
+      */
+      case op2_setge_int : return op2_pred_setge_int;
+      case op2_setgt_int : return op2_pred_setgt_int;
+      case op2_setge_uint : return op2_pred_setge_uint;
+      case op2_setgt_uint : return op2_pred_setgt_uint;
+      case op2_sete_int : return op2_prede_int;
+      case op2_setne_int : return op2_pred_setne_int;
+      default:
+         return op0_nop;
+      }
+   case op2_prede_int:
+      switch (op) {
+      case op2_sete_int : return op2_pred_setne_int;
+      case op2_setne_int : return op2_prede_int;
+      default:
+         return op0_nop;
+      }
+   default:
+      return op0_nop;
+   }
+}
+
+void ReplaceIfPredicate::visit(AluInstr *alu)
+{
+   auto new_op = pred_from_op(m_pred->opcode(), alu->opcode());
+
+   if (new_op == op0_nop)
+      return;
+
+   /* Have to figure out how to pass the dependency correctly */
+   /*for (auto& s : alu->sources()) {
+      if (s->as_register() && s->as_register()->addr())
+         return;
+   }*/
+
+   m_pred->set_op(new_op);
+   m_pred->set_sources(alu->sources());
+
+   if (alu->has_alu_flag(alu_src0_abs))
+      m_pred->set_alu_flag(alu_src0_abs);
+   if (alu->has_alu_flag(alu_src1_abs))
+      m_pred->set_alu_flag(alu_src1_abs);
+
+   if (alu->has_alu_flag(alu_src0_neg))
+      m_pred->set_alu_flag(alu_src0_neg);
+
+   if (alu->has_alu_flag(alu_src1_neg))
+      m_pred->set_alu_flag(alu_src1_neg);
+
+   success = true;
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_peephole.h b/src/gallium/drivers/r600/sfn/sfn_peephole.h
new file mode 100644
index 0000000..1384fc0
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_peephole.h
@@ -0,0 +1,13 @@
+#ifndef PEEPHOLE_H
+#define PEEPHOLE_H
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+bool peephole(Shader& sh);
+
+}
+
+
+#endif // PEEPHOLE_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_ra.cpp b/src/gallium/drivers/r600/sfn/sfn_ra.cpp
new file mode 100644
index 0000000..375362d
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_ra.cpp
@@ -0,0 +1,268 @@
+#include "sfn_debug.h"
+#include "sfn_ra.h"
+
+#include <cassert>
+#include <queue>
+
+namespace r600 {
+
+void ComponentInterference::prepare_row(int row)
+{
+   m_rows.resize(row + 1);
+
+}
+
+void ComponentInterference::add(size_t idx1, size_t idx2)
+{
+   assert(idx1 > idx2);
+   assert(m_rows.size() > idx1);
+   m_rows[idx1].push_back(idx2);
+   m_rows[idx2].push_back(idx1);
+}
+
+
+Interference::Interference(LiveRangeMap& map):
+   m_map(map)
+{
+   initialize();
+}
+
+void Interference::initialize()
+{
+   for(int i = 0; i < 4; ++i) {
+      initialize(m_components_maps[i], m_map.component(i));
+   }
+}
+
+void Interference::initialize(ComponentInterference& comp_interference,
+                              LiveRangeMap::ChannelLiveRange& clr)
+{
+   for (size_t row = 0; row < clr.size(); ++row) {
+      auto& row_entry = clr[row];
+      comp_interference.prepare_row(row);
+      for (size_t col = 0; col < row; ++col) {
+         auto& col_entry = clr[col];
+         if (row_entry.m_end >= col_entry.m_start &&
+             row_entry.m_start <= col_entry.m_end)
+            comp_interference.add(row, col);
+      }
+   }
+}
+
+struct Group {
+   int priority;
+   std::array<PRegister, 4> channels;
+};
+
+static inline bool operator < (const Group& lhs, const Group& rhs)
+{
+   return lhs.priority < rhs.priority;
+}
+
+using GroupRegisters = std::priority_queue<Group>;
+
+static bool
+group_allocation (LiveRangeMap& lrm, const Interference&  interference, GroupRegisters& groups)
+{
+   int color = 0;
+   // allocate grouped registers
+   while (!groups.empty()) {
+      auto group = groups.top();
+      groups.pop();
+
+      int start_comp = 0;
+      while (!group.channels[start_comp])
+         ++start_comp;
+
+      sfn_log << SfnLog::merge << "Color group with " << *group.channels[start_comp] << "\n";
+
+      // don't restart registers for exports, we may be able tp merge the
+      // export calls, is fthe registers are consecutive
+      if (group.priority > 0)
+         color = 0;
+
+      while (color < 124) {
+         /* Find the coloring for the first channel */
+         bool color_in_use = false;
+         int comp = start_comp;
+
+         auto& adjecency = interference.row(start_comp, group.channels[comp]->index());
+         auto& regs = lrm.component(comp);
+
+         sfn_log << SfnLog::merge << "Try color "<< color;
+
+         for (auto adj : adjecency) {
+            if (regs[adj].m_color == color) {
+               color_in_use = true;
+               sfn_log << SfnLog::merge << " in use\n";
+               break;
+            }
+         }
+
+         if (color_in_use) {
+            ++color;
+            continue;
+         }
+
+         /* First channel color found, check whether it can be used for all channels */
+         while (comp < 4) {
+            sfn_log << SfnLog::merge << " interference: ";
+            if (group.channels[comp]) {
+               auto& component_life_ranges = lrm.component(comp);
+               auto& adjecencies = interference.row(comp, group.channels[comp]->index());
+
+               for (auto adj_index : adjecencies) {
+                  sfn_log << SfnLog::merge << *component_life_ranges[adj_index].m_register << " ";
+                  if (component_life_ranges[adj_index].m_color == color) {
+                     color_in_use = true;
+                     sfn_log << SfnLog::merge << "used";
+                     break;
+                  }
+               }
+
+               if (color_in_use)
+                  break;
+            }
+            ++comp;
+         }
+
+         /* We couldn't allocate all channels with this color, so try next */
+         if (color_in_use) {
+            ++color;
+            sfn_log << SfnLog::merge << "\n";
+            continue;
+         }
+         sfn_log << SfnLog::merge << " success\n";
+
+         /* Coloring successful */
+         for (auto reg : group.channels) {
+            if (reg) {
+               auto& vregs = lrm.component(reg->chan());
+               auto& vreg_cmp = vregs[reg->index()];
+               assert(vreg_cmp.m_start != -1 || vreg_cmp.m_end != -1);
+               vreg_cmp.m_color = color;
+            }
+         }
+         break;
+      }
+
+      if (color == 124)
+         return false;
+   }
+
+   return true;
+}
+
+static bool
+scalar_allocation (LiveRangeMap& lrm, const Interference&  interference)
+{
+   for (int comp = 0; comp < 4; ++comp) {
+      auto& live_ranges = lrm.component(comp);
+      for (auto& r : live_ranges) {
+         if (r.m_color != -1)
+            continue;
+
+         if (r.m_start == -1 &&
+             r.m_end == -1)
+            continue;
+
+         sfn_log << SfnLog::merge << "Color " << *r.m_register << "\n";
+
+         auto& adjecency = interference.row(comp, r.m_register->index());
+
+         int color = 0;
+
+         while (color < 124) {
+            bool color_in_use = false;
+            for (auto adj : adjecency) {
+               if (live_ranges[adj].m_color == color) {
+                  color_in_use = true;
+                  break;
+               }
+            }
+
+            if (color_in_use) {
+               ++color;
+               continue;
+            }
+
+            r.m_color = color;
+            break;
+         }
+         if (color == 124)
+            return false;
+      }
+   }
+   return true;
+}
+
+bool register_allocation(LiveRangeMap& lrm)
+{
+   Interference interference(lrm);
+
+   std::map<int, Group> groups;
+
+   // setup fixed colors and group relationships
+   for (int i = 0; i < 4; ++i) {
+      auto& comp = lrm.component(i);
+      for (auto& entry : comp) {
+         sfn_log << SfnLog::merge << "Prepare RA for "
+                 << *entry.m_register
+                 << " [" << entry.m_start << ", " << entry.m_end << "]\n";
+         auto pin = entry.m_register->pin();
+         if (entry.m_start == -1 && entry.m_end == -1) {
+            if (pin == pin_group || pin == pin_chgr)
+               entry.m_register->set_chan(7);
+            continue;
+         }
+
+         auto sel = entry.m_register->sel();
+         /* fully pinned registers contain system values with the
+          * definite register index, and array values are allocated
+          * right after the system registers, so just reuse the IDs (for now)  */
+         if (pin == pin_fully || pin == pin_array) {
+            /* Must set all array element entries */
+            sfn_log << SfnLog::merge << "Pin color " << sel << " to " << *entry.m_register << "\n";
+            entry.m_color = sel;
+         } else if (pin == pin_group || pin == pin_chgr) {
+            /* Groups must all have the same sel() value, because they are used
+             * as vec4 registers */
+            auto igroup = groups.find(sel);
+            if (igroup != groups.end()) {
+               igroup->second.channels[i] = entry.m_register;
+               assert(comp[entry.m_register->index()].m_register->index() == entry.m_register->index());
+            } else {
+               int priority = entry.m_use.test(LiveRangeEntry::use_export) ? - entry.m_end : entry.m_start;
+               Group group{priority, {nullptr, nullptr, nullptr, nullptr}};
+               group.channels[i] = entry.m_register;
+               assert(comp[group.channels[i]->index()].m_register->index() == entry.m_register->index());
+               groups[sel] = group;
+            }
+         }
+      }
+   }
+
+   GroupRegisters groups_sorted;
+   for (auto& [sel, group] : groups)
+      groups_sorted.push(group);
+
+   if (!group_allocation (lrm, interference, groups_sorted))
+      return false;
+
+   if (!scalar_allocation(lrm, interference))
+      return false;
+
+   for (int i = 0; i < 4; ++i) {
+      auto& comp = lrm.component(i);
+      for (auto& entry : comp) {
+         sfn_log << SfnLog::merge << "Set " << *entry.m_register << " to ";
+         entry.m_register->set_sel(entry.m_color);
+         entry.m_register->set_pin(pin_none);
+         sfn_log << SfnLog::merge << *entry.m_register << "\n";
+      }
+   }
+
+   return true;
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_ra.h b/src/gallium/drivers/r600/sfn/sfn_ra.h
new file mode 100644
index 0000000..b40b611
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_ra.h
@@ -0,0 +1,51 @@
+#ifndef INTERFERENCE_H
+#define INTERFERENCE_H
+
+#include "sfn_valuefactory.h"
+
+#include <vector>
+
+namespace r600 {
+
+class ComponentInterference
+{
+public:
+
+   using Row = std::vector<int>;
+
+   void prepare_row(int row);
+
+   void add(size_t idx1, size_t idx2);
+
+   auto row(int idx) const -> const Row& {
+      assert((size_t)idx < m_rows.size()); return m_rows[idx];}
+
+private:
+
+   std::vector<Row> m_rows;
+};
+
+class Interference {
+public:
+   Interference(LiveRangeMap& map);
+
+   const auto& row(int comp, int index) const {
+      assert(comp < 4);
+      return m_components_maps[comp].row(index);
+   }
+
+private:
+   void initialize();
+   void initialize(ComponentInterference& comp, LiveRangeMap::ChannelLiveRange& clr);
+
+   LiveRangeMap& m_map;
+   std::array<ComponentInterference, 4> m_components_maps;
+
+
+};
+
+bool register_allocation(LiveRangeMap& lrm);
+
+}
+
+#endif // INTERFERENCE_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
new file mode 100644
index 0000000..d180f25
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
@@ -0,0 +1,890 @@
+#include "sfn_scheduler.h"
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_controlflow.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_mem.h"
+#include "sfn_instr_lds.h"
+#include "sfn_instr_tex.h"
+#include "sfn_debug.h"
+
+#include <algorithm>
+#include <sstream>
+
+namespace r600 {
+
+class CollectInstructions : public InstrVisitor {
+
+public:
+   CollectInstructions(ValueFactory& vf):
+      m_value_factory(vf)  {}
+
+   void visit(AluInstr *instr) override {
+      if (instr->has_alu_flag(alu_is_trans))
+         alu_trans.push_back(instr);
+      else {
+         if (instr->alu_slots() == 1)
+            alu_vec.push_back(instr);
+         else
+            alu_groups.push_back(instr->split(m_value_factory));
+      }
+   }
+   void visit(AluGroup *instr) override {
+      alu_groups.push_back(instr);
+   }
+   void visit(TexInstr *instr) override {
+      tex.push_back(instr);
+   }
+   void visit(ExportInstr *instr) override {
+      exports.push_back(instr);
+   }
+   void visit(FetchInstr *instr)  override {
+      fetches.push_back(instr);
+   }
+   void visit(Block *instr) override {
+      for (auto& i: *instr)
+         i->accept(*this);
+   }
+
+   void visit(ControlFlowInstr *instr) override {
+      assert(!m_cf_instr);
+      m_cf_instr = instr;
+   }
+
+   void visit(IfInstr *instr) override {
+      assert(!m_cf_instr);
+      m_cf_instr = instr;
+   }
+
+   void visit(EmitVertexInstr *instr) override {
+      assert(!m_cf_instr);
+      m_cf_instr = instr;
+   }
+
+   void visit(WriteScratchInstr *instr) override {
+      mem_write_instr.push_back(instr);
+   }
+
+   void visit(StreamOutInstr *instr) override {
+      mem_write_instr.push_back(instr);
+   }
+
+   void visit(MemRingOutInstr *instr) override {
+      mem_ring_writes.push_back(instr);
+   }
+
+   void visit(GDSInstr *instr) override {
+      gds_op.push_back(instr);
+   }
+
+   void visit(WriteTFInstr *instr) override {
+      write_tf.push_back(instr);
+   }
+
+   void visit(LDSReadInstr *instr) override {
+      std::vector<AluInstr*> buffer;
+      m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
+      for (auto& i: buffer) {
+         i->accept(*this);
+      }      
+   }
+
+   void visit(LDSAtomicInstr *instr) override {
+      std::vector<AluInstr*> buffer;
+      m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
+      for (auto& i: buffer) {
+         i->accept(*this);
+      }
+   }
+
+   void visit(RatInstr *instr) override {
+      rat_instr.push_back(instr);
+   }
+
+
+   std::list<AluInstr *> alu_trans;
+   std::list<AluInstr *> alu_vec;
+   std::list<TexInstr *> tex;
+   std::list<AluGroup *> alu_groups;
+   std::list<ExportInstr *> exports;
+   std::list<FetchInstr *> fetches;
+   std::list<WriteOutInstr *> mem_write_instr;
+   std::list<MemRingOutInstr *> mem_ring_writes;
+   std::list<GDSInstr *> gds_op;
+   std::list<WriteTFInstr *> write_tf;
+   std::list<RatInstr *> rat_instr;
+
+   Instr *m_cf_instr{nullptr};
+   ValueFactory& m_value_factory;
+
+   AluInstr *m_last_lds_instr{nullptr};
+};
+
+class BlockSheduler {
+public:
+   BlockSheduler();
+   void run(Shader *shader);
+
+   void finalize();
+
+private:
+
+   void schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf);
+
+   bool collect_ready(CollectInstructions &available);
+
+   template <typename T>
+   bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
+
+   bool collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available);
+
+   bool schedule_tex(Shader::ShaderBlocks& out_blocks);
+   bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
+
+   template <typename I>
+   bool schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
+
+   template <typename I>
+   bool schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
+
+   bool schedule_alu(Shader::ShaderBlocks& out_blocks);
+   void start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type);
+
+   bool schedule_alu_to_group_vec(AluGroup *group);
+   bool schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist);
+
+   bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list);
+
+   template <typename I>
+   bool schedule(std::list<I *>& ready_list);
+
+   template <typename I>
+   bool schedule_block(std::list<I *>& ready_list);
+
+   std::list<AluInstr *> alu_vec_ready;
+   std::list<AluInstr *> alu_trans_ready;
+   std::list<AluGroup *> alu_groups_ready;
+   std::list<TexInstr *> tex_ready;
+   std::list<ExportInstr *> exports_ready;
+   std::list<FetchInstr *> fetches_ready;
+   std::list<WriteOutInstr *> memops_ready;
+   std::list<MemRingOutInstr *> mem_ring_writes_ready;
+   std::list<GDSInstr *> gds_ready;
+   std::list<WriteTFInstr *> write_tf_ready;
+   std::list<RatInstr *> rat_instr_ready;
+
+   enum {
+      sched_alu,
+      sched_tex,
+      sched_fetch,
+      sched_free,
+      sched_mem_ring,
+      sched_gds,
+      sched_write_tf,
+      sched_rat,
+   } current_shed;
+
+   ExportInstr *m_last_pos;
+   ExportInstr *m_last_pixel;
+   ExportInstr *m_last_param;
+
+   Block *m_current_block;
+
+   int m_lds_addr_count{0};
+   int m_alu_groups_schduled{0};
+
+};
+
+Shader *schedule(Shader *original)
+{
+   AluGroup::set_chipclass(original->chip_class());
+
+   sfn_log << SfnLog::schedule << "Original shader\n";
+   if (sfn_log.has_debug_flag(SfnLog::schedule)) {
+      std::stringstream ss;
+      original->print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+   // TODO later it might be necessary to clone the shader
+   // to be able to re-start scheduling
+
+   auto scheduled_shader = original;
+   BlockSheduler s;
+   s.run(scheduled_shader);
+   s.finalize();
+
+   sfn_log << SfnLog::schedule << "Scheduled shader\n";
+   if (sfn_log.has_debug_flag(SfnLog::schedule)) {
+      std::stringstream ss;
+      scheduled_shader->print(ss);
+      sfn_log << ss.str() << "\n\n";
+   }
+
+   return scheduled_shader;
+}
+
+BlockSheduler::BlockSheduler():
+   current_shed(sched_alu),
+   m_last_pos(nullptr),
+   m_last_pixel(nullptr),
+   m_last_param(nullptr),
+   m_current_block(nullptr)
+{
+}
+
+void BlockSheduler::run( Shader *shader)
+{
+   Shader::ShaderBlocks scheduled_blocks;
+
+   for (auto& block : shader->func()) {
+      sfn_log << SfnLog::schedule  << "Process block " << block->id() <<"\n";
+      if (sfn_log.has_debug_flag(SfnLog::schedule)) {
+         std::stringstream ss;
+         block->print(ss);
+         sfn_log << ss.str() << "\n";
+      }
+      schedule_block(*block, scheduled_blocks, shader->value_factory());
+   }
+
+   shader->reset_function(scheduled_blocks);
+}
+
+void BlockSheduler::schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf)
+{
+
+   assert(in_block.id() >= 0);
+
+
+   current_shed = sched_fetch;
+   auto last_shed = sched_fetch;
+
+   CollectInstructions cir(vf);
+   in_block.accept(cir);
+
+   bool have_instr = collect_ready(cir);
+
+   m_current_block = new Block(in_block.nesting_depth(), in_block.id());
+
+   assert(m_current_block->id() >= 0);
+
+   while (have_instr) {
+
+      sfn_log << SfnLog::schedule << "Have ready instructions\n";
+
+      if (alu_vec_ready.size())
+         sfn_log << SfnLog::schedule << "  ALU V:" << alu_vec_ready.size() << "\n";
+
+      if (alu_trans_ready.size())
+         sfn_log << SfnLog::schedule <<  "  ALU T:" << alu_trans_ready.size() << "\n";
+
+      if (alu_groups_ready.size())
+         sfn_log << SfnLog::schedule << "  ALU G:" << alu_groups_ready.size() << "\n";
+
+      if (exports_ready.size())
+         sfn_log << SfnLog::schedule << "  EXP:" << exports_ready.size()
+                 << "\n";
+      if (tex_ready.size())
+         sfn_log << SfnLog::schedule << "  TEX:" << tex_ready.size()
+                 << "\n";
+      if (fetches_ready.size())
+         sfn_log << SfnLog::schedule << "  FETCH:" << fetches_ready.size()
+                 << "\n";
+      if (mem_ring_writes_ready.size())
+         sfn_log << SfnLog::schedule << "  MEM_RING:" << mem_ring_writes_ready.size()
+                 << "\n";
+      if (memops_ready.size())
+         sfn_log << SfnLog::schedule << "  MEM_OPS:" << mem_ring_writes_ready.size()
+                 << "\n";
+
+      if (!m_current_block->lds_group_active()) {
+         if (last_shed != sched_free && memops_ready.size() > 8)
+            current_shed = sched_free;
+         else if (mem_ring_writes_ready.size() > 5)
+            current_shed = sched_mem_ring;
+         else if (rat_instr_ready.size() > 3)
+            current_shed = sched_rat;
+         else if (gds_ready.size() > 3)
+            current_shed = sched_gds;
+         else if (tex_ready.size() > 3)
+            current_shed = sched_tex;         
+      }
+
+      switch (current_shed) {
+      case sched_alu:
+         if (!schedule_alu(out_blocks)) {
+            assert(!m_current_block->lds_group_active());
+            current_shed = sched_tex;
+            continue;
+         }
+         last_shed = current_shed;
+         break;
+      case sched_tex:
+         if (tex_ready.empty() || !schedule_tex(out_blocks)) {
+            current_shed = sched_fetch;
+            continue;
+         }
+         last_shed = current_shed;
+         break;
+      case sched_fetch:
+         if (!fetches_ready.empty()) {
+            schedule_vtx(out_blocks);
+            last_shed = current_shed;
+         }
+         current_shed = sched_gds;
+         continue;
+      case sched_gds:
+         if (!gds_ready.empty()) {
+            schedule_gds(out_blocks, gds_ready);
+            last_shed = current_shed;
+         }
+         current_shed = sched_mem_ring;
+         continue;
+      case sched_mem_ring:
+         if (mem_ring_writes_ready.empty() || !schedule_cf(out_blocks, mem_ring_writes_ready)) {
+            current_shed = sched_write_tf;
+            continue;
+         }
+         last_shed = current_shed;
+         break;
+      case sched_write_tf:
+         if (write_tf_ready.empty() || !schedule_gds(out_blocks, write_tf_ready)) {
+            current_shed = sched_rat;
+            continue;
+         }
+         last_shed = current_shed;
+         break;
+      case sched_rat:
+         if (rat_instr_ready.empty() || !schedule_cf(out_blocks, rat_instr_ready)) {
+             current_shed = sched_free;
+             continue;
+          }
+         last_shed = current_shed;
+         break;
+      case sched_free:
+         if (memops_ready.empty() || !schedule_cf(out_blocks, memops_ready)) {
+            current_shed = sched_alu;
+            break;
+         }
+         last_shed = current_shed;
+      }
+
+      have_instr = collect_ready(cir);
+   }
+
+   /* Emit exports always at end of a block */
+   while (collect_ready_type(exports_ready, cir.exports))
+      schedule_exports(out_blocks, exports_ready);
+
+   bool fail = false;
+
+   if (!cir.alu_groups.empty()) {
+      std::cerr << "Unscheduled ALU groups:\n";
+      for (auto& a : cir.alu_groups) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+
+   if (!cir.alu_vec.empty()){
+      std::cerr << "Unscheduled ALU vec ops:\n";
+      for (auto& a : cir.alu_vec) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+
+   if (!cir.alu_trans.empty()){
+      std::cerr << "Unscheduled ALU trans ops:\n";
+      for (auto& a : cir.alu_trans) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+   if (!cir.mem_write_instr.empty()){
+      std::cerr << "Unscheduled MEM ops:\n";
+      for (auto& a : cir.mem_write_instr) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+
+   if (!cir.fetches.empty()){
+      std::cerr << "Unscheduled Fetch ops:\n";
+      for (auto& a : cir.fetches) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+
+   if (!cir.tex.empty()){
+      std::cerr << "Unscheduled Tex ops:\n";
+      for (auto& a : cir.tex) {
+          std::cerr << "   " << *a << "\n";
+      }
+      fail = true;
+   }
+
+   assert(cir.tex.empty());
+   assert(cir.exports.empty());
+   assert(cir.fetches.empty());
+   assert(cir.alu_vec.empty());
+   assert(cir.mem_write_instr.empty());
+   assert(cir.mem_ring_writes.empty());
+
+   assert (!fail);
+
+   if (cir.m_cf_instr) {
+      // Assert that if condition is ready
+      m_current_block->push_back(cir.m_cf_instr);
+      cir.m_cf_instr->set_scheduled();
+   }
+
+   out_blocks.push_back(m_current_block);
+}
+
+void BlockSheduler::finalize()
+{
+   if (m_last_pos)
+      m_last_pos->set_is_last_export(true);
+   if (m_last_pixel)
+      m_last_pixel->set_is_last_export(true);
+   if (m_last_param)
+      m_last_param->set_is_last_export(true);
+}
+
+bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
+{
+   bool success = false;
+   AluGroup *group = nullptr;
+
+   bool has_alu_ready = !alu_vec_ready.empty() || !alu_trans_ready.empty();
+
+   bool has_lds_ready = !alu_vec_ready.empty() &&
+                        (*alu_vec_ready.begin())->has_lds_access();
+
+   /* Schedule groups first. unless we have a pending LDS instuction
+    * We don't want the LDS instructions to be too far apart because the
+    * fetch + read from queue has to be in the same ALU CF block */
+   if (!alu_groups_ready.empty() && !has_lds_ready) {
+      group = *alu_groups_ready.begin();
+      alu_groups_ready.erase(alu_groups_ready.begin());
+      sfn_log << SfnLog::schedule << "Schedule ALU group\n";
+      success = true;
+   } else {
+      if (has_alu_ready) {
+         group = new AluGroup();
+         sfn_log << SfnLog::schedule << "START new ALU group\n";
+      }
+   }
+
+   if (group) {
+      int free_slots = group->free_slots();
+
+      if (free_slots && has_alu_ready) {
+         if (!alu_vec_ready.empty())
+            success |= schedule_alu_to_group_vec(group);
+
+         /* Apparently one can't schedule a t-slot if there is already
+          * and LDS instruction scheduled.
+          * TODO: check whether this is only relevant for actual LDS instructions
+          * or also for instructions that read from the LDS return value queue */
+
+         if (free_slots & 0x10 && !has_lds_ready) {
+            sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
+            if (!alu_trans_ready.empty())
+               success |= schedule_alu_to_group_trans(group, alu_trans_ready);
+            if (!alu_vec_ready.empty())
+               success |= schedule_alu_to_group_trans(group, alu_vec_ready);
+         }
+      }
+
+      sfn_log << SfnLog::schedule << "Finalize ALU group\n";
+      group->set_scheduled();
+      group->fix_last_flag();
+      group->set_nesting_depth(m_current_block->nesting_depth());
+
+
+      if (m_current_block->type() != Block::alu) {
+         start_new_block(out_blocks, Block::alu);
+         m_alu_groups_schduled = 0;
+      }
+
+      /* Pessimistic hack: If we have started an LDS group,
+       * make sure 8 instructions groups still fit into the CF
+       * TODO: take care of Address slot emission
+       * TODO: maybe do this CF split only in the assembler
+       */
+      /*if (group->slots() > m_current_block->remaining_slots() ||
+          (group->has_lds_group_start() &&
+           m_current_block->remaining_slots() < 7 * 8)) {
+         //assert(!m_current_block->lds_group_active());
+         start_new_block(out_blocks, Block::alu);
+      }*/
+
+      if (!m_current_block->try_reserve_kcache(*group)) {
+         assert(!m_current_block->lds_group_active());
+         start_new_block(out_blocks, Block::alu);
+         m_current_block->set_instr_flag(Instr::force_cf);
+      }
+
+      assert(m_current_block->try_reserve_kcache(*group));
+
+      if (group->has_lds_group_start())
+         m_current_block->lds_group_start(*group->begin());
+
+      m_current_block->push_back(group);
+      if (group->has_lds_group_end())
+         m_current_block->lds_group_end();
+   }
+
+   if (success)
+      ++m_alu_groups_schduled;
+
+   return success;
+}
+
+bool BlockSheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
+{
+   if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() ==  0) {
+      start_new_block(out_blocks, Block::tex);
+      m_current_block->set_instr_flag(Instr::force_cf);
+   }
+
+
+   if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) {
+      auto ii = tex_ready.begin();
+      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
+
+      if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
+         start_new_block(out_blocks, Block::tex);
+
+      for (auto prep : (*ii)->prepare_instr()) {
+         prep->set_scheduled();
+         m_current_block->push_back(prep);
+      }
+
+      (*ii)->set_scheduled();
+      m_current_block->push_back(*ii);
+      tex_ready.erase(ii);
+      return true;
+   }
+   return false;
+}
+
+bool BlockSheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks)
+{
+   if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) {
+      start_new_block(out_blocks, Block::vtx);
+      m_current_block->set_instr_flag(Instr::force_cf);
+   }
+   return schedule_block(fetches_ready);
+}
+
+template <typename I>
+bool BlockSheduler::schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
+{
+   bool was_full = m_current_block->remaining_slots() == 0;
+   if (m_current_block->type() != Block::gds || was_full) {
+      start_new_block(out_blocks, Block::gds);
+      if (was_full)
+         m_current_block->set_instr_flag(Instr::force_cf);
+   }
+   return schedule_block(ready_list);
+}
+
+
+void BlockSheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type)
+{
+   if (!m_current_block->empty()) {
+      sfn_log << SfnLog::schedule << "Start new block\n";
+      assert(!m_current_block->lds_group_active());
+      out_blocks.push_back(m_current_block);
+      m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id());
+   }
+   m_current_block->set_type(type);
+}
+
+template <typename I>
+bool BlockSheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
+{
+   if (ready_list.empty())
+      return false;
+   if (m_current_block->type() != Block::cf)
+      start_new_block(out_blocks, Block::cf);
+   return schedule(ready_list);
+}
+
+
+bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group)
+{
+   assert(group);
+   assert(!alu_vec_ready.empty());
+
+   bool success =  false;
+   auto i = alu_vec_ready.begin();
+   auto e = alu_vec_ready.end();
+   while (i != e) {
+      sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
+      if (group->add_vec_instructions(*i)) {
+         auto old_i = i;
+         ++i;
+         if ((*old_i)->has_alu_flag(alu_is_lds)) {
+            --m_lds_addr_count;
+         }
+
+         alu_vec_ready.erase(old_i);
+         success = true;
+         sfn_log << SfnLog::schedule << " success\n";
+      } else {
+         ++i;
+         sfn_log << SfnLog::schedule << " failed\n";
+      }
+   }
+   return success;
+}
+
+bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist)
+{
+   assert(group);
+
+   bool success =  false;
+   auto i = readylist.begin();
+   auto e = readylist.end();
+   while (i != e) {
+      sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
+      if (group->add_trans_instructions(*i)) {
+         auto old_i = i;
+         ++i;
+         readylist.erase(old_i);
+         success = true;
+         sfn_log << SfnLog::schedule << " sucess\n";
+         break;
+      } else {
+         ++i;
+         sfn_log << SfnLog::schedule << " failed\n";
+      }
+   }
+   return success;
+}
+
+template <typename I>
+bool BlockSheduler::schedule(std::list<I *>& ready_list)
+{
+   if (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
+      auto ii = ready_list.begin();
+      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
+      (*ii)->set_scheduled();
+      m_current_block->push_back(*ii);
+      ready_list.erase(ii);
+      return true;
+   }
+   return false;
+}
+
+template <typename I>
+bool BlockSheduler::schedule_block(std::list<I *>& ready_list)
+{
+   bool success = false;
+   while (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
+      auto ii = ready_list.begin();
+      sfn_log << SfnLog::schedule << "Schedule: " << **ii << " "
+              << m_current_block->remaining_slots() << "\n";
+      (*ii)->set_scheduled();
+      m_current_block->push_back(*ii);
+      ready_list.erase(ii);
+      success = true;
+   }
+   return success;
+}
+
+
+bool BlockSheduler::schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list)
+{
+   if (m_current_block->type() != Block::cf)
+      start_new_block(out_blocks, Block::cf);
+
+   if (!ready_list.empty()) {
+      auto ii = ready_list.begin();
+      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
+      (*ii)->set_scheduled();
+      m_current_block->push_back(*ii);
+      switch ((*ii)->export_type()) {
+      case ExportInstr::pos: m_last_pos = *ii; break;
+      case ExportInstr::param: m_last_param = *ii; break;
+      case ExportInstr::pixel: m_last_pixel = *ii; break;
+      }
+      (*ii)->set_is_last_export(false);
+      ready_list.erase(ii);
+      return true;
+   }
+   return false;
+}
+
+bool BlockSheduler::collect_ready(CollectInstructions &available)
+{
+   sfn_log << SfnLog::schedule << "Ready instructions\n";
+   bool result = false;
+   result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec);
+   result |= collect_ready_type(alu_trans_ready, available.alu_trans);
+   result |= collect_ready_type(alu_groups_ready, available.alu_groups);
+   result |= collect_ready_type(gds_ready, available.gds_op);
+   result |= collect_ready_type(tex_ready, available.tex);
+   result |= collect_ready_type(fetches_ready, available.fetches);
+   result |= collect_ready_type(memops_ready, available.mem_write_instr);
+   result |= collect_ready_type(mem_ring_writes_ready, available.mem_ring_writes);
+   result |= collect_ready_type(write_tf_ready, available.write_tf);
+   result |= collect_ready_type(rat_instr_ready, available.rat_instr);
+
+   sfn_log << SfnLog::schedule << "\n";
+   return result;
+}
+
+bool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available)
+{
+   auto i = available.begin();
+   auto e = available.end();
+
+   for (auto alu : ready) {
+      alu->add_priority(100 * alu->register_priority());
+   }
+
+   int max_check = 0;
+   while (i != e && max_check++ < 32) {
+      if (ready.size() < 32 && (*i)->ready()) {
+
+         int priority = 0;
+         /* LDS fetches that use static offsets are usually ready ery fast,
+          * so that they would get schedules early, and this leaves the problem
+          * that we allocate too many registers with just constant values,
+          * and this will make problems wih RA. So limit the number of LDS
+          * address registers.
+          */
+         if ((*i)->has_alu_flag(alu_lds_address)) {
+            if (m_lds_addr_count > 64) {
+               ++i;
+               continue;
+            } else {
+               ++m_lds_addr_count;
+            }
+         }
+
+         /* LDS instructions are scheduled with high priority.
+          * instractions that can go into the t slot and don't have
+          * indirect access are put in last, so that they don't block
+          * vec-only instructions when scheduling to the vector slots
+          * for everything else we look at the register use */
+
+         if ((*i)->has_lds_access())
+             priority = 100000;
+         else if (AluGroup::has_t()) {
+            auto opinfo = alu_ops.find((*i)->opcode());
+            assert(opinfo != alu_ops.end());
+            if (opinfo->second.can_channel(AluOp::t) && !(*i)->indirect_addr().first)
+               priority = -1;
+         }
+
+         priority += 100 * (*i)->register_priority();
+
+         (*i)->add_priority(priority);
+         ready.push_back(*i);
+
+         auto old_i = i;
+         ++i;
+         available.erase(old_i);
+      } else
+         ++i;
+   }
+
+   for (auto& i: ready)
+      sfn_log << SfnLog::schedule << "V:  " << *i << "\n";
+
+   ready.sort([](const AluInstr *lhs, const AluInstr *rhs) {
+                 return lhs->priority() > rhs->priority();});
+
+   for (auto& i: ready)
+      sfn_log << SfnLog::schedule << "V (S):  " << *i << "\n";
+
+   return !ready.empty();
+}
+
+template <typename T>
+struct type_char {
+
+};
+
+
+template <>
+struct type_char<AluInstr> {
+   static constexpr const char value = 'A';
+};
+
+template <>
+struct type_char<AluGroup>  {
+   static constexpr const char value = 'G';
+};
+
+template <>
+struct type_char<ExportInstr>  {
+   static constexpr const char value = 'E';
+};
+
+template <>
+struct type_char<TexInstr>  {
+   static constexpr const char value = 'T';
+};
+
+template <>
+struct type_char<FetchInstr>  {
+   static constexpr const char value = 'F';
+};
+
+template <>
+struct type_char<WriteOutInstr>  {
+   static constexpr const char value = 'M';
+};
+
+template <>
+struct type_char<MemRingOutInstr>  {
+   static constexpr const char value = 'R';
+};
+
+template <>
+struct type_char<WriteTFInstr>  {
+   static constexpr const char value = 'X';
+};
+
+template <>
+struct type_char<GDSInstr>  {
+   static constexpr const char value = 'S';
+};
+
+template <>
+struct type_char<RatInstr>  {
+   static constexpr const char value = 'I';
+};
+
+
+template <typename T>
+bool BlockSheduler::collect_ready_type(std::list<T *>& ready, std::list<T *>& available)
+{
+   auto i = available.begin();
+   auto e = available.end();
+
+   while (i != e) {
+      if ((*i)->ready()) {
+         ready.push_back(*i);
+         auto old_i = i;
+         ++i;
+         available.erase(old_i);
+      } else
+         ++i;
+   }
+
+   for (auto& i: ready)
+      sfn_log << SfnLog::schedule << type_char<T>::value << ";  " << *i << "\n";
+
+   return !ready.empty();
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.h b/src/gallium/drivers/r600/sfn/sfn_scheduler.h
new file mode 100644
index 0000000..63ba7d9
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.h
@@ -0,0 +1,13 @@
+#ifndef SHEDULER_H
+#define SHEDULER_H
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+
+Shader *schedule(Shader *original);
+
+}
+
+#endif // SHEDULER_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp
new file mode 100644
index 0000000..ca7bcd9
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp
@@ -0,0 +1,1379 @@
+#include "sfn_shader.h"
+#include "sfn_instr.h"
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_export.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_lds.h"
+#include "sfn_instr_mem.h"
+
+#include "sfn_instr_controlflow.h"
+#include "sfn_liverangeevaluator.h"
+
+#include "sfn_shader_cs.h"
+#include "sfn_shader_fs.h"
+#include "sfn_shader_vs.h"
+#include "sfn_shader_gs.h"
+#include "sfn_shader_tess.h"
+
+#include "sfn_debug.h"
+#include "gallium/drivers/r600/r600_shader.h"
+
+#include "tgsi/tgsi_from_mesa.h"
+
+#include "nir.h"
+
+#include <numeric>
+#include <sstream>
+
+namespace r600 {
+
+using std::string;
+
+std::pair<unsigned, unsigned>
+r600_get_varying_semantic(unsigned varying_location)
+{
+   std::pair<unsigned, unsigned> result;
+   tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>(varying_location),
+                                true, &result.first, &result.second);
+
+   if (result.first == TGSI_SEMANTIC_GENERIC) {
+      result.second += 9;
+   } else if (result.first == TGSI_SEMANTIC_PCOORD) {
+      result.second = 8;
+   }
+   return result;
+}
+
+void ShaderIO::set_sid(int sid)
+{
+   m_sid = sid;
+   switch (m_name) {
+   case TGSI_SEMANTIC_POSITION:
+   case TGSI_SEMANTIC_PSIZE:
+   case TGSI_SEMANTIC_EDGEFLAG:
+   case TGSI_SEMANTIC_FACE:
+   case TGSI_SEMANTIC_SAMPLEMASK:
+   case TGSI_SEMANTIC_CLIPVERTEX:
+      m_spi_sid = 0;
+   break;
+   case TGSI_SEMANTIC_GENERIC:
+   case TGSI_SEMANTIC_TEXCOORD:
+   case TGSI_SEMANTIC_PCOORD:
+      m_spi_sid = m_sid + 1;
+   break;
+   default:
+      /* For non-generic params - pack name and sid into 8 bits */
+      m_spi_sid = (0x80 | (m_name << 3) | m_sid) + 1;
+   }
+}
+
+void ShaderIO::override_spi_sid(int spi)
+{
+   m_spi_sid = spi;
+}
+
+void ShaderIO::print(std::ostream& os) const
+{
+   os << m_type << " LOC:" << m_location << " NAME:" << m_name;
+   do_print(os);
+
+   if (m_sid > 0) {
+      os << " SID:" << m_sid << " SPI_SID:" << m_spi_sid;
+   }
+}
+
+
+ShaderIO::ShaderIO(const char *type, int loc, int name):
+   m_type(type),
+   m_location(loc),
+   m_name(name)
+{
+}
+
+ShaderOutput::ShaderOutput():
+   ShaderIO("OUTPUT", -1, -1)
+{
+}
+
+ShaderOutput::ShaderOutput(int location, int name, int writemask):
+   ShaderIO("OUTPUT", location, name),
+   m_writemask(writemask)
+{
+
+}
+
+void ShaderOutput::do_print(std::ostream& os) const
+{
+   os << " MASK:" << m_writemask;
+}
+
+
+ShaderInput::ShaderInput(int location, int name):
+   ShaderIO("INPUT", location, name)
+{
+}
+
+ShaderInput::ShaderInput():
+   ShaderInput(-1, -1)
+{
+}
+
+
+void ShaderInput::do_print(std::ostream& os) const
+{
+   if (m_interpolator)
+      os << " INTERP:" << m_interpolator;
+   if (m_interpolate_loc)
+      os << " ILOC:" << m_interpolate_loc;
+   if (m_uses_interpolate_at_centroid)
+      os << " USE_CENTROID";
+}
+
+void ShaderInput::set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid)
+{
+   m_interpolator = interp;
+   m_interpolate_loc = interp_loc;
+   m_uses_interpolate_at_centroid = uses_interpolate_at_centroid;
+}
+
+void ShaderInput::set_uses_interpolate_at_centroid()
+{
+   m_uses_interpolate_at_centroid = true;
+}
+
+Shader::Shader(const char *type_id):
+   m_current_block(nullptr),
+   m_type_id(type_id),
+   m_chip_class(ISA_CC_R600),
+   m_next_block(0)
+{
+   m_instr_factory = new InstrFactory();
+   m_chain_instr.this_shader = this;
+   start_new_block(0);
+}
+
+void Shader::set_input_gpr(int driver_lcation, int gpr)
+{
+   auto i = m_inputs.find(driver_lcation);
+   assert(i != m_inputs.end());
+   i->second.set_gpr(gpr);
+}
+
+bool Shader::add_info_from_string(std::istream& is)
+{
+   std::string type;
+   is >> type;
+
+   if (type == "CHIPCLASS")
+      return read_chipclass(is);
+   if (type == "OUTPUT")
+      return read_output(is);
+   if (type == "INPUT")
+      return read_input(is);
+   if (type == "PROP")
+      return read_prop(is);
+   if (type == "SYSVALUES")
+      return allocate_registers_from_string(is, pin_fully);
+   if (type == "REGISTERS")
+      return allocate_registers_from_string(is, pin_free);
+   if (type == "ARRAYS")
+      return allocate_arrays_from_string(is);
+
+
+   return false;
+}
+
+void Shader::emit_instruction_from_string(const std::string& s)
+{
+
+   sfn_log << SfnLog::instr << "Create Instr from '" << s << "'\n";
+   if (s == "BLOCK_START") {
+      if (!m_current_block->empty()) {
+         start_new_block(m_current_block->nesting_offset());
+         sfn_log << SfnLog::instr << "   Emit start block\n";
+      }
+      return;
+   }
+
+   if (s == "BLOCK_END") {
+      return;
+   }
+
+   auto ir = m_instr_factory->from_string(s, m_current_block->nesting_depth());
+   if (ir) {
+      emit_instruction(ir);
+      if (ir->end_block())
+         start_new_block(ir->nesting_offset());
+      sfn_log << SfnLog::instr << "   " << *ir << "\n";
+   }
+}
+
+bool Shader::read_output(std::istream& is)
+{
+   string value;
+   is >> value;
+   int pos = int_from_string_with_prefix(value, "LOC:");
+   is >> value;
+   int name = int_from_string_with_prefix(value, "NAME:");
+   is >> value;
+   int mask = int_from_string_with_prefix(value, "MASK:");
+   ShaderOutput output(pos, name, mask);
+
+   value.clear();
+   is >> value;
+   if (!value.empty()) {
+      int sid = int_from_string_with_prefix(value, "SID:");
+      output.set_sid(sid);
+      is >> value;
+      int spi_sid = int_from_string_with_prefix(value, "SPI_SID:");
+      assert(spi_sid == output.spi_sid());
+   }
+
+   add_output(output);
+   return true;
+}
+
+
+bool Shader::read_input(std::istream& is)
+{
+   string value;
+   is >> value;
+   int pos = int_from_string_with_prefix(value, "LOC:");
+   is >> value;
+   int name = int_from_string_with_prefix(value, "NAME:");
+
+   value.clear();
+
+   ShaderInput input(pos, name);
+
+   int interp = 0;
+   int interp_loc = 0;
+   bool use_centroid = false;
+
+   is >> value;
+   while (!value.empty()) {
+      if (value.substr(0, 4) == "SID:") {
+         int sid = int_from_string_with_prefix(value, "SID:");
+         input.set_sid(sid);
+      } else if (value.substr(0, 8) == "SPI_SID:") {
+         int spi_sid = int_from_string_with_prefix(value, "SPI_SID:");
+         assert(spi_sid == input.spi_sid());
+      } else if (value.substr(0, 7) == "INTERP:") {
+         interp = int_from_string_with_prefix(value, "INTERP:");
+      } else if (value.substr(0, 5) == "ILOC:") {
+         interp_loc = int_from_string_with_prefix(value, "ILOC:");
+      } else if (value == "USE_CENTROID") {
+         use_centroid = true;
+      } else {
+         std::cerr << "Unknown parse value '" << value << "'";
+         assert(!value.c_str());
+      }
+      value.clear();
+      is >> value;
+   }
+
+   input.set_interpolator(interp, interp_loc, use_centroid);
+
+   add_input(input);
+   return true;
+}
+
+bool Shader::allocate_registers_from_string(std::istream& is, Pin pin)
+{
+   std::string line;
+   if (!std::getline(is, line))
+      return false;
+
+   std::istringstream iline(line);
+
+   while (!iline.eof())  {
+      string reg_str;
+      iline >> reg_str;
+
+      if (reg_str.empty())
+         break;
+
+      if (strchr(reg_str.c_str(), '@')) {
+         value_factory().dest_from_string(reg_str);
+      } else {
+         RegisterVec4::Swizzle swz = {0,1,2,3};
+         auto regs = value_factory().dest_vec4_from_string(reg_str, swz, pin);
+         for (int i = 0; i < 4; ++i) {
+            if (swz[i] < 4 && pin == pin_fully) {
+               regs[i]->pin_live_range(true, false);
+            }
+         }
+      }
+   }
+   return true;
+}
+
+bool Shader::allocate_arrays_from_string(std::istream& is)
+{
+   std::string line;
+   if (!std::getline(is, line))
+      return false;
+
+   std::istringstream iline(line);
+
+   while (!iline.eof())  {
+      string reg_str;
+      iline >> reg_str;
+
+      if (reg_str.empty())
+         break;
+
+      value_factory().array_from_string(reg_str);
+   }
+   return true;
+}
+
+bool Shader::read_chipclass(std::istream& is)
+{
+   string name;
+   is >> name;
+   if (name == "R600")
+      m_chip_class = ISA_CC_R600;
+   else if (name == "R700")
+      m_chip_class = ISA_CC_R700;
+   else if (name == "EVERGREEN")
+      m_chip_class = ISA_CC_EVERGREEN;
+   else if (name == "CAYMAN")
+      m_chip_class = ISA_CC_CAYMAN;
+   else
+      return false;
+   return true;
+}
+
+void Shader::allocate_reserved_registers()
+{
+   m_instr_factory->value_factory().set_virtual_register_base(0);
+   auto reserved_registers_end = do_allocate_reserved_registers();
+   m_instr_factory->value_factory().set_virtual_register_base(reserved_registers_end);
+   if (!m_atomics.empty()) {
+      m_atomic_update = value_factory().temp_register();
+      auto alu = new AluInstr(op1_mov, m_atomic_update,
+                              value_factory().one_i(),
+                              AluInstr::last_write);
+      alu->set_alu_flag(alu_no_schedule_bias);
+      emit_instruction(alu);
+   }
+
+   if(m_flags.test(sh_needs_sbo_ret_address)) {
+      m_rat_return_address = value_factory().temp_register(0);
+      auto temp0 = value_factory().temp_register(0);
+      auto temp1 = value_factory().temp_register(1);
+      auto temp2 = value_factory().temp_register(2);
+
+      auto group = new AluGroup();
+      group->add_instruction(new AluInstr(op1_mbcnt_32lo_accum_prev_int, temp0, value_factory().literal(-1), {alu_write}));
+      group->add_instruction(new AluInstr(op1_mbcnt_32hi_int, temp1, value_factory().literal(-1), {alu_write}));
+      emit_instruction(group);
+      emit_instruction(new AluInstr(op3_muladd_uint24, temp2, value_factory().inline_const(ALU_SRC_SE_ID, 0),
+                                          value_factory().literal(256), value_factory().inline_const(ALU_SRC_HW_WAVE_ID, 0), {alu_write, alu_last_instr}));
+      emit_instruction(new AluInstr(op3_muladd_uint24, m_rat_return_address,
+                                    temp2, value_factory().literal(0x40), temp0,
+                                    {alu_write, alu_last_instr}));
+   }
+}
+
+Shader *Shader::translate_from_nir(nir_shader *nir, const pipe_stream_output_info *so_info,
+                                   struct r600_shader* gs_shader,
+                                   r600_shader_key& key, r600_chip_class chip_class)
+{
+   Shader *shader = nullptr;
+
+   switch (nir->info.stage) {
+   case MESA_SHADER_FRAGMENT:
+      shader = new FragmentShader(key);
+   break;
+   case MESA_SHADER_VERTEX:
+      shader = new VertexShader(so_info, gs_shader, key);
+   break;
+   case MESA_SHADER_GEOMETRY:
+      shader = new GeometryShader(key);
+   break;
+   case MESA_SHADER_TESS_CTRL:
+      shader = new TCSShader(key);
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      shader = new TESShader(so_info, gs_shader, key);
+      break;
+   case MESA_SHADER_COMPUTE:
+      shader = new ComputeShader(key);
+      break;
+   default:
+      return nullptr;
+   }
+
+   shader->set_info(nir);
+
+   shader->set_chip_class(chip_class);
+   if (!shader->process(nir))
+      return nullptr;
+
+   return shader;
+}
+
+void Shader::set_info(nir_shader *nir)
+{
+   m_scratch_size = nir->scratch_size;
+}
+
+ValueFactory& Shader::value_factory()
+{
+   return m_instr_factory->value_factory();
+}
+
+
+bool Shader::process(nir_shader *nir)
+{
+   m_ssbo_image_offset = nir->info.num_images;
+
+   nir_foreach_uniform_variable(var, nir)
+         scan_uniforms(var);
+
+   // at this point all functions should be inlined
+   const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&nir->functions));
+
+   if (!scan_shader(func))
+      return false;
+
+   allocate_reserved_registers();
+
+   allocate_local_registers(&func->impl->registers);
+
+   sfn_log << SfnLog::trans << "Process shader \n";
+   foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
+      if (!process_cf_node(node))
+         return false;
+   }
+
+   finalize();
+
+   return true;
+}
+
+void Shader::allocate_local_registers(const exec_list *registers)
+{
+   if (value_factory().allocate_registers(registers))
+      m_indirect_files |= 1 << TGSI_FILE_TEMPORARY;
+}
+
+bool Shader::scan_shader(const nir_function *func)
+{
+
+   nir_foreach_block(block, func->impl) {
+      nir_foreach_instr(instr, block) {
+         if (!scan_instruction(instr)) {
+            fprintf(stderr, "Unhandled sysvalue access ");
+            nir_print_instr(instr, stderr);
+            fprintf(stderr, "\n");
+            return false;
+         }
+      }
+   }
+
+   int lds_pos = 0;
+   for (auto& [index, input] : m_inputs) {
+      if (input.need_lds_pos())
+         input.set_lds_pos(lds_pos++);
+   }
+
+   int param_id = 0;
+   for (auto& [index, out] : m_outputs) {
+      if (out.is_param())
+         out.set_pos(param_id++);
+   }
+
+   return true;
+}
+
+bool Shader::scan_uniforms(nir_variable *uniform)
+{
+   if (uniform->type->contains_atomic()) {
+      int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
+      m_nhwatomic += natomics;
+
+      if (uniform->type->is_array())
+         m_indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
+
+      m_flags.set(sh_uses_atomics);
+
+      r600_shader_atomic atom = {0};
+
+      atom.buffer_id = uniform->data.binding;
+      atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
+
+      atom.start = uniform->data.offset >> 2;
+      atom.end = atom.start + natomics - 1;
+
+      if (m_atomic_base_map.find(uniform->data.binding) ==
+          m_atomic_base_map.end())
+         m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
+
+      m_next_hwatomic_loc += natomics;
+
+      m_atomic_file_count += atom.end  - atom.start + 1;
+
+      sfn_log << SfnLog::io << "HW_ATOMIC file count: "
+              << m_atomic_file_count << "\n";
+
+      m_atomics.push_back(atom);
+   }
+
+   auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type;
+   if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
+      m_flags.set(sh_uses_images);
+      if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo))
+         m_indirect_files |= 1 << TGSI_FILE_IMAGE;
+   }
+
+   return true;
+}
+
+
+bool Shader::scan_instruction(nir_instr *instr)
+{
+   if (do_scan_instruction(instr))
+      return true;
+
+   if (instr->type != nir_instr_type_intrinsic)
+      return true;
+
+   auto intr = nir_instr_as_intrinsic(instr);
+
+   // handle unhandled instructions
+   switch (intr->intrinsic) {
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_ssbo_atomic_xor:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_imax:
+      m_flags.set(sh_needs_sbo_ret_address);
+      FALLTHROUGH;
+   case nir_intrinsic_image_store:
+   case nir_intrinsic_store_ssbo:
+      m_flags.set(sh_writes_memory);
+      m_flags.set(sh_uses_images);
+      break;
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_group_memory_barrier:
+      m_chain_instr.prepare_mem_barrier = true;
+   default:
+      ;
+   }
+   return true;
+}
+
+bool Shader::process_cf_node(nir_cf_node *node)
+{
+   SFN_TRACE_FUNC(SfnLog::flow, "CF");
+
+   switch (node->type) {
+   case nir_cf_node_block:
+   return process_block(nir_cf_node_as_block(node));
+   case nir_cf_node_if:
+   return process_if(nir_cf_node_as_if(node));
+   case nir_cf_node_loop:
+   return process_loop(nir_cf_node_as_loop(node));
+   default:
+   return false;
+   }
+
+}
+
+static bool
+child_block_empty (const exec_list& list)
+{
+   if (list.is_empty())
+      return true;
+
+   bool result = true;
+
+   foreach_list_typed(nir_cf_node, n, node, &list) {
+
+      if (n->type == nir_cf_node_block) {
+         if (!nir_cf_node_as_block(n)->instr_list.is_empty())
+            return false;
+      }
+      if (n->type == nir_cf_node_if)
+         return false;
+   }
+   return result;
+}
+
+bool Shader::process_if(nir_if *if_stmt)
+{
+   SFN_TRACE_FUNC(SfnLog::flow, "IF");
+
+   if (!emit_if_start(if_stmt))
+      return false;
+
+   foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list) {
+      SFN_TRACE_FUNC(SfnLog::flow, "IF-then");
+         if (!process_cf_node(n))
+            return false;
+   }
+
+   if (!child_block_empty(if_stmt->else_list)) {
+      assert(emit_control_flow(ControlFlowInstr::cf_else));
+
+      foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
+            if (!process_cf_node(n)) return false;
+   }
+
+   if (!emit_control_flow(ControlFlowInstr::cf_endif))
+      return false;
+
+   return true;
+}
+
+bool Shader::emit_if_start(nir_if *if_stmt)
+{
+   auto value = value_factory().src(if_stmt->condition, 0);
+   AluInstr *pred = new AluInstr(op2_pred_setne_int, value_factory().temp_register(),
+                                 value, value_factory().zero(), AluInstr::last);
+   pred->set_alu_flag(alu_update_exec);
+   pred->set_alu_flag(alu_update_pred);
+   pred->set_cf_type(cf_alu_push_before);
+
+   IfInstr *ir = new IfInstr(pred);
+   emit_instruction(ir);
+   start_new_block(1);
+   return true;
+}
+
+bool Shader::emit_control_flow(ControlFlowInstr::CFType type)
+{
+   auto ir = new ControlFlowInstr(type);
+   emit_instruction(ir);
+   int depth = 0;
+   switch (type) {
+   case ControlFlowInstr::cf_loop_begin:
+      m_loops.push_back(ir);
+      m_nloops++;
+      depth = 1;
+   break;
+   case ControlFlowInstr::cf_loop_end:
+      m_loops.pop_back();
+      FALLTHROUGH;
+   case ControlFlowInstr::cf_endif:
+      depth = -1;
+   break;
+   default:
+      ;
+   }
+
+   start_new_block(depth);
+   return true;
+}
+
+bool Shader::process_loop(nir_loop *node)
+{
+   SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
+   if (!emit_control_flow(ControlFlowInstr::cf_loop_begin))
+      return false;
+
+   foreach_list_typed(nir_cf_node, n, node, &node->body)
+         if (!process_cf_node(n)) return false;
+
+   if (!emit_control_flow(ControlFlowInstr::cf_loop_end))
+      return false;
+
+   return true;
+}
+
+bool Shader::process_block(nir_block *block)
+{
+   SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
+
+   nir_foreach_instr(instr, block) {
+      sfn_log << SfnLog::instr << "FROM:" << *instr << "\n";
+      bool r = process_instr(instr);
+      if (!r) {
+         sfn_log << SfnLog::err << "R600: Unsupported instruction: "
+                 << *instr << "\n";
+         return false;
+      }
+   }
+   return true;
+}
+
+bool Shader::process_instr(nir_instr *instr)
+{
+   return m_instr_factory->from_nir(instr, *this);
+}
+
+bool Shader::process_intrinsic(nir_intrinsic_instr *intr)
+{
+   if (process_stage_intrinsic(intr))
+      return true;
+
+   if (GDSInstr::emit_atomic_counter(intr, *this)) {
+      set_flag(sh_writes_memory);
+      return true;
+   }
+
+   if (RatInstr::emit(intr, *this))
+      return true;
+
+   switch (intr->intrinsic) {
+   case nir_intrinsic_store_output: return store_output(intr);
+   case nir_intrinsic_load_input: return load_input(intr);
+   case nir_intrinsic_load_uniform: return load_uniform(intr);
+   case nir_intrinsic_load_ubo_vec4: return load_ubo(intr);
+   case nir_intrinsic_store_scratch: return emit_store_scratch(intr);
+   case nir_intrinsic_load_scratch: return emit_load_scratch(intr);
+   case nir_intrinsic_store_local_shared_r600: return emit_local_store(intr);
+   case nir_intrinsic_load_local_shared_r600: return emit_local_load(intr);
+   case nir_intrinsic_load_tcs_in_param_base_r600: return emit_load_tcs_param_base(intr, 0);
+   case nir_intrinsic_load_tcs_out_param_base_r600: return emit_load_tcs_param_base(intr, 16);
+      // We only emit the group barrier, barriers across work groups
+      // are not yet implemented
+   case nir_intrinsic_control_barrier:
+   case nir_intrinsic_memory_barrier_tcs_patch:
+   case nir_intrinsic_memory_barrier_shared:
+      return emit_barrier(intr);
+   case nir_intrinsic_memory_barrier_atomic_counter:
+      return true;
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier:
+      return emit_wait_ack();
+
+   case nir_intrinsic_shared_atomic_add:
+   case nir_intrinsic_shared_atomic_and:
+   case nir_intrinsic_shared_atomic_or:
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_shared_atomic_umax:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_shared_atomic_umin:
+   case nir_intrinsic_shared_atomic_xor:
+   case nir_intrinsic_shared_atomic_exchange:
+   case nir_intrinsic_shared_atomic_comp_swap:
+      return emit_atomic_local_shared(intr);
+   case nir_intrinsic_shader_clock:
+      return emit_shader_clock(intr);
+
+   default:
+   return false;
+   }
+}
+
+static ESDOp
+lds_op_from_intrinsic(nir_intrinsic_op op, bool ret) {
+   switch (op) {
+   case nir_intrinsic_shared_atomic_add:
+      return ret ? LDS_ADD_RET : LDS_ADD;
+   case nir_intrinsic_shared_atomic_and:
+      return ret ? LDS_AND_RET : LDS_AND;
+   case nir_intrinsic_shared_atomic_or:
+      return ret ? LDS_OR_RET : LDS_OR;
+   case nir_intrinsic_shared_atomic_imax:
+      return ret ? LDS_MAX_INT_RET : LDS_MAX_INT;
+   case nir_intrinsic_shared_atomic_umax:
+      return ret ? LDS_MAX_UINT_RET : LDS_MAX_UINT;
+   case nir_intrinsic_shared_atomic_imin:
+      return ret ? LDS_MIN_INT_RET : LDS_MIN_INT;
+   case nir_intrinsic_shared_atomic_umin:
+      return ret ? LDS_MIN_UINT_RET : LDS_MIN_UINT;
+   case nir_intrinsic_shared_atomic_xor:
+      return ret ? LDS_XOR_RET : LDS_XOR;
+   case nir_intrinsic_shared_atomic_exchange:
+      return LDS_XCHG_RET;
+   case nir_intrinsic_shared_atomic_comp_swap:
+      return LDS_CMP_XCHG_RET;
+   default:
+      unreachable("Unsupported shared atomic opcode");
+   }
+}
+
+PRegister Shader::emit_load_to_register(PVirtualValue src)
+{
+   assert(src);
+   PRegister dest = src->as_register();
+
+   if (!dest) {
+      dest = value_factory().temp_register();
+      emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write));
+   }
+   return dest;
+}
+
+bool Shader::emit_atomic_local_shared(nir_intrinsic_instr* instr)
+{
+   bool uses_retval = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
+
+   auto& vf = value_factory();
+
+   auto dest_value = uses_retval ? vf.dest(instr->dest, 0, pin_free) : nullptr;
+
+   auto op = lds_op_from_intrinsic(instr->intrinsic, uses_retval);
+
+   auto address = vf.src(instr->src[0], 0);
+
+   AluInstr::SrcValues src;
+   src.push_back(vf.src(instr->src[1], 0));
+
+   if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap))
+      src.push_back(vf.src(instr->src[2], 0));
+   emit_instruction(new LDSAtomicInstr(op, dest_value, address, src));
+   return true;
+}
+
+auto Shader::evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) -> std::pair<int, PRegister>
+{
+   auto& vf = value_factory();
+
+   PRegister uav_id{nullptr};
+   int offset = 0;
+
+   auto uav_id_const = nir_src_as_const_value(instr->src[src_id]);
+   if (uav_id_const) {
+      offset += uav_id_const->u32;
+   } else {
+      auto uav_id_val = vf.src(instr->src[src_id], 0);
+      if (uav_id_val->as_register()) {
+         uav_id = uav_id_val->as_register();
+      } else {
+         uav_id = vf.temp_register();
+         emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val,
+                                 AluInstr::last_write));
+      }
+   }
+   return std::make_pair(offset, uav_id);
+}
+
+
+bool Shader::emit_store_scratch(nir_intrinsic_instr *intr)
+{
+   auto& vf = m_instr_factory->value_factory();
+
+   int writemask = nir_intrinsic_write_mask(intr);
+
+
+   RegisterVec4::Swizzle swz = {7,7,7,7};
+
+   for (unsigned i = 0; i < intr->num_components; ++i)
+      swz[i] = (1 << i) & writemask ? i : 7;
+
+   auto value = vf.temp_vec4(pin_group, swz);
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < intr->num_components; ++i) {
+      if (value[i]->chan() < 4) {
+         ir = new AluInstr(op1_mov, value[i], vf.src(intr->src[0], i), AluInstr::write);
+         ir->set_alu_flag(alu_no_schedule_bias);
+         emit_instruction(ir);
+      }
+   }
+   if (!ir)
+      return true;
+
+   ir->set_alu_flag(alu_last_instr);
+
+   auto address = vf.src(intr->src[1], 0);
+
+
+   int align = nir_intrinsic_align_mul(intr);
+   int align_offset = nir_intrinsic_align_offset(intr);
+
+   WriteScratchInstr *ws_ir = nullptr;
+
+   int offset = -1;
+   if (address->as_literal()) {
+      offset = address->as_literal()->value();
+   } else if (address->as_inline_const()) {
+      auto il = address->as_inline_const();
+      if (il->sel() == ALU_SRC_0)
+         offset = 0;
+      else if (il->sel() == ALU_SRC_1_INT)
+         offset = 1;
+   }
+
+   if (offset >= 0) {
+      ws_ir = new WriteScratchInstr(value, offset, align, align_offset, writemask);
+   } else {
+      auto addr_temp  = vf.temp_register(0);      
+      auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::last_write);
+      load_addr->set_alu_flag(alu_no_schedule_bias);
+      emit_instruction(load_addr);
+
+      ws_ir = new WriteScratchInstr(value, addr_temp, align, align_offset, writemask, m_scratch_size);
+   }
+   emit_instruction(ws_ir);
+
+   m_flags.set(sh_needs_scratch_space);
+   return true;
+}
+
+bool Shader::emit_load_scratch(nir_intrinsic_instr *intr)
+{
+   auto addr = value_factory().src(intr->src[0], 0);
+
+   RegisterVec4::Swizzle dest_swz = {7,7,7,7};
+
+   for (unsigned i = 0; i < intr->num_components; ++i)
+      dest_swz[i] = i;
+
+   auto dest = value_factory().dest_vec4(intr->dest, pin_group);
+
+   auto ir = new LoadFromScratch(dest, dest_swz, addr, m_scratch_size);
+   emit_instruction(ir);
+
+   chain_scratch_read(ir);
+
+   m_flags.set(sh_needs_scratch_space);
+
+   return true;
+
+}
+
+bool Shader::emit_local_store(nir_intrinsic_instr *instr)
+{
+   unsigned write_mask = nir_intrinsic_write_mask(instr);
+
+   auto address = value_factory().src(instr->src[1], 0);
+   int swizzle_base = (write_mask & 0x3) ? 0 : 2;
+   write_mask |= write_mask >> 2;
+
+   if ((write_mask & 3) != 3) {
+      if (write_mask == 2)
+         swizzle_base += 1;
+      auto value = value_factory().src(instr->src[0], swizzle_base);
+      emit_instruction(new LDSAtomicInstr(LDS_WRITE, nullptr, address, {value}));
+   } else {
+      auto value = value_factory().src(instr->src[0], swizzle_base);
+      auto value1 = value_factory().src(instr->src[0], swizzle_base + 1);
+      emit_instruction(new LDSAtomicInstr(LDS_WRITE_REL, nullptr, address, {value, value1}));
+   }
+   return true;
+}
+
+bool Shader::emit_local_load(nir_intrinsic_instr* instr)
+{
+   auto address = value_factory().src_vec(instr->src[0], instr->num_components);
+   auto dest_value = value_factory().dest_vec(instr->dest, instr->num_components);
+   emit_instruction(new LDSReadInstr(dest_value, address));
+   return true;
+}
+
+void Shader::chain_scratch_read(Instr *instr)
+{
+   m_chain_instr.apply(instr, &m_chain_instr.last_scratch_instr);
+}
+
+void Shader::chain_ssbo_read(Instr *instr)
+{
+   m_chain_instr.apply(instr, &m_chain_instr.last_ssbo_instr);
+}
+
+bool Shader::emit_wait_ack()
+{
+   start_new_block(0);
+   emit_instruction(new ControlFlowInstr(ControlFlowInstr::cf_wait_ack));
+   start_new_block(0);
+   return true;
+}
+
+void Shader::InstructionChain::visit(WriteScratchInstr *instr)
+{
+   apply(instr, &last_scratch_instr);
+}
+
+void Shader::InstructionChain::visit(GDSInstr *instr)
+{
+   apply(instr, &last_gds_instr);
+   for (auto& loop : this_shader->m_loops) {
+      loop->set_instr_flag(Instr::vpm);
+   }
+}
+
+void Shader::InstructionChain::visit(RatInstr *instr)
+{
+   apply(instr, &last_ssbo_instr);
+   for (auto& loop : this_shader->m_loops) {
+      loop->set_instr_flag(Instr::vpm);
+   }
+
+   if (prepare_mem_barrier)
+      instr->set_ack();
+}
+
+void Shader::InstructionChain::apply(Instr *current, Instr **last) {
+   if (*last)
+      current->add_required_instr(*last);
+   *last = current;
+}
+
+void Shader::emit_instruction(PInst instr)
+{
+   sfn_log << SfnLog::instr << "   " << *instr << "\n";
+   instr->accept(m_chain_instr);
+   m_current_block->push_back(instr);
+}
+
+bool Shader::load_uniform(nir_intrinsic_instr *intr)
+{
+   auto literal = nir_src_as_const_value(intr->src[0]);
+
+   if (literal) {
+      AluInstr *ir = nullptr;
+      auto pin = intr->dest.is_ssa && nir_dest_num_components(intr->dest) == 1 ?
+               pin_free : pin_none;
+      for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
+
+         sfn_log << SfnLog::io << "uniform "
+                 << intr->dest.ssa.index << " const["<< i << "]: "<< intr->const_index[i] << "\n";
+
+         auto uniform = value_factory().uniform(intr, i);
+         ir = new AluInstr(op1_mov, value_factory().dest(intr->dest, i, pin),
+                           uniform, {alu_write});
+         emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_alu_flag(alu_last_instr);
+      return true;
+   } else {
+      auto addr = value_factory().src(intr->src[0], 0);
+      return load_uniform_indirect(intr, addr, 16 * nir_intrinsic_base(intr), 0);
+   }
+}
+
+bool Shader::load_uniform_indirect(nir_intrinsic_instr *intr, PVirtualValue addr,
+                                   int offset , int buffer_id)
+{
+   auto addr_reg = addr->as_register();
+   if (!addr) {
+      auto tmp = value_factory().temp_register();
+      emit_instruction(new AluInstr(op1_mov, tmp, addr, AluInstr::last_write));
+      addr = tmp;
+   }
+
+   RegisterVec4 dest = value_factory().dest_vec4(intr->dest, pin_group);
+
+   auto ir = new LoadFromBuffer(dest, {0,1,2,3}, addr_reg, offset, buffer_id,
+                                nullptr, fmt_32_32_32_32_float);
+   emit_instruction(ir);
+   m_flags.set(sh_indirect_const_file);
+   return true;
+}
+
+bool Shader::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
+{
+   auto src = value_factory().temp_register();
+   emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(),
+                                 AluInstr::last_write));
+
+   auto dest = value_factory().dest_vec4(instr->dest, pin_group);
+   auto fetch = new LoadFromBuffer(dest, {0,1,2,3}, src, offset,
+                                   R600_LDS_INFO_CONST_BUFFER, nullptr,
+                                   fmt_32_32_32_32);
+
+   fetch->set_fetch_flag(LoadFromBuffer::srf_mode);
+   emit_instruction(fetch);
+
+   return true;
+}
+
+bool Shader::emit_shader_clock(nir_intrinsic_instr* instr)
+{
+   auto& vf = value_factory();
+   auto group = new AluGroup();
+   group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->dest, 0, pin_chan),
+                                       vf.inline_const(ALU_SRC_TIME_LO, 0), AluInstr::write));
+   group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->dest, 1, pin_chan),
+                                       vf.inline_const(ALU_SRC_TIME_HI, 0), AluInstr::last_write));
+   emit_instruction(group);
+   return true;
+}
+
+
+bool Shader::emit_barrier(nir_intrinsic_instr* intr)
+{
+   (void)intr;
+   /* Put barrier into it's own block, so that optimizers and the
+    * scheduler don't move code */
+   start_new_block(0);
+   auto op = new AluInstr(op0_group_barrier, 0);
+   op->set_alu_flag(alu_last_instr);
+   emit_instruction(op);
+   start_new_block(0);
+   return true;
+}
+
+bool Shader::load_ubo(nir_intrinsic_instr *instr)
+{
+   auto bufid = nir_src_as_const_value(instr->src[0]);
+   auto buf_offset = nir_src_as_const_value(instr->src[1]);
+
+   if (!buf_offset) {
+      /* TODO: if bufid is constant then this can also be solved by using the CF indes
+       * on the ALU block, and this would probably make sense when there are more then one
+       * loads with the same buffer ID. */
+
+      auto addr = value_factory().src(instr->src[1], 0)->as_register();
+      RegisterVec4::Swizzle dest_swz {7,7,7,7};
+      auto dest = value_factory().dest_vec4(instr->dest, pin_group);
+
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         dest_swz[i] = i + nir_intrinsic_component(instr);
+      }
+
+      LoadFromBuffer *ir;
+      if (bufid) {
+         ir = new LoadFromBuffer(dest, dest_swz, addr, 0, 1 + bufid->u32,
+                                 nullptr, fmt_32_32_32_32_float);
+      } else {
+         auto buffer_id = emit_load_to_register(value_factory().src(instr->src[0], 0));
+         ir = new LoadFromBuffer(dest, dest_swz, addr, 0, 1, buffer_id,
+                                 fmt_32_32_32_32_float);
+      }
+      emit_instruction(ir);
+      return true;
+   }
+
+   /* direct load using the constant cache */
+   if (bufid) {
+      int buf_cmp = nir_intrinsic_component(instr);
+
+      AluInstr *ir = nullptr;
+      auto pin = instr->dest.is_ssa && nir_dest_num_components(instr->dest) == 1 ?
+                    pin_free : pin_none;
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+
+         sfn_log << SfnLog::io << "UBO[" << bufid << "] "
+                 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
+
+         auto uniform = value_factory().uniform(512 +  buf_offset->u32, i + buf_cmp, bufid->u32 + 1);
+         ir = new AluInstr(op1_mov, value_factory().dest(instr->dest, i, pin),
+                           uniform, {alu_write});
+         emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_alu_flag(alu_last_instr);
+      return true;
+   } else {
+      int buf_cmp = nir_intrinsic_component(instr);
+      AluInstr *ir = nullptr;
+      auto kc_id = value_factory().src(instr->src[0], 0);
+
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         int cmp = buf_cmp + i;
+         auto u = new UniformValue(512 +  buf_offset->u32, cmp, kc_id);
+         auto dest = value_factory().dest(instr->dest, i, pin_none);
+         ir = new AluInstr(op1_mov,  dest, u, AluInstr::write);
+         emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_alu_flag(alu_last_instr);
+      m_indirect_files |= 1 << TGSI_FILE_CONSTANT;
+      return true;
+   }
+}
+
+void Shader::start_new_block(int depth)
+{
+   int depth_offset = m_current_block ? m_current_block->nesting_depth() : 0;
+   m_current_block = new Block(depth + depth_offset, m_next_block++);
+   m_root.push_back(m_current_block);
+}
+
+bool Shader::emit_simple_mov(nir_dest& dest, int chan, PVirtualValue src, Pin pin)
+{
+   auto dst = value_factory().dest(dest, chan, pin);
+   emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::last_write));
+   return true;
+}
+
+void Shader::print(std::ostream& os) const
+{
+   print_header(os);
+
+   for (auto& [dummy, i]: m_inputs) {
+      i.print(os);
+      os << "\n";
+   }
+
+   for (auto& [dummy, o]: m_outputs) {
+      o.print(os);
+      os << "\n";
+   }
+
+   os << "SHADER\n";
+   for (auto& b : m_root)
+      b->print(os);
+}
+
+const char *chip_class_names[] = {
+   "R600",
+   "R700",
+   "EVERGREEN",
+   "CAYMAN"
+};
+
+void Shader::print_header(std::ostream& os) const
+{
+   assert(m_chip_class <= ISA_CC_CAYMAN);
+   os << m_type_id << "\n";
+   os << "CHIPCLASS " << chip_class_names[m_chip_class] << "\n";
+   print_properties(os);
+}
+
+void Shader::print_properties(std::ostream& os) const
+{
+   do_print_properties(os);
+}
+
+bool Shader::equal_to(const Shader& other) const
+{
+   if (m_root.size() != other.m_root.size())
+      return false;
+   return std::inner_product(m_root.begin(), m_root.end(),
+                             other.m_root.begin(),
+                             true,
+                             [](bool lhs, bool rhs){ return lhs & rhs;},
+                             [](const Block::Pointer lhs, const Block::Pointer rhs) -> bool {
+                                return lhs->is_equal_to(*rhs);
+                             });
+}
+
+void Shader::get_shader_info(r600_shader *sh_info)
+{
+   sh_info->ninput = m_inputs.size();
+   int lds_pos = 0;
+   int output_array_array_loc = 0;
+   for (auto& [index, info] : m_inputs) {
+      r600_shader_io& io = sh_info->input[output_array_array_loc++];
+
+      io.sid = info.sid();
+      io.gpr = info.gpr();
+      io.spi_sid = info.spi_sid();
+      io.ij_index = info.ij_index();
+      io.name = info.name();
+      io.interpolate = info.interpolator();
+      io.interpolate_location = info.interpolate_loc();
+      if (info.need_lds_pos())
+         io.lds_pos = lds_pos++;
+      else
+         io.lds_pos = 0;
+
+      io.ring_offset = info.ring_offset();
+      io.uses_interpolate_at_centroid = info.uses_interpolate_at_centroid();
+
+      sfn_log << SfnLog::io << "Emit Input [" << index << "] sid:" << io.sid << " spi_sid:" << io.spi_sid << "\n";
+      assert(io.spi_sid >= 0);
+   }
+
+   sh_info->nlds = lds_pos;
+   sh_info->noutput = m_outputs.size();
+   sh_info->num_loops = m_nloops;
+
+   for (auto& [index, info] : m_outputs) {
+      r600_shader_io& io = sh_info->output[index];
+      io.sid = info.sid();
+      io.gpr = info.gpr();
+      io.spi_sid = info.spi_sid();
+      io.name = info.name();
+      io.write_mask = info.writemask();
+
+      sfn_log << SfnLog::io << "Emit output[" << index << "] sid:" << io.sid << " spi_sid:" << io.spi_sid << "\n";
+      assert(io.spi_sid >= 0);
+   }
+
+   sh_info->nhwatomic = m_nhwatomic;
+   sh_info->atomic_base = m_atomic_base;
+   sh_info->nhwatomic_ranges = m_atomics.size();
+   for (unsigned i = 0; i < m_atomics.size(); ++i)
+      sh_info->atomics[i] = m_atomics[i];
+
+   if (m_flags.test(sh_indirect_const_file))
+         sh_info->indirect_files |= 1 << TGSI_FILE_CONSTANT;
+
+   if (m_flags.test(sh_indirect_atomic))
+      sh_info->indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
+
+   sh_info->uses_tex_buffers = m_flags.test(sh_uses_tex_buffer);
+
+   value_factory().get_shader_info(sh_info);
+
+   sh_info->needs_scratch_space = m_flags.test(sh_needs_scratch_space);
+   sh_info->uses_images = m_flags.test(sh_uses_images);
+   sh_info->uses_atomics = m_flags.test(sh_uses_atomics);
+   sh_info->has_txq_cube_array_z_comp = m_flags.test(sh_txs_cube_array_comp);
+   sh_info->indirect_files = m_indirect_files;
+   do_get_shader_info(sh_info);
+}
+
+PRegister Shader::atomic_update()
+{
+   assert(m_atomic_update);
+   return m_atomic_update;
+}
+
+int Shader::remap_atomic_base(int base)
+{
+   return m_atomic_base_map[base];
+}
+
+void Shader::do_get_shader_info(r600_shader *sh_info)
+{
+   sh_info->uses_atomics = m_nhwatomic > 0;
+}
+
+
+const ShaderInput& Shader::input(int base) const
+{
+   auto io = m_inputs.find(base);
+   assert(io != m_inputs.end());
+   return io->second;
+}
+
+const ShaderOutput& Shader::output(int base) const
+{
+   auto io = m_outputs.find(base);
+   assert(io != m_outputs.end());
+   return io->second;
+}
+
+LiveRangeMap Shader::prepare_live_range_map()
+{
+   return m_instr_factory->value_factory().prepare_live_range_map();
+
+}
+
+void Shader::reset_function(ShaderBlocks& new_root)
+{
+   std::swap(m_root, new_root);
+}
+
+void Shader::finalize()
+{
+   do_finalize();
+}
+
+void Shader::do_finalize()
+{
+
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.h b/src/gallium/drivers/r600/sfn/sfn_shader.h
new file mode 100644
index 0000000..daf9484
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader.h
@@ -0,0 +1,365 @@
+#ifndef SHADER_H
+#define SHADER_H
+
+#include "sfn_instr.h"
+#include "sfn_instrfactory.h"
+#include "sfn_instr_controlflow.h"
+#include "gallium/drivers/r600/r600_shader.h"
+#include "sfn_liverangeevaluator.h"
+
+#include <bitset>
+#include <memory>
+#include <stack>
+#include <vector>
+
+struct nir_shader;
+struct nir_cf_node;
+struct nir_if;
+struct nir_block;
+struct nir_instr;
+
+namespace r600 {
+
+class ShaderIO {
+public:
+   void set_sid(int sid);
+   void override_spi_sid(int spi_sid);
+   void print(std::ostream& os) const;
+
+   int spi_sid() const { return m_spi_sid;}
+   unsigned sid() const { return m_sid;}
+
+   int location() const {return m_location;}
+   unsigned name() const { return m_name;}
+
+   int pos() const { return m_pos;}
+   void set_pos(int pos) {m_pos = pos;}
+
+   bool is_param() const { return m_is_param;}
+   void set_is_param(bool val) { m_is_param = val;}
+
+   void set_gpr(int gpr) {m_gpr = gpr;}
+   int gpr() const {return m_gpr;}
+
+protected:
+   ShaderIO(const char *type, int loc, int name);
+
+private:
+
+   virtual void do_print(std::ostream& os) const = 0;
+
+   const char *m_type;
+   int m_location{-1};
+   int m_name{-1};
+   int m_sid{0};
+   int m_spi_sid{0};
+   int m_pos{0};
+   int m_is_param{false};
+   int m_gpr{0};
+};
+
+class ShaderOutput : public ShaderIO {
+public:
+   ShaderOutput();
+   ShaderOutput(int location, int name, int writemask);
+
+   int writemask() const { return m_writemask;}
+
+private:
+   void do_print(std::ostream& os) const override;
+
+   int m_writemask{0};
+};
+
+
+class ShaderInput : public ShaderIO  {
+public:
+   ShaderInput();
+   ShaderInput(int location, int name);
+   void set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid);
+   void set_uses_interpolate_at_centroid();
+   void set_need_lds_pos() { m_need_lds_pos = true;}
+   int ij_index() const { return m_ij_index;}
+
+   int interpolator() const{return m_interpolator;}
+   int interpolate_loc() const {return m_interpolate_loc;}
+   bool need_lds_pos() const {return m_need_lds_pos;}
+   int lds_pos() const {return m_lds_pos;}
+   void set_lds_pos(int pos) {m_lds_pos = pos;}
+
+   int ring_offset() const {return m_ring_offset;}
+   void set_ring_offset(int offs) {m_ring_offset = offs;}
+   bool uses_interpolate_at_centroid() const {return m_uses_interpolate_at_centroid;}
+
+private:
+   void do_print(std::ostream& os) const override;
+
+   int m_interpolator{0};
+   int m_interpolate_loc{0};
+   int m_ij_index{0};
+   bool m_uses_interpolate_at_centroid{false};
+   bool m_need_lds_pos{false};
+   int m_lds_pos{0};
+   int m_ring_offset{0};
+};
+
+class Shader : public Allocate {
+public:
+   using InputIterator = std::map<int, ShaderInput>::iterator;
+   using OutputIterator = std::map<int, ShaderOutput>::iterator;
+
+   using ShaderBlocks = std::list<Block::Pointer, Allocator<Block::Pointer>>;
+
+   Shader(const Shader& orig) = delete;
+
+   virtual ~Shader() {}
+
+   bool add_info_from_string(std::istream& is);
+
+   static Shader *translate_from_nir(nir_shader *nir, const pipe_stream_output_info *so_info, r600_shader *gs_shader,
+                                     r600_shader_key& key, r600_chip_class chip_class);
+
+   bool process(nir_shader *nir);
+
+   bool process_cf_node(nir_cf_node *node);
+   bool process_if(nir_if *node);
+   bool process_loop(nir_loop *node);
+   bool process_block(nir_block *node);
+   bool process_instr(nir_instr *instr);
+   void emit_instruction(PInst instr);
+   bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
+
+   void print(std::ostream& os ) const;
+   void print_header(std::ostream& os ) const;
+
+   bool process_intrinsic(nir_intrinsic_instr *intr);
+
+   virtual bool load_input(nir_intrinsic_instr *intr) = 0;
+   virtual bool store_output(nir_intrinsic_instr *intr) = 0;
+
+   bool load_uniform(nir_intrinsic_instr *intr);
+   bool load_ubo(nir_intrinsic_instr *intr);
+
+   ValueFactory& value_factory();
+
+   void add_output(const ShaderOutput& output) {
+      m_outputs[output.location()] = output;
+   }
+
+   void add_input(const ShaderInput& input) {
+      m_inputs[input.location()] = input;
+   }
+
+   void set_input_gpr(int driver_lcation, int gpr);
+
+   InputIterator find_input(int location) { return m_inputs.find(location);}
+
+   InputIterator input_not_found() {return m_inputs.end();}
+
+   OutputIterator find_output(int location);
+   OutputIterator output_not_found() {return m_outputs.end();}
+
+   ShaderBlocks& func() { return m_root; }
+   void reset_function(ShaderBlocks& new_root);
+
+   void emit_instruction_from_string(const std::string &s);
+
+   void set_info(nir_shader *nir);
+   void get_shader_info(r600_shader *sh_info);
+
+   r600_chip_class chip_class() const {return m_chip_class;};
+   void set_chip_class(r600_chip_class cls) {m_chip_class = cls;};
+
+   void start_new_block(int nesting_depth);
+
+   const ShaderOutput& output(int base) const;
+
+   LiveRangeMap prepare_live_range_map();
+
+   void set_last_txd(Instr *txd){m_last_txd = txd;}
+   Instr *last_txd(){return m_last_txd;}
+
+   // Needed for keeping the memory access in order
+   void chain_scratch_read(Instr *instr);
+   void chain_ssbo_read(Instr *instr);
+
+   virtual uint32_t enabled_stream_buffers_mask() const {return 0;};
+
+   size_t noutputs() const { return m_outputs.size();}
+   size_t ninputs() const { return m_inputs.size();}
+
+   enum Flags {
+      sh_indirect_const_file,
+      sh_needs_scratch_space,
+      sh_needs_sbo_ret_address,
+      sh_uses_atomics,
+      sh_uses_images,
+      sh_uses_tex_buffer,
+      sh_writes_memory,
+      sh_txs_cube_array_comp,
+      sh_indirect_atomic,
+      sh_mem_barrier,
+      sh_flags_count
+   };
+
+   void set_flag(Flags f) {m_flags.set(f);}
+   bool has_flag(Flags f) const {return m_flags.test(f);}
+
+   int atomic_file_count() const { return m_atomic_file_count; }
+
+   PRegister atomic_update();
+   int remap_atomic_base(int base);
+   auto evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) -> std::pair<int, PRegister>;
+   int ssbo_image_offset() const {return m_ssbo_image_offset;}
+   PRegister rat_return_address() {assert(m_rat_return_address); return m_rat_return_address;}
+
+   PRegister emit_load_to_register(PVirtualValue src);
+
+protected:
+   enum ESlots {
+      es_face,
+      es_instanceid,
+      es_invocation_id,
+      es_patch_id,
+      es_pos,
+      es_rel_patch_id,
+      es_sample_mask_in,
+      es_sample_id,
+      es_sample_pos,
+      es_tess_factor_base,
+      es_vertexid,
+      es_tess_coord,
+      es_primitive_id,
+      es_helper_invocation,
+      es_last
+   };
+
+   std::bitset<es_last> m_sv_values;
+
+   Shader(const char *type_id);
+
+   const ShaderInput& input(int base) const;
+
+   bool emit_simple_mov(nir_dest& dest, int chan, PVirtualValue src, Pin pin = pin_free);
+
+private:
+   virtual bool process_stage_intrinsic(nir_intrinsic_instr *intr) = 0;
+
+   bool allocate_registers_from_string(std::istream& is, Pin pin);
+   bool allocate_arrays_from_string(std::istream& is);
+
+   bool read_chipclass(std::istream& is);
+
+   bool load_uniform_indirect(nir_intrinsic_instr *intr, PVirtualValue addr, int offset , int buffer_id);
+
+   bool scan_shader(const nir_function *impl);
+   bool scan_uniforms(nir_variable *uniform);
+   void allocate_reserved_registers();
+
+   void allocate_local_registers(const exec_list *registers);
+
+   virtual int do_allocate_reserved_registers() = 0;
+
+   bool scan_instruction(nir_instr *instr);
+   virtual bool do_scan_instruction(nir_instr *instr) = 0;
+
+   void print_properties(std::ostream& os) const;
+   virtual void do_print_properties(std::ostream& os) const = 0;
+
+   bool read_output(std::istream& is);
+   bool read_input(std::istream& is);
+   virtual bool read_prop(std::istream& is) = 0;
+
+   bool emit_if_start(nir_if *if_stmt);
+   bool emit_control_flow(ControlFlowInstr::CFType type);
+   bool emit_store_scratch(nir_intrinsic_instr *intr);
+   bool emit_load_scratch(nir_intrinsic_instr *intr);
+   bool emit_local_store(nir_intrinsic_instr *intr);
+   bool emit_local_load(nir_intrinsic_instr* instr);
+   bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
+   bool emit_barrier(nir_intrinsic_instr* intr);
+   bool emit_shader_clock(nir_intrinsic_instr* instr);
+   bool emit_wait_ack();
+
+   bool equal_to(const Shader& other) const;
+   void finalize();
+   virtual void do_finalize();
+
+   virtual void do_get_shader_info(r600_shader *sh_info);
+
+   ShaderBlocks m_root;
+   Block::Pointer m_current_block;
+
+   InstrFactory *m_instr_factory;
+   const char *m_type_id;
+
+   template <typename T>
+   using IOMap = std::map<int, T, std::less<int>, Allocator<std::pair<const int, T>>>;
+
+   IOMap<ShaderOutput> m_outputs;
+   IOMap<ShaderInput> m_inputs;
+   r600_chip_class m_chip_class;
+
+   int m_scratch_size;
+   int m_next_block;
+   bool m_indirect_const_file{false};
+
+   Instr *m_last_txd {nullptr};
+
+   uint32_t m_indirect_files{0};      
+   std::bitset<sh_flags_count> m_flags;
+   uint32_t nhwatomic_ranges{0};
+   std::vector<r600_shader_atomic> m_atomics;
+
+   uint32_t m_nhwatomic{0};
+   uint32_t m_atomic_base{0};
+   uint32_t m_next_hwatomic_loc{0};
+   std::unordered_map<int, int> m_atomic_base_map;
+   uint32_t m_atomic_file_count{0};
+   PRegister m_atomic_update{nullptr};
+   PRegister m_rat_return_address{nullptr};
+
+   int32_t m_ssbo_image_offset{0};
+   uint32_t m_nloops{0};
+
+   class InstructionChain : public InstrVisitor {
+   public:
+      void visit(AluInstr  *instr) override {(void) instr;}
+      void visit(AluGroup *instr) override {(void) instr;}
+      void visit(TexInstr *instr) override {(void) instr;}
+      void visit(ExportInstr *instr) override {(void) instr;}
+      void visit(FetchInstr *instr) override {(void) instr;}
+      void visit(Block *instr) override {(void) instr;}
+      void visit(ControlFlowInstr *instr) override {(void) instr;}
+      void visit(IfInstr *instr) override {(void) instr;}
+      void visit(StreamOutInstr *instr) override {(void) instr;}
+      void visit(MemRingOutInstr *instr) override {(void) instr;}
+      void visit(EmitVertexInstr *instr) override {(void) instr;}
+      void visit(WriteTFInstr *instr) override {(void) instr;}
+      void visit(LDSAtomicInstr *instr) override {(void) instr;}
+      void visit(LDSReadInstr *instr) override {(void) instr;}
+
+      void visit(WriteScratchInstr *instr) override;
+      void visit(GDSInstr *instr) override;
+      void visit(RatInstr *instr) override;
+
+      void apply(Instr *current, Instr **last);
+
+      Shader *this_shader{nullptr};
+      Instr *last_scratch_instr{nullptr};
+      Instr *last_gds_instr{nullptr};
+      Instr *last_ssbo_instr{nullptr};
+      bool prepare_mem_barrier{false};
+   };
+
+   InstructionChain m_chain_instr;
+   std::vector<Instr *> m_loops;
+};
+
+
+std::pair<unsigned, unsigned>
+r600_get_varying_semantic(unsigned varying_location);
+
+}
+
+#endif // SHADER_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
deleted file mode 100644
index a25b04b..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
+++ /dev/null
@@ -1,1188 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "../r600_pipe.h"
-#include "../r600_shader.h"
-#include "sfn_shader_vertex.h"
-
-#include "sfn_shader_compute.h"
-#include "sfn_shader_fragment.h"
-#include "sfn_shader_geometry.h"
-#include "sfn_liverange.h"
-#include "sfn_ir_to_assembly.h"
-#include "sfn_nir.h"
-#include "sfn_instruction_misc.h"
-#include "sfn_instruction_fetch.h"
-#include "sfn_instruction_lds.h"
-
-#include <iostream>
-
-#define ENABLE_DEBUG 1
-
-#ifdef ENABLE_DEBUG
-#define DEBUG_SFN(X)  \
-   do {\
-      X; \
-   } while (0)
-#else
-#define DEBUG_SFN(X)
-#endif
-
-namespace r600 {
-
-using namespace std;
-
-
-ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
-                                               r600_pipe_shader_selector& sel,
-                                               r600_shader &sh_info, int scratch_size,
-                                               enum amd_gfx_level gfx_level,
-                                               int atomic_base):
-   m_processor_type(ptype),
-   m_nesting_depth(0),
-   m_block_number(0),
-   m_export_output(0, -1),
-   m_sh_info(sh_info),
-   m_chip_class(gfx_level),
-   m_tex_instr(*this),
-   m_alu_instr(*this),
-   m_ssbo_instr(*this),
-   m_pending_else(nullptr),
-   m_scratch_size(scratch_size),
-   m_next_hwatomic_loc(0),
-   m_sel(sel),
-   m_atomic_base(atomic_base),
-   m_image_count(0),
-   last_emitted_alu(nullptr)
-{
-   m_sh_info.processor_type = ptype;
-
-}
-
-
-ShaderFromNirProcessor::~ShaderFromNirProcessor()
-{
-}
-
-bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
-{
-   switch (instr->type) {
-   case nir_instr_type_tex: {
-      nir_tex_instr *t = nir_instr_as_tex(instr);
-      if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
-         sh_info().uses_tex_buffers = true;
-      if (t->op == nir_texop_txs &&
-          t->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
-          t->is_array)
-         sh_info().has_txq_cube_array_z_comp = true;
-      break;
-   }
-   case nir_instr_type_intrinsic: {
-      auto *i = nir_instr_as_intrinsic(instr);
-      switch (i->intrinsic) {
-      case nir_intrinsic_ssbo_atomic_add:
-      case nir_intrinsic_image_atomic_add:
-      case nir_intrinsic_ssbo_atomic_and:
-      case nir_intrinsic_image_atomic_and:
-      case nir_intrinsic_ssbo_atomic_or:
-      case nir_intrinsic_image_atomic_or:
-      case nir_intrinsic_ssbo_atomic_imin:
-      case nir_intrinsic_image_atomic_imin:
-      case nir_intrinsic_ssbo_atomic_imax:
-      case nir_intrinsic_image_atomic_imax:
-      case nir_intrinsic_ssbo_atomic_umin:
-      case nir_intrinsic_image_atomic_umin:
-      case nir_intrinsic_ssbo_atomic_umax:
-      case nir_intrinsic_image_atomic_umax:
-      case nir_intrinsic_ssbo_atomic_xor:
-      case nir_intrinsic_image_atomic_xor:
-      case nir_intrinsic_ssbo_atomic_exchange:
-      case nir_intrinsic_image_atomic_exchange:
-      case nir_intrinsic_image_atomic_comp_swap:
-      case nir_intrinsic_ssbo_atomic_comp_swap:
-         m_sel.info.writes_memory = 1;
-         FALLTHROUGH;
-      case nir_intrinsic_image_load:
-         m_ssbo_instr.set_require_rat_return_address();
-         break;
-      case nir_intrinsic_image_size: {
-         if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE &&
-             nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2)
-            sh_info().has_txq_cube_array_z_comp = true;
-      }
-
-
-
-      default:
-         ;
-      }
-
-
-   }
-   default:
-      ;
-   }
-
-   return scan_sysvalue_access(instr);
-}
-
-enum amd_gfx_level ShaderFromNirProcessor::get_chip_class(void) const
-{
-  return m_chip_class;
-}
-
-bool ShaderFromNirProcessor::allocate_reserved_registers()
-{
-   bool retval = do_allocate_reserved_registers();
-   m_ssbo_instr.load_rat_return_address();
-   if (sh_info().uses_atomics)
-      m_ssbo_instr.load_atomic_inc_limits();
-   m_ssbo_instr.set_ssbo_offset(m_image_count);
-   return retval;
-}
-
-static void remap_shader_info(r600_shader& sh_info,
-                              std::vector<rename_reg_pair>& map,
-                              UNUSED ValueMap& values)
-{
-   for (unsigned i = 0; i < sh_info.num_arrays; ++i) {
-      auto new_index = map[sh_info.arrays[i].gpr_start];
-      if (new_index.valid)
-         sh_info.arrays[i].gpr_start = new_index.new_reg;
-      map[sh_info.arrays[i].gpr_start].used = true;
-   }
-
-   for (unsigned i = 0; i < sh_info.ninput; ++i) {
-      sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
-              << " of map.size()\n";
-
-      assert(sh_info.input[i].gpr < map.size());
-      auto new_index = map[sh_info.input[i].gpr];
-      if (new_index.valid)
-         sh_info.input[i].gpr = new_index.new_reg;
-      map[sh_info.input[i].gpr].used = true;
-   }
-
-   for (unsigned i = 0; i < sh_info.noutput; ++i) {
-      assert(sh_info.output[i].gpr < map.size());
-      auto new_index = map[sh_info.output[i].gpr];
-      if (new_index.valid)
-         sh_info.output[i].gpr = new_index.new_reg;
-      map[sh_info.output[i].gpr].used = true;
-   }
-}
-
-void ShaderFromNirProcessor::remap_registers()
-{
-   // register renumbering
-   auto rc = register_count();
-   if (!rc)
-      return;
-
-   std::vector<register_live_range> register_live_ranges(rc);
-
-   auto temp_register_map = get_temp_registers();
-
-   Shader sh{m_output, temp_register_map};
-   LiverangeEvaluator().run(sh, register_live_ranges);
-   auto register_map = get_temp_registers_remapping(register_live_ranges);
-
-   sfn_log << SfnLog::merge << "=========Mapping===========\n";
-   for (size_t  i = 0; i < register_map.size(); ++i)
-      if (register_map[i].valid)
-         sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
-
-   ValueRemapper vmap0(register_map, temp_register_map);
-   for (auto& block: m_output)
-      block.remap_registers(vmap0);
-
-   remap_shader_info(m_sh_info, register_map, temp_register_map);
-
-   /* Mark inputs as used registers, these registers should no be remapped */
-   for (auto& v: sh.m_temp) {
-      if (v.second->type() == Value::gpr) {
-         const auto& g = static_cast<const GPRValue&>(*v.second);
-         if (g.is_input())
-            register_map[g.sel()].used = true;
-      }
-   }
-
-   int new_index = 0;
-   for (auto& i : register_map) {
-      i.valid = i.used;
-      if (i.used)
-         i.new_reg = new_index++;
-   }
-
-   ValueRemapper vmap1(register_map, temp_register_map);
-   for (auto& ir: m_output)
-      ir.remap_registers(vmap1);
-
-   remap_shader_info(m_sh_info, register_map, temp_register_map);
-}
-
-bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
-{
-   // m_uniform_type_map
-   m_uniform_type_map[uniform->data.location] = uniform->type;
-
-   if (uniform->type->contains_atomic()) {
-      int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
-      sh_info().nhwatomic += natomics;
-
-      if (uniform->type->is_array())
-         sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
-
-      sh_info().uses_atomics = 1;
-
-      struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
-      ++sh_info().nhwatomic_ranges;
-      atom.buffer_id = uniform->data.binding;
-      atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
-
-      atom.start = uniform->data.offset >> 2;
-      atom.end = atom.start + natomics - 1;
-
-      if (m_atomic_base_map.find(uniform->data.binding) ==
-          m_atomic_base_map.end())
-         m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
-
-      m_next_hwatomic_loc += natomics;
-
-      m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
-
-      sfn_log << SfnLog::io << "HW_ATOMIC file count: "
-              << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
-   }
-
-   auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type;
-   if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
-      sh_info().uses_images = 1;
-      if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo))
-         sh_info().indirect_files |= 1 << TGSI_FILE_IMAGE;
-   }
-
-   return true;
-}
-
-void ShaderFromNirProcessor::set_shader_info(const nir_shader *sh)
-{
-   m_image_count = sh->info.num_images;
-   do_set_shader_info(sh);
-}
-
-void ShaderFromNirProcessor::do_set_shader_info(const nir_shader *sh)
-{
-   (void)sh;
-}
-
-bool ShaderFromNirProcessor::scan_inputs_read(const nir_shader *sh)
-{
-   return true;
-}
-
-void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
-{
-   auto& dest = instr->dest;
-   unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
-   assert(util_bitcount(instr->modes) == 1);
-   m_var_mode[instr->var] = instr->modes;
-   m_var_derefs[index] = instr->var;
-
-   sfn_log << SfnLog::io << "Add var deref:" << index
-           << " with DDL:" << instr->var->data.driver_location << "\n";
-}
-
-void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
-{
-   switch (io.name) {
-   case TGSI_SEMANTIC_POSITION:
-   case TGSI_SEMANTIC_PSIZE:
-   case TGSI_SEMANTIC_EDGEFLAG:
-   case TGSI_SEMANTIC_FACE:
-   case TGSI_SEMANTIC_SAMPLEMASK:
-   case TGSI_SEMANTIC_CLIPVERTEX:
-      io.spi_sid = 0;
-      break;
-   case TGSI_SEMANTIC_GENERIC:
-   case TGSI_SEMANTIC_TEXCOORD:
-   case TGSI_SEMANTIC_PCOORD:
-      io.spi_sid = io.sid + 1;
-      break;
-   default:
-      /* For non-generic params - pack name and sid into 8 bits */
-      io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
-   }   
-}
-
-const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
-{
-   unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
-
-   sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
-
-   auto v = m_var_derefs.find(index);
-   if (v != m_var_derefs.end())
-      return v->second;
-
-     fprintf(stderr, "R600: could not find deref with index %d\n", index);
-
-     return nullptr;
-
-   /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
-   return  nir_deref_instr_get_variable(deref); */
-}
-
-bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
-{
-   return m_tex_instr.emit(instr);
-}
-
-void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir)
-{
-   if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) {
-      for (unsigned i = 0; i < ir->n_sources(); ++i) {
-         auto& s = ir->src(i);
-         if (s.type() == Value::kconst) {
-            auto& c = static_cast<UniformValue&>(s);
-            if (c.addr()) {
-               last_emitted_alu->set_flag(alu_last_instr);
-               break;
-            }
-         }
-      }
-   }
-   last_emitted_alu = ir;
-   emit_instruction_internal(ir);
-}
-
-
-void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
-{
-
-   emit_instruction_internal(ir);
-   last_emitted_alu = nullptr;
-}
-
-void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir)
-{
-   if (m_pending_else) {
-      append_block(-1);
-      m_output.back().emit(PInstruction(m_pending_else));
-      append_block(1);
-      m_pending_else = nullptr;
-   }
-
-   r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
-   if (m_output.empty())
-      append_block(0);
-
-   m_output.back().emit(Instruction::Pointer(ir));
-}
-
-void ShaderFromNirProcessor::emit_shader_start()
-{
-   /* placeholder, may become an abstract method */
-   m_ssbo_instr.set_ssbo_offset(m_image_count);
-}
-
-bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
-{
-   switch (instr->type) {
-   case nir_jump_break: {
-      auto b = new LoopBreakInstruction();
-      emit_instruction(b);
-      return true;
-   }
-   case nir_jump_continue: {
-      auto  b = new LoopContInstruction();
-      emit_instruction(b);
-      return true;
-   }
-   default: {
-      nir_instr *i = reinterpret_cast<nir_instr*>(instr);
-      sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
-      return false;
-   }
-   }
-   return true;
-}
-
-bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
-{
-   return m_alu_instr.emit(instr);
-}
-
-bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
-{
-   return false;
-}
-
-bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
-{
-   LoopBeginInstruction *loop = new LoopBeginInstruction();
-   emit_instruction(loop);
-   m_loop_begin_block_map[loop_id] = loop;
-   append_block(1);
-   return true;
-}
-bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
-{
-   auto start = m_loop_begin_block_map.find(loop_id);
-   if (start == m_loop_begin_block_map.end()) {
-      sfn_log << SfnLog::err  << "End loop: Loop start for "
-              << loop_id << "  not found\n";
-      return false;
-   }
-   m_nesting_depth--;
-   m_block_number++;
-   m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
-   LoopEndInstruction *loop = new LoopEndInstruction(start->second);
-   emit_instruction(loop);
-
-   m_loop_begin_block_map.erase(start);
-   return true;
-}
-
-bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
-{
-
-   auto value = from_nir(if_stmt->condition, 0, 0);
-   AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
-                                             value, Value::zero, EmitInstruction::last);
-   pred->set_flag(alu_update_exec);
-   pred->set_flag(alu_update_pred);
-   pred->set_cf_type(cf_alu_push_before);
-
-   append_block(1);
-
-   IfInstruction *ir = new IfInstruction(pred);
-   emit_instruction(ir);
-   assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
-   m_if_block_start_map[if_id] = ir;
-   return true;
-}
-
-bool ShaderFromNirProcessor::emit_else_start(int if_id)
-{
-   auto iif = m_if_block_start_map.find(if_id);
-   if (iif == m_if_block_start_map.end()) {
-      std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
-      return false;
-   }
-
-   if (iif->second->type() != Instruction::cond_if) {
-      std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
-      return false;
-   }
-   IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
-   ElseInstruction *ir = new ElseInstruction(if_instr);
-   m_if_block_start_map[if_id] = ir;
-   m_pending_else = ir;
-
-   return true;
-}
-
-bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
-{
-   auto ifelse = m_if_block_start_map.find(if_id);
-   if (ifelse == m_if_block_start_map.end()) {
-      std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
-      return false;
-   }
-
-   if (ifelse->second->type() != Instruction::cond_if &&
-       ifelse->second->type() != Instruction::cond_else) {
-      std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
-      return false;
-   }
-   /* Clear pending else, if the else branch was empty, non will be emitted */
-
-   m_pending_else = nullptr;
-
-   append_block(-1);
-   IfElseEndInstruction *ir = new IfElseEndInstruction();
-   emit_instruction(ir);
-
-   return true;
-}
-
-bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
-{
-   PValue src = get_temp_register();
-   emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
-
-   GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
-   emit_instruction(new FetchTCSIOParam(dest, src, offset));
-
-   return true;
-
-}
-
-bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
-{
-   auto address = varvec_from_nir(instr->src[0], instr->num_components);
-   auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
-
-   emit_instruction(new LDSReadInstruction(address, dest_value));
-   return true;
-}
-
-static unsigned
-lds_op_from_intrinsic(nir_intrinsic_op op) {
-   switch (op) {
-   case nir_intrinsic_shared_atomic_add:
-      return LDS_OP2_LDS_ADD_RET;
-   case nir_intrinsic_shared_atomic_and:
-      return LDS_OP2_LDS_AND_RET;
-   case nir_intrinsic_shared_atomic_or:
-      return LDS_OP2_LDS_OR_RET;
-   case nir_intrinsic_shared_atomic_imax:
-      return LDS_OP2_LDS_MAX_INT_RET;
-   case nir_intrinsic_shared_atomic_umax:
-      return LDS_OP2_LDS_MAX_UINT_RET;
-   case nir_intrinsic_shared_atomic_imin:
-      return LDS_OP2_LDS_MIN_INT_RET;
-   case nir_intrinsic_shared_atomic_umin:
-      return LDS_OP2_LDS_MIN_UINT_RET;
-   case nir_intrinsic_shared_atomic_xor:
-      return LDS_OP2_LDS_XOR_RET;
-   case nir_intrinsic_shared_atomic_exchange:
-      return LDS_OP2_LDS_XCHG_RET;
-   case nir_intrinsic_shared_atomic_comp_swap:
-      return LDS_OP3_LDS_CMP_XCHG_RET;
-   default:
-      unreachable("Unsupported shared atomic opcode");
-   }
-}
-
-bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr)
-{
-   auto address = from_nir(instr->src[0], 0);
-   auto dest_value = from_nir(instr->dest, 0);
-   auto value = from_nir(instr->src[1], 0);
-   auto op = lds_op_from_intrinsic(instr->intrinsic);
-
-   if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) {
-      auto value2 = from_nir(instr->src[2], 0);
-      emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op));
-   } else {
-      emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op));
-   }
-   return true;
-}
-
-
-bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
-{
-   unsigned write_mask = nir_intrinsic_write_mask(instr);
-
-   auto address = from_nir(instr->src[1], 0);
-   int swizzle_base = (write_mask & 0x3) ? 0 : 2;
-   write_mask |= write_mask >> 2;
-
-   auto value =  from_nir(instr->src[0], swizzle_base);
-   if (!(write_mask & 2)) {
-      emit_instruction(new LDSWriteInstruction(address, 0, value));
-   } else {
-      auto value1 = from_nir(instr->src[0], swizzle_base + 1);
-      emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
-   }
-
-   return true;
-}
-
-bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
-{
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   if (emit_intrinsic_instruction_override(instr))
-      return true;
-
-   if (m_ssbo_instr.emit(&instr->instr)) {
-      m_sel.info.writes_memory = true;
-      return true;
-   }
-
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_deref: {
-      auto var = get_deref_location(instr->src[0]);
-      if (!var)
-         return false;
-      auto mode_helper = m_var_mode.find(var);
-      if (mode_helper == m_var_mode.end()) {
-         cerr << "r600-nir: variable '" << var->name << "' not found\n";
-         return false;
-      }
-      switch (mode_helper->second) {
-      case nir_var_function_temp:
-         return emit_load_function_temp(var, instr);
-      default:
-         cerr << "r600-nir: Unsupported mode" << mode_helper->second
-              << "for src variable\n";
-         return false;
-      }
-   }
-   case nir_intrinsic_store_scratch:
-      return emit_store_scratch(instr);
-   case nir_intrinsic_load_scratch:
-      return emit_load_scratch(instr);
-   case nir_intrinsic_load_uniform:
-      return load_uniform(instr);
-   case nir_intrinsic_discard:
-   case nir_intrinsic_discard_if:
-      return emit_discard_if(instr);
-   case nir_intrinsic_load_ubo_vec4:
-      return emit_load_ubo_vec4(instr);
-   case nir_intrinsic_load_tcs_in_param_base_r600:
-      return emit_load_tcs_param_base(instr, 0);
-   case nir_intrinsic_load_tcs_out_param_base_r600:
-      return emit_load_tcs_param_base(instr, 16);
-   case nir_intrinsic_load_local_shared_r600:
-   case nir_intrinsic_load_shared:
-      return emit_load_local_shared(instr);
-   case nir_intrinsic_store_local_shared_r600:
-   case nir_intrinsic_store_shared:
-      return emit_store_local_shared(instr);
-   case nir_intrinsic_control_barrier:
-   case nir_intrinsic_memory_barrier_tcs_patch:
-   case nir_intrinsic_memory_barrier_shared:
-   case nir_intrinsic_memory_barrier_buffer:
-   case nir_intrinsic_memory_barrier:
-   case nir_intrinsic_memory_barrier_image:
-   case nir_intrinsic_group_memory_barrier:
-      return emit_barrier(instr);
-   case nir_intrinsic_memory_barrier_atomic_counter:
-      return true;
-   case nir_intrinsic_shared_atomic_add:
-   case nir_intrinsic_shared_atomic_and:
-   case nir_intrinsic_shared_atomic_or:
-   case nir_intrinsic_shared_atomic_imax:
-   case nir_intrinsic_shared_atomic_umax:
-   case nir_intrinsic_shared_atomic_imin:
-   case nir_intrinsic_shared_atomic_umin:
-   case nir_intrinsic_shared_atomic_xor:
-   case nir_intrinsic_shared_atomic_exchange:
-   case nir_intrinsic_shared_atomic_comp_swap:
-      return emit_atomic_local_shared(instr);
-   case nir_intrinsic_shader_clock:
-      return emit_shader_clock(instr);
-   case nir_intrinsic_copy_deref:
-   case nir_intrinsic_load_constant:
-   case nir_intrinsic_load_input:
-   case nir_intrinsic_store_output:
-
-   default:
-      fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
-      return false;
-   }
-   return false;
-}
-
-bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
-{
-   return false;
-}
-
-bool
-ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
-{
-   return false;
-}
-
-bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
-{
-   AluInstruction *ir = new AluInstruction(op0_group_barrier);
-   ir->set_flag(alu_last_instr);
-   emit_instruction(ir);
-   return true;
-}
-
-
-bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
-{
-   if (!dest.is_ssa) {
-      auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
-      if (as_last)
-         ir->set_flag(alu_last_instr);
-      emit_instruction(ir);
-   } else {
-      inject_register(dest.ssa.index, chan, value, true);
-   }
-   return true;
-}
-
-bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
-{
-   PValue address = from_nir(instr->src[1], 0, 0);
-
-   auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
-         swizzle_from_comps(instr->num_components));
-
-   int writemask = nir_intrinsic_write_mask(instr);
-   int align = nir_intrinsic_align_mul(instr);
-   int align_offset = nir_intrinsic_align_offset(instr);
-
-   WriteScratchInstruction *ir = nullptr;
-   if (address->type() == Value::literal) {
-      const auto& lv = static_cast<const LiteralValue&>(*address);
-      ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
-   } else {
-      address = from_nir_with_fetch_constant(instr->src[1], 0);
-      ir = new WriteScratchInstruction(address, value, align, align_offset,
-                                       writemask, m_scratch_size);
-   }
-   emit_instruction(ir);
-   sh_info().needs_scratch_space = 1;
-   return true;
-}
-
-bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
-{
-   PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
-   std::array<PValue, 4> dst_val;
-   for (int i = 0; i < 4; ++i)
-      dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
-
-   GPRVector dst(dst_val);
-   auto ir = new LoadFromScratch(dst, address, m_scratch_size);
-   ir->prelude_append(new WaitAck(0));
-   emit_instruction(ir);
-   sh_info().needs_scratch_space = 1;
-   return true;
-}
-
-bool ShaderFromNirProcessor::emit_shader_clock(nir_intrinsic_instr* instr)
-{
-   emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 0),
-                                       PValue(new InlineConstValue(ALU_SRC_TIME_LO, 0)), EmitInstruction::write));
-   emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 1),
-                                       PValue(new InlineConstValue(ALU_SRC_TIME_HI, 0)), EmitInstruction::last_write));
-   return true;
-}
-
-GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
-                                                                   unsigned mask,
-                                                                   const GPRVector::Swizzle& swizzle,
-                                                                   bool match)
-{
-   bool use_same = true;
-   GPRVector::Values v;
-
-   std::array<bool,4> used_swizzles = {false, false, false, false};
-
-   /* Check whether all sources come from a GPR, and,
-    * if requested, whether they are swizzled as expected */
-
-   for (int i = 0; i < 4 && use_same; ++i)  {
-      if ((1 << i) & mask) {
-         if (swizzle[i] < 4) {
-            v[i] = from_nir(src, swizzle[i]);
-            assert(v[i]);
-            use_same &= (v[i]->type() == Value::gpr);
-            if (match) {
-               use_same &= (v[i]->chan() == swizzle[i]);
-            }
-            used_swizzles[v[i]->chan()] = true;
-         }
-      }
-   }
-
-
-   /* Now check whether all inputs come from the same GPR, and fill
-    * empty slots in the vector with unused swizzles, bail out if
-    * the sources are not from the same GPR
-    */
-
-   if (use_same) {
-      int next_free_swizzle = 0;
-      while (used_swizzles[next_free_swizzle] && next_free_swizzle < 4)
-         next_free_swizzle++;
-
-      /* Find the first GPR index used */
-      int i = 0;
-      while (!v[i] && i < 4) ++i;
-      assert(i < 4);
-      unsigned sel = v[i]->sel();
-
-
-      for (i = 0; i < 4 && use_same; ++i) {
-         if (!v[i]) {
-            if (swizzle[i] >= 4)
-               v[i] = PValue(new GPRValue(sel, swizzle[i]));
-            else {
-               assert(next_free_swizzle < 4);
-               v[i] = PValue(new GPRValue(sel, next_free_swizzle));
-               used_swizzles[next_free_swizzle] = true;
-               while (next_free_swizzle < 4 && used_swizzles[next_free_swizzle])
-                  next_free_swizzle++;
-            }
-         }
-         else
-            use_same &= v[i]->sel() == sel;
-      }
-   }
-
-   /* We can't re-use the source data because they either need re-swizzling, or
-    * they didn't come all from a GPR or the same GPR, so copy to a new vector
-    */
-   if (!use_same) {
-      AluInstruction *ir = nullptr;
-      GPRVector result = get_temp_vec4(swizzle);
-      for (int i = 0; i < 4; ++i) {
-         if (swizzle[i] < 4 && (mask & (1 << i))) {
-            ir = new AluInstruction(op1_mov, result[i], from_nir(src, swizzle[i]),
-                                    EmitInstruction::write);
-            emit_instruction(ir);
-         }
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-      return result;
-   } else
-      return GPRVector(v);;
-}
-
-bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
-{
-   auto bufid = nir_src_as_const_value(instr->src[0]);
-   auto buf_offset = nir_src_as_const_value(instr->src[1]);
-   auto base = nir_intrinsic_base(instr);
-
-   if (!buf_offset) {
-      /* TODO: if buf_offset is constant then this can also be solved by using the CF indes
-       * on the ALU block, and this would probably make sense when there are more then one
-       * loads with the same buffer ID. */
-
-      PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
-      GPRVector trgt;
-      std::array<int, 4> swz = {7,7,7,7};
-      for (unsigned i = 0; i < 4; ++i) {
-         if (i < nir_dest_num_components(instr->dest)) {
-            trgt.set_reg_i(i, from_nir(instr->dest, i));
-            swz[i] = i + nir_intrinsic_component(instr);
-         } else {
-            trgt.set_reg_i(i, from_nir(instr->dest, 7));
-         }
-      }
-
-      FetchInstruction *ir;
-      if (bufid) {
-         ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, base,
-                                              1 + bufid->u32, nullptr, bim_none);
-      } else {
-         PValue bufid = from_nir(instr->src[0], 0, 0);
-         ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, base,
-                                              1, bufid, bim_zero);
-      }
-      ir->set_dest_swizzle(swz);
-      emit_instruction(ir);
-      m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
-      return true;
-   }
-
-   uint32_t offset = 512 + base + buf_offset->u32;
-
-   if (bufid) {
-      int buf_cmp = nir_intrinsic_component(instr);
-      AluInstruction *ir = nullptr;
-      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
-         int cmp = buf_cmp + i;
-         assert(cmp < 4);
-         auto u = PValue(new UniformValue(offset, cmp, bufid->u32 + 1));
-         if (instr->dest.is_ssa)
-            load_preloaded_value(instr->dest, i, u);
-         else {
-            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
-            emit_instruction(ir);
-         }
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-      return true;
-
-   } else {
-      int buf_cmp = nir_intrinsic_component(instr);
-      AluInstruction *ir = nullptr;
-      auto kc_id = from_nir(instr->src[0], 0);
-      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
-         int cmp = buf_cmp + i;
-         auto u = PValue(new UniformValue(offset, cmp, kc_id));
-         if (instr->dest.is_ssa)
-            load_preloaded_value(instr->dest, i, u);
-         else {
-            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
-            emit_instruction(ir);
-         }
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-      return true;
-   }
-}
-
-bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
-{
-   r600::sfn_log << SfnLog::instr << "emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "' (" << __func__ << ")\n";
-
-   if (instr->intrinsic == nir_intrinsic_discard_if) {
-      emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
-                          {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
-
-   } else {
-      emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
-                       {Value::zero, Value::zero}, {alu_last_instr}));
-   }
-   m_sh_info.uses_kill = 1;
-   return true;
-}
-
-bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr)
-{
-   r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "'\n";
-
-
-   /* If the target register is a SSA register and the loading is not
-    * indirect then we can do lazy loading, i.e. the uniform value can
-    * be used directly. Otherwise we have to load the data for real
-    * rigt away.
-    */
-   auto literal = nir_src_as_const_value(instr->src[0]);
-   int base = nir_intrinsic_base(instr);
-
-   if (literal) {
-      AluInstruction *ir = nullptr;
-      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
-         PValue u = PValue(new UniformValue(512 + literal->u32 + base, i));
-         sfn_log << SfnLog::io << "uniform "
-                 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
-
-         if (instr->dest.is_ssa)
-            load_preloaded_value(instr->dest, i, u);
-         else {
-            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
-                                                   u, {alu_write});
-             emit_instruction(ir);
-         }
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-   } else {
-      PValue addr = from_nir(instr->src[0], 0, 0);
-      return load_uniform_indirect(instr, addr, 16 * base, 0);
-   }
-   return true;
-}
-
-bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
-{
-   if (!addr) {
-      std::cerr << "r600-nir: don't know how uniform is addressed\n";
-      return false;
-   }
-
-   GPRVector trgt;
-   std::array<int, 4> swz = {7,7,7,7};
-   for (int i = 0; i < 4; ++i) {
-      trgt.set_reg_i(i, from_nir(instr->dest, i));
-      swz[i] = i;
-   }
-
-   if (addr->type() != Value::gpr) {
-      emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
-      addr = trgt.reg_i(0);
-   }
-
-   auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
-                                  bufferid, PValue(), bim_none);
-   ir->set_dest_swizzle(swz);
-   emit_instruction(ir);
-   m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
-   return true;
-}
-
-AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
-{
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < literal->def.num_components ; ++i) {
-      if (writemask & (1 << i)){
-         PValue lsrc;
-         switch (literal->def.bit_size) {
-
-         case 1:
-            sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
-            lsrc = literal->value[i].b ?
-                     PValue(new LiteralValue( 0xffffffff, i)) :
-                     Value::zero;
-            break;
-         case 32:
-            sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
-            if (literal->value[i].u32 == 0)
-               lsrc = Value::zero;
-            else if (literal->value[i].u32 == 1)
-               lsrc = Value::one_i;
-            else if (literal->value[i].f32 == 1.0f)
-               lsrc = Value::one_f;
-            else if (literal->value[i].f32 == 0.5f)
-               lsrc = Value::zero_dot_5;
-            else
-               lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
-            break;
-         default:
-            sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
-                    << " falling back to 32 bit\n";
-            lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
-         }
-         ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
-
-         emit_instruction(ir);
-      }
-   }
-   return ir;
-}
-
-PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
-{
-   PValue value = from_nir(src, component);
-   if (value->type() != Value::gpr &&
-       value->type() != Value::gpr_vector &&
-       value->type() != Value::gpr_array_value) {
-      PValue retval = get_temp_register(channel);
-      emit_instruction(new AluInstruction(op1_mov, retval, value,
-                                          EmitInstruction::last_write));
-      value = retval;
-   }
-   return value;
-}
-
-bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
-{
-   r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
-                 << *reinterpret_cast<nir_instr*>(instr)
-                 << "'\n";
-
-   /* Give the specific shader type a chance to process this, i.e. Geometry and
-    * tesselation shaders need specialized deref_array, for the other shaders
-    * it is lowered.
-    */
-   if (emit_deref_instruction_override(instr))
-      return true;
-
-   switch (instr->deref_type) {
-   case nir_deref_type_var:
-      set_var_address(instr);
-      return true;
-   case nir_deref_type_array:
-   case nir_deref_type_array_wildcard:
-   case nir_deref_type_struct:
-   case nir_deref_type_cast:
-   default:
-      fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
-   }
-   return false;
-}
-
-bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
-                                              std::vector<PValue> srcs,
-                                              const std::set<AluModifiers>& m_flags)
-{
-   AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
-   emit_instruction(ir);
-   return true;
-}
-
-void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
-{
-   m_output_register_map[loc] = gpr;
-}
-
-void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
-{
-   r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
-   m_export_output.emit(PInstruction(ir));
-}
-
-const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
-{
-   const GPRVector *retval = nullptr;
-   auto val = m_output_register_map.find(location);
-   if (val != m_output_register_map.end())
-      retval =  val->second;
-   return retval;
-}
-
-void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
-{
-   r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
-   m_inputs[pos] = var;
-}
-
-void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
-{
-   r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel <<  "\n";
-   m_outputs[pos] = sel;
-}
-
-void ShaderFromNirProcessor::append_block(int nesting_change)
-{
-   m_nesting_depth += nesting_change;
-   m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
-}
-
-void ShaderFromNirProcessor::get_array_info(r600_shader& shader) const
-{
-   shader.num_arrays = m_reg_arrays.size();
-   if (shader.num_arrays) {
-      shader.arrays = (r600_shader_array *)calloc(shader.num_arrays, sizeof(r600_shader_array));
-      for (unsigned i = 0; i < shader.num_arrays; ++i) {
-         shader.arrays[i].comp_mask = m_reg_arrays[i]->mask();
-         shader.arrays[i].gpr_start = m_reg_arrays[i]->sel();
-         shader.arrays[i].gpr_count = m_reg_arrays[i]->size();
-      }
-      shader.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
-   }
-}
-
-void ShaderFromNirProcessor::finalize()
-{
-   do_finalize();
-
-   for (auto& i : m_inputs)
-      m_sh_info.input[i.first].gpr = i.second->sel();
-
-   for (auto& i : m_outputs)
-      m_sh_info.output[i.first].gpr = i.second;
-
-   m_output.push_back(m_export_output);
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.h b/src/gallium/drivers/r600/sfn/sfn_shader_base.h
deleted file mode 100644
index 7109a10..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_base.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef sfn_shader_from_nir_h
-#define sfn_shader_from_nir_h
-
-
-#include "gallium/drivers/r600/r600_shader.h"
-
-#include "compiler/nir/nir.h"
-#include "compiler/nir_types.h"
-
-#include "sfn_instruction_block.h"
-#include "sfn_instruction_export.h"
-#include "sfn_alu_defines.h"
-#include "sfn_valuepool.h"
-#include "sfn_debug.h"
-#include "sfn_instruction_cf.h"
-#include "sfn_emittexinstruction.h"
-#include "sfn_emitaluinstruction.h"
-#include "sfn_emitssboinstruction.h"
-
-#include <vector>
-#include <set>
-#include <stack>
-#include <unordered_map>
-
-struct nir_instr;
-
-namespace r600 {
-
-extern SfnLog sfn_log;
-
-class ShaderFromNirProcessor : public ValuePool {
-public:
-   ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel,
-                          r600_shader& sh_info, int scratch_size, enum amd_gfx_level _chip_class,
-                          int atomic_base);
-   virtual ~ShaderFromNirProcessor();
-
-   void emit_instruction(Instruction *ir);
-
-   PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
-   GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
-                                              const GPRVector::Swizzle& swizzle, bool match = false);
-
-   bool emit_instruction(EAluOp opcode, PValue dest,
-                         std::vector<PValue> src0,
-                         const std::set<AluModifiers>& m_flags);
-   void emit_export_instruction(WriteoutInstruction *ir);
-   void emit_instruction(AluInstruction *ir);
-
-   bool use_legacy_math_rules(void) {
-      return m_sel.nir->info.use_legacy_math_rules;
-   };
-
-   void split_constants(nir_alu_instr* instr);
-   void remap_registers();
-
-   const nir_variable *get_deref_location(const nir_src& src) const;
-
-   r600_shader& sh_info() {return m_sh_info;}
-   void add_param_output_reg(int loc, const GPRVector *gpr);
-   void set_output(unsigned pos, int sel);
-   const GPRVector *output_register(unsigned location) const;
-   void evaluate_spi_sid(r600_shader_io &io);
-
-   enum amd_gfx_level get_chip_class() const;
-
-   int remap_atomic_base(int base) {
-      return m_atomic_base_map[base];
-   }
-
-   void get_array_info(r600_shader& shader) const;
-
-   virtual bool scan_inputs_read(const nir_shader *sh);
-   void set_shader_info(const nir_shader *sh);
-
-protected:
-
-   void set_var_address(nir_deref_instr *instr);
-   void set_input(unsigned pos, PValue var);
-
-   bool scan_instruction(nir_instr *instr);
-
-   virtual bool scan_sysvalue_access(nir_instr *instr) = 0;
-
-   bool emit_if_start(int if_id, nir_if *if_stmt);
-   bool emit_else_start(int if_id);
-   bool emit_ifelse_end(int if_id);
-
-   bool emit_loop_start(int loop_id);
-   bool emit_loop_end(int loop_id);
-   bool emit_jump_instruction(nir_jump_instr *instr);
-
-   bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
-   bool emit_load_local_shared(nir_intrinsic_instr* instr);
-   bool emit_store_local_shared(nir_intrinsic_instr* instr);
-   bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
-
-   bool emit_barrier(nir_intrinsic_instr* instr);
-
-   bool load_preloaded_value(const nir_dest& dest, int chan, PValue value,
-                             bool as_last = true);
-
-   void inc_atomic_file_count();
-
-   virtual void do_set_shader_info(const nir_shader *sh);
-
-   enum ESlots {
-      es_face,
-      es_instanceid,
-      es_invocation_id,
-      es_patch_id,
-      es_pos,
-      es_rel_patch_id,
-      es_sample_mask_in,
-      es_sample_id,
-      es_sample_pos,
-      es_tess_factor_base,
-      es_vertexid,
-      es_tess_coord,
-      es_primitive_id,
-      es_helper_invocation,
-      es_last
-   };
-
-   std::bitset<es_last> m_sv_values;
-
-   bool allocate_reserved_registers();
-
-
-private:
-   virtual bool do_allocate_reserved_registers() = 0;
-
-
-   void emit_instruction_internal(Instruction *ir);
-
-   bool emit_alu_instruction(nir_instr *instr);
-   bool emit_deref_instruction(nir_deref_instr* instr);
-   bool emit_intrinsic_instruction(nir_intrinsic_instr* instr);
-   virtual bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr);
-   bool emit_tex_instruction(nir_instr* instr);
-   bool emit_discard_if(nir_intrinsic_instr* instr);
-   bool emit_load_ubo_vec4(nir_intrinsic_instr* instr);
-   bool emit_ssbo_atomic_add(nir_intrinsic_instr* instr);
-   bool load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufid);
-
-   /* Code creating functions */
-   bool emit_load_function_temp(const nir_variable *var, nir_intrinsic_instr *instr);
-   AluInstruction *emit_load_literal(const nir_load_const_instr *literal, const nir_src& src, unsigned writemask);
-
-   bool load_uniform(nir_intrinsic_instr* instr);
-   bool process_uniforms(nir_variable *uniform);
-
-   void append_block(int nesting_change);
-
-   virtual void emit_shader_start();
-   virtual bool emit_deref_instruction_override(nir_deref_instr* instr);
-
-   bool emit_store_scratch(nir_intrinsic_instr* instr);
-   bool emit_load_scratch(nir_intrinsic_instr* instr);
-   bool emit_shader_clock(nir_intrinsic_instr* instr);
-   virtual void do_finalize() = 0;
-
-   void finalize();
-   friend class ShaderFromNir;
-
-   std::set<nir_variable*> m_arrays;
-
-   std::map<unsigned, PValue> m_inputs;
-   std::map<unsigned, int> m_outputs;
-
-   std::map<unsigned, nir_variable*> m_var_derefs;
-   std::map<const nir_variable *, nir_variable_mode> m_var_mode;
-
-   std::map<unsigned, const glsl_type*>  m_uniform_type_map;
-   std::map<int, IfElseInstruction *> m_if_block_start_map;
-   std::map<int, LoopBeginInstruction *> m_loop_begin_block_map;
-
-   pipe_shader_type m_processor_type;
-
-   std::vector<InstructionBlock> m_output;
-   unsigned m_nesting_depth;
-   unsigned m_block_number;
-   InstructionBlock m_export_output;
-   r600_shader& m_sh_info;
-   enum amd_gfx_level m_chip_class;
-   EmitTexInstruction m_tex_instr;
-   EmitAluInstruction m_alu_instr;
-   EmitSSBOInstruction m_ssbo_instr;
-   OutputRegisterMap m_output_register_map;
-
-   IfElseInstruction *m_pending_else;
-   int m_scratch_size;
-   int m_next_hwatomic_loc;
-
-   r600_pipe_shader_selector& m_sel;
-   int m_atomic_base ;
-   int m_image_count;
-
-   std::unordered_map<int, int> m_atomic_base_map;
-   AluInstruction *last_emitted_alu;
-};
-
-}
-
-#endif
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp
deleted file mode 100644
index 0977fe2..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_shader_compute.h"
-#include "sfn_instruction_fetch.h"
-
-namespace r600 {
-
-ComputeShaderFromNir::ComputeShaderFromNir(r600_pipe_shader *sh,
-                                           r600_pipe_shader_selector& sel,
-                                           UNUSED const r600_shader_key& key,
-                                           enum amd_gfx_level gfx_level):
-     ShaderFromNirProcessor (PIPE_SHADER_COMPUTE, sel, sh->shader,
-                             sh->scratch_space_needed, gfx_level, 0),
-     m_reserved_registers(0)
-{
-}
-
-bool ComputeShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
-{
-   return true;
-}
-bool ComputeShaderFromNir::do_allocate_reserved_registers()
-{
-   int thread_id_sel = m_reserved_registers++;
-   int wg_id_sel = m_reserved_registers++;
-
-   for (int i = 0; i < 3; ++i) {
-      auto tmp = new GPRValue(thread_id_sel, i);
-      tmp->set_as_input();
-      tmp->set_keep_alive();
-      m_local_invocation_id[i] = PValue(tmp);
-      inject_register(tmp->sel(), i, m_local_invocation_id[i], false);
-
-      tmp = new GPRValue(wg_id_sel, i);
-      tmp->set_as_input();
-      tmp->set_keep_alive();
-      m_workgroup_id[i] = PValue(tmp);
-      inject_register(tmp->sel(), i, m_workgroup_id[i], false);
-   }
-   return true;
-}
-
-bool ComputeShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
-{
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_local_invocation_id:
-      return emit_load_3vec(instr, m_local_invocation_id);
-   case nir_intrinsic_load_workgroup_id:
-      return emit_load_3vec(instr, m_workgroup_id);
-   case nir_intrinsic_load_num_workgroups:
-      return emit_load_num_workgroups(instr);
-   default:
-      return false;
-   }
-}
-
-bool ComputeShaderFromNir::emit_load_3vec(nir_intrinsic_instr* instr,
-                                          const std::array<PValue,3>& src)
-{
-   for (int i = 0; i < 3; ++i)
-      load_preloaded_value(instr->dest, i, src[i], i == 2);
-   return true;
-}
-
-bool ComputeShaderFromNir::emit_load_num_workgroups(nir_intrinsic_instr* instr)
-{
-   PValue a_zero = get_temp_register(1);
-   emit_instruction(new AluInstruction(op1_mov, a_zero, Value::zero, EmitInstruction::last_write));
-   GPRVector dest;
-   for (int i = 0; i < 3; ++i)
-      dest.set_reg_i(i, from_nir(instr->dest, i));
-   dest.set_reg_i(3, from_nir(instr->dest, 7));
-
-   auto ir = new FetchInstruction(vc_fetch, no_index_offset,
-                                  fmt_32_32_32_32, vtx_nf_int, vtx_es_none, a_zero, dest, 16,
-                                  false, 16, R600_BUFFER_INFO_CONST_BUFFER, 0,
-                                  bim_none, false, false, 0, 0, 0, PValue(), {0,1,2,7});
-   ir->set_flag(vtx_srf_mode);
-   emit_instruction(ir);
-   return true;
-}
-
-void ComputeShaderFromNir::do_finalize()
-{
-
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_compute.h b/src/gallium/drivers/r600/sfn/sfn_shader_compute.h
deleted file mode 100644
index 98cbc63..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_compute.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_COMPUTE_SHADER_FROM_NIR_H
-#define SFN_COMPUTE_SHADER_FROM_NIR_H
-
-#include "sfn_shader_base.h"
-#include "sfn_shaderio.h"
-#include <bitset>
-
-namespace r600 {
-
-class ComputeShaderFromNir : public ShaderFromNirProcessor
-{
-public:
-   ComputeShaderFromNir(r600_pipe_shader *sh,
-                        r600_pipe_shader_selector& sel,
-                        const r600_shader_key &key,
-                        enum amd_gfx_level gfx_level);
-
-   bool scan_sysvalue_access(nir_instr *instr) override;
-
-private:
-   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
-
-   bool do_allocate_reserved_registers() override;
-   void do_finalize() override;
-
-   bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PValue,3>& src);
-   bool emit_load_num_workgroups(nir_intrinsic_instr* instr);
-
-   int m_reserved_registers;
-   std::array<PValue,3> m_workgroup_id;
-   std::array<PValue,3> m_local_invocation_id;
-};
-
-}
-
-#endif // SFN_COMPUTE_SHADER_FROM_NIR_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp
new file mode 100644
index 0000000..89aa796
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp
@@ -0,0 +1,95 @@
+#include "sfn_shader_cs.h"
+#include "sfn_instr_fetch.h"
+
+
+namespace r600 {
+
+ComputeShader::ComputeShader(UNUSED const r600_shader_key& key):
+   Shader("CS")
+{
+
+}
+
+bool ComputeShader::do_scan_instruction(UNUSED nir_instr *instr)
+{
+   return false;
+}
+
+int ComputeShader::do_allocate_reserved_registers()
+{
+   auto& vf = value_factory();
+
+   const int thread_id_sel = 0;
+   const int wg_id_sel = 1;
+
+   for (int i = 0; i < 3; ++i) {
+      m_local_invocation_id[i] = vf.allocate_pinned_register(thread_id_sel, i);
+      m_local_invocation_id[i]->pin_live_range(true);
+
+      m_workgroup_id[i] = vf.allocate_pinned_register(wg_id_sel, i);
+      m_workgroup_id[i]->pin_live_range(true);
+   }
+   return 2;
+}
+
+bool ComputeShader::process_stage_intrinsic(nir_intrinsic_instr *instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_local_invocation_id:
+      return emit_load_3vec(instr, m_local_invocation_id);
+   case nir_intrinsic_load_workgroup_id:
+      return emit_load_3vec(instr, m_workgroup_id);
+   case nir_intrinsic_load_num_workgroups:
+      return emit_load_num_workgroups(instr);
+   default:
+      return false;
+   }
+}
+
+void ComputeShader::do_get_shader_info(r600_shader *sh_info)
+{
+   sh_info->processor_type = PIPE_SHADER_COMPUTE;
+}
+
+bool ComputeShader::read_prop(UNUSED std::istream& is)
+{
+   return true;
+}
+
+void ComputeShader::do_print_properties(UNUSED std::ostream& os) const
+{
+
+}
+
+bool ComputeShader::emit_load_num_workgroups(nir_intrinsic_instr* instr)
+{
+   auto zero = value_factory().temp_register();
+
+   emit_instruction(new AluInstr(op1_mov, zero, value_factory().inline_const(ALU_SRC_0, 0),
+                                 AluInstr::last_write));
+   auto dest = value_factory().dest_vec4(instr->dest, pin_group);
+
+   auto ir = new LoadFromBuffer(dest, {0,1,2,7}, zero, 16,
+                                R600_BUFFER_INFO_CONST_BUFFER,
+                                nullptr, fmt_32_32_32_32);
+
+   ir->set_fetch_flag(LoadFromBuffer::srf_mode);
+   ir->reset_fetch_flag(LoadFromBuffer::format_comp_signed);
+   ir->set_num_format(vtx_nf_int);
+   emit_instruction(ir);
+   return true;
+
+}
+
+bool ComputeShader::emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PRegister,3>& src)
+{
+   auto& vf = value_factory();
+
+   for (int i = 0; i < 3; ++i) {
+      auto dest = vf.dest(instr->dest, i, pin_none);
+      emit_instruction(new AluInstr(op1_mov, dest, src[i], i == 2 ? AluInstr::last_write : AluInstr::write));
+   }
+   return true;
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_cs.h b/src/gallium/drivers/r600/sfn/sfn_shader_cs.h
new file mode 100644
index 0000000..c58e606
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_cs.h
@@ -0,0 +1,39 @@
+#ifndef COMPUTE_H
+#define COMPUTE_H
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+class ComputeShader : public Shader
+{
+public:
+   ComputeShader(const r600_shader_key& key);
+
+private:
+   bool do_scan_instruction(nir_instr *instr) override;
+   int do_allocate_reserved_registers() override;
+
+   bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
+   void do_get_shader_info(r600_shader *sh_info) override;
+
+   bool load_input(UNUSED nir_intrinsic_instr *intr) override {
+      unreachable("compute shaders  have bno inputs");
+   };
+   bool store_output(UNUSED nir_intrinsic_instr *intr) override {
+      unreachable("compute shaders have no outputs");
+   };
+
+   bool read_prop(std::istream& is) override;
+   void do_print_properties(std::ostream& os) const override;
+
+   bool emit_load_num_workgroups(nir_intrinsic_instr* instr);
+   bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PRegister,3>& src);
+
+   std::array<PRegister,3> m_workgroup_id{nullptr};
+   std::array<PRegister,3> m_local_invocation_id{nullptr};
+};
+
+}
+
+#endif // COMPUTE_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp
deleted file mode 100644
index 02d5add..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp
+++ /dev/null
@@ -1,1074 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "pipe/p_defines.h"
-#include "tgsi/tgsi_from_mesa.h"
-#include "sfn_shader_fragment.h"
-#include "sfn_instruction_fetch.h"
-
-namespace r600 {
-
-FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
-                                             r600_shader& sh,
-                                             r600_pipe_shader_selector &sel,
-                                             const r600_shader_key &key,
-                                             enum amd_gfx_level gfx_level):
-   ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, gfx_level, 0),
-   m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
-   m_max_counted_color_exports(0),
-   m_two_sided_color(key.ps.color_two_side),
-   m_last_pixel_export(nullptr),
-   m_nir(nir),
-   m_reserved_registers(0),
-   m_frag_pos_index(0),
-   m_need_back_color(false),
-   m_front_face_loaded(false),
-   m_depth_exports(0),
-   m_apply_sample_mask(key.ps.apply_sample_id_mask),
-   m_dual_source_blend(key.ps.dual_source_blend),
-   m_pos_input(nullptr)
-{
-   for (auto&  i: m_interpolator) {
-      i.enabled = false;
-      i.ij_index= 0;
-   }
-
-   sh_info().rat_base = key.ps.nr_cbufs;
-   sh_info().atomic_base = key.ps.first_atomic_counter;
-}
-
-unsigned barycentric_ij_index(nir_intrinsic_instr *instr)
-{
-   unsigned index = 0;
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_barycentric_sample:
-      index = 0;
-      break;
-   case nir_intrinsic_load_barycentric_at_sample:
-   case nir_intrinsic_load_barycentric_at_offset:
-   case nir_intrinsic_load_barycentric_pixel:
-      index = 1;
-      break;
-   case nir_intrinsic_load_barycentric_centroid:
-      index = 2;
-      break;
-   default:
-      unreachable("Unknown interpolator intrinsic");
-   }
-
-   switch (nir_intrinsic_interp_mode(instr)) {
-   case INTERP_MODE_NONE:
-   case INTERP_MODE_SMOOTH:
-   case INTERP_MODE_COLOR:
-      return index;
-   case INTERP_MODE_NOPERSPECTIVE:
-      return index + 3;
-   case INTERP_MODE_FLAT:
-   case INTERP_MODE_EXPLICIT:
-   default:
-      unreachable("unknown/unsupported mode for load_interpolated");
-   }
-   return 0;
-}
-
-bool FragmentShaderFromNir::process_load_input(nir_intrinsic_instr *instr,
-                                               bool interpolated)
-{
-   sfn_log << SfnLog::io << "Parse " << instr->instr        
-           << "\n";
-
-   auto index = nir_src_as_const_value(instr->src[interpolated ? 1 : 0]);
-   assert(index);
-
-   unsigned location = nir_intrinsic_io_semantics(instr).location + index->u32;
-   auto semantic = r600_get_varying_semantic(location);
-   tgsi_semantic name = (tgsi_semantic)semantic.first;
-   unsigned sid = semantic.second;
-
-
-   if (location == VARYING_SLOT_POS) {
-      m_sv_values.set(es_pos);
-      m_pos_input = new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32,
-                                               nir_intrinsic_component(instr),
-                                               nir_dest_num_components(instr->dest),
-                                               TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER);
-      m_shaderio.add_input(m_pos_input);
-      return true;
-   }
-
-   if (location == VARYING_SLOT_FACE) {
-      m_sv_values.set(es_face);
-      return true;
-   }
-
-
-   tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT;
-   tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
-
-   bool uses_interpol_at_centroid = false;
-
-   if (interpolated) {
-
-      glsl_interp_mode mode = INTERP_MODE_NONE;
-      auto parent = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr);
-      mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent);
-      switch (parent->intrinsic) {
-      case nir_intrinsic_load_barycentric_sample:
-         tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
-         break;
-      case nir_intrinsic_load_barycentric_at_sample:
-      case nir_intrinsic_load_barycentric_at_offset:
-      case nir_intrinsic_load_barycentric_pixel:
-         tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
-         break;
-      case nir_intrinsic_load_barycentric_centroid:
-         tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID;
-         uses_interpol_at_centroid = true;
-         break;
-      default:
-         std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of "
-                   << nir_intrinsic_infos[instr->intrinsic].name
-                   << " interpolator?\n";
-         assert(0);
-      }
-
-      switch (mode) {
-      case INTERP_MODE_NONE:
-         if (name == TGSI_SEMANTIC_COLOR) {
-            tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
-            break;
-      }
-         FALLTHROUGH;
-      case INTERP_MODE_SMOOTH:
-         tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
-         break;
-      case INTERP_MODE_NOPERSPECTIVE:
-         tgsi_interpolate = TGSI_INTERPOLATE_LINEAR;
-         break;
-      case INTERP_MODE_FLAT:
-         break;
-      case INTERP_MODE_COLOR:
-         tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
-         break;
-      case INTERP_MODE_EXPLICIT:
-      default:
-         assert(0);
-      }
-
-      m_interpolators_used.set(barycentric_ij_index(parent));
-
-   }
-
-   switch (name) {
-   case TGSI_SEMANTIC_COLOR: {
-      auto input = m_shaderio.find_varying(name, sid);
-      if (!input) {
-         m_shaderio.add_input(new ShaderInputColor(name, sid,
-                                                   nir_intrinsic_base(instr) + index->u32,
-                                                   nir_intrinsic_component(instr),
-                                                   nir_dest_num_components(instr->dest),
-                                                   tgsi_interpolate, tgsi_loc));
-      }  else {
-         if (uses_interpol_at_centroid)
-            input->set_uses_interpolate_at_centroid();
-
-         auto varying = static_cast<ShaderInputVarying&>(*input);
-         varying.update_mask(nir_dest_num_components(instr->dest),
-                             nir_intrinsic_component(instr));
-      }
-
-      m_need_back_color = m_two_sided_color;
-      return true;
-   }
-   case TGSI_SEMANTIC_PRIMID:
-      sh_info().gs_prim_id_input = true;
-      sh_info().ps_prim_id_input = m_shaderio.inputs().size();
-      FALLTHROUGH;
-   case TGSI_SEMANTIC_FOG:
-   case TGSI_SEMANTIC_GENERIC:
-   case TGSI_SEMANTIC_TEXCOORD:
-   case TGSI_SEMANTIC_LAYER:
-   case TGSI_SEMANTIC_PCOORD:
-   case TGSI_SEMANTIC_VIEWPORT_INDEX:
-   case TGSI_SEMANTIC_CLIPDIST: {
-      auto input = m_shaderio.find_varying(name, sid);
-      if (!input) {
-         m_shaderio.add_input(new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32,
-                                                     nir_intrinsic_component(instr),
-                                                     nir_dest_num_components(instr->dest),
-                                                     tgsi_interpolate, tgsi_loc));
-      } else {
-         if (uses_interpol_at_centroid)
-            input->set_uses_interpolate_at_centroid();
-
-         auto varying = static_cast<ShaderInputVarying&>(*input);
-         varying.update_mask(nir_dest_num_components(instr->dest),
-                             nir_intrinsic_component(instr));
-      }
-
-      return true;
-   }
-   default:
-      return false;
-   }
-}
-
-
-bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
-{
-   switch (instr->type) {
-   case nir_instr_type_intrinsic: {
-      nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
-
-      switch (ii->intrinsic) {
-      case nir_intrinsic_load_front_face:
-         m_sv_values.set(es_face);
-         break;
-      case nir_intrinsic_load_sample_mask_in:
-         m_sv_values.set(es_sample_mask_in);
-         break;
-      case nir_intrinsic_load_sample_pos:
-         m_sv_values.set(es_sample_pos);
-         FALLTHROUGH;
-      case nir_intrinsic_load_sample_id:
-         m_sv_values.set(es_sample_id);
-         break;
-      case nir_intrinsic_load_helper_invocation:
-         m_sv_values.set(es_helper_invocation);
-         sh_info().uses_helper_invocation = true;
-         break;
-      case nir_intrinsic_load_input:
-         return process_load_input(ii, false);
-      case nir_intrinsic_load_interpolated_input: {
-         return process_load_input(ii, true);
-      }
-      case nir_intrinsic_store_output:
-         return process_store_output(ii);
-
-      default:
-         ;
-      }
-   }
-   default:
-      ;
-   }
-   return true;
-}
-
-bool FragmentShaderFromNir::do_allocate_reserved_registers()
-{
-   assert(!m_reserved_registers);
-
-   int face_reg_index = -1;
-   int sample_id_index = -1;
-   // enabled interpolators based on inputs
-   for (unsigned i = 0; i < s_max_interpolators; ++i) {
-      if (m_interpolators_used.test(i)) {
-         sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
-         m_interpolator[i].enabled = true;
-      }
-   }
-
-   // sort the varying inputs
-   m_shaderio.sort_varying_inputs();
-
-   // handle interpolators
-   int num_baryc = 0;
-   for (int i = 0; i < 6; ++i) {
-      if (m_interpolator[i].enabled) {
-         sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
-
-         m_interpolator[i].ij_index = num_baryc;
-
-         unsigned sel = num_baryc / 2;
-         unsigned chan = 2 * (num_baryc % 2);
-
-         auto ip_i = new GPRValue(sel, chan + 1);
-         ip_i->set_as_input();
-         m_interpolator[i].i.reset(ip_i);
-         inject_register(sel, chan + 1, m_interpolator[i].i, false);
-
-         auto ip_j = new GPRValue(sel, chan);
-         ip_j->set_as_input();
-         m_interpolator[i].j.reset(ip_j);
-         inject_register(sel, chan, m_interpolator[i].j, false);
-
-         ++num_baryc;
-      }
-   }
-   m_reserved_registers += (num_baryc + 1) >> 1;
-
-   if (m_sv_values.test(es_pos)) {
-      m_frag_pos_index = m_reserved_registers++;
-      assert(m_pos_input);
-      m_pos_input->set_gpr(m_frag_pos_index);
-   }
-
-   // handle system values
-   if (m_sv_values.test(es_face) || m_need_back_color) {
-      face_reg_index = m_reserved_registers++;
-      m_front_face_reg = std::make_shared<GPRValue>(face_reg_index,0);
-      m_front_face_reg->set_as_input();
-      sfn_log << SfnLog::io << "Set front_face register to " <<  *m_front_face_reg << "\n";
-      inject_register(m_front_face_reg->sel(), m_front_face_reg->chan(), m_front_face_reg, false);
-
-      m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
-      load_front_face();
-   }
-
-   if (m_sv_values.test(es_sample_mask_in)) {
-      if (face_reg_index < 0)
-         face_reg_index = m_reserved_registers++;
-
-      m_sample_mask_reg = std::make_shared<GPRValue>(face_reg_index,2);
-      m_sample_mask_reg->set_as_input();
-      sfn_log << SfnLog::io << "Set sample mask in register to " <<  *m_sample_mask_reg << "\n";
-      sh_info().nsys_inputs = 1;
-      m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
-   }
-
-   if (m_sv_values.test(es_sample_id) ||
-       m_sv_values.test(es_sample_mask_in)) {
-      if (sample_id_index < 0)
-         sample_id_index = m_reserved_registers++;
-
-      m_sample_id_reg = std::make_shared<GPRValue>(sample_id_index, 3);
-      m_sample_id_reg->set_as_input();
-      sfn_log << SfnLog::io << "Set sample id register to " <<  *m_sample_id_reg << "\n";
-      sh_info().nsys_inputs++;
-      m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index));
-   }
-
-   // The back color handling is not emmited in the code, so we have
-   // to add the inputs here and later we also need to inject the code to set
-   // the right color
-   if (m_need_back_color) {
-      size_t ninputs = m_shaderio.inputs().size();
-      for (size_t k = 0; k < ninputs; ++k) {
-         ShaderInput& i = m_shaderio.input(k);
-
-         if (i.name() != TGSI_SEMANTIC_COLOR)
-            continue;
-
-         ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
-
-         size_t next_pos = m_shaderio.size();
-         auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
-         m_shaderio.add_input(bcol);
-         col.set_back_color(next_pos);
-      }
-      m_shaderio.set_two_sided();
-   }
-
-   m_shaderio.update_lds_pos();
-
-   set_reserved_registers(m_reserved_registers);
-
-   return true;
-}
-
-void FragmentShaderFromNir::emit_shader_start()
-{
-   if (m_sv_values.test(es_face))
-      load_front_face();
-
-   if (m_sv_values.test(es_pos)) {
-      for (int i = 0; i < 4; ++i) {
-         auto v = new GPRValue(m_frag_pos_index, i);
-         v->set_as_input();
-         auto reg = PValue(v);
-         m_frag_pos[i] = reg;
-      }
-   }
-
-   if (m_sv_values.test(es_helper_invocation)) {
-      m_helper_invocation = get_temp_register();
-      auto dummy = PValue(new GPRValue(m_helper_invocation->sel(), 7));
-      emit_instruction(new AluInstruction(op1_mov, m_helper_invocation, literal(-1), {alu_write, alu_last_instr}));
-      GPRVector dst({dummy, dummy, dummy, dummy});
-      std::array<int,4> swz = {7,7,7,7};
-      dst.set_reg_i(m_helper_invocation->chan(), m_helper_invocation);
-      swz[m_helper_invocation->chan()] = 4;
-
-      auto vtx = new FetchInstruction(dst, m_helper_invocation,
-                                      R600_BUFFER_INFO_CONST_BUFFER, bim_none);
-      vtx->set_flag(vtx_vpm);
-      vtx->set_flag(vtx_use_tc);
-      vtx->set_dest_swizzle(swz);
-      emit_instruction(vtx);
-   }
-}
-
-bool FragmentShaderFromNir::process_store_output(nir_intrinsic_instr *instr)
-{
-
-   auto semantic = nir_intrinsic_io_semantics(instr);
-   unsigned driver_loc = nir_intrinsic_base(instr);
-
-   if (sh_info().noutput <= driver_loc)
-      sh_info().noutput = driver_loc + 1;
-
-   r600_shader_io& io = sh_info().output[driver_loc];
-   tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>(semantic.location),
-                                    &io.name, &io.sid);
-
-   unsigned component = nir_intrinsic_component(instr);
-   io.write_mask |= nir_intrinsic_write_mask(instr) << component;
-
-   if (semantic.location == FRAG_RESULT_COLOR && !m_dual_source_blend) {
-      sh_info().fs_write_all = true;
-   }
-
-   if (semantic.location == FRAG_RESULT_COLOR ||
-       (semantic.location >= FRAG_RESULT_DATA0 &&
-        semantic.location <= FRAG_RESULT_DATA7))  {
-      ++m_max_counted_color_exports;
-
-      /* Hack: force dual source output handling if one color output has a
-       * dual_source_blend_index > 0 */
-      if (semantic.location == FRAG_RESULT_COLOR &&
-          semantic.dual_source_blend_index > 0)
-         m_dual_source_blend = true;
-
-      if (m_max_counted_color_exports > 1)
-         sh_info().fs_write_all = false;
-      return true;
-   }
-
-   if (semantic.location == FRAG_RESULT_DEPTH ||
-       semantic.location == FRAG_RESULT_STENCIL ||
-       semantic.location == FRAG_RESULT_SAMPLE_MASK) {
-      io.write_mask = 15;
-      return true;
-   }
-
-   return false;
-
-
-}
-
-bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
-{
-   auto dest = from_nir(instr->dest, 0);
-   assert(m_sample_id_reg);
-   assert(m_sample_mask_reg);
-
-   emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write));
-   emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write));
-   return true;
-}
-
-bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
-{
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_sample_mask_in:
-      if (m_apply_sample_mask) {
-         return emit_load_sample_mask_in(instr);
-      } else
-         return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
-   case nir_intrinsic_load_sample_id:
-      return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
-   case nir_intrinsic_load_front_face:
-      return load_preloaded_value(instr->dest, 0, m_front_face_reg);
-   case nir_intrinsic_load_sample_pos:
-      return emit_load_sample_pos(instr);
-   case nir_intrinsic_load_helper_invocation:
-      return load_preloaded_value(instr->dest, 0, m_helper_invocation);
-   case nir_intrinsic_load_input:
-      return emit_load_input(instr);
-   case nir_intrinsic_load_barycentric_sample:
-   case nir_intrinsic_load_barycentric_pixel:
-   case nir_intrinsic_load_barycentric_centroid:  {
-      unsigned ij = barycentric_ij_index(instr);
-      return load_preloaded_value(instr->dest, 0, m_interpolator[ij].i) &&
-            load_preloaded_value(instr->dest, 1, m_interpolator[ij].j);
-   }
-   case nir_intrinsic_load_barycentric_at_offset:
-         return load_barycentric_at_offset(instr);
-   case nir_intrinsic_load_barycentric_at_sample:
-      return load_barycentric_at_sample(instr);
-
-   case nir_intrinsic_load_interpolated_input: {
-      return emit_load_interpolated_input(instr);
-   }
-   case nir_intrinsic_store_output:
-      return emit_store_output(instr);
-
-   default:
-      return false;
-   }
-}
-
-bool FragmentShaderFromNir::emit_store_output(nir_intrinsic_instr* instr)
-{
-   auto location = nir_intrinsic_io_semantics(instr).location;
-
-   if (location == FRAG_RESULT_COLOR)
-      return emit_export_pixel(instr, m_dual_source_blend ? 1 : m_max_color_exports);
-
-   if ((location >= FRAG_RESULT_DATA0 &&
-        location <= FRAG_RESULT_DATA7) ||
-       location == FRAG_RESULT_DEPTH ||
-       location == FRAG_RESULT_STENCIL ||
-       location == FRAG_RESULT_SAMPLE_MASK)
-      return emit_export_pixel(instr, 1);
-
-   sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_output for " << location << ")\n";
-   return false;
-
-}
-
-bool FragmentShaderFromNir::emit_load_interpolated_input(nir_intrinsic_instr* instr)
-{
-   unsigned loc = nir_intrinsic_io_semantics(instr).location;
-   switch (loc) {
-   case VARYING_SLOT_POS:
-      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
-         load_preloaded_value(instr->dest, i, m_frag_pos[i]);
-      }
-      return true;
-   case VARYING_SLOT_FACE:
-      return load_preloaded_value(instr->dest, 0, m_front_face_reg);
-   default:
-      ;
-   }
-
-   auto param = nir_src_as_const_value(instr->src[1]);
-   assert(param && "Indirect PS inputs not (yet) supported");
-
-   auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
-   auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
-
-   io.set_gpr(dst.sel());
-
-   Interpolator ip = {true, 0, from_nir(instr->src[0], 0), from_nir(instr->src[0], 1)};
-
-
-   if (!load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
-                          nir_intrinsic_component(instr)))
-      return false;
-
-   if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
-
-      auto & color_input  = static_cast<ShaderInputColor&> (io);
-      auto& bgio = m_shaderio.input(color_input.back_color_input_index());
-
-      GPRVector bgcol = get_temp_vec4();
-      bgio.set_gpr(bgcol.sel());
-      load_interpolated(bgcol, bgio, ip, nir_dest_num_components(instr->dest), 0);
-
-      load_front_face();
-
-      AluInstruction *ir = nullptr;
-      for (unsigned i = 0; i < 4 ; ++i) {
-         ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
-         emit_instruction(ir);
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-   }
-
-
-   AluInstruction *ir = nullptr;
-   if (nir_intrinsic_component(instr) != 0) {
-      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
-         ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
-         emit_instruction(ir);
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-   }
-
-   return true;
-}
-
-bool FragmentShaderFromNir::load_barycentric_at_offset(nir_intrinsic_instr* instr)
-{
-   auto interpolator = m_interpolator[barycentric_ij_index(instr)];
-   PValue dummy(new GPRValue(interpolator.i->sel(), 0));
-
-   GPRVector help = get_temp_vec4();
-   GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
-
-   auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
-   getgradh->set_dest_swizzle({0,1,7,7});
-   getgradh->set_flag(TexInstruction::x_unnormalized);
-   getgradh->set_flag(TexInstruction::y_unnormalized);
-   getgradh->set_flag(TexInstruction::z_unnormalized);
-   getgradh->set_flag(TexInstruction::w_unnormalized);
-   getgradh->set_flag(TexInstruction::grad_fine);
-   emit_instruction(getgradh);
-
-   auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
-   getgradv->set_dest_swizzle({7,7,0,1});
-   getgradv->set_flag(TexInstruction::x_unnormalized);
-   getgradv->set_flag(TexInstruction::y_unnormalized);
-   getgradv->set_flag(TexInstruction::z_unnormalized);
-   getgradv->set_flag(TexInstruction::w_unnormalized);
-   getgradv->set_flag(TexInstruction::grad_fine);
-   emit_instruction(getgradv);
-
-   PValue ofs_x = from_nir(instr->src[0], 0);
-   PValue ofs_y = from_nir(instr->src[0], 1);
-   emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
-   emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
-   emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write}));
-   emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write, alu_last_instr}));
-
-   return true;
-}
-
-bool FragmentShaderFromNir::load_barycentric_at_sample(nir_intrinsic_instr* instr)
-{
-   GPRVector slope = get_temp_vec4();
-
-   auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
-                                     from_nir_with_fetch_constant(instr->src[0], 0),
-                                     0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
-   fetch->set_flag(vtx_srf_mode);
-   emit_instruction(fetch);
-
-   GPRVector grad = get_temp_vec4();
-
-   auto interpolator = m_interpolator[barycentric_ij_index(instr)];
-   assert(interpolator.enabled);
-   PValue dummy(new GPRValue(interpolator.i->sel(), 0));
-
-   GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
-
-   auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
-   tex->set_flag(TexInstruction::grad_fine);
-   tex->set_flag(TexInstruction::x_unnormalized);
-   tex->set_flag(TexInstruction::y_unnormalized);
-   tex->set_flag(TexInstruction::z_unnormalized);
-   tex->set_flag(TexInstruction::w_unnormalized);
-   tex->set_dest_swizzle({0,1,7,7});
-   emit_instruction(tex);
-
-   tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
-   tex->set_flag(TexInstruction::x_unnormalized);
-   tex->set_flag(TexInstruction::y_unnormalized);
-   tex->set_flag(TexInstruction::z_unnormalized);
-   tex->set_flag(TexInstruction::w_unnormalized);
-   tex->set_flag(TexInstruction::grad_fine);
-   tex->set_dest_swizzle({7,7,0,1});
-   emit_instruction(tex);
-
-   emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
-   emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
-
-   emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write}));
-   emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write, alu_last_instr}));
-
-   return true;
-}
-
-bool FragmentShaderFromNir::emit_load_input(nir_intrinsic_instr* instr)
-{
-   unsigned loc = nir_intrinsic_io_semantics(instr).location;
-   auto param = nir_src_as_const_value(instr->src[0]);
-   assert(param && "Indirect PS inputs not (yet) supported");
-
-   auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
-
-   assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
-
-   unsigned num_components = nir_dest_num_components(instr->dest);
-
-   switch (loc) {
-   case VARYING_SLOT_POS:
-      for (unsigned i = 0; i < num_components; ++i) {
-         load_preloaded_value(instr->dest, i, m_frag_pos[i]);
-      }
-      return true;
-   case VARYING_SLOT_FACE:
-      return load_preloaded_value(instr->dest, 0, m_front_face_reg);
-   default:
-      ;
-   }
-
-   auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
-
-   AluInstruction *ir = nullptr;
-   for (unsigned i = 0; i < 4 ; ++i) {
-      ir = new AluInstruction(op1_interp_load_p0, dst[i],
-                              PValue(new InlineConstValue(ALU_SRC_PARAM_BASE +
-                                                          io.lds_pos(), i)),
-                              EmitInstruction::write);
-      emit_instruction(ir);
-   }
-   ir->set_flag(alu_last_instr);
-
-   /* TODO: back color */
-   if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
-      Interpolator ip = {false, 0, NULL, NULL};
-
-      auto & color_input  = static_cast<ShaderInputColor&> (io);
-      auto& bgio = m_shaderio.input(color_input.back_color_input_index());
-
-      GPRVector bgcol = get_temp_vec4();
-      bgio.set_gpr(bgcol.sel());
-      load_interpolated(bgcol, bgio, ip, num_components, 0);
-
-      load_front_face();
-
-      AluInstruction *ir = nullptr;
-      for (unsigned i = 0; i < 4 ; ++i) {
-         ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
-         emit_instruction(ir);
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-   }
-
-   if (nir_intrinsic_component(instr) != 0) {
-      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
-         ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
-         emit_instruction(ir);
-      }
-      if (ir)
-         ir->set_flag(alu_last_instr);
-   }
-
-
-   return true;
-}
-
-void FragmentShaderFromNir::load_front_face()
-{
-   assert(m_front_face_reg);
-   if (m_front_face_loaded)
-      return;
-
-   auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
-                                Value::zero, {alu_write, alu_last_instr});
-   m_front_face_loaded = true;
-   emit_instruction(ir);
-}
-
-bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr)
-{
-   GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
-   auto fetch = new FetchInstruction(vc_fetch,
-                                     no_index_offset,
-                                     fmt_32_32_32_32_float,
-                                     vtx_nf_scaled,
-                                     vtx_es_none,
-                                     m_sample_id_reg,
-                                     dest,
-                                     0,
-                                     false,
-                                     0xf,
-                                     R600_BUFFER_INFO_CONST_BUFFER,
-                                     0,
-                                     bim_none,
-                                     false,
-                                     false,
-                                     0,
-                                     0,
-                                     0,
-                                     PValue(),
-                                     {0,1,2,3});
-   fetch->set_flag(vtx_srf_mode);
-   emit_instruction(fetch);
-   return true;
-}
-
-bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
-                                              ShaderInput& io, const Interpolator &ip,
-                                              int num_components, int start_comp)
-{
-   // replace io with ShaderInputVarying
-   if (io.interpolate() > 0) {
-
-      sfn_log << SfnLog::io << "Using Interpolator (" << *ip.j << ", " << *ip.i <<  ")" << "\n";
-
-      if (num_components == 1) {
-         switch (start_comp) {
-         case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
-         case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
-         case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
-         case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
-         default:
-            assert(0);
-         }
-      }
-
-      if (num_components == 2) {
-         switch (start_comp) {
-         case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
-         case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
-         case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
-                  load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
-         default:
-            assert(0);
-         }
-      }
-
-      if (num_components == 3 && start_comp == 0)
-         return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
-               load_interpolated_one_comp(dest, io, ip, op2_interp_z);
-
-      int full_write_mask = ((1 << num_components) - 1) << start_comp;
-
-      bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
-      success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
-      return success;
-
-   } else {
-      AluInstruction *ir = nullptr;
-      for (unsigned i = 0; i < 4 ; ++i) {
-         ir = new AluInstruction(op1_interp_load_p0, dest[i],
-                                 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
-                                 EmitInstruction::write);
-         emit_instruction(ir);
-      }
-      ir->set_flag(alu_last_instr);
-   }
-   return true;
-}
-
-bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
-                                                       ShaderInput& io, const Interpolator& ip, EAluOp op)
-{
-   for (unsigned i = 0; i < 2 ; ++i) {
-      int chan = i;
-      if (op == op2_interp_z)
-         chan += 2;
-
-
-      auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
-                                   PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
-                                   i == 0  ? EmitInstruction::write : EmitInstruction::last);
-      dest.pin_to_channel(chan);
-
-      ir->set_bank_swizzle(alu_vec_210);
-      emit_instruction(ir);
-   }
-   return true;
-}
-
-bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
-                                                       const Interpolator& ip, EAluOp op, int writemask)
-{
-   AluInstruction *ir = nullptr;
-   assert(ip.j);
-   assert(ip.i);
-   for (unsigned i = 0; i < 4 ; ++i) {
-      ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
-                              (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
-      dest.pin_to_channel(i);
-      ir->set_bank_swizzle(alu_vec_210);
-      emit_instruction(ir);
-   }
-   ir->set_flag(alu_last_instr);
-   return true;
-}
-
-bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
-                                                               ShaderInput& io, const Interpolator& ip,
-                                                               EAluOp op, UNUSED int start, int comp)
-{
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i <  4 ; ++i) {
-      ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
-                                   PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
-                                   i == comp ? EmitInstruction::write : EmitInstruction::empty);
-      ir->set_bank_swizzle(alu_vec_210);
-      dest.pin_to_channel(i);
-      emit_instruction(ir);
-   }
-   ir->set_flag(alu_last_instr);
-   return true;
-}
-
-
-bool FragmentShaderFromNir::emit_export_pixel(nir_intrinsic_instr* instr, int outputs)
-{
-   std::array<uint32_t,4> swizzle;
-   unsigned writemask = nir_intrinsic_write_mask(instr);
-   auto semantics = nir_intrinsic_io_semantics(instr);
-   unsigned driver_location = nir_intrinsic_base(instr);
-
-   switch (semantics.location) {
-   case FRAG_RESULT_DEPTH:
-      writemask = 1;
-      swizzle = {0,7,7,7};
-      break;
-   case FRAG_RESULT_STENCIL:
-      writemask = 2;
-      swizzle = {7,0,7,7};
-      break;
-   case FRAG_RESULT_SAMPLE_MASK:
-      writemask = 4;
-      swizzle = {7,7,0,7};
-      break;
-   default:
-      for (int i = 0; i < 4; ++i) {
-         swizzle[i] = (i < instr->num_components) ? i : 7;
-      }
-   }
-
-   auto value = vec_from_nir_with_fetch_constant(instr->src[0], writemask, swizzle);
-
-   set_output(driver_location, value.sel());
-
-   if (semantics.location == FRAG_RESULT_COLOR ||
-       (semantics.location >= FRAG_RESULT_DATA0 &&
-        semantics.location <= FRAG_RESULT_DATA7)) {
-      for (int k = 0 ; k < outputs; ++k) {
-
-         unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR)
-                             ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports;
-
-         sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n";
-
-         if (location >= m_max_color_exports) {
-            sfn_log << SfnLog::io << "Pixel output loc:" << location
-                    << " dl:" << driver_location
-                    << " skipped  because  we have only "   << m_max_color_exports << " CBs\n";
-            continue;
-         }
-
-         m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
-
-         if (sh_info().ps_export_highest < location)
-            sh_info().ps_export_highest = location;
-
-         sh_info().nr_ps_color_exports++;
-
-         unsigned mask = (0xfu << (location * 4));
-         sh_info().ps_color_export_mask |= mask;
-
-         emit_export_instruction(m_last_pixel_export);
-      };
-   } else if (semantics.location == FRAG_RESULT_DEPTH ||
-              semantics.location == FRAG_RESULT_STENCIL ||
-              semantics.location == FRAG_RESULT_SAMPLE_MASK) {
-      m_depth_exports++;
-      emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
-   } else {
-      return false;
-   }
-   return true;
-}
-
-
-bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs)
-{
-   std::array<uint32_t,4> swizzle;
-   unsigned writemask = nir_intrinsic_write_mask(instr);
-   switch (out_var->data.location) {
-   case FRAG_RESULT_DEPTH:
-      writemask = 1;
-      swizzle = {0,7,7,7};
-      break;
-   case FRAG_RESULT_STENCIL:
-      writemask = 2;
-      swizzle = {7,0,7,7};
-      break;
-   case FRAG_RESULT_SAMPLE_MASK:
-      writemask = 4;
-      swizzle = {7,7,0,7};
-      break;
-   default:
-      for (int i = 0; i < 4; ++i) {
-         swizzle[i] = (i < instr->num_components) ? i : 7;
-      }
-   }
-
-   auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
-
-   set_output(out_var->data.driver_location, value.sel());
-
-   if (out_var->data.location == FRAG_RESULT_COLOR ||
-       (out_var->data.location >= FRAG_RESULT_DATA0 &&
-        out_var->data.location <= FRAG_RESULT_DATA7)) {
-      for (int k = 0 ; k < outputs; ++k) {
-
-         unsigned location = (m_dual_source_blend && (out_var->data.location == FRAG_RESULT_COLOR)
-                             ? out_var->data.index : out_var->data.driver_location) + k - m_depth_exports;
-
-         sfn_log << SfnLog::io << "Pixel output " << out_var->name << " at loc:" << location << "\n";
-
-         if (location >= m_max_color_exports) {
-            sfn_log << SfnLog::io << "Pixel output loc:" << location
-                    << " dl:" << out_var->data.location
-                    << " skipped  because  we have only "   << m_max_color_exports << " CBs\n";
-            continue;
-         }
-
-         m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
-
-         if (sh_info().ps_export_highest < location)
-            sh_info().ps_export_highest = location;
-
-         sh_info().nr_ps_color_exports++;
-
-         unsigned mask = (0xfu << (location * 4));
-         sh_info().ps_color_export_mask |= mask;
-
-         emit_export_instruction(m_last_pixel_export);
-      };
-   } else if (out_var->data.location == FRAG_RESULT_DEPTH ||
-              out_var->data.location == FRAG_RESULT_STENCIL ||
-              out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
-      m_depth_exports++;
-      emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
-   } else {
-      return false;
-   }
-   return true;
-}
-
-void FragmentShaderFromNir::do_finalize()
-{
-   // update shader io info and set LDS etc.
-   sh_info().ninput = m_shaderio.inputs().size();
-
-   sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
-   for (size_t i = 0; i < sh_info().ninput; ++i) {
-      ShaderInput& input = m_shaderio.input(i);
-      int ij_idx = (input.ij_index() < 6 &&
-                    input.ij_index() >= 0) ? input.ij_index() : 0;
-      input.set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
-   }
-
-   sh_info().two_side = m_shaderio.two_sided();
-   sh_info().nlds = m_shaderio.nlds();
-
-   if (!m_last_pixel_export) {
-      GPRVector v(0, {7,7,7,7});
-      m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
-      sh_info().nr_ps_color_exports++;
-      sh_info().ps_color_export_mask = 0xf;
-      emit_export_instruction(m_last_pixel_export);
-   }
-
-   m_last_pixel_export->set_last();
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h
deleted file mode 100644
index 6c6fb2b..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef sfn_fragment_shader_from_nir_h
-#define sfn_fragment_shader_from_nir_h
-
-#include "sfn_shader_base.h"
-#include "sfn_shaderio.h"
-#include <bitset>
-
-namespace r600 {
-
-class FragmentShaderFromNir : public ShaderFromNirProcessor {
-public:
-   FragmentShaderFromNir(const nir_shader& nir, r600_shader& sh_info,
-                         r600_pipe_shader_selector &sel, const r600_shader_key &key,
-                         enum amd_gfx_level gfx_level);
-   bool scan_sysvalue_access(nir_instr *instr) override;
-private:
-
-   struct Interpolator {
-      bool enabled;
-      unsigned ij_index;
-      PValue i;
-      PValue j;
-   };
-
-   void emit_shader_start() override;
-   bool do_allocate_reserved_registers() override;
-   bool process_store_output(nir_intrinsic_instr *instr);
-
-   bool emit_store_output(nir_intrinsic_instr* instr);
-
-   bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, int outputs);
-   bool emit_export_pixel(nir_intrinsic_instr* instr, int outputs);
-   bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip,
-                          int num_components, int start_comp);
-   bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op);
-   bool load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip,EAluOp op, int writemask);
-   bool load_interpolated_two_comp_for_one(GPRVector &dest,
-                                           ShaderInput& io, const Interpolator& ip, EAluOp op, int start, int comp);
-
-   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
-   void do_finalize() override;
-
-   void load_front_face();
-
-   bool emit_load_input(nir_intrinsic_instr* instr);
-   bool emit_load_front_face(nir_intrinsic_instr* instr);
-   bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
-   bool emit_load_sample_pos(nir_intrinsic_instr* instr);
-   bool emit_load_sample_id(nir_intrinsic_instr* instr);
-
-   bool process_load_input(nir_intrinsic_instr *instr, bool interpolated);
-   bool emit_load_interpolated_input(nir_intrinsic_instr* instr);
-   bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
-   bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
-
-
-   unsigned m_max_color_exports;
-   unsigned m_max_counted_color_exports;
-   bool m_two_sided_color;
-   ExportInstruction *m_last_pixel_export;
-   const nir_shader& m_nir;
-
-
-   std::array<Interpolator, 6> m_interpolator;
-   unsigned m_reserved_registers;
-   unsigned m_frag_pos_index;
-   PGPRValue m_front_face_reg;
-   PGPRValue m_sample_mask_reg;
-   PGPRValue m_sample_id_reg;
-   PGPRValue m_helper_invocation;
-   GPRVector m_frag_pos;
-   bool m_need_back_color;
-   bool m_front_face_loaded;
-   ShaderIO m_shaderio;
-   unsigned m_depth_exports;
-
-   std::map<unsigned, PValue> m_input_cache;
-
-   static const int s_max_interpolators = 6;
-
-   std::bitset<s_max_interpolators> m_interpolators_used;
-
-   unsigned m_apply_sample_mask;
-   bool m_dual_source_blend;
-   ShaderInput *m_pos_input;
-
-};
-	
-}
-
-#endif
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp
new file mode 100644
index 0000000..62b1413
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp
@@ -0,0 +1,873 @@
+
+
+#include "sfn_debug.h"
+#include "sfn_shader_fs.h"
+
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_tex.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_instr_export.h"
+
+#include "tgsi/tgsi_from_mesa.h"
+
+#include <sstream>
+
+namespace r600 {
+
+using std::string;
+
+FragmentShader::FragmentShader(const r600_shader_key& key):
+   Shader("FS"),
+   m_dual_source_blend(key.ps.dual_source_blend),
+   m_max_color_exports(MAX2(key.ps.nr_cbufs, 1)),
+   m_export_highest(0),
+   m_num_color_exports(0),
+   m_color_export_mask(0),
+   m_depth_exports(0),
+   m_last_pixel_export(nullptr),
+   m_pos_input(127, false),
+   m_fs_write_all(false),
+   m_apply_sample_mask(key.ps.apply_sample_id_mask),
+   m_rat_base(key.ps.nr_cbufs)
+{
+}
+
+void FragmentShader::do_get_shader_info(r600_shader *sh_info)
+{
+   sh_info->processor_type = PIPE_SHADER_FRAGMENT;
+
+   sh_info->ps_color_export_mask = m_color_export_mask;
+   sh_info->ps_export_highest = m_export_highest;
+   sh_info->nr_ps_color_exports = m_num_color_exports;
+
+   sh_info->fs_write_all = m_fs_write_all;
+
+   sh_info->rat_base = m_rat_base;
+   sh_info->uses_kill = m_uses_discard;
+   sh_info->gs_prim_id_input = m_gs_prim_id_input;
+   sh_info->ps_prim_id_input = m_ps_prim_id_input;
+   sh_info->nsys_inputs = m_nsys_inputs;
+   sh_info->uses_helper_invocation = m_helper_invocation != nullptr;
+}
+
+
+bool FragmentShader::load_input(nir_intrinsic_instr *intr)
+{
+   auto& vf = value_factory();
+   AluInstr *ir = nullptr;
+
+   auto location = nir_intrinsic_io_semantics(intr).location;
+   if (location == VARYING_SLOT_POS) {
+      for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
+         ir = new AluInstr(op1_mov,
+                           vf.dest(intr->dest, i, pin_none),
+                           m_pos_input[i],
+                           AluInstr::write);
+         emit_instruction(ir);
+      }
+      ir->set_alu_flag(alu_last_instr);
+      set_input_gpr(nir_intrinsic_base(intr), m_pos_input[0]->sel());
+      return true;
+   }
+
+   if (location == VARYING_SLOT_FACE) {
+      ir = new AluInstr(op2_setge_dx10,
+                        vf.dest(intr->dest, 0, pin_none),
+                        m_face_input,
+                        vf.inline_const(ALU_SRC_0, 0),
+                        AluInstr::last_write);
+      set_input_gpr(nir_intrinsic_base(intr), m_face_input->sel());
+
+      emit_instruction(ir);
+      return true;
+   }
+
+   auto io = input(nir_intrinsic_base(intr));
+   auto comp = nir_intrinsic_component(intr);
+   bool need_temp = comp > 0 || !intr->dest.is_ssa;
+   for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
+      if (need_temp) {
+         auto tmp = vf.temp_register(comp + i);
+         ir = new AluInstr(op1_interp_load_p0,
+                           tmp,
+                           new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
+                           AluInstr::last_write);
+         emit_instruction(ir);
+         emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write));
+      } else {
+
+         ir = new AluInstr(op1_interp_load_p0,
+                           vf.dest(intr->dest, i, pin_chan),
+                           new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
+                           AluInstr::write);
+         emit_instruction(ir);
+      }
+
+   }
+   ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+bool FragmentShader::store_output(nir_intrinsic_instr *intr)
+{
+   auto location = nir_intrinsic_io_semantics(intr).location;
+
+   if (location == FRAG_RESULT_COLOR) {
+      if (!m_dual_source_blend) {
+         m_fs_write_all = true;
+      }
+
+      return emit_export_pixel(*intr, m_dual_source_blend ? 1 : m_max_color_exports);
+   }
+
+   if ((location >= FRAG_RESULT_DATA0 &&
+        location <= FRAG_RESULT_DATA7) ||
+       location == FRAG_RESULT_DEPTH ||
+       location == FRAG_RESULT_STENCIL ||
+       location == FRAG_RESULT_SAMPLE_MASK)
+      return emit_export_pixel(*intr, 1);
+
+   sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_output for " << location << ")\n";
+   return false;
+}
+
+unsigned
+barycentric_ij_index(nir_intrinsic_instr *intr)
+{
+   unsigned index = 0;
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_barycentric_sample:
+      index = 0;
+   break;
+   case nir_intrinsic_load_barycentric_at_sample:
+   case nir_intrinsic_load_barycentric_at_offset:
+   case nir_intrinsic_load_barycentric_pixel:
+      index = 1;
+   break;
+   case nir_intrinsic_load_barycentric_centroid:
+      index = 2;
+   break;
+   default:
+      unreachable("Unknown interpolator intrinsic");
+   }
+
+   switch (nir_intrinsic_interp_mode(intr)) {
+   case INTERP_MODE_NONE:
+   case INTERP_MODE_SMOOTH:
+   case INTERP_MODE_COLOR:
+   return index;
+   case INTERP_MODE_NOPERSPECTIVE:
+   return index + 3;
+   case INTERP_MODE_FLAT:
+   case INTERP_MODE_EXPLICIT:
+   default:
+      unreachable("unknown/unsupported mode for load_interpolated");
+   }
+   return 0;
+}
+
+bool FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
+{
+   auto& vf = value_factory();
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_barycentric_centroid:
+   case nir_intrinsic_load_barycentric_pixel:
+   case nir_intrinsic_load_barycentric_sample: {
+      unsigned ij = barycentric_ij_index(intr);
+      vf.inject_value(intr->dest, 0, m_interpolator[ij].i);
+      vf.inject_value(intr->dest, 1, m_interpolator[ij].j);
+      return true;
+   }    
+   case nir_intrinsic_load_input:
+      return load_input(intr);
+   case nir_intrinsic_load_barycentric_at_offset:
+      return load_barycentric_at_offset(intr);
+   case nir_intrinsic_load_barycentric_at_sample:
+      return load_barycentric_at_sample(intr);
+   case nir_intrinsic_load_interpolated_input:
+      return load_interpolated_input(intr);
+   case nir_intrinsic_discard_if:
+      m_uses_discard = true;
+      emit_instruction(new AluInstr(op2_killne_int, nullptr,
+                                    value_factory().src(intr->src[0], 0),
+                                    value_factory().zero(),
+                                    {AluInstr::last}));
+      start_new_block(0);
+      return true;
+   case nir_intrinsic_discard:
+      m_uses_discard = true;
+      emit_instruction(new AluInstr(op2_kille_int, nullptr,
+                                    value_factory().zero(),
+                                    value_factory().zero(),
+                                    {AluInstr::last}));
+      return true;
+   case nir_intrinsic_load_sample_mask_in:
+      if (m_apply_sample_mask) {
+         return emit_load_sample_mask_in(intr);
+      } else
+         return emit_simple_mov(intr->dest, 0, m_sample_mask_reg);
+   case nir_intrinsic_load_sample_id:
+      return emit_simple_mov(intr->dest, 0, m_sample_id_reg);
+   case nir_intrinsic_load_helper_invocation:
+       return emit_load_helper_invocation(intr);
+   case nir_intrinsic_load_sample_pos:
+      return emit_load_sample_pos(intr);
+   default:
+      return false;
+   }
+}
+
+bool FragmentShader::load_interpolated_input(nir_intrinsic_instr *intr)
+{
+   auto& vf = value_factory();
+   unsigned loc = nir_intrinsic_io_semantics(intr).location;
+   switch (loc) {
+   case VARYING_SLOT_POS:
+      for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i)
+         vf.inject_value(intr->dest, i,  m_pos_input[i]);
+   return true;
+   case VARYING_SLOT_FACE:
+      return false;
+   default:
+      ;
+   }
+
+   auto param = nir_src_as_const_value(intr->src[1]);
+   assert(param && "Indirect PS inputs not (yet) supported");
+
+   int dest_num_comp = nir_dest_num_components(intr->dest);
+   int start_comp = nir_intrinsic_component(intr);
+   bool need_temp = start_comp > 0 || !intr->dest.is_ssa;
+
+   auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan);
+
+   InterpolateParams params;
+
+   params.i = vf.src(intr->src[0], 0);
+   params.j = vf.src(intr->src[0], 1);
+   params.base = input(nir_intrinsic_base(intr)).lds_pos();
+
+   if (!load_interpolated(dst, params, dest_num_comp, start_comp))
+      return false;
+
+   if (need_temp) {
+      AluInstr *ir = nullptr;
+      for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
+         auto real_dst = vf.dest(intr->dest, i, pin_chan);
+         ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
+         emit_instruction(ir);
+      }
+      assert(ir);
+      ir->set_alu_flag(alu_last_instr);
+   }
+
+   return true;
+}
+
+bool FragmentShader::load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
+                                       int num_dest_comp, int start_comp)
+{
+   sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i <<  ")" << "\n";
+
+   if (num_dest_comp == 1) {
+      switch (start_comp) {
+      case 0: return load_interpolated_one_comp(dest, params, op2_interp_x);
+      case 1: return load_interpolated_two_comp_for_one(dest, params,  op2_interp_xy, 0, 1);
+      case 2: return load_interpolated_one_comp(dest, params, op2_interp_z);
+      case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 2, 3);
+      default:
+         assert(0);
+      }
+   }
+
+   if (num_dest_comp == 2) {
+      switch (start_comp) {
+      case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
+      case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
+      case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) &&
+               load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1);
+      default:
+         assert(0);
+      }
+   }
+
+   if (num_dest_comp == 3 && start_comp == 0)
+      return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
+            load_interpolated_one_comp(dest, params, op2_interp_z);
+
+   int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
+
+   bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
+   success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
+   return success;
+}
+
+bool FragmentShader::load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op)
+{
+   auto group = new AluGroup();
+   bool success = true;
+
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < 2 && success; ++i) {
+      int chan = i;
+      if (op == op2_interp_z)
+         chan += 2;
+
+
+      ir = new AluInstr(op, dest[chan],
+                        i & 1 ? params.j : params.i,
+                        new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
+                        i == 0  ? AluInstr::write : AluInstr::last);
+
+      ir->set_bank_swizzle(alu_vec_210);
+      success = group->add_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   if (success)
+      emit_instruction(group);
+   return success;
+}
+
+bool FragmentShader::load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask)
+{
+   auto group = new AluGroup();
+   bool success = true;
+
+   AluInstr *ir = nullptr;
+   assert(params.j);
+   assert(params.i);
+   for (unsigned i = 0; i < 4 ; ++i) {
+      ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
+                        new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
+                        (writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
+      ir->set_bank_swizzle(alu_vec_210);
+      success = group->add_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   if (success)
+      emit_instruction(group);
+   return success;
+}
+
+bool FragmentShader::load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
+                                                        UNUSED int start, int comp)
+{
+   auto group = new AluGroup();
+   bool success = true;
+   AluInstr *ir = nullptr;
+
+   for (int i = 0; i <  4 ; ++i) {
+      ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
+                        new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
+                        i == comp ? AluInstr::write : AluInstr::empty);
+      ir->set_bank_swizzle(alu_vec_210);
+      success = group->add_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   if (success)
+      emit_instruction(group);
+
+   return success;
+}
+
+int FragmentShader::do_allocate_reserved_registers()
+{
+   for (unsigned i = 0; i < s_max_interpolators; ++i) {
+      if (m_interpolators_used.test(i)) {
+         sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
+         m_interpolator[i].enabled = true;
+      }
+   }
+
+   int num_baryc = 0;
+   for (int i = 0; i < 6; ++i) {
+      if (m_interpolator[i].enabled) {
+         sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
+         unsigned sel = num_baryc / 2;
+         unsigned chan = 2 * (num_baryc % 2);
+
+         m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
+         m_interpolator[i].i->pin_live_range(true, false);
+
+         m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
+         m_interpolator[i].j->pin_live_range(true, false);
+
+         m_interpolator[i].ij_index = num_baryc++;
+      }
+   }
+
+   int next_register = (num_baryc + 1) >> 1;
+
+   if (m_sv_values.test(es_pos)) {
+      m_pos_input = value_factory().allocate_pinned_vec4(next_register++, false);
+      for (int i = 0; i < 4; ++i)
+         m_pos_input[i]->pin_live_range(true);
+   }
+
+   int face_reg_index = -1;
+   if (m_sv_values.test(es_face)) {
+      face_reg_index = next_register++;
+      m_face_input = value_factory().allocate_pinned_register(face_reg_index, 0);
+      m_face_input->pin_live_range(true);
+   }
+
+   if (m_sv_values.test(es_sample_mask_in)) {
+      if (face_reg_index < 0)
+         face_reg_index = next_register++;
+      m_sample_mask_reg = value_factory().allocate_pinned_register(face_reg_index, 2);
+      m_sample_mask_reg->pin_live_range(true);
+      sfn_log << SfnLog::io << "Set sample mask in register to " <<  *m_sample_mask_reg << "\n";
+      m_nsys_inputs = 1;
+      ShaderInput input(ninputs(), TGSI_SEMANTIC_SAMPLEMASK);
+      input.set_gpr(face_reg_index);
+      add_input(input);
+   }
+
+   if (m_sv_values.test(es_sample_id) ||
+       m_sv_values.test(es_sample_mask_in)) {
+      int sample_id_reg = next_register++;
+      m_sample_id_reg = value_factory().allocate_pinned_register(sample_id_reg, 3);
+      m_sample_id_reg->pin_live_range(true);
+      sfn_log << SfnLog::io << "Set sample id register to " <<  *m_sample_id_reg << "\n";
+      m_nsys_inputs++;
+      ShaderInput input(ninputs(), TGSI_SEMANTIC_SAMPLEID);
+      input.set_gpr(sample_id_reg);
+      add_input(input);
+   }
+
+   if (m_sv_values.test(es_helper_invocation)) {
+      m_helper_invocation = value_factory().allocate_pinned_register(next_register++, 0);
+   }
+
+   return next_register;
+}
+
+bool FragmentShader::do_scan_instruction(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   auto intr = nir_instr_as_intrinsic(instr);
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_barycentric_pixel:
+   case nir_intrinsic_load_barycentric_sample:
+   case nir_intrinsic_load_barycentric_at_sample:
+   case nir_intrinsic_load_barycentric_at_offset:
+   case nir_intrinsic_load_barycentric_centroid:
+      m_interpolators_used.set(barycentric_ij_index(intr));
+      break;
+   case nir_intrinsic_load_front_face:
+      m_sv_values.set(es_face);
+      break;
+   case nir_intrinsic_load_sample_mask_in:
+      m_sv_values.set(es_sample_mask_in);
+      break;
+   case nir_intrinsic_load_sample_pos:
+      m_sv_values.set(es_sample_pos);
+      FALLTHROUGH;
+   case nir_intrinsic_load_sample_id:
+      m_sv_values.set(es_sample_id);
+      break;
+   case nir_intrinsic_load_helper_invocation:
+      m_sv_values.set(es_helper_invocation);
+      break;
+   case nir_intrinsic_load_input:
+      return scan_input(intr, 0);
+   case nir_intrinsic_load_interpolated_input:
+      return scan_input(intr, 1);
+   default:
+      return false;
+   }
+   return true;
+}
+
+bool FragmentShader::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
+{
+   auto& vf = value_factory();
+   auto dest = vf.dest(instr->dest, 0, pin_free);
+   auto tmp = vf.temp_register();
+   assert(m_sample_id_reg);
+   assert(m_sample_mask_reg);
+
+   emit_instruction(new AluInstr(op2_lshl_int, tmp, vf.one_i(), m_sample_id_reg, AluInstr::last_write));
+   emit_instruction(new AluInstr(op2_and_int, dest, tmp, m_sample_mask_reg, AluInstr::last_write));
+   return true;
+}
+
+bool FragmentShader::emit_load_helper_invocation(nir_intrinsic_instr* instr)
+{
+   assert(m_helper_invocation);
+   auto& vf = value_factory();
+   emit_instruction(new AluInstr(op1_mov, m_helper_invocation, vf.literal(-1), AluInstr::last_write));
+   RegisterVec4 destvec{m_helper_invocation, nullptr, nullptr, nullptr, pin_group};
+
+   auto vtx = new LoadFromBuffer(destvec, {4,7,7,7}, m_helper_invocation, 0,
+                                   R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
+   vtx->set_fetch_flag(FetchInstr::vpm);
+   vtx->set_fetch_flag(FetchInstr::use_tc);
+   vtx->set_always_keep();
+   auto dst = value_factory().dest(instr->dest, 0, pin_free);
+   auto ir = new AluInstr(op1_mov, dst, m_helper_invocation, AluInstr::last_write);
+   ir->add_required_instr(vtx);
+   emit_instruction(vtx);
+   emit_instruction(ir);
+
+   return true;
+}
+
+bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id)
+{
+   auto index = nir_src_as_const_value(intr->src[index_src_id]);
+   assert(index);
+
+   bool uses_interpol_at_centroid = false;
+
+   unsigned location = nir_intrinsic_io_semantics(intr).location  + index->u32;
+   unsigned driver_location = nir_intrinsic_base(intr) + index->u32;
+   auto semantic = r600_get_varying_semantic(location);
+   tgsi_semantic name = (tgsi_semantic)semantic.first;
+   unsigned sid = semantic.second;
+
+   if (location == VARYING_SLOT_POS) {
+      m_sv_values.set(es_pos);
+      ShaderInput pos_input(driver_location, name);
+      pos_input.set_sid(sid);
+      pos_input.set_interpolator(TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER, false);
+      add_input(pos_input);
+      return true;
+   }
+
+   if (location == VARYING_SLOT_FACE) {
+      m_sv_values.set(es_face);
+      ShaderInput face_input(driver_location, name);
+      face_input.set_sid(sid);
+      add_input(face_input);
+      return true;
+   }
+
+   tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT;
+   tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
+
+   if (index_src_id > 0) {
+      glsl_interp_mode mode = INTERP_MODE_NONE;
+      auto parent = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr);
+      mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent);
+      switch (parent->intrinsic) {
+      case nir_intrinsic_load_barycentric_sample:
+         tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
+      break;
+      case nir_intrinsic_load_barycentric_at_sample:
+      case nir_intrinsic_load_barycentric_at_offset:
+      case nir_intrinsic_load_barycentric_pixel:
+         tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
+      break;
+      case nir_intrinsic_load_barycentric_centroid:
+         tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID;
+         uses_interpol_at_centroid = true;
+      break;
+      default:
+         std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of "
+                   << nir_intrinsic_infos[intr->intrinsic].name
+                   << " interpolator?\n";
+         assert(0);
+      }
+
+      switch (mode) {
+      case INTERP_MODE_NONE:
+         if (name == TGSI_SEMANTIC_COLOR ||
+             name == TGSI_SEMANTIC_BCOLOR) {
+            tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
+            break;
+         }
+         FALLTHROUGH;
+      case INTERP_MODE_SMOOTH:
+         tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+      break;
+      case INTERP_MODE_NOPERSPECTIVE:
+         tgsi_interpolate = TGSI_INTERPOLATE_LINEAR;
+      break;
+      case INTERP_MODE_FLAT:
+      break;
+      case INTERP_MODE_COLOR:
+         tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
+      break;
+      case INTERP_MODE_EXPLICIT:
+      default:
+         assert(0);
+      }
+   }
+
+   switch (name) {
+   case TGSI_SEMANTIC_PRIMID:
+      std::cerr << "Found primid input\n";
+      m_gs_prim_id_input = true;
+      m_ps_prim_id_input = ninputs();
+      FALLTHROUGH;
+   case TGSI_SEMANTIC_COLOR:
+   case TGSI_SEMANTIC_BCOLOR:
+   case TGSI_SEMANTIC_FOG:
+   case TGSI_SEMANTIC_GENERIC:
+   case TGSI_SEMANTIC_TEXCOORD:
+   case TGSI_SEMANTIC_LAYER:
+   case TGSI_SEMANTIC_PCOORD:
+   case TGSI_SEMANTIC_VIEWPORT_INDEX:
+   case TGSI_SEMANTIC_CLIPDIST: {
+      auto iinput = find_input(driver_location);
+      if (iinput == input_not_found()) {
+         ShaderInput input(driver_location, name);
+         input.set_sid(sid);
+         input.set_need_lds_pos();
+         input.set_interpolator(tgsi_interpolate, tgsi_loc, uses_interpol_at_centroid);
+         add_input(input);
+         assert(find_input(input.location()) != input_not_found());
+      } else {
+         if (uses_interpol_at_centroid) {
+            iinput->second.set_uses_interpolate_at_centroid();
+         }
+      }
+
+      return true;
+   }
+   default:
+      return false;
+   }
+}
+
+bool FragmentShader::load_barycentric_at_sample(nir_intrinsic_instr* instr)
+{
+   auto& vf = value_factory();
+   RegisterVec4 slope = vf.temp_vec4(pin_group);
+   auto  src = emit_load_to_register(vf.src(instr->src[0], 0));
+   auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0,
+                                   R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
+
+   fetch->set_fetch_flag(FetchInstr::srf_mode);
+   emit_instruction(fetch);
+
+   auto grad = vf.temp_vec4(pin_group);
+
+   auto interpolator = m_interpolator[barycentric_ij_index(instr)];
+   assert(interpolator.enabled);
+
+   RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
+
+   auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
+   tex->set_tex_flag(TexInstr::grad_fine);
+   tex->set_tex_flag(TexInstr::x_unnormalized);
+   tex->set_tex_flag(TexInstr::y_unnormalized);
+   tex->set_tex_flag(TexInstr::z_unnormalized);
+   tex->set_tex_flag(TexInstr::w_unnormalized);
+   emit_instruction(tex);
+
+   tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0);
+   tex->set_tex_flag(TexInstr::x_unnormalized);
+   tex->set_tex_flag(TexInstr::y_unnormalized);
+   tex->set_tex_flag(TexInstr::z_unnormalized);
+   tex->set_tex_flag(TexInstr::w_unnormalized);
+   tex->set_tex_flag(TexInstr::grad_fine);
+   emit_instruction(tex);
+
+   auto tmp0 = vf.temp_register();
+   auto tmp1 = vf.temp_register();
+
+   emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
+   emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
+
+   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write}));
+   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr}));
+
+   return true;
+}
+
+bool FragmentShader::load_barycentric_at_offset(nir_intrinsic_instr* instr)
+{
+   auto& vf = value_factory();
+   auto interpolator = m_interpolator[barycentric_ij_index(instr)];
+
+   auto help = vf.temp_vec4(pin_group);
+   RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
+
+   auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0);
+   getgradh->set_tex_flag(TexInstr::x_unnormalized);
+   getgradh->set_tex_flag(TexInstr::y_unnormalized);
+   getgradh->set_tex_flag(TexInstr::z_unnormalized);
+   getgradh->set_tex_flag(TexInstr::w_unnormalized);
+   getgradh->set_tex_flag(TexInstr::grad_fine);
+   emit_instruction(getgradh);
+
+   auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0);
+   getgradv->set_tex_flag(TexInstr::x_unnormalized);
+   getgradv->set_tex_flag(TexInstr::y_unnormalized);
+   getgradv->set_tex_flag(TexInstr::z_unnormalized);
+   getgradv->set_tex_flag(TexInstr::w_unnormalized);
+   getgradv->set_tex_flag(TexInstr::grad_fine);
+   emit_instruction(getgradv);
+
+   auto ofs_x = vf.src(instr->src[0], 0);
+   auto ofs_y = vf.src(instr->src[0], 1);
+   auto tmp0 = vf.temp_register();
+   auto tmp1 = vf.temp_register();
+   emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
+   emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
+   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
+   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr}));
+
+   return true;
+}
+
+
+
+bool FragmentShader::emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs)
+{
+   RegisterVec4::Swizzle swizzle;
+   auto semantics = nir_intrinsic_io_semantics(&intr);
+   unsigned driver_location = nir_intrinsic_base(&intr);
+   unsigned write_mask = nir_intrinsic_write_mask(&intr);
+
+   switch (semantics.location) {
+   case FRAG_RESULT_DEPTH:
+      swizzle = {0,7,7,7};
+   break;
+   case FRAG_RESULT_STENCIL:
+      swizzle = {7,0,7,7};
+   break;
+   case FRAG_RESULT_SAMPLE_MASK:
+      swizzle = {7,7,0,7};
+   break;
+   default:
+      for (int i = 0; i < 4; ++i) {
+         swizzle[i] = (1 << i) & write_mask ? i : 7;
+      }
+   }
+
+   auto value = value_factory().src_vec4(intr.src[0], pin_group, swizzle);
+
+   if (semantics.location == FRAG_RESULT_COLOR ||
+       (semantics.location >= FRAG_RESULT_DATA0 &&
+        semantics.location <= FRAG_RESULT_DATA7)) {
+
+      ShaderOutput output(driver_location, TGSI_SEMANTIC_COLOR, write_mask);
+      add_output(output);
+
+      for (int k = 0 ; k < num_outputs; ++k) {
+
+         unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR)
+                              ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports;
+
+         sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n";
+
+         if (location >= m_max_color_exports) {
+            sfn_log << SfnLog::io << "Pixel output loc:" << location
+                    << " dl:" << driver_location
+                    << " skipped  because  we have only "   << m_max_color_exports << " CBs\n";
+            continue;
+         }
+
+         m_last_pixel_export = new ExportInstr(ExportInstr::pixel, location, value);
+
+         if (m_export_highest < location)
+            m_export_highest = location;
+
+         m_num_color_exports++;
+
+         /* Hack: force dual source output handling if one color output has a
+          * dual_source_blend_index > 0 */
+         if (semantics.location == FRAG_RESULT_COLOR &&
+             semantics.dual_source_blend_index > 0)
+            m_dual_source_blend = true;
+
+         if (m_num_color_exports > 1)
+            m_fs_write_all = false;
+         unsigned mask = (0xfu << (location * 4));
+         m_color_export_mask |= mask;
+
+         emit_instruction(m_last_pixel_export);
+
+      };
+   } else if (semantics.location == FRAG_RESULT_DEPTH ||
+              semantics.location == FRAG_RESULT_STENCIL ||
+              semantics.location == FRAG_RESULT_SAMPLE_MASK) {
+      m_depth_exports++;
+      emit_instruction(new ExportInstr(ExportInstr::pixel, 61, value));
+      int semantic = TGSI_SEMANTIC_POSITION;
+      if (semantics.location == FRAG_RESULT_STENCIL)
+         semantic = TGSI_SEMANTIC_STENCIL;
+      else if (semantics.location == FRAG_RESULT_SAMPLE_MASK)
+         semantic = TGSI_SEMANTIC_SAMPLEMASK;
+
+      ShaderOutput output(driver_location, semantic, write_mask);
+      add_output(output);
+
+   } else {
+      return false;
+   }
+   return true;
+}
+
+bool FragmentShader::emit_load_sample_pos(nir_intrinsic_instr* instr)
+{
+   auto dest = value_factory().dest_vec4(instr->dest, pin_group);
+
+
+   auto fetch = new LoadFromBuffer(dest, {0,1,2,3}, m_sample_id_reg, 0,
+                                   R600_BUFFER_INFO_CONST_BUFFER,
+                                   nullptr, fmt_32_32_32_32_float);
+   fetch->set_fetch_flag(FetchInstr::srf_mode);
+   emit_instruction(fetch);
+   return true;
+}
+
+void FragmentShader::do_finalize()
+{
+   if (!m_last_pixel_export) {
+      RegisterVec4 value(0, false, {7,7,7,7});
+      m_last_pixel_export = new ExportInstr(ExportInstr::pixel, 0, value);
+      emit_instruction(m_last_pixel_export);
+      m_num_color_exports++;
+      m_color_export_mask |= 0xf;
+   }
+   m_last_pixel_export->set_is_last_export(true);
+}
+
+bool FragmentShader::read_prop(std::istream& is)
+{
+   string value;
+   is >> value;
+
+   auto splitpos = value.find(':');
+   assert(splitpos != string::npos);
+
+   std::istringstream ival(value);
+   string name;
+   string val;
+
+   std::getline(ival, name, ':');
+
+   if (name == "MAX_COLOR_EXPORTS")
+      ival >> m_max_color_exports;
+   else if (name == "COLOR_EXPORTS")
+      ival >> m_num_color_exports;
+   else if (name == "COLOR_EXPORT_MASK")
+      ival >> m_color_export_mask;
+   else if (name == "WRITE_ALL_COLORS")
+      ival >> m_fs_write_all;
+   else
+      return false;
+   return true;
+}
+
+void FragmentShader::do_print_properties(std::ostream& os) const
+{
+   os << "PROP MAX_COLOR_EXPORTS:"  << m_max_color_exports << "\n";
+   os << "PROP COLOR_EXPORTS:"  << m_num_color_exports << "\n";
+   os << "PROP COLOR_EXPORT_MASK:"  << m_color_export_mask << "\n";
+   os << "PROP WRITE_ALL_COLORS:" << m_fs_write_all << "\n";
+}
+
+
+FragmentShader::Interpolator::Interpolator():
+   enabled(false)
+{
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fs.h b/src/gallium/drivers/r600/sfn/sfn_shader_fs.h
new file mode 100644
index 0000000..e207fe4
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_fs.h
@@ -0,0 +1,88 @@
+#ifndef R600_SFN_SHADER_FS_H
+#define R600_SFN_SHADER_FS_H
+
+#include "sfn_shader.h"
+
+namespace r600 {
+
+class FragmentShader : public Shader {
+public:
+   FragmentShader(const r600_shader_key& key);
+   bool load_input(nir_intrinsic_instr *intr) override;
+   bool store_output(nir_intrinsic_instr *intr) override;
+
+   bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
+
+private:
+   class Interpolator {
+   public:
+      Interpolator();
+      bool enabled : 1;
+      unsigned ij_index : 4;
+      PRegister i;
+      PRegister j;
+   };
+
+   struct InterpolateParams {
+      PVirtualValue i,j;
+      int base;
+   };
+
+   static const int s_max_interpolators = 6;
+
+   bool do_scan_instruction(nir_instr *instr) override;
+   int do_allocate_reserved_registers() override;
+
+   void do_get_shader_info(r600_shader *sh_info) override;
+
+   bool scan_input(nir_intrinsic_instr *instr, int index_src_id);
+
+   bool load_barycentric_pixel(nir_intrinsic_instr *intr);
+   bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
+   bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
+   bool load_interpolated_input(nir_intrinsic_instr *intr);
+   bool load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
+                          int num_dest_comp, int start_comp);
+
+   bool load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op);
+   bool load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask);
+   bool load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
+                                           int start, int dest_slot);
+
+   bool emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs);
+   bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
+   bool emit_load_helper_invocation(nir_intrinsic_instr* instr);
+   bool emit_load_sample_pos(nir_intrinsic_instr* instr);
+   void do_finalize() override;
+
+   bool read_prop(std::istream& is) override;
+
+   void do_print_properties(std::ostream& os) const override;
+
+   bool m_dual_source_blend;
+   unsigned m_max_color_exports;
+   unsigned m_export_highest;
+   unsigned m_num_color_exports;
+   unsigned m_color_export_mask;
+   unsigned m_depth_exports;
+   ExportInstr *m_last_pixel_export;
+
+   std::bitset<s_max_interpolators> m_interpolators_used;
+   std::array<Interpolator, s_max_interpolators> m_interpolator;
+   RegisterVec4 m_pos_input;
+   Register *m_face_input{nullptr};
+   bool m_fs_write_all;
+   bool m_uses_discard{false};
+   bool m_gs_prim_id_input{false};
+   int m_ps_prim_id_input{0};
+   Register *m_sample_id_reg{nullptr};
+   Register *m_sample_mask_reg{nullptr};
+   Register *m_helper_invocation{nullptr};
+   int m_nsys_inputs{0};
+   bool m_apply_sample_mask{false};
+   int m_rat_base{0};
+};
+
+}
+
+#endif
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp
deleted file mode 100644
index a0ab219..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp
+++ /dev/null
@@ -1,343 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_shader_geometry.h"
-#include "sfn_instruction_misc.h"
-#include "sfn_instruction_fetch.h"
-#include "sfn_shaderio.h"
-
-namespace r600 {
-
-GeometryShaderFromNir::GeometryShaderFromNir(r600_pipe_shader *sh,
-                                             r600_pipe_shader_selector &sel,
-                                             const r600_shader_key &key,
-                                             enum amd_gfx_level gfx_level):
-   VertexStage(PIPE_SHADER_GEOMETRY, sel, sh->shader,
-               sh->scratch_space_needed, gfx_level, key.gs.first_atomic_counter),
-   m_pipe_shader(sh),
-   m_so_info(&sel.so),
-   m_first_vertex_emitted(false),
-   m_offset(0),
-   m_next_input_ring_offset(0),
-   m_key(key),
-   m_clip_dist_mask(0),
-   m_cur_ring_output(0),
-   m_gs_tri_strip_adj_fix(false),
-   m_input_mask(0)
-{
-   sh_info().atomic_base = key.gs.first_atomic_counter;
-}
-
-bool GeometryShaderFromNir::emit_store(nir_intrinsic_instr* instr)
-{
-   auto location = nir_intrinsic_io_semantics(instr).location;
-   auto index = nir_src_as_const_value(instr->src[1]);
-   assert(index);
-   auto driver_location = nir_intrinsic_base(instr) + index->u32;
-
-   uint32_t write_mask = nir_intrinsic_write_mask(instr);
-   GPRVector::Swizzle swz = swizzle_from_mask(write_mask);
-
-   auto out_value = vec_from_nir_with_fetch_constant(instr->src[0], write_mask, swz, true);
-
-   sh_info().output[driver_location].write_mask = write_mask;
-
-   auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write_ind, out_value,
-                                      4 * driver_location,
-                                      instr->num_components, m_export_base[0]);
-   streamout_data[location] = ir;
-
-   return true;
-}
-
-bool GeometryShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
-{
-   if (instr->type != nir_instr_type_intrinsic)
-      return true;
-
-   nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
-
-   switch (ii->intrinsic) {
-   case nir_intrinsic_store_output:
-      return process_store_output(ii);
-   case nir_intrinsic_load_input:
-   case nir_intrinsic_load_per_vertex_input:
-      return process_load_input(ii);
-   default:
-      return true;
-   }
-}
-
-bool GeometryShaderFromNir::process_store_output(nir_intrinsic_instr* instr)
-{
-   auto location = nir_intrinsic_io_semantics(instr).location;
-   auto index = nir_src_as_const_value(instr->src[1]);
-   assert(index);
-
-   auto driver_location = nir_intrinsic_base(instr) + index->u32;
-
-   if (location == VARYING_SLOT_COL0 ||
-       location == VARYING_SLOT_COL1 ||
-       (location >= VARYING_SLOT_VAR0 &&
-       location <= VARYING_SLOT_VAR31) ||
-       (location >= VARYING_SLOT_TEX0 &&
-       location <= VARYING_SLOT_TEX7) ||
-       location == VARYING_SLOT_BFC0 ||
-       location == VARYING_SLOT_BFC1 ||
-       location == VARYING_SLOT_PNTC ||
-       location == VARYING_SLOT_CLIP_VERTEX ||
-       location == VARYING_SLOT_CLIP_DIST0 ||
-       location == VARYING_SLOT_CLIP_DIST1 ||
-       location == VARYING_SLOT_PRIMITIVE_ID ||
-       location == VARYING_SLOT_POS ||
-       location == VARYING_SLOT_PSIZ ||
-       location == VARYING_SLOT_LAYER ||
-       location == VARYING_SLOT_VIEWPORT ||
-       location == VARYING_SLOT_FOGC) {
-      r600_shader_io& io = sh_info().output[driver_location];
-
-      auto semantic = r600_get_varying_semantic(location);
-      io.name = semantic.first;
-      io.sid = semantic.second;
-
-      evaluate_spi_sid(io);
-
-      if (sh_info().noutput <= driver_location)
-         sh_info().noutput = driver_location + 1;
-
-      if (location == VARYING_SLOT_CLIP_DIST0 ||
-          location == VARYING_SLOT_CLIP_DIST1) {
-         m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0);
-      }
-
-      if (location == VARYING_SLOT_VIEWPORT) {
-         sh_info().vs_out_viewport = 1;
-         sh_info().vs_out_misc_write = 1;
-      }
-      return true;
-   }
-   return false;
-}
-
-bool GeometryShaderFromNir::process_load_input(nir_intrinsic_instr* instr)
-{
-   auto location = nir_intrinsic_io_semantics(instr).location;
-   auto index = nir_src_as_const_value(instr->src[1]);
-   assert(index);
-
-   auto driver_location = nir_intrinsic_base(instr) + index->u32;
-
-   if (location == VARYING_SLOT_POS ||
-       location == VARYING_SLOT_PSIZ ||
-       location == VARYING_SLOT_FOGC ||
-       location == VARYING_SLOT_CLIP_VERTEX ||
-       location == VARYING_SLOT_CLIP_DIST0 ||
-       location == VARYING_SLOT_CLIP_DIST1 ||
-       location == VARYING_SLOT_COL0 ||
-       location == VARYING_SLOT_COL1 ||
-       location == VARYING_SLOT_BFC0 ||
-       location == VARYING_SLOT_BFC1 ||
-       location == VARYING_SLOT_PNTC ||
-       (location >= VARYING_SLOT_VAR0 &&
-        location <= VARYING_SLOT_VAR31) ||
-       (location >= VARYING_SLOT_TEX0 &&
-       location <= VARYING_SLOT_TEX7)) {
-
-      uint64_t bit = 1ull << location;
-      if (!(bit & m_input_mask)) {
-         r600_shader_io& io = sh_info().input[driver_location];
-         auto semantic = r600_get_varying_semantic(location);
-         io.name = semantic.first;
-         io.sid = semantic.second;
-
-         io.ring_offset = 16 * driver_location;
-         ++sh_info().ninput;
-         m_next_input_ring_offset += 16;
-         m_input_mask |= bit;
-      }
-      return true;
-   }
-   return false;
-}
-
-bool GeometryShaderFromNir::do_allocate_reserved_registers()
-{
-   const int sel[6] = {0, 0 ,0, 1, 1, 1};
-   const int chan[6] = {0, 1 ,3, 0, 1, 2};
-
-   increment_reserved_registers();
-   increment_reserved_registers();
-
-   /* Reserve registers used by the shaders (should check how many
-    * components are actually used */
-   for (int i = 0; i < 6; ++i) {
-      auto reg = new GPRValue(sel[i], chan[i]);
-      reg->set_as_input();
-      m_per_vertex_offsets[i].reset(reg);
-      inject_register(sel[i], chan[i], m_per_vertex_offsets[i], false);
-   }
-   auto reg = new GPRValue(0, 2);
-   reg->set_as_input();
-   m_primitive_id.reset(reg);
-   inject_register(0, 2, m_primitive_id, false);
-
-   reg = new GPRValue(1, 3);
-   reg->set_as_input();
-   m_invocation_id.reset(reg);
-   inject_register(1, 3, m_invocation_id, false);
-
-   m_export_base[0] = get_temp_register(0);
-   m_export_base[1] = get_temp_register(0);
-   m_export_base[2] = get_temp_register(0);
-   m_export_base[3] = get_temp_register(0);
-   emit_instruction(new AluInstruction(op1_mov, m_export_base[0], Value::zero, {alu_write, alu_last_instr}));
-   emit_instruction(new AluInstruction(op1_mov, m_export_base[1], Value::zero, {alu_write, alu_last_instr}));
-   emit_instruction(new AluInstruction(op1_mov, m_export_base[2], Value::zero, {alu_write, alu_last_instr}));
-   emit_instruction(new AluInstruction(op1_mov, m_export_base[3], Value::zero, {alu_write, alu_last_instr}));
-
-   sh_info().ring_item_sizes[0] = m_next_input_ring_offset;
-
-   if (m_key.gs.tri_strip_adj_fix)
-      emit_adj_fix();
-
-   return true;
-}
-
-void GeometryShaderFromNir::emit_adj_fix()
-{
-   PValue adjhelp0(new  GPRValue(m_export_base[0]->sel(), 1));
-   emit_instruction(op2_and_int, adjhelp0, {m_primitive_id, Value::one_i}, {alu_write, alu_last_instr});
-
-   int reg_indices[6];
-   int reg_chanels[6] = {1, 2, 3, 1, 2, 3};
-
-   int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
-
-   reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel();
-   reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel();
-
-   std::array<PValue, 6> adjhelp;
-
-   AluInstruction *ir = nullptr;
-   for (int i = 0; i < 6; i++) {
-      adjhelp[i].reset(new GPRValue(reg_indices[i], reg_chanels[i]));
-      ir = new AluInstruction(op3_cnde_int, adjhelp[i],
-                             {adjhelp0, m_per_vertex_offsets[i],
-                              m_per_vertex_offsets[rotate_indices[i]]},
-                             {alu_write});
-      if ((get_chip_class() == CAYMAN && i == 2) || (i  == 3))
-         ir->set_flag(alu_last_instr);
-      emit_instruction(ir);
-   }
-   ir->set_flag(alu_last_instr);
-
-   for (int i = 0; i < 6; i++)
-      m_per_vertex_offsets[i] = adjhelp[i];
-}
-
-
-bool GeometryShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
-{
-   switch (instr->intrinsic) {
-   case nir_intrinsic_emit_vertex:
-      return emit_vertex(instr, false);
-   case nir_intrinsic_end_primitive:
-      return emit_vertex(instr, true);
-   case nir_intrinsic_load_primitive_id:
-      return load_preloaded_value(instr->dest, 0, m_primitive_id);
-   case nir_intrinsic_load_invocation_id:
-      return load_preloaded_value(instr->dest, 0, m_invocation_id);
-   case nir_intrinsic_store_output:
-      return emit_store(instr);
-   case nir_intrinsic_load_per_vertex_input:
-      return emit_load_per_vertex_input(instr);
-   default:
-      ;
-   }
-   return false;
-}
-
-bool GeometryShaderFromNir::emit_vertex(nir_intrinsic_instr* instr, bool cut)
-{
-   int stream = nir_intrinsic_stream_id(instr);
-   assert(stream < 4);
-
-   for(auto v: streamout_data) {
-      if (stream == 0 || v.first != VARYING_SLOT_POS) {
-         v.second->patch_ring(stream, m_export_base[stream]);
-         emit_instruction(v.second);
-      } else
-         delete v.second;
-   }
-   streamout_data.clear();
-   emit_instruction(new EmitVertex(stream, cut));
-
-   if (!cut)
-      emit_instruction(new AluInstruction(op2_add_int, m_export_base[stream], m_export_base[stream],
-                                          PValue(new LiteralValue(sh_info().noutput)),
-                                          {alu_write, alu_last_instr}));
-
-   return true;
-}
-
-bool GeometryShaderFromNir::emit_load_per_vertex_input(nir_intrinsic_instr* instr)
-{
-   auto dest = vec_from_nir(instr->dest, 4);
-
-   std::array<int, 4> swz = {7,7,7,7};
-   for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
-      swz[i] = i + nir_intrinsic_component(instr);
-   }
-
-   auto literal_index = nir_src_as_const_value(instr->src[0]);
-
-   if (!literal_index) {
-      sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
-      return false;
-   }
-   assert(literal_index->u32 < 6);
-   assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
-
-   PValue addr = m_per_vertex_offsets[literal_index->u32];
-   auto fetch = new FetchInstruction(vc_fetch, no_index_offset, dest, addr,
-                                     16 * nir_intrinsic_base(instr),
-                                     R600_GS_RING_CONST_BUFFER, PValue(), bim_none, true);
-   fetch->set_dest_swizzle(swz);
-
-   emit_instruction(fetch);
-   return true;
-}
-
-void GeometryShaderFromNir::do_finalize()
-{
-   if (m_clip_dist_mask) {
-      int num_clip_dist = 4 * util_bitcount(m_clip_dist_mask);
-      sh_info().cc_dist_mask = (1 << num_clip_dist) - 1;
-      sh_info().clip_dist_write = (1 << num_clip_dist) - 1;
-   }
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h b/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h
deleted file mode 100644
index de7501c..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef SFN_GEOMETRYSHADERFROMNIR_H
-#define SFN_GEOMETRYSHADERFROMNIR_H
-
-#include "sfn_vertexstageexport.h"
-
-namespace r600 {
-
-class GeometryShaderFromNir : public VertexStage
-{
-public:
-   GeometryShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, const r600_shader_key& key, enum amd_gfx_level gfx_level);
-
-   bool scan_sysvalue_access(nir_instr *instr) override;
-   PValue primitive_id() override {return m_primitive_id;}
-
-private:
-
-   bool do_allocate_reserved_registers() override;
-   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
-
-   bool emit_vertex(nir_intrinsic_instr* instr, bool cut);
-   void emit_adj_fix();
-
-   bool process_store_output(nir_intrinsic_instr* instr);
-   bool process_load_input(nir_intrinsic_instr* instr);
-
-   bool emit_store(nir_intrinsic_instr* instr);
-   bool emit_load_per_vertex_input(nir_intrinsic_instr* instr);
-
-   void do_finalize() override;
-
-   r600_pipe_shader *m_pipe_shader;
-   const pipe_stream_output_info *m_so_info;
-
-   std::array<PValue, 6> m_per_vertex_offsets;
-   PValue m_primitive_id;
-   PValue m_invocation_id;
-   PValue m_export_base[4];
-   bool m_first_vertex_emitted;
-
-   int  m_offset;
-   int  m_next_input_ring_offset;
-   r600_shader_key m_key;
-   int m_clip_dist_mask;
-   unsigned m_cur_ring_output;
-   bool m_gs_tri_strip_adj_fix;
-   uint64_t m_input_mask;
-
-   std::map<int, MemRingOutIntruction *> streamout_data;
-};
-
-}
-
-#endif // SFN_GEOMETRYSHADERFROMNIR_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp
new file mode 100644
index 0000000..e7db5a8
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp
@@ -0,0 +1,370 @@
+#include "sfn_shader_gs.h"
+#include "sfn_instr_fetch.h"
+#include "sfn_debug.h"
+
+namespace r600 {
+
+GeometryShader::GeometryShader(const r600_shader_key& key):
+   Shader("GS"),
+   m_tri_strip_adj_fix(key.gs.tri_strip_adj_fix)
+{
+
+}
+
+bool GeometryShader::do_scan_instruction(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
+
+   switch (ii->intrinsic) {
+   case nir_intrinsic_store_output:
+      return process_store_output(ii);
+   case nir_intrinsic_load_per_vertex_input:
+      return process_load_input(ii);
+   default:
+      return false;
+   }  
+}
+
+bool GeometryShader::process_store_output(nir_intrinsic_instr *instr)
+{
+   auto location = nir_intrinsic_io_semantics(instr).location;
+   auto index = nir_src_as_const_value(instr->src[1]);
+   assert(index);
+
+   auto driver_location = nir_intrinsic_base(instr) + index->u32;
+
+   if (location == VARYING_SLOT_COL0 ||
+       location == VARYING_SLOT_COL1 ||
+       (location >= VARYING_SLOT_VAR0 &&
+       location <= VARYING_SLOT_VAR31) ||
+       (location >= VARYING_SLOT_TEX0 &&
+       location <= VARYING_SLOT_TEX7) ||
+       location == VARYING_SLOT_BFC0 ||
+       location == VARYING_SLOT_BFC1 ||
+       location == VARYING_SLOT_PNTC ||
+       location == VARYING_SLOT_CLIP_VERTEX ||
+       location == VARYING_SLOT_CLIP_DIST0 ||
+       location == VARYING_SLOT_CLIP_DIST1 ||
+       location == VARYING_SLOT_PRIMITIVE_ID ||
+       location == VARYING_SLOT_POS ||
+       location == VARYING_SLOT_PSIZ ||
+       location == VARYING_SLOT_LAYER ||
+       location == VARYING_SLOT_VIEWPORT ||
+       location == VARYING_SLOT_FOGC) {
+
+      auto semantic = r600_get_varying_semantic(location);
+      tgsi_semantic name = (tgsi_semantic)semantic.first;
+      auto write_mask = nir_intrinsic_write_mask(instr);
+      ShaderOutput output(driver_location, name, write_mask);
+      output.set_sid(semantic.second);
+      add_output(output);
+
+      if (location == VARYING_SLOT_CLIP_DIST0 ||
+          location == VARYING_SLOT_CLIP_DIST1) {
+         m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0);
+      }
+
+      if (location == VARYING_SLOT_VIEWPORT) {
+         m_out_viewport = true;
+         m_out_misc_write = true;
+
+      }
+      if (m_noutputs <= driver_location)
+         m_noutputs = driver_location + 1;
+
+      return true;
+   }
+   return false;
+}
+
+bool GeometryShader::process_load_input(nir_intrinsic_instr *instr)
+{
+   auto location = nir_intrinsic_io_semantics(instr).location;
+   auto index = nir_src_as_const_value(instr->src[1]);
+   assert(index);
+
+   auto driver_location = nir_intrinsic_base(instr) + index->u32;
+
+   if (location == VARYING_SLOT_POS ||
+       location == VARYING_SLOT_PSIZ ||
+       location == VARYING_SLOT_FOGC ||
+       location == VARYING_SLOT_CLIP_VERTEX ||
+       location == VARYING_SLOT_CLIP_DIST0 ||
+       location == VARYING_SLOT_CLIP_DIST1 ||
+       location == VARYING_SLOT_COL0 ||
+       location == VARYING_SLOT_COL1 ||
+       location == VARYING_SLOT_BFC0 ||
+       location == VARYING_SLOT_BFC1 ||
+       location == VARYING_SLOT_PNTC ||
+       (location >= VARYING_SLOT_VAR0 &&
+        location <= VARYING_SLOT_VAR31) ||
+       (location >= VARYING_SLOT_TEX0 &&
+       location <= VARYING_SLOT_TEX7)) {
+
+      uint64_t bit = 1ull << location;
+      if (!(bit & m_input_mask)) {
+         auto semantic = r600_get_varying_semantic(location);
+         ShaderInput input(driver_location, semantic.first);
+         input.set_sid(semantic.second);
+         input.set_ring_offset(16 * driver_location);
+         add_input(input);
+         m_next_input_ring_offset += 16;
+         m_input_mask |= bit;
+      }
+      return true;
+   }
+   return false;
+}
+
+int GeometryShader::do_allocate_reserved_registers()
+{
+   const int sel[6] = {0, 0 ,0, 1, 1, 1};
+   const int chan[6] = {0, 1 ,3, 0, 1, 2};
+
+   /* Reserve registers used by the shaders (should check how many
+    * components are actually used */
+   for (int i = 0; i < 6; ++i) {
+      m_per_vertex_offsets[i] = value_factory().allocate_pinned_register(sel[i], chan[i]);
+      m_per_vertex_offsets[i]->pin_live_range(true);
+   }
+
+   m_primitive_id = value_factory().allocate_pinned_register(0, 2);
+   m_primitive_id->pin_live_range(true);
+   m_invocation_id = value_factory().allocate_pinned_register(1, 3);
+   m_invocation_id->pin_live_range(true);
+
+   value_factory().set_virtual_register_base(2);
+
+   auto zero = value_factory().inline_const(ALU_SRC_0, 0);
+
+   for (int i = 0; i < 4; ++i) {
+      m_export_base[i] = value_factory().temp_register(0, false);
+      emit_instruction(new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::last_write));
+   }
+
+   m_ring_item_sizes[0] = m_next_input_ring_offset;
+
+   if (m_tri_strip_adj_fix)
+      emit_adj_fix();
+
+   return value_factory().next_register_index();
+}
+
+bool GeometryShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_emit_vertex:
+      return emit_vertex(intr, false);
+   case nir_intrinsic_end_primitive:
+      return emit_vertex(intr, true);
+   case nir_intrinsic_load_primitive_id:
+      return emit_simple_mov(intr->dest, 0, m_primitive_id);
+   case nir_intrinsic_load_invocation_id:
+      return emit_simple_mov(intr->dest, 0, m_invocation_id);
+   case nir_intrinsic_load_per_vertex_input:
+      return emit_load_per_vertex_input(intr);
+   default:
+      ;
+   }
+   return false;
+}
+
+bool GeometryShader::emit_vertex(nir_intrinsic_instr* instr, bool cut)
+{
+   int stream = nir_intrinsic_stream_id(instr);
+   assert(stream < 4);
+
+   auto cut_instr = new EmitVertexInstr(stream, cut);
+
+   for(auto v: m_streamout_data) {
+      if (stream == 0 || v.first != VARYING_SLOT_POS) {
+         v.second->patch_ring(stream, m_export_base[stream]);
+         cut_instr->add_required_instr(v.second);
+         emit_instruction(v.second);
+      } else
+         delete v.second;
+   }
+   m_streamout_data.clear();
+
+   emit_instruction(cut_instr);
+   start_new_block(0);
+
+   if (!cut) {
+      auto ir = new AluInstr(op2_add_int, m_export_base[stream], m_export_base[stream],
+                             value_factory().literal(m_noutputs),
+                             AluInstr::last_write);
+      //ir->add_required_instr(cut_instr);
+      emit_instruction(ir);
+   }
+
+
+
+   return true;
+}
+
+bool GeometryShader::store_output(nir_intrinsic_instr* instr)
+{
+   auto location = nir_intrinsic_io_semantics(instr).location;
+   auto index = nir_src_as_const_value(instr->src[1]);
+   assert(index);
+   auto driver_location = nir_intrinsic_base(instr) + index->u32;
+
+   uint32_t write_mask = nir_intrinsic_write_mask(instr);
+   uint32_t shift = nir_intrinsic_component(instr);
+
+   RegisterVec4::Swizzle src_swz {7,7,7,7};
+   for (unsigned i = shift; i < 4; ++i) {
+      src_swz[i] = (1 << i) & (write_mask << shift) ? i  - shift: 7;
+   }
+
+   auto out_value = value_factory().src_vec4(instr->src[0], pin_group, src_swz);
+
+   AluInstr *ir = nullptr;
+   if (m_streamout_data[location]) {
+      auto value = m_streamout_data[location]->value();
+      auto tmp = value_factory().temp_vec4(pin_group);
+
+      for (unsigned i = 0; i < 4 - shift; ++i) {
+         if (!(write_mask & (1 << i)))
+            continue;
+         if (value[i]->chan() < 4) {
+            ir = new AluInstr(op1_mov, tmp[i], value[src_swz[i]], AluInstr::write);
+         } else if (out_value[i]->chan() < 4) {
+            ir = new AluInstr(op1_mov, tmp[i], out_value[i], AluInstr::write);
+         } else
+            continue;
+         emit_instruction(ir);
+      }
+      ir->set_alu_flag(alu_last_instr);
+      m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write_ind, tmp,
+                                                       4 * driver_location,
+                                                       instr->num_components, m_export_base[0]);
+   } else {
+
+      sfn_log << SfnLog::io << "None-streamout ";
+      bool need_copy =  shift != 0;
+      if (!need_copy) {
+         for (int i = 0; i < 4; ++i) {
+            if ((write_mask & (1 << i)) && (out_value[i]->chan() != i)) {
+               need_copy = true;
+               break;
+            }
+         }
+      }
+
+      if (need_copy) {
+         auto tmp = value_factory().temp_vec4(pin_group);
+         for (unsigned i = 0; i < 4 - shift; ++i) {
+            if (out_value[i]->chan() < 4) {
+               ir = new AluInstr(op1_mov, tmp[i], out_value[i], AluInstr::write);
+               emit_instruction(ir);
+            }
+         }
+         ir->set_alu_flag(alu_last_instr);
+         m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write_ind, tmp,
+                                                          4 * driver_location,
+                                                          instr->num_components, m_export_base[0]);
+      } else {
+         m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write_ind, out_value,
+                                                          4 * driver_location,
+                                                          instr->num_components, m_export_base[0]);
+      }
+   }
+
+
+
+
+   return true;
+}
+
+bool GeometryShader::emit_load_per_vertex_input(nir_intrinsic_instr* instr)
+{
+   auto dest = value_factory().dest_vec4(instr->dest, pin_group);
+
+   RegisterVec4::Swizzle dest_swz{7,7,7,7};
+   for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+      dest_swz[i] = i + nir_intrinsic_component(instr);
+   }
+
+   auto literal_index = nir_src_as_const_value(instr->src[0]);
+
+   if (!literal_index) {
+      sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
+      return false;
+   }
+   assert(literal_index->u32 < 6);
+   assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
+
+   auto addr = m_per_vertex_offsets[literal_index->u32];
+   auto fetch = new LoadFromBuffer(dest, dest_swz, addr,
+                                   16 * nir_intrinsic_base(instr),
+                                   R600_GS_RING_CONST_BUFFER, nullptr,
+                                   fmt_invalid);
+
+   fetch->set_fetch_flag(FetchInstr::use_const_field);
+   fetch->set_num_format(vtx_nf_norm);
+   fetch->reset_fetch_flag(FetchInstr::format_comp_signed);
+
+   emit_instruction(fetch);
+   return true;
+}
+
+void GeometryShader::do_finalize()
+{
+
+}
+
+void GeometryShader::do_get_shader_info(r600_shader *sh_info)
+{
+   sh_info->processor_type = PIPE_SHADER_GEOMETRY;
+   sh_info->ring_item_sizes[0] =  m_ring_item_sizes[0];
+}
+
+bool GeometryShader::read_prop(std::istream& is)
+{
+   (void)is;
+   return true;
+}
+
+void GeometryShader::do_print_properties(std::ostream& os) const
+{
+   (void)os;
+}
+
+void GeometryShader::emit_adj_fix()
+{
+   auto adjhelp0 = value_factory().temp_register();
+
+   emit_instruction(new AluInstr(op2_and_int, adjhelp0,
+                                 m_primitive_id, value_factory().one_i(),
+                                 AluInstr::last_write));
+
+   int reg_indices[6];
+   int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
+
+   reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel();
+   reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel();
+
+   std::array<PRegister, 6> adjhelp;
+
+   AluInstr *ir = nullptr;
+   for (int i = 0; i < 6; i++) {
+      adjhelp[i] = value_factory().temp_register();
+      ir = new AluInstr(op3_cnde_int, adjhelp[i],
+                        adjhelp0, m_per_vertex_offsets[i],
+                        m_per_vertex_offsets[rotate_indices[i]],
+                        AluInstr::write);
+
+      emit_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+
+   for (int i = 0; i < 6; i++)
+      m_per_vertex_offsets[i] = adjhelp[i];
+}
+
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_gs.h b/src/gallium/drivers/r600/sfn/sfn_shader_gs.h
new file mode 100644
index 0000000..f08c2e1
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_gs.h
@@ -0,0 +1,66 @@
+#ifndef GEOMETRYSHADER_H
+#define GEOMETRYSHADER_H
+
+
+#include "sfn_shader.h"
+#include "sfn_instr_export.h"
+
+namespace r600 {
+
+class GeometryShader : public Shader {
+public:
+   GeometryShader(const r600_shader_key& key);
+
+
+
+private:
+   bool do_scan_instruction(nir_instr *instr) override;
+   int do_allocate_reserved_registers() override;
+
+   bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
+
+   bool process_store_output(nir_intrinsic_instr *intr);
+   bool process_load_input(nir_intrinsic_instr *intr);
+
+   void do_finalize() override;
+
+   void do_get_shader_info(r600_shader *sh_info) override;
+
+   bool read_prop(std::istream& is) override;
+   void do_print_properties(std::ostream& os) const override;
+
+   void emit_adj_fix();
+
+   bool emit_load_per_vertex_input(nir_intrinsic_instr* instr);
+
+   bool load_input(UNUSED nir_intrinsic_instr *intr) override {
+      unreachable("load_input must be lowered in GS");
+   };
+   bool store_output(nir_intrinsic_instr* instr) override;
+   bool emit_vertex(nir_intrinsic_instr* instr, bool cut);
+
+   std::array<PRegister, 6> m_per_vertex_offsets{nullptr};
+   PRegister m_primitive_id{nullptr};
+   PRegister m_invocation_id{nullptr};
+   std::array<PRegister, 4> m_export_base{nullptr};
+
+   unsigned m_ring_item_sizes[4]{0};
+
+   bool m_tri_strip_adj_fix{false};
+   bool m_first_vertex_emitted{false};
+   int m_offset{0};
+   int m_next_input_ring_offset{0};
+   int m_clip_dist_mask{0};
+   int m_cur_ring_output{0};
+   bool m_gs_tri_strip_adj_fix{false};
+   uint64_t m_input_mask{0};
+   unsigned m_noutputs{0};
+   bool m_out_viewport{false};
+   bool m_out_misc_write{false};
+
+   std::map<int, MemRingOutInstr *> m_streamout_data;
+};
+
+}
+
+#endif // GEOMETRYSHADER_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp
deleted file mode 100644
index d26f24d..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-#include "sfn_shader_tcs.h"
-#include "sfn_instruction_gds.h"
-#include "tgsi/tgsi_from_mesa.h"
-
-namespace r600 {
-
-TcsShaderFromNir::TcsShaderFromNir(r600_pipe_shader *sh,
-                                   r600_pipe_shader_selector& sel,
-                                   const r600_shader_key& key,
-                                   enum amd_gfx_level gfx_level):
-   ShaderFromNirProcessor (PIPE_SHADER_TESS_CTRL, sel, sh->shader,
-                           sh->scratch_space_needed, gfx_level, key.tcs.first_atomic_counter),
-   m_reserved_registers(0)
-{
-   sh_info().tcs_prim_mode = key.tcs.prim_mode;
-}
-
-bool TcsShaderFromNir::scan_sysvalue_access(nir_instr *instr)
-{
-   if (instr->type != nir_instr_type_intrinsic)
-      return true;
-
-   auto intr = nir_instr_as_intrinsic(instr);
-
-   switch (intr->intrinsic) {
-   case nir_intrinsic_load_primitive_id:
-      m_sv_values.set(es_primitive_id);
-      break;
-   case nir_intrinsic_load_invocation_id:
-      m_sv_values.set(es_invocation_id);
-      break;
-   case nir_intrinsic_load_tcs_rel_patch_id_r600:
-      m_sv_values.set(es_rel_patch_id);
-      break;
-   case nir_intrinsic_load_tcs_tess_factor_base_r600:
-      m_sv_values.set(es_tess_factor_base);
-      break;
-   default:
-
-      ;
-   }
-   return true;
-}
-
-bool TcsShaderFromNir::do_allocate_reserved_registers()
-{
-   if (m_sv_values.test(es_primitive_id)) {
-      m_reserved_registers = 1;
-      auto gpr = new GPRValue(0,0);
-      gpr->set_as_input();
-      m_primitive_id.reset(gpr);
-   }
-
-   if (m_sv_values.test(es_invocation_id)) {
-      m_reserved_registers = 1;
-      auto gpr = new GPRValue(0,2);
-      gpr->set_as_input();
-      m_invocation_id.reset(gpr);
-   }
-
-   if (m_sv_values.test(es_rel_patch_id)) {
-      m_reserved_registers = 1;
-      auto gpr = new GPRValue(0,1);
-      gpr->set_as_input();
-      m_rel_patch_id.reset(gpr);
-   }
-
-   if (m_sv_values.test(es_tess_factor_base)) {
-      m_reserved_registers = 1;
-      auto gpr = new GPRValue(0,3);
-      gpr->set_as_input();
-      m_tess_factor_base.reset(gpr);
-   }
-
-   set_reserved_registers(m_reserved_registers);
-
-   return true;
-}
-
-bool TcsShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
-{
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_tcs_rel_patch_id_r600:
-      return load_preloaded_value(instr->dest, 0, m_rel_patch_id);
-   case nir_intrinsic_load_invocation_id:
-      return load_preloaded_value(instr->dest, 0, m_invocation_id);
-   case nir_intrinsic_load_primitive_id:
-      return load_preloaded_value(instr->dest, 0, m_primitive_id);
-   case nir_intrinsic_load_tcs_tess_factor_base_r600:
-      return load_preloaded_value(instr->dest, 0, m_tess_factor_base);
-   case nir_intrinsic_store_tf_r600:
-      return store_tess_factor(instr);
-   default:
-      return false;
-   }
-}
-
-bool TcsShaderFromNir::store_tess_factor(nir_intrinsic_instr* instr)
-{
-   const GPRVector::Swizzle& swizzle = (instr->src[0].ssa->num_components == 4) ?
-            GPRVector::Swizzle({0, 1, 2, 3}) : GPRVector::Swizzle({0, 1, 7, 7});
-   auto val = vec_from_nir_with_fetch_constant(instr->src[0],
-         (1 << instr->src[0].ssa->num_components) - 1, swizzle);
-   emit_instruction(new GDSStoreTessFactor(val));
-   return true;
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h b/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h
deleted file mode 100644
index 886791e..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef TCSSHADERFROMNIR_H
-#define TCSSHADERFROMNIR_H
-
-#include "sfn_shader_base.h"
-
-namespace r600 {
-
-class TcsShaderFromNir : public ShaderFromNirProcessor
-{
-public:
-   TcsShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, const r600_shader_key& key, enum amd_gfx_level gfx_level);
-   bool scan_sysvalue_access(nir_instr *instr) override;
-
-private:
-   bool do_allocate_reserved_registers() override;
-   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
-   bool store_tess_factor(nir_intrinsic_instr* instr);
-
-   void do_finalize() override {}
-
-   int m_reserved_registers;
-   PValue m_patch_id;
-   PValue m_rel_patch_id;
-   PValue m_invocation_id;
-   PValue m_primitive_id;
-   PValue m_tess_factor_base;
-
-
-};
-
-}
-
-#endif // TCSSHADERFROMNIR_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp
new file mode 100644
index 0000000..22054bb
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp
@@ -0,0 +1,275 @@
+
+#include "sfn_instr_export.h"
+#include "sfn_shader_tess.h"
+#include "sfn_shader_vs.h"
+
+
+#include <sstream>
+
+namespace r600 {
+
+using std::string;
+
+TCSShader::TCSShader(const r600_shader_key& key):
+   Shader("TCS"),
+   m_tcs_prim_mode(key.tcs.prim_mode)
+{
+
+}
+
+bool TCSShader::do_scan_instruction(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
+
+   switch (ii->intrinsic) {
+   case nir_intrinsic_load_primitive_id:
+      m_sv_values.set(es_primitive_id);
+      break;
+   case nir_intrinsic_load_invocation_id:
+      m_sv_values.set(es_invocation_id);
+      break;
+   case nir_intrinsic_load_tcs_rel_patch_id_r600:
+      m_sv_values.set(es_rel_patch_id);
+      break;
+   case nir_intrinsic_load_tcs_tess_factor_base_r600:
+      m_sv_values.set(es_tess_factor_base);
+      break;
+   default:
+      return false;
+      ;
+   }
+   return true;
+}
+
+int TCSShader::do_allocate_reserved_registers()
+{
+   if (m_sv_values.test(es_primitive_id)) {
+      m_primitive_id = value_factory().allocate_pinned_register(0, 0);
+      m_primitive_id->pin_live_range(true);
+   }
+
+   if (m_sv_values.test(es_invocation_id)) {
+      m_invocation_id = value_factory().allocate_pinned_register(0, 2);
+      m_invocation_id->pin_live_range(true);
+   }
+
+   if (m_sv_values.test(es_rel_patch_id)) {
+      m_rel_patch_id = value_factory().allocate_pinned_register(0, 1);;
+      m_rel_patch_id->pin_live_range(true);
+   }
+
+   if (m_sv_values.test(es_tess_factor_base)) {
+      m_tess_factor_base = value_factory().allocate_pinned_register(0, 3);
+      m_tess_factor_base->pin_live_range(true);
+   }
+
+   return value_factory().next_register_index();;
+}
+
+bool TCSShader::process_stage_intrinsic(nir_intrinsic_instr *instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_tcs_rel_patch_id_r600:
+      return emit_simple_mov(instr->dest, 0, m_rel_patch_id);
+   case nir_intrinsic_load_invocation_id:
+      return emit_simple_mov(instr->dest, 0, m_invocation_id);
+   case nir_intrinsic_load_primitive_id:
+      return emit_simple_mov(instr->dest, 0, m_primitive_id);
+   case nir_intrinsic_load_tcs_tess_factor_base_r600:
+      return emit_simple_mov(instr->dest, 0, m_tess_factor_base);
+   case nir_intrinsic_store_tf_r600:
+      return store_tess_factor(instr);
+   default:
+      return false;
+   }
+}
+
+bool TCSShader::store_tess_factor(nir_intrinsic_instr* instr)
+{
+   bool two_parts = nir_src_num_components(instr->src[0]) == 4;
+
+   auto value0 = value_factory().temp_vec4(pin_group, {0, 1, 7, 7});
+   emit_instruction(new AluInstr(op1_mov, value0[0], value_factory().src(instr->src[0], 0),
+                                 AluInstr::write));
+   emit_instruction(new AluInstr(op1_mov, value0[1], value_factory().src(instr->src[0], 1),
+                                 two_parts ? AluInstr::write : AluInstr::last_write));
+
+
+   if (two_parts) {
+      auto value1 = value_factory().temp_vec4(pin_group, {2, 3, 7, 7});
+      emit_instruction(new AluInstr(op1_mov, value1[0], value_factory().src(instr->src[0], 2),
+                                    AluInstr::write));
+      emit_instruction(new AluInstr(op1_mov, value1[1], value_factory().src(instr->src[0], 3),
+                                    AluInstr::last_write));
+      emit_instruction(new WriteTFInstr(value1));
+   }
+
+   emit_instruction(new WriteTFInstr(value0));
+   return true;
+}
+
+
+void TCSShader::do_get_shader_info(r600_shader *sh_info)
+{
+   sh_info->processor_type = PIPE_SHADER_TESS_CTRL;
+   sh_info->tcs_prim_mode = m_tcs_prim_mode;
+}
+
+bool TCSShader::read_prop(std::istream& is)
+{
+   string value;
+   is >> value;
+
+   auto splitpos = value.find(':');
+   assert(splitpos != string::npos);
+
+   std::istringstream ival(value);
+   string name;
+   string val;
+
+   std::getline(ival, name, ':');
+
+   if (name == "TCS_PRIM_MODE")
+      ival >> m_tcs_prim_mode;
+   else
+      return false;
+   return true;
+}
+
+void TCSShader::do_print_properties(std::ostream& os) const
+{
+   os << "PROP TCS_PRIM_MODE:" << m_tcs_prim_mode << "\n";
+}
+
+TESShader::TESShader(const pipe_stream_output_info *so_info, const r600_shader *gs_shader,
+                     const r600_shader_key& key):
+   VertexStageShader("TES"),
+   m_vs_as_gs_a(key.vs.as_gs_a),
+   m_tes_as_es(key.tes.as_es)
+{
+   if (key.tes.as_es)
+      m_export_processor = new VertexExportForGS(this, gs_shader);
+   else
+      m_export_processor = new VertexExportForFs(this, so_info, key);
+}
+
+bool TESShader::do_scan_instruction(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   auto intr = nir_instr_as_intrinsic(instr);
+
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_tess_coord_r600:
+      m_sv_values.set(es_tess_coord);
+      break;
+   case nir_intrinsic_load_primitive_id:
+      m_sv_values.set(es_primitive_id);
+      break;
+   case nir_intrinsic_load_tcs_rel_patch_id_r600:
+      m_sv_values.set(es_rel_patch_id);
+      break;
+   case nir_intrinsic_store_output: {
+      int driver_location = nir_intrinsic_base(intr);
+      int location = nir_intrinsic_io_semantics(intr).location;
+      auto semantic = r600_get_varying_semantic(location);
+      tgsi_semantic name = (tgsi_semantic)semantic.first;
+      unsigned sid = semantic.second;
+      auto write_mask = nir_intrinsic_write_mask(intr);
+
+      if (location == VARYING_SLOT_LAYER)
+         write_mask = 4;
+
+      ShaderOutput output(driver_location, name, write_mask);
+      output.set_sid(sid);
+
+      switch (location) {
+      case VARYING_SLOT_PSIZ:
+      case VARYING_SLOT_POS:
+      case VARYING_SLOT_CLIP_VERTEX:
+      case VARYING_SLOT_EDGE: {
+         break;
+      }
+      case VARYING_SLOT_CLIP_DIST0:
+      case VARYING_SLOT_CLIP_DIST1:
+      case VARYING_SLOT_VIEWPORT:
+      case VARYING_SLOT_LAYER:
+      case VARYING_SLOT_VIEW_INDEX:
+      default:
+         output.set_is_param(true);
+      }
+      add_output(output);
+      break;
+   }
+   default:
+      return false;
+   }
+   return true;
+}
+
+int TESShader::do_allocate_reserved_registers()
+{
+   if (m_sv_values.test(es_tess_coord)) {
+      m_tess_coord[0] = value_factory().allocate_pinned_register(0, 0);
+      m_tess_coord[0]->pin_live_range(true);
+      m_tess_coord[1] = value_factory().allocate_pinned_register(0, 1);
+      m_tess_coord[1]->pin_live_range(true);
+   }
+
+   if (m_sv_values.test(es_rel_patch_id)) {
+      m_rel_patch_id = value_factory().allocate_pinned_register(0, 2);
+      m_rel_patch_id->pin_live_range(true);
+   }
+
+   if (m_sv_values.test(es_primitive_id) || m_vs_as_gs_a) {
+      m_primitive_id = value_factory().allocate_pinned_register(0, 3);
+      m_primitive_id->pin_live_range(true);
+   }
+   return value_factory().next_register_index();
+}
+
+bool TESShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_tess_coord_r600:
+      return emit_simple_mov(intr->dest, 0, m_tess_coord[0], pin_none) &&
+            emit_simple_mov(intr->dest, 1, m_tess_coord[1], pin_none);
+   case nir_intrinsic_load_primitive_id:
+      return emit_simple_mov(intr->dest, 0, m_primitive_id);
+   case nir_intrinsic_load_tcs_rel_patch_id_r600:
+      return emit_simple_mov(intr->dest, 0, m_rel_patch_id);
+   case nir_intrinsic_store_output:
+      return m_export_processor->store_output(*intr);
+   default:
+      return false;
+   }
+}
+
+void TESShader::do_get_shader_info(r600_shader *sh_info)
+{
+   sh_info->processor_type = PIPE_SHADER_TESS_EVAL;
+   m_export_processor->get_shader_info(sh_info);
+}
+
+void TESShader::do_finalize()
+{
+   m_export_processor->finalize();
+}
+
+bool TESShader::TESShader::read_prop(std::istream& is)
+{
+   (void)is;
+   return true;
+}
+
+void TESShader::do_print_properties(std::ostream& os) const
+{
+   (void)os;
+}
+
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tess.h b/src/gallium/drivers/r600/sfn/sfn_shader_tess.h
new file mode 100644
index 0000000..90d6a19
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_tess.h
@@ -0,0 +1,76 @@
+#ifndef TCS_H
+#define TCS_H
+
+#include "sfn_shader_vs.h"
+
+namespace r600 {
+
+class VertexExportStage;
+
+class TCSShader : public Shader {
+public:
+   TCSShader(const r600_shader_key& key);
+private:
+   bool do_scan_instruction(nir_instr *instr) override;
+   int do_allocate_reserved_registers() override;
+
+   bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
+   void do_get_shader_info(r600_shader *sh_info) override;
+   bool store_tess_factor(nir_intrinsic_instr* instr);
+
+   bool load_input(nir_intrinsic_instr *intr) override {
+      unreachable("load_input must be lowered in TCS");
+   };
+   bool store_output(nir_intrinsic_instr *intr) override {
+      unreachable("load_output must be lowered in TCS");
+   };
+
+   bool read_prop(std::istream& is) override;
+   void do_print_properties(std::ostream& os) const override;
+
+   PRegister m_tess_factor_base;
+   PRegister m_rel_patch_id;
+   PRegister m_invocation_id;
+   PRegister m_primitive_id;
+
+   unsigned m_tcs_prim_mode{0};
+};
+
+class TESShader : public VertexStageShader {
+public:
+   TESShader(const pipe_stream_output_info *so_info, const r600_shader *gs_shader,
+             const r600_shader_key& key);
+private:
+   bool do_scan_instruction(nir_instr *instr) override;
+   int do_allocate_reserved_registers() override;
+
+   bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
+   void do_get_shader_info(r600_shader *sh_info) override;
+
+   bool load_input(UNUSED nir_intrinsic_instr *intr) override {
+      unreachable("load_input must be lowered in TES");
+   };
+   bool store_output(UNUSED nir_intrinsic_instr *intr) override {
+      unreachable("load_output must be lowered in TES");
+   };
+
+   bool read_prop(std::istream& is) override;
+   void do_print_properties(std::ostream& os) const override;
+
+   void do_finalize() override;
+
+   PRegister m_tess_coord[2]= {nullptr, nullptr};
+   PRegister m_rel_patch_id{nullptr};
+   PRegister m_primitive_id{nullptr};
+
+   VertexExportStage *m_export_processor{nullptr};
+
+   int m_tcs_vertices_out{0};
+   bool m_vs_as_gs_a{false};
+   bool m_tes_as_es{false};
+};
+
+
+}
+
+#endif // TCS_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp
deleted file mode 100644
index 20a3f85..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-#include "sfn_shader_tess_eval.h"
-#include "tgsi/tgsi_from_mesa.h"
-
-namespace r600 {
-
-TEvalShaderFromNir::TEvalShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel,
-                                       const r600_shader_key& key, r600_shader *gs_shader,
-                                       enum amd_gfx_level gfx_level):
-   VertexStage(PIPE_SHADER_TESS_EVAL, sel, sh->shader,
-               sh->scratch_space_needed, gfx_level, key.tes.first_atomic_counter),
-   m_reserved_registers(0),
-   m_key(key)
-
-{
-   sh->shader.tes_as_es = key.tes.as_es;
-   if (key.tes.as_es)
-      m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader));
-   else
-      m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key));
-}
-
-bool TEvalShaderFromNir::scan_sysvalue_access(nir_instr *instr)
-{
-   if (instr->type != nir_instr_type_intrinsic)
-      return true;
-
-   auto ir = nir_instr_as_intrinsic(instr);
-
-   switch (ir->intrinsic) {
-   case nir_intrinsic_load_tess_coord_r600:
-      m_sv_values.set(es_tess_coord);
-      break;
-   case nir_intrinsic_load_primitive_id:
-      m_sv_values.set(es_primitive_id);
-      break;
-   case nir_intrinsic_load_tcs_rel_patch_id_r600:
-      m_sv_values.set(es_rel_patch_id);
-      break;
-   case nir_intrinsic_store_output:
-      m_export_processor->scan_store_output(ir);
-      break;
-   default:
-      ;
-   }
-   return true;
-}
-
-void TEvalShaderFromNir::emit_shader_start()
-{
-   m_export_processor->emit_shader_start();
-}
-
-bool TEvalShaderFromNir::do_allocate_reserved_registers()
-{
-   if (m_sv_values.test(es_tess_coord)) {
-      m_reserved_registers = 1;
-      auto gpr = new GPRValue(0,0);
-      gpr->set_as_input();
-      m_tess_coord[0].reset(gpr);
-      gpr = new GPRValue(0,1);
-      gpr->set_as_input();
-      m_tess_coord[1].reset(gpr);
-   }
-
-   if (m_sv_values.test(es_rel_patch_id)) {
-      m_reserved_registers = 1;
-      auto gpr = new GPRValue(0,2);
-      gpr->set_as_input();
-      m_rel_patch_id.reset(gpr);
-   }
-
-   if (m_sv_values.test(es_primitive_id) ||
-       m_key.vs.as_gs_a) {
-      m_reserved_registers = 1;
-      auto gpr = new GPRValue(0,3);
-      gpr->set_as_input();
-      m_primitive_id.reset(gpr);
-      if (m_key.vs.as_gs_a)
-         inject_register(0, 3, m_primitive_id, false);
-   }
-   set_reserved_registers(m_reserved_registers);
-   return true;
-}
-
-bool TEvalShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
-{
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_tess_coord_r600:
-      return load_preloaded_value(instr->dest, 0, m_tess_coord[0]) &&
-            load_preloaded_value(instr->dest, 1, m_tess_coord[1]);
-   case nir_intrinsic_load_primitive_id:
-      return load_preloaded_value(instr->dest, 0, m_primitive_id);
-   case nir_intrinsic_load_tcs_rel_patch_id_r600:
-      return load_preloaded_value(instr->dest, 0, m_rel_patch_id);
-   case nir_intrinsic_store_output:
-      return m_export_processor->store_output(instr);
-   default:
-      return false;
-   }
-}
-
-void TEvalShaderFromNir::do_finalize()
-{
-   m_export_processor->finalize_exports();
-}
-
-
-bool TEvalShaderFromNir::emit_load_tess_coord(nir_intrinsic_instr* instr)
-{
-   bool result = load_preloaded_value(instr->dest, 0, m_tess_coord[0]) &&
-               load_preloaded_value(instr->dest, 1, m_tess_coord[1]);
-
-   m_tess_coord[2] = from_nir(instr->dest, 2);
-
-
-   emit_instruction(new AluInstruction(op2_add, m_tess_coord[2], m_tess_coord[2],
-         m_tess_coord[0], {alu_last_instr, alu_write, alu_src0_neg}));
-   emit_instruction(new AluInstruction(op2_add, m_tess_coord[2], m_tess_coord[2],
-         m_tess_coord[1], {alu_last_instr, alu_write, alu_src0_neg}));
-   return result;
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h b/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h
deleted file mode 100644
index 4ae572f..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef TEVALSHADERFROMNIR_H
-#define TEVALSHADERFROMNIR_H
-
-#include "sfn_shader_base.h"
-#include "sfn_vertexstageexport.h"
-
-namespace r600 {
-
-class TEvalShaderFromNir : public VertexStage
-{
-public:
-	TEvalShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel,
-                           const r600_shader_key& key, r600_shader *gs_shader,
-                           enum amd_gfx_level gfx_level);
-        bool scan_sysvalue_access(nir_instr *instr) override;
-        PValue primitive_id() override {return m_primitive_id;}
-     private:
-        void emit_shader_start() override;
-        bool do_allocate_reserved_registers() override;
-        bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
-        bool emit_load_tess_coord(nir_intrinsic_instr* instr);
-        bool load_tess_z_coord(nir_intrinsic_instr* instr);
-
-        void do_finalize() override;
-
-
-        unsigned m_reserved_registers;
-        PValue m_tess_coord[3];
-        PValue m_rel_patch_id;
-        PValue m_primitive_id;
-
-        std::unique_ptr<VertexStageExportBase> m_export_processor;
-        const r600_shader_key& m_key;
-};
-
-
-}
-
-#endif // TEVALSHADERFROMNIR_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp
deleted file mode 100644
index dbce6f9..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp
+++ /dev/null
@@ -1,230 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "pipe/p_defines.h"
-#include "tgsi/tgsi_from_mesa.h"
-#include "sfn_shader_vertex.h"
-#include "sfn_instruction_lds.h"
-
-#include <queue>
-
-
-namespace r600 {
-
-using std::priority_queue;
-
-VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh,
-                                         r600_pipe_shader_selector& sel,
-                                         const r600_shader_key& key,
-                                         struct r600_shader* gs_shader,
-                                         enum amd_gfx_level gfx_level):
-   VertexStage(PIPE_SHADER_VERTEX, sel, sh->shader,
-               sh->scratch_space_needed, gfx_level, key.vs.first_atomic_counter),
-   m_num_clip_dist(0),
-   m_last_param_export(nullptr),
-   m_last_pos_export(nullptr),
-   m_pipe_shader(sh),
-   m_enabled_stream_buffers_mask(0),
-   m_so_info(&sel.so),
-   m_vertex_id(),
-   m_key(key),
-   m_max_attrib(0)
-{
-   // reg 0 is used in the fetch shader
-   increment_reserved_registers();
-
-   sh_info().atomic_base = key.vs.first_atomic_counter;
-   sh_info().vs_as_gs_a = m_key.vs.as_gs_a;
-
-   if (key.vs.as_es) {
-      sh->shader.vs_as_es = true;
-      m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader));
-   } else if (key.vs.as_ls) {
-      sh->shader.vs_as_ls = true;
-      sfn_log << SfnLog::trans << "Start VS for GS\n";
-      m_export_processor.reset(new VertexStageExportForES(*this));
-   } else {
-      m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key));
-   }
-}
-
-bool VertexShaderFromNir::scan_inputs_read(const nir_shader *sh)
-{
-   uint64_t inputs = sh->info.inputs_read;
-
-   while (inputs) {
-      unsigned i = u_bit_scan64(&inputs);
-      if (i < VERT_ATTRIB_MAX) {
-         ++sh_info().ninput;
-      }
-   }
-   m_max_attrib = sh_info().ninput;
-   return true;
-}
-
-bool VertexShaderFromNir::do_allocate_reserved_registers()
-{
-   /* Since the vertex ID is nearly always used, we add it here as an input so
-    * that the registers used for vertex attributes don't get clobbered by the
-    * register merge step */
-   auto R0x = new GPRValue(0,0);
-   R0x->set_as_input();
-   m_vertex_id.reset(R0x);
-   inject_register(0, 0, m_vertex_id, false);
-
-   if (m_key.vs.as_gs_a || m_sv_values.test(es_primitive_id)) {
-      auto R0z = new GPRValue(0,2);
-      R0x->set_as_input();
-      m_primitive_id.reset(R0z);
-      inject_register(0, 2, m_primitive_id, false);
-   }
-
-   if (m_sv_values.test(es_instanceid)) {
-      auto R0w = new GPRValue(0,3);
-      R0w->set_as_input();
-      m_instance_id.reset(R0w);
-      inject_register(0, 3, m_instance_id, false);
-   }
-
-
-   if (m_sv_values.test(es_rel_patch_id)) {
-      auto R0y = new GPRValue(0,1);
-      R0y->set_as_input();
-      m_rel_vertex_id.reset(R0y);
-      inject_register(0, 1, m_rel_vertex_id, false);
-   }
-
-   m_attribs.resize(4 * m_max_attrib + 4);
-   for (unsigned i = 0; i < m_max_attrib + 1; ++i) {
-      for (unsigned k = 0; k < 4; ++k) {
-         auto gpr = std::make_shared<GPRValue>(i + 1, k);
-         gpr->set_as_input();
-         m_attribs[4 * i + k] = gpr;
-         inject_register(i + 1, k, gpr, false);
-      }
-   }
-
-   return true;
-}
-
-void VertexShaderFromNir::emit_shader_start()
-{
-   m_export_processor->emit_shader_start();
-}
-
-bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr)
-{
-   switch (instr->type) {
-   case nir_instr_type_intrinsic: {
-      nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
-      switch (ii->intrinsic) {
-      case nir_intrinsic_load_vertex_id:
-         m_sv_values.set(es_vertexid);
-         break;
-      case nir_intrinsic_load_instance_id:
-         m_sv_values.set(es_instanceid);
-         break;
-      case nir_intrinsic_load_tcs_rel_patch_id_r600:
-         m_sv_values.set(es_rel_patch_id);
-         break;
-      case nir_intrinsic_store_output:
-         m_export_processor->scan_store_output(ii);
-      default:
-         ;
-      }
-   }
-   default:
-      ;
-   }
-   return true;
-}
-
-bool VertexShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
-{
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_vertex_id:
-      return load_preloaded_value(instr->dest, 0, m_vertex_id);
-   case nir_intrinsic_load_tcs_rel_patch_id_r600:
-      return load_preloaded_value(instr->dest, 0, m_rel_vertex_id);
-   case nir_intrinsic_load_instance_id:
-      return load_preloaded_value(instr->dest, 0, m_instance_id);
-   case nir_intrinsic_store_local_shared_r600:
-      return emit_store_local_shared(instr);
-   case nir_intrinsic_store_output:
-      return m_export_processor->store_output(instr);
-   case nir_intrinsic_load_input:
-      return load_input(instr);
-
-   default:
-      return false;
-   }
-}
-
-bool VertexShaderFromNir::load_input(nir_intrinsic_instr* instr)
-{
-   unsigned location = nir_intrinsic_base(instr);
-
-   if (location < VERT_ATTRIB_MAX) {
-      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
-         auto src = m_attribs[4 * location + i];
-
-         if (i == 0)
-            set_input(location, src);
-
-         load_preloaded_value(instr->dest, i, src, i == (unsigned)(instr->num_components - 1));
-      }
-      return true;
-   }
-   fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", location);
-   return false;
-}
-
-bool VertexShaderFromNir::emit_store_local_shared(nir_intrinsic_instr* instr)
-{
-   unsigned write_mask = nir_intrinsic_write_mask(instr);
-
-   auto address = from_nir(instr->src[1], 0);
-   int swizzle_base = (write_mask & 0x3) ? 0 : 2;
-   write_mask |= write_mask >> 2;
-
-   auto value =  from_nir(instr->src[0], swizzle_base);
-   if (!(write_mask & 2)) {
-      emit_instruction(new LDSWriteInstruction(address, 1, value));
-   } else {
-      auto value1 =  from_nir(instr->src[0], swizzle_base + 1);
-      emit_instruction(new LDSWriteInstruction(address, 1, value, value1));
-   }
-
-   return true;
-}
-
-void VertexShaderFromNir::do_finalize()
-{
-   m_export_processor->finalize_exports();
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h
deleted file mode 100644
index a6577c2..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef sfn_vertex_shader_from_nir_h
-#define sfn_vertex_shader_from_nir_h
-
-#include "sfn_shader_base.h"
-#include "sfn_vertexstageexport.h"
-
-namespace r600 {
-
-class VertexShaderFromNir : public VertexStage {
-public:
-   VertexShaderFromNir(r600_pipe_shader *sh,
-                       r600_pipe_shader_selector &sel,
-                       const r600_shader_key &key, r600_shader *gs_shader,
-                       enum amd_gfx_level gfx_level);
-
-   bool scan_sysvalue_access(nir_instr *instr) override;
-
-   PValue primitive_id() override {return m_primitive_id;}
-protected:
-
-   // todo: encapsulate
-   unsigned m_num_clip_dist;
-   ExportInstruction *m_last_param_export;
-   ExportInstruction *m_last_pos_export;
-   r600_pipe_shader *m_pipe_shader;
-   unsigned m_enabled_stream_buffers_mask;
-   const pipe_stream_output_info *m_so_info;
-   void do_finalize() override;
-
-   std::map<unsigned, unsigned> m_param_map;
-
-   bool scan_inputs_read(const nir_shader *sh) override;
-
-private:
-   bool load_input(nir_intrinsic_instr* instr);
-
-   void finalize_exports();
-
-   void emit_shader_start() override;
-   bool do_allocate_reserved_registers() override;
-   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
-   bool emit_store_local_shared(nir_intrinsic_instr* instr);
-
-   PValue m_vertex_id;
-   PValue m_instance_id;
-   PValue m_rel_vertex_id;
-   PValue m_primitive_id;
-   std::vector<PGPRValue> m_attribs;
-   r600_shader_key m_key;
-
-   std::unique_ptr<VertexStageExportBase> m_export_processor;
-   unsigned m_max_attrib;
-};
-
-}
-
-#endif 
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp
new file mode 100644
index 0000000..2d043ff
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp
@@ -0,0 +1,663 @@
+
+#include "sfn_debug.h"
+#include "sfn_shader_vs.h"
+
+#include "sfn_instr_alugroup.h"
+#include "sfn_instr_export.h"
+
+#include "tgsi/tgsi_from_mesa.h"
+
+
+namespace r600 {
+
+uint32_t VertexStageShader::enabled_stream_buffers_mask() const
+{
+   return m_enabled_stream_buffers_mask;
+}
+
+void VertexStageShader::combine_enabled_stream_buffers_mask(uint32_t mask)
+{
+   m_enabled_stream_buffers_mask = mask;
+}
+
+bool VertexExportStage::store_output(nir_intrinsic_instr& intr)
+{
+   auto index = nir_src_as_const_value(intr.src[1]);
+   assert(index && "Indirect outputs not supported");
+
+   const store_loc store_info  = {
+      nir_intrinsic_component(&intr),
+      nir_intrinsic_io_semantics(&intr).location,
+      (unsigned)nir_intrinsic_base(&intr) + index->u32,
+      0
+   };
+
+   return do_store_output(store_info, intr);
+}
+
+VertexExportStage::VertexExportStage(VertexStageShader *parent):
+   m_parent(parent)
+{
+
+}
+
+VertexExportForFs::VertexExportForFs(VertexStageShader *parent,
+                                     const pipe_stream_output_info *so_info,
+                                     const r600_shader_key& key):
+   VertexExportStage(parent),
+   m_vs_as_gs_a(key.vs.as_gs_a),
+   m_vs_prim_id_out(key.vs.prim_id_out),
+   m_so_info(so_info)
+{
+}
+
+bool VertexExportForFs::do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr)
+{
+   switch (store_info.location) {
+
+   case VARYING_SLOT_PSIZ:
+      m_writes_point_size = true;
+      FALLTHROUGH;
+   case VARYING_SLOT_POS:
+      return emit_varying_pos(store_info, intr);
+   case VARYING_SLOT_EDGE: {
+      std::array<uint8_t, 4> swizzle_override = {7 ,0, 7, 7};
+      return emit_varying_pos(store_info, intr, &swizzle_override);
+   }
+   case VARYING_SLOT_VIEWPORT: {
+      std::array<uint8_t, 4> swizzle_override = {7, 7, 7, 0};
+      return emit_varying_pos(store_info, intr, &swizzle_override) &&
+            emit_varying_param(store_info, intr);
+   }
+   case VARYING_SLOT_CLIP_VERTEX:
+      return emit_clip_vertices(store_info, intr);
+   case VARYING_SLOT_CLIP_DIST0:
+   case VARYING_SLOT_CLIP_DIST1:
+      m_num_clip_dist += 4;
+      return emit_varying_param(store_info, intr) && emit_varying_pos(store_info, intr);
+   case VARYING_SLOT_LAYER: {
+      m_out_misc_write = 1;
+      m_vs_out_layer = 1;
+      std::array<uint8_t, 4> swz = {7,7,0,7};
+      return emit_varying_pos(store_info, intr, &swz) &&
+            emit_varying_param(store_info, intr);
+   }
+   case VARYING_SLOT_VIEW_INDEX:
+   return emit_varying_pos(store_info, intr) &&
+         emit_varying_param(store_info, intr);
+
+   default:
+      return emit_varying_param(store_info, intr);
+   return false;
+   }
+}
+
+bool VertexExportForFs::emit_clip_vertices(const store_loc &store_info, const nir_intrinsic_instr &instr)
+{
+   auto& vf = m_parent->value_factory();
+
+   m_cc_dist_mask = 0xff;
+   m_clip_dist_write = 0xff;
+
+   m_clip_vertex = vf.src_vec4(instr.src[store_info.data_loc], pin_group, {0,1,2,3});
+
+   m_output_registers[nir_intrinsic_base(&instr)] = &m_clip_vertex;
+
+   RegisterVec4 clip_dist[2] = { vf.temp_vec4(pin_group), vf.temp_vec4(pin_group)};
+
+   for (int i = 0; i < 8; i++) {
+      int oreg = i >> 2;
+      int ochan = i & 3;
+      AluInstr *ir = nullptr;
+      AluInstr::SrcValues src(8);
+
+      for (int j = 0; j < 4; j++) {
+         src[2 * j] = m_clip_vertex[j];
+         src[2 * j + 1] = vf.uniform(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER);
+      }
+
+      ir = new AluInstr(op2_dot4_ieee, clip_dist[oreg][ochan], src, AluInstr::last_write, 4);
+      m_parent->emit_instruction(ir);
+   }
+
+   m_last_pos_export = new ExportInstr(ExportInstr::pos, m_cur_clip_pos++, clip_dist[0]);
+   m_parent->emit_instruction(m_last_pos_export);
+
+   m_last_pos_export = new ExportInstr(ExportInstr::pos, m_cur_clip_pos++, clip_dist[1]);
+   m_parent->emit_instruction(m_last_pos_export);
+
+   return true;
+}
+
+void VertexExportForFs::get_shader_info(r600_shader *sh_info) const
+{
+   sh_info->cc_dist_mask = m_cc_dist_mask;
+   sh_info->clip_dist_write = m_clip_dist_write;
+   sh_info->vs_as_gs_a =  m_vs_as_gs_a;
+   sh_info->vs_out_edgeflag = m_out_edgeflag;
+   sh_info->vs_out_viewport = m_out_viewport;
+   sh_info->vs_out_misc_write = m_out_misc_write;
+   sh_info->vs_out_point_size = m_out_point_size;
+   sh_info->vs_out_layer = m_vs_out_layer;
+}
+
+void VertexExportForFs::finalize()
+{
+   if (m_vs_as_gs_a) {
+      auto primid = m_parent->value_factory().temp_vec4(pin_group, {2, 7, 7, 7});
+      m_parent->emit_instruction(new AluInstr(op1_mov, primid[0], m_parent->primitive_id(), AluInstr::last_write));
+      int param = m_last_param_export ? m_last_param_export->location() + 1 : 0;
+
+      m_last_param_export = new ExportInstr(ExportInstr::param, param, primid);
+      m_parent->emit_instruction(m_last_param_export);
+
+      ShaderOutput output(m_parent->noutputs(), TGSI_SEMANTIC_PRIMID, 1);
+      output.set_sid(0);
+      output.override_spi_sid(m_vs_prim_id_out);
+      m_parent->add_output(output);
+   }
+
+   if (!m_last_pos_export) {
+      RegisterVec4 value(0, false, {7,7,7,7});
+      m_last_pos_export = new ExportInstr(ExportInstr::pos, 0, value);
+      m_parent->emit_instruction(m_last_pos_export);
+   }
+
+   if (!m_last_param_export) {
+      RegisterVec4 value(0,false, {7,7,7,7});
+      m_last_param_export = new ExportInstr(ExportInstr::param, 0, value);
+      m_parent->emit_instruction(m_last_param_export);
+   }
+
+   m_last_pos_export->set_is_last_export(true);
+   m_last_param_export->set_is_last_export(true);
+
+   if (m_so_info && m_so_info->num_outputs)
+      emit_stream(-1);
+}
+
+void VertexShader::do_get_shader_info(r600_shader *sh_info)
+{
+   sh_info->processor_type = PIPE_SHADER_VERTEX;
+   m_export_stage->get_shader_info(sh_info);   
+}
+
+bool VertexExportForFs::emit_varying_pos(const store_loc &store_info, nir_intrinsic_instr& intr,
+                                         std::array<uint8_t, 4> *swizzle_override)
+{
+   RegisterVec4::Swizzle swizzle;
+   uint32_t write_mask = 0;
+
+   write_mask = nir_intrinsic_write_mask(&intr) << store_info.frac;
+
+   if (!swizzle_override) {
+      for (int i = 0; i < 4; ++i)
+         swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
+   } else
+      std::copy(swizzle_override->begin(), swizzle_override->end(), swizzle.begin());
+
+   int export_slot = 0;
+
+   auto in_value = m_parent->value_factory().src_vec4(intr.src[0], pin_group, swizzle);
+   auto& value = in_value;
+   RegisterVec4 out_value = m_parent->value_factory().temp_vec4(pin_group, swizzle);
+
+   switch (store_info.location) {
+   case VARYING_SLOT_EDGE: {
+      m_out_misc_write = true;
+      m_out_edgeflag = true;
+      auto src = m_parent->value_factory().src(intr.src[0], 0);
+      auto clamped = m_parent->value_factory().temp_register();
+      m_parent->emit_instruction(new AluInstr(op1_mov, clamped, src, {alu_write, alu_dst_clamp, alu_last_instr}));
+      m_parent->emit_instruction(new AluInstr(op1_flt_to_int, out_value[1], clamped, AluInstr::last_write));
+      value = out_value;
+   }
+      FALLTHROUGH;
+   case VARYING_SLOT_PSIZ:
+      m_out_misc_write = true;
+      m_out_point_size = true;
+      FALLTHROUGH;
+   case VARYING_SLOT_LAYER:
+      export_slot = 1;
+      break;
+   case VARYING_SLOT_VIEWPORT:
+      m_out_misc_write = true;
+      m_out_viewport = true;
+      export_slot = 1;
+      break;
+   case VARYING_SLOT_POS:
+      break;
+   case VARYING_SLOT_CLIP_DIST0:
+   case VARYING_SLOT_CLIP_DIST1:      
+      m_cc_dist_mask |= write_mask << (4 * (store_info.location - VARYING_SLOT_CLIP_DIST0));
+      m_clip_dist_write |= write_mask <<  (4 * (store_info.location - VARYING_SLOT_CLIP_DIST0));
+      export_slot = m_cur_clip_pos++;
+      break;
+   default:
+      sfn_log << SfnLog::err << __func__ << "Unsupported location "
+              << store_info.location << "\n";
+      return false;
+   }
+
+
+   m_last_pos_export = new ExportInstr(ExportInstr::pos,  export_slot, value);
+
+   m_output_registers[nir_intrinsic_base(&intr)] = &m_last_pos_export->value();
+
+   m_parent->emit_instruction(m_last_pos_export);
+
+   return true;
+}
+
+
+bool VertexExportForFs::emit_varying_param(const store_loc &store_info, nir_intrinsic_instr& intr)
+{
+   sfn_log << SfnLog::io << __func__ << ": emit DDL: " << store_info.driver_location << "\n";
+
+   int write_mask = nir_intrinsic_write_mask(&intr) << store_info.frac;
+   RegisterVec4::Swizzle swizzle;
+   for (int i = 0; i < 4; ++i)
+      swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
+
+   Pin pin = util_bitcount(write_mask) > 1 ? pin_group: pin_free;
+
+   int export_slot = m_parent->output(nir_intrinsic_base(&intr)).pos();
+   auto value = m_parent->value_factory().temp_vec4(pin, swizzle);
+
+   AluInstr *alu = nullptr;
+   for (int i = 0; i < 4; ++i) {
+      if (swizzle[i] < 4) {
+         alu = new AluInstr(op1_mov, value[i], m_parent->value_factory().src(intr.src[0], swizzle[i]),
+               AluInstr::write);
+         m_parent->emit_instruction(alu);
+      }
+   }
+   if (alu)
+      alu->set_alu_flag(alu_last_instr);
+
+   m_last_param_export = new ExportInstr(ExportInstr::param,  export_slot, value);
+   m_output_registers[nir_intrinsic_base(&intr)] = &m_last_param_export->value();
+
+   m_parent->emit_instruction(m_last_param_export);
+
+   return true;
+}
+
+bool VertexExportForFs::emit_stream(int stream)
+{
+   assert(m_so_info);
+   if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
+      R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
+      return false;
+   }
+   for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+      if (m_so_info->output[i].output_buffer >= 4) {
+         R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
+                  m_so_info->output[i].output_buffer);
+         return false;
+      }
+   }
+   const RegisterVec4 *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+   unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
+   std::vector<RegisterVec4> tmp(m_so_info->num_outputs);
+
+   /* Initialize locations where the outputs are stored. */
+   for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+      if (stream != -1 && stream != m_so_info->output[i].stream)
+         continue;
+
+      sfn_log << SfnLog::instr << "Emit stream " << i
+              << " with register index " << m_so_info->output[i].register_index << "  so_gpr:";
+
+
+      so_gpr[i] = output_register(m_so_info->output[i].register_index);
+
+      if (!so_gpr[i]) {
+         sfn_log << SfnLog::err << "\nERR: register index "
+              << m_so_info->output[i].register_index
+              << " doesn't correspond to an output register\n";
+         return false;
+      }
+      start_comp[i] = m_so_info->output[i].start_component;
+      /* Lower outputs with dst_offset < start_component.
+       *
+       * We can only output 4D vectors with a write mask, e.g. we can
+       * only output the W component at offset 3, etc. If we want
+       * to store Y, Z, or W at buffer offset 0, we need to use MOV
+       * to move it to X and output X. */
+
+      bool need_copy = m_so_info->output[i].dst_offset < m_so_info->output[i].start_component;
+
+      int sc = m_so_info->output[i].start_component;
+      for (int j = 0; j < m_so_info->output[i].num_components; j++) {
+         if ((*so_gpr[i])[j + sc]->chan() != j + sc) {
+            need_copy = true;
+            break;
+         }
+      }
+      if (need_copy) {
+         RegisterVec4::Swizzle swizzle =  {0,1,2,3};
+         for (auto j = m_so_info->output[i].num_components; j < 4; ++j)
+            swizzle[j] = 7;
+         tmp[i] = m_parent->value_factory().temp_vec4(pin_group, swizzle);
+
+         AluInstr *alu = nullptr;
+         for (int j = 0; j < m_so_info->output[i].num_components; j++) {
+            alu = new AluInstr(op1_mov, tmp[i][j], (*so_gpr[i])[j + sc], {alu_write});
+            m_parent->emit_instruction(alu);
+         }
+         if (alu)
+            alu->set_alu_flag(alu_last_instr);
+
+         start_comp[i] = 0;
+         so_gpr[i] = &tmp[i];
+      }
+      sfn_log << SfnLog::instr <<  *so_gpr[i] << "\n";
+   }
+
+   uint32_t enabled_stream_buffers_mask = 0;
+   /* Write outputs to buffers. */
+   for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+      sfn_log << SfnLog::instr << "Write output buffer " << i
+              << " with register index " << m_so_info->output[i].register_index << "\n";
+
+      auto out_stream =
+            new StreamOutInstr(*so_gpr[i],
+                               m_so_info->output[i].num_components,
+                                    m_so_info->output[i].dst_offset - start_comp[i],
+                               ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
+                               m_so_info->output[i].output_buffer,
+                               m_so_info->output[i].stream);
+      m_parent->emit_instruction(out_stream);
+      enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
+   }
+   m_parent->combine_enabled_stream_buffers_mask(enabled_stream_buffers_mask);
+   return true;
+}
+
+const RegisterVec4 *VertexExportForFs::output_register(int loc) const
+{
+   const RegisterVec4 *retval = nullptr;
+   auto val = m_output_registers.find(loc);
+   if (val != m_output_registers.end())
+      retval =  val->second;
+   return retval;
+}
+
+VertexShader::VertexShader(const pipe_stream_output_info *so_info, r600_shader *gs_shader, r600_shader_key& key):
+   VertexStageShader("VS"),
+   m_vs_as_gs_a(key.vs.as_gs_a)
+{
+   if (key.vs.as_es)
+      m_export_stage = new VertexExportForGS(this, gs_shader);
+   else if (key.vs.as_ls)
+      m_export_stage = new VertexExportForTCS(this);
+   else
+      m_export_stage = new VertexExportForFs(this, so_info, key);
+}
+
+bool VertexShader::do_scan_instruction(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   auto intr = nir_instr_as_intrinsic(instr);
+
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_input: {
+      int vtx_register = nir_intrinsic_base(intr) + 1;
+      if (m_last_vertex_atribute_register < vtx_register)
+         m_last_vertex_atribute_register = vtx_register;
+      return true;
+   }
+   case nir_intrinsic_store_output: {
+      int driver_location = nir_intrinsic_base(intr);
+      int location = nir_intrinsic_io_semantics(intr).location;
+      auto semantic = r600_get_varying_semantic(location);
+      tgsi_semantic name = (tgsi_semantic)semantic.first;
+      unsigned sid = semantic.second;
+      auto write_mask = nir_intrinsic_write_mask(intr);
+
+      if (location == VARYING_SLOT_LAYER)
+         write_mask = 4;
+
+      ShaderOutput output(driver_location, name, write_mask);
+      output.set_sid(sid);
+
+      switch (location) {
+      case VARYING_SLOT_PSIZ:
+      case VARYING_SLOT_POS:
+      case VARYING_SLOT_CLIP_VERTEX:
+      case VARYING_SLOT_EDGE: {
+         break;
+      }
+      case VARYING_SLOT_CLIP_DIST0:
+      case VARYING_SLOT_CLIP_DIST1:
+      case VARYING_SLOT_VIEWPORT:
+      case VARYING_SLOT_LAYER:
+      case VARYING_SLOT_VIEW_INDEX:
+      default:
+         output.set_is_param(true);
+      }
+      add_output(output);
+      break;
+   }
+   case nir_intrinsic_load_vertex_id:
+      m_sv_values.set(es_vertexid);
+      break;
+   case nir_intrinsic_load_instance_id:
+      m_sv_values.set(es_instanceid);
+      break;
+   case nir_intrinsic_load_primitive_id:
+      m_sv_values.set(es_primitive_id);
+      break;
+   case nir_intrinsic_load_tcs_rel_patch_id_r600:
+      m_sv_values.set(es_rel_patch_id);
+      break;
+   default:
+      return false;
+   }
+
+   return true;
+}
+
+bool VertexShader::load_input(nir_intrinsic_instr *intr)
+{
+   unsigned driver_location = nir_intrinsic_base(intr);
+   unsigned location = nir_intrinsic_io_semantics(intr).location;
+   auto& vf = value_factory();
+
+   AluInstr *ir = nullptr;
+   if (location < VERT_ATTRIB_MAX) {
+      for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
+         auto src = vf.allocate_pinned_register(driver_location + 1, i);
+         src->pin_live_range(true);
+         if (intr->dest.is_ssa)
+            vf.inject_value(intr->dest, i, src);
+         else {
+            ir = new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_none), src, {alu_write});
+            emit_instruction(ir);
+         }
+      }
+      if (ir)
+         ir->set_alu_flag(alu_last_instr);
+
+      ShaderInput input(driver_location, location);
+      input.set_gpr(driver_location + 1);
+      add_input(input);
+      return true;
+   }
+   fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", location);
+   return false;
+}
+
+
+int VertexShader::do_allocate_reserved_registers()
+{
+   if (m_sv_values.test(es_vertexid)) {
+      m_vertex_id = value_factory().allocate_pinned_register(0, 0);
+      m_vertex_id->pin_live_range(true);
+   }
+
+   if (m_sv_values.test(es_instanceid)) {
+      m_instance_id = value_factory().allocate_pinned_register(0, 3);
+      m_instance_id->pin_live_range(true);
+   }
+
+   if (m_sv_values.test(es_primitive_id) || m_vs_as_gs_a) {
+      auto primitive_id = value_factory().allocate_pinned_register(0, 2);
+      primitive_id->pin_live_range(true);
+      set_primitive_id(primitive_id);
+   }
+
+   if (m_sv_values.test(es_rel_patch_id)) {
+      m_rel_vertex_id = value_factory().allocate_pinned_register(0, 1);
+      m_rel_vertex_id->pin_live_range(true);
+   }
+
+   return m_last_vertex_atribute_register + 1;
+}
+
+bool VertexShader::store_output(nir_intrinsic_instr *intr)
+{
+   return m_export_stage->store_output(*intr);
+}
+
+bool VertexShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_vertex_id:
+      return emit_simple_mov(intr->dest, 0, m_vertex_id);
+   case nir_intrinsic_load_instance_id:
+      return emit_simple_mov(intr->dest, 0, m_instance_id);
+   case nir_intrinsic_load_primitive_id:
+      return emit_simple_mov(intr->dest, 0, primitive_id());
+   case nir_intrinsic_load_tcs_rel_patch_id_r600:
+      return emit_simple_mov(intr->dest, 0, m_rel_vertex_id);
+   default:
+      return false;
+   }
+}
+
+void VertexShader::do_finalize()
+{
+   m_export_stage->finalize();
+}
+
+bool VertexShader::read_prop(std::istream& is)
+{
+   (void)is;
+   return false;
+}
+
+void VertexShader::do_print_properties(std::ostream& os) const
+{
+   (void)os;
+}
+
+VertexExportForGS::VertexExportForGS(VertexStageShader *parent,
+                                               const r600_shader *gs_shader):
+   VertexExportStage(parent),
+   m_gs_shader(gs_shader)
+{
+
+}
+
+bool VertexExportForGS::do_store_output(const store_loc &store_info, nir_intrinsic_instr& instr)
+{
+   int ring_offset = -1;
+   auto out_io = m_parent->output(store_info.driver_location);
+
+   sfn_log << SfnLog::io << "check output " << store_info.driver_location
+           << " name=" << out_io.name()<< " sid=" << out_io.sid() << "\n";
+
+   for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
+      auto& in_io = m_gs_shader->input[k];
+      sfn_log << SfnLog::io << "  against  " <<  k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
+
+      if (in_io.name == out_io.name() &&
+          in_io.sid == out_io.sid()) {
+         ring_offset = in_io.ring_offset;
+         break;
+      }
+   }
+
+   if (store_info.location == VARYING_SLOT_VIEWPORT) {
+      m_vs_out_viewport = 1;
+      m_vs_out_misc_write = 1;
+      return true;
+   }
+
+   if (ring_offset == -1) {
+      sfn_log << SfnLog::err << "VS defines output at "
+              << store_info.driver_location << "name=" << out_io.name()
+              << " sid=" << out_io.sid() << " that is not consumed as GS input\n";
+      return true;
+   }
+
+   RegisterVec4::Swizzle src_swz = {7,7,7,7};
+   for (int i = 0; i < 4; ++i)
+      src_swz[i] = i < instr.num_components ? i : 7;
+
+   auto value = m_parent->value_factory().temp_vec4(pin_group, src_swz);
+
+   AluInstr *ir = nullptr;
+   for (unsigned int i = 0; i < instr.num_components ; ++i) {
+      ir = new AluInstr(op1_mov, value[i],
+                        m_parent->value_factory().src(instr.src[store_info.data_loc], i),
+                        AluInstr::write);
+      m_parent->emit_instruction(ir);
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+
+   m_parent->emit_instruction(new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write, value,
+                                                  ring_offset >> 2, 4, nullptr));
+
+   if (store_info.location == VARYING_SLOT_CLIP_DIST0 ||
+       store_info.location == VARYING_SLOT_CLIP_DIST1)
+      m_num_clip_dist += 4;
+
+   return true;
+}
+
+void VertexExportForGS::finalize()
+{
+
+}
+
+void VertexExportForGS::get_shader_info(r600_shader *sh_info) const
+{
+   sh_info->vs_out_viewport = m_vs_out_viewport;
+   sh_info->vs_out_misc_write = m_vs_out_misc_write;
+   sh_info->vs_as_es = true;
+}
+
+VertexExportForTCS::VertexExportForTCS(VertexStageShader *parent):
+   VertexExportStage(parent)
+{
+
+}
+
+void VertexExportForTCS::finalize()
+{
+
+}
+
+void VertexExportForTCS::get_shader_info(r600_shader *sh_info) const
+{
+   sh_info->vs_as_ls = 1;
+}
+
+
+bool VertexExportForTCS::do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr)
+{
+   (void)store_info;
+   (void)intr;
+   return true;
+}
+
+
+
+}
+
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.h b/src/gallium/drivers/r600/sfn/sfn_shader_vs.h
new file mode 100644
index 0000000..571ebd9
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.h
@@ -0,0 +1,156 @@
+#ifndef SFN_SHADER_VS_H
+#define SFN_SHADER_VS_H
+
+#include "sfn_shader.h"
+
+
+
+namespace r600 {
+
+class VertexStageShader : public Shader {
+protected:
+   using Shader::Shader;
+public:
+   PRegister primitive_id() const { return m_primitive_id;}
+   void set_primitive_id(PRegister prim_id) { m_primitive_id = prim_id;}
+
+   void combine_enabled_stream_buffers_mask(uint32_t mask);
+   uint32_t enabled_stream_buffers_mask() const override;
+
+private:
+   PRegister m_primitive_id{nullptr};
+   uint32_t m_enabled_stream_buffers_mask{0};
+};
+
+class VertexExportStage : public Allocate {
+public:
+
+   VertexExportStage(VertexStageShader *parent);
+
+   bool store_output(nir_intrinsic_instr& intr);
+
+   virtual void finalize() = 0;
+
+   virtual void get_shader_info(r600_shader *sh_info) const = 0;
+
+protected:
+   struct store_loc {
+      unsigned frac;
+      unsigned location;
+      unsigned driver_location;
+      int data_loc;
+   };
+
+   virtual bool do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr) = 0;
+
+   VertexStageShader *m_parent;
+
+private:
+};
+
+class VertexExportForFs : public VertexExportStage {
+   friend VertexExportStage;
+
+public:
+
+   VertexExportForFs(VertexStageShader *parent, const pipe_stream_output_info *so_info,
+                     const r600_shader_key& key);
+
+   void finalize() override;
+
+   void get_shader_info(r600_shader *sh_info) const override;
+
+private:
+
+   bool do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr) override;
+
+   bool emit_varying_pos(const store_loc &store_info, nir_intrinsic_instr& intr,
+                         std::array<uint8_t, 4> *swizzle_override = nullptr);
+   bool emit_varying_param(const store_loc &store_info, nir_intrinsic_instr& intr);
+
+   bool emit_clip_vertices(const store_loc &store_info, const nir_intrinsic_instr &instr);
+
+   bool emit_stream(int stream);
+
+   const RegisterVec4 *output_register(int loc) const;
+
+   ExportInstr *m_last_param_export{nullptr};
+   ExportInstr *m_last_pos_export{nullptr};
+
+   int m_num_clip_dist{0};
+   int m_next_param{0};
+   uint8_t m_cc_dist_mask{0};
+   uint8_t m_clip_dist_write{0};
+   int m_cur_clip_pos{1};
+   bool m_writes_point_size{false};
+   bool m_out_misc_write{false};
+   bool m_vs_out_layer{false};
+   bool m_vs_as_gs_a{false};
+   int m_vs_prim_id_out{0};
+   bool m_out_edgeflag{false};
+   bool m_out_viewport{false};
+   bool m_out_point_size{false};
+   RegisterVec4 m_clip_vertex;
+
+   const pipe_stream_output_info *m_so_info {nullptr};
+
+   std::unordered_map<int, RegisterVec4 *> m_output_registers;
+};
+
+
+class VertexExportForGS : public VertexExportStage {
+public:
+   VertexExportForGS(VertexStageShader *parent, const r600_shader *gs_shader);
+   void finalize() override;
+
+   void get_shader_info(r600_shader *sh_info) const override;
+
+private:
+   bool do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr) override;
+   unsigned m_num_clip_dist{0};
+   bool m_vs_out_viewport{false};
+   bool m_vs_out_misc_write{false};
+
+   const r600_shader *m_gs_shader;
+};
+
+class VertexExportForTCS : public VertexExportStage {
+public:
+   VertexExportForTCS(VertexStageShader *parent);
+   void finalize() override;
+   void get_shader_info(r600_shader *sh_info) const override;
+private:
+   bool do_store_output(const store_loc &store_info, nir_intrinsic_instr& intr) override;
+};
+
+class VertexShader : public VertexStageShader {
+public:
+   VertexShader(const pipe_stream_output_info *so_info, r600_shader *gs_shader, r600_shader_key& key);
+
+   bool load_input(nir_intrinsic_instr *intr) override;
+   bool store_output(nir_intrinsic_instr *intr) override;
+
+   bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
+
+private:
+   bool do_scan_instruction(nir_instr *instr) override;
+   int do_allocate_reserved_registers() override;
+
+   void do_finalize() override;
+
+   bool read_prop(std::istream& is) override;
+
+   void do_print_properties(std::ostream& os) const override;
+   void do_get_shader_info(r600_shader *sh_info) override;
+
+   VertexExportStage *m_export_stage {nullptr};
+   int m_last_vertex_atribute_register {0};
+   PRegister m_vertex_id{nullptr};
+   PRegister m_instance_id{nullptr};
+   PRegister m_rel_vertex_id{nullptr};
+   bool m_vs_as_gs_a;
+};
+
+}
+
+#endif
diff --git a/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp b/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp
deleted file mode 100644
index 07cbebc..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp
+++ /dev/null
@@ -1,448 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_shaderio.h"
-#include "sfn_debug.h"
-#include "tgsi/tgsi_from_mesa.h"
-
-#include <queue>
-
-namespace r600 {
-
-using std::vector;
-using std::priority_queue;
-
-ShaderIO::ShaderIO():
-   m_two_sided(false),
-   m_lds_pos(0)
-{
-
-}
-
-ShaderInput::ShaderInput(tgsi_semantic name):
-   m_name(name),
-   m_gpr(0),
-   m_uses_interpolate_at_centroid(false)
-{
-}
-
-ShaderInput::~ShaderInput()
-{
-}
-
-void ShaderInput::set_lds_pos(UNUSED int lds_pos)
-{
-}
-
-int ShaderInput::ij_index() const
-{
-   return -1;
-}
-
-bool ShaderInput::interpolate() const
-{
-   return false;
-}
-
-int ShaderInput::lds_pos() const
-{
-   return 0;
-}
-
-bool ShaderInput::is_varying() const
-{
-   return false;
-}
-
-void ShaderInput::set_uses_interpolate_at_centroid()
-{
-   m_uses_interpolate_at_centroid = true;
-}
-
-void ShaderInput::set_ioinfo(r600_shader_io& io, int translated_ij_index) const
-{
-   io.name = m_name;
-   io.gpr = m_gpr;
-   io.ij_index = translated_ij_index;
-   io.lds_pos = lds_pos();
-   io.uses_interpolate_at_centroid = m_uses_interpolate_at_centroid;
-
-   set_specific_ioinfo(io);
-}
-
-void ShaderInput::set_specific_ioinfo(UNUSED r600_shader_io& io) const
-{
-}
-
-ShaderInputSystemValue::ShaderInputSystemValue(tgsi_semantic name, int gpr):
-   ShaderInput(name),
-   m_gpr(gpr)
-{
-}
-
-void ShaderInputSystemValue::set_specific_ioinfo(r600_shader_io& io) const
-{
-   io.gpr = m_gpr;
-   io.ij_index = 0;
-}
-
-ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, unsigned driver_location,
-                                       unsigned frac, unsigned components,
-                                       tgsi_interpolate_mode interpolate,
-                                       tgsi_interpolate_loc interp_loc):
-   ShaderInput(_name),
-   m_driver_location(driver_location),
-   m_location_frac(frac),
-   m_sid(sid),
-   m_interpolate(interpolate),
-   m_interpolate_loc(interp_loc),
-   m_ij_index(-10),
-   m_lds_pos(0),
-   m_mask(((1 << components) - 1) << frac)
-{
-   evaluate_spi_sid();
-
-   m_ij_index = interpolate == TGSI_INTERPOLATE_LINEAR ? 3 : 0;
-   switch (interp_loc) {
-   case TGSI_INTERPOLATE_LOC_CENTROID: m_ij_index += 2; break;
-   case TGSI_INTERPOLATE_LOC_CENTER: m_ij_index += 1; break;
-   default:
-      ;
-   }
-}
-
-ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, nir_variable *input):
-   ShaderInput(_name),
-   m_driver_location(input->data.driver_location),
-   m_location_frac(input->data.location_frac),
-   m_sid(sid),
-   m_ij_index(-10),
-   m_lds_pos(0),
-   m_mask(((1 << input->type->components()) - 1) << input->data.location_frac)
-{
-   sfn_log << SfnLog::io << __func__
-           << "name:" << _name
-           << " sid: " << sid
-           << " op: " << input->data.interpolation;
-
-   evaluate_spi_sid();
-
-   enum glsl_base_type base_type =
-      glsl_get_base_type(glsl_without_array(input->type));
-
-   switch (input->data.interpolation) {
-   case INTERP_MODE_NONE:
-      if (glsl_base_type_is_integer(base_type)) {
-         m_interpolate = TGSI_INTERPOLATE_CONSTANT;
-         break;
-      }
-
-      if (name() == TGSI_SEMANTIC_COLOR) {
-         m_interpolate = TGSI_INTERPOLATE_COLOR;
-         m_ij_index = 0;
-         break;
-      }
-      FALLTHROUGH;
-
-   case INTERP_MODE_SMOOTH:
-      assert(!glsl_base_type_is_integer(base_type));
-
-      m_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
-      m_ij_index = 0;
-      break;
-
-   case INTERP_MODE_NOPERSPECTIVE:
-      assert(!glsl_base_type_is_integer(base_type));
-
-      m_interpolate = TGSI_INTERPOLATE_LINEAR;
-      m_ij_index = 3;
-      break;
-
-   case INTERP_MODE_FLAT:
-      m_interpolate = TGSI_INTERPOLATE_CONSTANT;
-      break;
-
-   default:
-      m_interpolate = TGSI_INTERPOLATE_CONSTANT;
-      break;
-   }
-
-   if (input->data.sample) {
-      m_interpolate_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
-   } else if (input->data.centroid) {
-      m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTROID;
-      m_ij_index += 2;
-   } else {
-      m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTER;
-      m_ij_index += 1;
-   }
-   sfn_log << SfnLog::io
-           << " -> IP:" << m_interpolate
-           << " IJ:" << m_ij_index
-           << "\n";
-}
-
-bool ShaderInputVarying::is_varying() const
-{
-   return true;
-}
-
-void ShaderInputVarying::update_mask(int additional_comps, int frac)
-{
-   m_mask |= ((1 << additional_comps) - 1) << frac;
-}
-
-void ShaderInputVarying::evaluate_spi_sid()
-{
-   switch (name()) {
-   case TGSI_SEMANTIC_PSIZE:
-   case TGSI_SEMANTIC_EDGEFLAG:
-   case TGSI_SEMANTIC_FACE:
-   case TGSI_SEMANTIC_SAMPLEMASK:
-      assert(0 && "System value used as varying");
-      break;
-   case TGSI_SEMANTIC_POSITION:
-      m_spi_sid = 0;
-      break;
-   case TGSI_SEMANTIC_GENERIC:
-   case TGSI_SEMANTIC_TEXCOORD:
-   case TGSI_SEMANTIC_PCOORD:
-      m_spi_sid = m_sid + 1;
-      break;
-   default:
-      /* For non-generic params - pack name and sid into 8 bits */
-      m_spi_sid = (0x80 | (name() << 3) | m_sid) + 1;
-   }
-}
-
-ShaderInputVarying::ShaderInputVarying(tgsi_semantic name,
-                                       const ShaderInputVarying& orig, size_t location):
-   ShaderInput(name),
-   m_driver_location(location),
-   m_location_frac(orig.location_frac()),
-
-   m_sid(orig.m_sid),
-   m_spi_sid(orig.m_spi_sid),
-   m_interpolate(orig.m_interpolate),
-   m_interpolate_loc(orig.m_interpolate_loc),
-   m_ij_index(orig.m_ij_index),
-   m_lds_pos(0),
-   m_mask(0)
-{
-   evaluate_spi_sid();
-}
-
-bool ShaderInputVarying::interpolate() const
-{
-   return m_interpolate > 0;
-}
-
-int ShaderInputVarying::ij_index() const
-{
-   return m_ij_index;
-}
-
-void ShaderInputVarying::set_lds_pos(int lds_pos)
-{
-   m_lds_pos = lds_pos;
-}
-
-int ShaderInputVarying::lds_pos() const
-{
-   return m_lds_pos;
-}
-
-void ShaderInputVarying::set_specific_ioinfo(r600_shader_io& io) const
-{
-   io.interpolate = m_interpolate;
-   io.interpolate_location = m_interpolate_loc;
-   io.sid = m_sid;
-   io.spi_sid = m_spi_sid;
-   set_color_ioinfo(io);
-}
-
-void ShaderInputVarying::set_color_ioinfo(UNUSED r600_shader_io& io) const
-{
-   sfn_log << SfnLog::io << __func__ << " Don't set color_ioinfo\n";
-}
-
-ShaderInputColor::ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input):
-   ShaderInputVarying(name, sid, input),
-   m_back_color_input_idx(0)
-{
-   sfn_log << SfnLog::io << __func__ << "name << " << name << " sid << " << sid << "\n";
-}
-
-ShaderInputColor::ShaderInputColor(tgsi_semantic _name, int sid, unsigned driver_location,
-                                   unsigned frac, unsigned components, tgsi_interpolate_mode interpolate,
-                                   tgsi_interpolate_loc interp_loc):
-   ShaderInputVarying(_name, sid, driver_location,frac, components, interpolate, interp_loc),
-   m_back_color_input_idx(0)
-{
-   sfn_log << SfnLog::io << __func__ << "name << " << _name << " sid << " << sid << "\n";
-}
-
-void ShaderInputColor::set_back_color(unsigned back_color_input_idx)
-{
-   sfn_log << SfnLog::io << "Set back color index " << back_color_input_idx << "\n";
-   m_back_color_input_idx = back_color_input_idx;
-}
-
-void ShaderInputColor::set_color_ioinfo(r600_shader_io& io) const
-{
-   sfn_log << SfnLog::io << __func__ << " set color_ioinfo " << m_back_color_input_idx << "\n";
-   io.back_color_input = m_back_color_input_idx;
-}
-
-size_t ShaderIO::add_input(ShaderInput *input)
-{
-   m_inputs.push_back(PShaderInput(input));
-   return m_inputs.size() - 1;
-}
-
-PShaderInput ShaderIO::find_varying(tgsi_semantic name, int sid)
-{
-   for (auto& a : m_inputs) {
-      if (a->name() == name) {
-         assert(a->is_varying());
-         auto& v = static_cast<ShaderInputVarying&>(*a);
-         if (v.sid() == sid)
-            return a;
-      }
-   }
-   return nullptr;
-}
-
-struct VaryingShaderIOLess {
-   bool operator () (PShaderInput lhs, PShaderInput rhs) const
-   {
-      const ShaderInputVarying& l = static_cast<ShaderInputVarying&>(*lhs);
-      const ShaderInputVarying& r = static_cast<ShaderInputVarying&>(*rhs);
-      return l.location() > r.location();
-   }
-};
-
-void ShaderIO::sort_varying_inputs()
-{
-   priority_queue<PShaderInput, vector<PShaderInput>, VaryingShaderIOLess> q;
-
-   vector<int> idx;
-
-   for (auto i = 0u; i < m_inputs.size(); ++i) {
-      if (m_inputs[i]->is_varying()) {
-         q.push(m_inputs[i]);
-         idx.push_back(i);
-      }
-   }
-
-   auto next_index = idx.begin();
-   while (!q.empty()) {
-      auto si = q.top();
-      q.pop();
-      m_inputs[*next_index++] = si;
-   }
-}
-
-void ShaderIO::update_lds_pos()
-{
-   m_lds_pos = -1;
-   m_ldspos.resize(m_inputs.size());
-   for (auto& i : m_inputs) {
-      if (!i->is_varying())
-         continue;
-
-      auto& v = static_cast<ShaderInputVarying&>(*i);
-      /* There are shaders that miss an input ...*/
-      if (m_ldspos.size() <= static_cast<unsigned>(v.location()))
-          m_ldspos.resize(v.location() + 1);
-   }
-
-   std::fill(m_ldspos.begin(), m_ldspos.end(), -1);
-   for (auto& i : m_inputs) {
-      if (!i->is_varying())
-         continue;
-
-      auto& v = static_cast<ShaderInputVarying&>(*i);
-      if (v.name() == TGSI_SEMANTIC_POSITION)
-         continue;
-
-      if (m_ldspos[v.location()] < 0) {
-         ++m_lds_pos;
-         m_ldspos[v.location()] = m_lds_pos;
-      }
-      v.set_lds_pos(m_lds_pos);
-   }
-   ++m_lds_pos;
-}
-
-std::vector<PShaderInput> &ShaderIO::inputs()
-{
-   return m_inputs;
-}
-
-ShaderInput& ShaderIO::input(size_t k)
-{
-   assert(k < m_inputs.size());
-   return *m_inputs[k];
-}
-
-ShaderInput& ShaderIO::input(size_t driver_loc, int frac)
-{
-   for (auto& i: m_inputs) {
-      if (!i->is_varying())
-         continue;
-
-      auto& v = static_cast<ShaderInputVarying&>(*i);
-      if (v.location() == driver_loc)
-         return v;
-   }
-   return input(driver_loc);
-}
-
-void ShaderIO::set_two_sided()
-{
-   m_two_sided = true;
-}
-
-std::pair<unsigned, unsigned>
-r600_get_varying_semantic(unsigned varying_location)
-{
-   std::pair<unsigned, unsigned> result;
-   tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>(varying_location),
-                                true, &result.first, &result.second);
-
-   if (result.first == TGSI_SEMANTIC_GENERIC) {
-      result.second += 9;
-   } else if (result.first == TGSI_SEMANTIC_PCOORD) {
-      result.second = 8;
-   }
-   return result;
-}
-
-
-
-}
-
diff --git a/src/gallium/drivers/r600/sfn/sfn_shaderio.h b/src/gallium/drivers/r600/sfn/sfn_shaderio.h
deleted file mode 100644
index 855bbe1..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_shaderio.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_SHADERIO_H
-#define SFN_SHADERIO_H
-
-#include "compiler/nir/nir.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "gallium/drivers/r600/r600_shader.h"
-
-#include <vector>
-#include <memory>
-
-namespace r600 {
-
-class ShaderInput {
-public:
-   ShaderInput();
-   virtual  ~ShaderInput();
-
-   ShaderInput(tgsi_semantic name);
-   tgsi_semantic name() const {return m_name;}
-
-   void set_gpr(int gpr) {m_gpr = gpr;}
-   int gpr() const {return m_gpr;}
-   void set_ioinfo(r600_shader_io& io, int translated_ij_index) const;
-
-   virtual void set_lds_pos(int lds_pos);
-   virtual int ij_index() const;
-   virtual bool interpolate() const;
-   virtual int lds_pos() const;
-   void set_uses_interpolate_at_centroid();
-
-   virtual bool is_varying() const;
-
-private:
-   virtual void set_specific_ioinfo(r600_shader_io& io) const;
-
-   tgsi_semantic m_name;
-   int m_gpr;
-   bool m_uses_interpolate_at_centroid;
-};
-
-using PShaderInput = std::shared_ptr<ShaderInput>;
-
-class ShaderInputSystemValue: public ShaderInput {
-public:
-   ShaderInputSystemValue(tgsi_semantic name, int gpr);
-   void set_specific_ioinfo(r600_shader_io& io) const;
-   int m_gpr;
-};
-
-class ShaderInputVarying : public ShaderInput {
-public:
-   ShaderInputVarying(tgsi_semantic _name, int sid, unsigned driver_location,
-                      unsigned frac, unsigned components, tgsi_interpolate_mode interpolate,
-                      tgsi_interpolate_loc interp_loc);
-   ShaderInputVarying(tgsi_semantic name, int sid, nir_variable *input);
-   ShaderInputVarying(tgsi_semantic name, const ShaderInputVarying& orig,
-                      size_t location);
-
-   void set_lds_pos(int lds_pos) override;
-
-   int ij_index() const override;
-
-   bool interpolate() const override;
-
-   int lds_pos() const override;
-
-   int sid() const {return m_sid;}
-
-   void update_mask(int additional_comps, int frac);
-
-   size_t location() const {return m_driver_location;}
-   int location_frac() const {return m_location_frac;}
-
-   bool is_varying() const override;
-
-private:
-   void evaluate_spi_sid();
-
-   virtual void set_color_ioinfo(r600_shader_io& io) const;
-   void set_specific_ioinfo(r600_shader_io& io) const override;
-   size_t m_driver_location;
-   int m_location_frac;
-   int m_sid;
-   int m_spi_sid;
-   tgsi_interpolate_mode m_interpolate;
-   tgsi_interpolate_loc m_interpolate_loc;
-   int m_ij_index;
-   int m_lds_pos;
-   int m_mask;
-};
-
-class ShaderInputColor: public ShaderInputVarying {
-public:
-   ShaderInputColor(tgsi_semantic _name, int sid, unsigned driver_location,
-                    unsigned frac, unsigned components, tgsi_interpolate_mode interpolate,
-                    tgsi_interpolate_loc interp_loc);
-   ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input);
-   void set_back_color(unsigned back_color_input_idx);
-   unsigned back_color_input_index() const {
-      return m_back_color_input_idx;
-   }
-private:
-   void set_color_ioinfo(UNUSED r600_shader_io& io) const override;
-   unsigned m_back_color_input_idx;
-
-};
-
-class ShaderIO
-{
-public:
-   ShaderIO();
-
-   size_t add_input(ShaderInput *input);
-
-   std::vector<PShaderInput>& inputs();
-   ShaderInput& input(size_t k);
-
-   ShaderInput& input(size_t driver_loc, int frac);
-
-   void set_two_sided();
-   bool two_sided() {return m_two_sided;}
-
-   int nlds() const  {
-      return m_lds_pos;
-   }
-
-   void sort_varying_inputs();
-
-   size_t size() const {return m_inputs.size();}
-
-   PShaderInput find_varying(tgsi_semantic name, int sid);
-
-   void update_lds_pos();
-
-private:
-   std::vector<PShaderInput> m_inputs;
-   std::vector<int> m_ldspos;
-   bool m_two_sided;
-   int m_lds_pos;
-
-};
-
-std::pair<unsigned, unsigned>
-r600_get_varying_semantic(unsigned varying_location);
-
-
-}
-
-#endif // SFN_SHADERIO_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_value.cpp b/src/gallium/drivers/r600/sfn/sfn_value.cpp
deleted file mode 100644
index 3228b75..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_value.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_value.h"
-#include "util/macros.h"
-
-#include <iostream>
-#include <iomanip>
-#include <cassert>
-
-namespace r600 {
-
-using std::unique_ptr;
-using std::make_shared;
-
-const char *Value::component_names = "xyzw01?_!";
-
-Value::Value():
-   m_type(gpr),
-   m_chan(0)
-{
-}
-
-Value::Value(Type type, uint32_t chan):
-   m_type(type),
-   m_chan(chan)
-{
-
-}
-
-
-
-Value::Value(Type type):
-   Value(type, 0)
-{
-}
-
-Value::Type Value::type() const
-{
-   return m_type;
-}
-
-void Value::set_chan(uint32_t chan)
-{
-   m_chan = chan;
-}
-
-void Value::print(std::ostream& os) const
-{
-   do_print(os);
-}
-
-void Value::print(std::ostream& os, const PrintFlags& flags) const
-{
-   if (flags.flags & PrintFlags::has_neg) os << '-';
-   if (flags.flags & PrintFlags::has_abs) os << '|';
-   do_print(os, flags);
-   if (flags.flags & PrintFlags::has_abs) os << '|';
-}
-
-void Value::do_print(std::ostream& os, const PrintFlags& flags) const
-{
-   (void)flags;
-   do_print(os);
-}
-
-bool Value::operator < (const Value& lhs) const
-{
-   return sel() < lhs.sel() ||
-         (sel() == lhs.sel() && chan() < lhs.chan());
-}
-
-
-LiteralValue::LiteralValue(float value, uint32_t chan):
-   Value(Value::literal, chan)
-{
-   m_value.f=value;
-}
-
-
-LiteralValue::LiteralValue(uint32_t value, uint32_t chan):
-   Value(Value::literal, chan)
-{
-   m_value.u=value;
-}
-
-LiteralValue::LiteralValue(int value, uint32_t chan):
-   Value(Value::literal, chan)
-{
-   m_value.u=value;
-}
-
-uint32_t LiteralValue::sel() const
-{
-   return ALU_SRC_LITERAL;
-}
-
-uint32_t LiteralValue::value() const
-{
-   return m_value.u;
-}
-
-float LiteralValue::value_float() const
-{
-   return m_value.f;
-}
-
-void LiteralValue::do_print(std::ostream& os) const
-{
-   os << "[0x" << std::setbase(16) << m_value.u << " " << std::setbase(10)
-      << m_value.f << "].";
-   os << component_names[chan()];
-}
-
-void LiteralValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const
-{
-   os << "[0x" << std::setbase(16) << m_value.u << " "
-      << std::setbase(10);
-
-   os << m_value.f << "f";
-
-   os<< "]";
-}
-
-bool LiteralValue::is_equal_to(const Value& other) const
-{
-   assert(other.type() == Value::Type::literal);
-   const auto& rhs = static_cast<const LiteralValue&>(other);
-   return (sel() == rhs.sel() &&
-           value() == rhs.value());
-}
-
-InlineConstValue::InlineConstValue(int value, int chan):
-   Value(Value::cinline,  chan),
-   m_value(static_cast<AluInlineConstants>(value))
-{
-}
-
-uint32_t InlineConstValue::sel() const
-{
-   return m_value;
-}
-
-void InlineConstValue::do_print(std::ostream& os) const
-{
-   auto sv_info = alu_src_const.find(m_value);
-   if (sv_info != alu_src_const.end()) {
-      os << sv_info->second.descr;
-      if (sv_info->second.use_chan)
-         os << '.' << component_names[chan()];
-      else if (chan() > 0)
-         os << "." << component_names[chan()]
-            << " (W: Channel ignored)";
-   } else {
-      if (m_value >= ALU_SRC_PARAM_BASE && m_value < ALU_SRC_PARAM_BASE + 32)
-         os << " Param" << m_value - ALU_SRC_PARAM_BASE;
-      else
-         os << " E: unknown inline constant " << m_value;
-   }
-}
-
-bool InlineConstValue::is_equal_to(const Value& other) const
-{
-   assert(other.type() == Value::Type::cinline);
-   const auto& rhs = static_cast<const InlineConstValue&>(other);
-   return sel() == rhs.sel();
-}
-
-PValue Value::zero(new InlineConstValue(ALU_SRC_0, 0));
-PValue Value::one_f(new InlineConstValue(ALU_SRC_1, 0));
-PValue Value::one_i(new InlineConstValue(ALU_SRC_1_INT, 0));
-PValue Value::zero_dot_5(new InlineConstValue(ALU_SRC_0_5, 0));
-
-UniformValue::UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank):
-   Value(Value::kconst, chan)
-{
-   m_index = sel;
-   m_kcache_bank = kcache_bank;
-}
-
-UniformValue::UniformValue(uint32_t sel, uint32_t chan, PValue addr):
-   Value(Value::kconst, chan),
-   m_index(sel),
-   m_kcache_bank(1),
-   m_addr(addr)
-{
-
-}
-
-uint32_t UniformValue::sel() const
-{
-   const int bank_base[4] = {128, 160, 256, 288};
-   return m_index < 512 ? m_index + bank_base[m_kcache_bank] : m_index;
-}
-
-uint32_t UniformValue::kcache_bank() const
-{
-   return m_kcache_bank;
-}
-
-bool UniformValue::is_equal_to(const Value& other) const
-{
-   const UniformValue& o = static_cast<const UniformValue&>(other);
-   return sel()  == o.sel() &&
-         m_kcache_bank == o.kcache_bank();
-}
-
-void UniformValue::do_print(std::ostream& os) const
-{
-   if (m_index < 512)
-      os << "KC" << m_kcache_bank << "[" << m_index;
-   else if (m_addr)
-      os << "KC[" << *m_addr << "][" << m_index;
-   else
-      os << "KCx[" << m_index;
-   os << "]." << component_names[chan()];
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_value.h b/src/gallium/drivers/r600/sfn/sfn_value.h
deleted file mode 100644
index 7bc4528..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_value.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_VALUE_H
-#define SFN_VALUE_H
-
-#include "sfn_alu_defines.h"
-#include "nir.h"
-
-#include <memory>
-#include <set>
-#include <bitset>
-#include <iostream>
-
-namespace r600 {
-
-class Value {
-public:
-   using Pointer=std::shared_ptr<Value>;
-
-   struct PrintFlags {
-      PrintFlags():index_mode(0),
-         flags(0)
-      {
-      }
-      PrintFlags(int im, int f):index_mode(im),
-         flags(f)
-      {
-      }
-      int index_mode;
-      int flags;
-      static const int is_rel = 1;
-      static const int has_abs = 2;
-      static const int has_neg = 4;
-      static const int literal_is_float = 8;
-      static const int index_ar = 16;
-      static const int index_loopidx = 32;
-   };
-
-   enum Type {
-      gpr,
-      kconst,
-      literal,
-      cinline,
-      lds_direct,
-      gpr_vector,
-      gpr_array_value,
-      unknown
-   };
-
-   static const char *component_names;
-
-   using LiteralFlags=std::bitset<4>;
-
-   Value();
-
-   Value(Type type);
-
-   virtual ~Value(){}
-
-   Type type() const;
-   virtual uint32_t sel() const = 0;
-   uint32_t chan() const {return m_chan;}
-
-   void set_chan(uint32_t chan);
-   virtual void set_pin_to_channel() { assert(0 && "Only GPRs can be pinned to a channel ");}
-   void print(std::ostream& os, const PrintFlags& flags) const;
-
-   void print(std::ostream& os) const;
-
-   bool operator < (const Value& lhs) const;
-
-   static Value::Pointer zero;
-   static Value::Pointer one_f;
-   static Value::Pointer zero_dot_5;
-   static Value::Pointer one_i;
-
-protected:
-   Value(Type type, uint32_t chan);
-
-private:
-   virtual void do_print(std::ostream& os) const = 0;
-   virtual void do_print(std::ostream& os, const PrintFlags& flags) const;
-
-   virtual bool is_equal_to(const Value& other) const = 0;
-
-   Type m_type;
-   uint32_t m_chan;
-
-   friend bool operator == (const Value& lhs, const Value& rhs);
-};
-
-
-inline std::ostream& operator << (std::ostream& os, const Value& v)
-{
-   v.print(os);
-   return os;
-}
-
-
-inline bool operator == (const Value& lhs, const Value& rhs)
-{
-   if (lhs.type() == rhs.type())
-      return lhs.is_equal_to(rhs);
-   return false;
-}
-
-inline bool operator != (const Value& lhs, const Value& rhs)
-{
-   return !(lhs == rhs);
-}
-
-using PValue=Value::Pointer;
-
-struct value_less {
-   inline bool operator () (PValue lhs, PValue rhs) const {
-      return *lhs < *rhs;
-   }
-};
-
-using ValueSet = std::set<PValue, value_less>;
-
-
-class LiteralValue: public Value {
-public:
-   LiteralValue(float value, uint32_t chan= 0);
-   LiteralValue(uint32_t value, uint32_t chan= 0);
-   LiteralValue(int value, uint32_t chan= 0);
-   uint32_t sel() const override final;
-   uint32_t value() const;
-   float value_float() const;
-private:
-   void do_print(std::ostream& os) const override;
-   void do_print(std::ostream& os, const PrintFlags& flags) const override;
-   bool is_equal_to(const Value& other) const override;
-   union {
-      uint32_t u;
-      float f;
-   } m_value;
-};
-
-class InlineConstValue: public Value {
-public:
-   InlineConstValue(int value, int chan);
-   uint32_t sel() const override final;
-private:
-   void do_print(std::ostream& os) const override;
-   bool is_equal_to(const Value& other) const override;
-   AluInlineConstants m_value;
-};
-
-class UniformValue: public Value {
-public:
-   UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank = 0);
-   UniformValue(uint32_t sel, uint32_t chan, PValue addr);
-   uint32_t sel() const override;
-   uint32_t kcache_bank() const;
-   PValue addr() const {return m_addr;}
-   void reset_addr(PValue v) {m_addr = v;}
-private:
-   void do_print(std::ostream& os) const override;
-   bool is_equal_to(const Value& other) const override;
-
-   uint32_t m_index;
-   uint32_t m_kcache_bank;
-   PValue m_addr;
-};
-
-} // end ns r600
-
-#endif
diff --git a/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp b/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp
deleted file mode 100644
index c53b325..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp
+++ /dev/null
@@ -1,380 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_value_gpr.h"
-#include "sfn_valuepool.h"
-#include "sfn_debug.h"
-#include "sfn_liverange.h"
-
-namespace r600 {
-
-using std::vector;
-using std::array;
-
-GPRValue::GPRValue(uint32_t sel, uint32_t chan, int base_offset):
-   Value(Value::gpr, chan),
-   m_sel(sel),
-   m_base_offset(base_offset),
-   m_input(false),
-   m_pin_to_channel(false),
-   m_keep_alive(false)
-{
-}
-
-GPRValue::GPRValue(uint32_t sel, uint32_t chan):
-   Value(Value::gpr, chan),
-   m_sel(sel),
-   m_base_offset(0),
-   m_input(false),
-   m_pin_to_channel(false),
-   m_keep_alive(false)
-{
-}
-
-uint32_t GPRValue::sel() const
-{
-   return m_sel;
-}
-
-void GPRValue::do_print(std::ostream& os) const
-{
-   os << 'R';
-   os << m_sel;
-   os << '.' << component_names[chan()];
-}
-
-bool GPRValue::is_equal_to(const Value& other) const
-{
-   assert(other.type() == Value::Type::gpr);
-   const auto& rhs = static_cast<const GPRValue&>(other);
-   return (sel() == rhs.sel() &&
-           chan() == rhs.chan());
-}
-
-void GPRValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const
-{
-   os << 'R';
-   os << m_sel;
-   os << '.' << component_names[chan()];
-}
-
-GPRVector::GPRVector(const GPRVector& orig):
-   Value(gpr_vector),
-   m_elms(orig.m_elms),
-   m_valid(orig.m_valid)
-{
-}
-
-GPRVector::GPRVector(std::array<PValue,4> elms):
-   Value(gpr_vector),
-   m_elms(elms),
-   m_valid(false)
-{
-   for (unsigned i = 0; i < 4; ++i)
-      if (!m_elms[i] || (m_elms[i]->type() != Value::gpr)) {
-         assert(0 && "GPR vector not valid because element missing or nit a GPR");
-         return;
-      }
-   unsigned sel = m_elms[0]->sel();
-   for (unsigned i = 1; i < 4; ++i)
-      if (m_elms[i]->sel() != sel) {
-         assert(0 && "GPR vector not valid because sel is not equal for all elements");
-         return;
-      }
-   m_valid = true;
-}
-
-GPRVector::GPRVector(uint32_t sel, std::array<uint32_t,4> swizzle):
-   Value (gpr_vector),
-   m_valid(true)
-{
-   for (int i = 0; i < 4; ++i)
-      m_elms[i] = PValue(new GPRValue(sel, swizzle[i]));
-}
-
-GPRVector::GPRVector(const GPRVector& orig, const std::array<uint8_t,4>& swizzle)
-{
-      for (int i = 0; i < 4; ++i)
-         m_elms[i] = orig.reg_i(swizzle[i]);
-      m_valid = orig.m_valid;
-}
-
-void GPRVector::validate() const
-{
-   assert(m_elms[0]);
-   uint32_t sel = m_elms[0]->sel();
-   if (sel >= 124)
-      return;
-
-   for (unsigned i = 1; i < 4; ++i) {
-      assert(m_elms[i]);
-      if (sel != m_elms[i]->sel())
-         return;
-   }
-
-   m_valid = true;
-}
-
-uint32_t GPRVector::sel() const
-{
-   validate();
-   assert(m_valid);
-   return m_elms[0] ? m_elms[0]->sel() : 999;
-}
-
-void GPRVector::set_reg_i(int i, PValue reg)
-{
-   m_elms[i] = reg;
-}
-
-void GPRVector::pin_to_channel(int i)
-{
-   auto& v = static_cast<GPRValue&>(*m_elms[i]);
-   v.set_pin_to_channel();
-}
-
-void GPRVector::pin_all_to_channel()
-{
-   for (auto& v: m_elms) {
-      auto& c = static_cast<GPRValue&>(*v);
-      c.set_pin_to_channel();
-   }
-}
-
-void GPRVector::do_print(std::ostream& os) const
-{
-   os << "R" << sel() << ".";
-   for (int i = 0; i < 4; ++i)
-      os << (m_elms[i] ? component_names[m_elms[i]->chan() < 8 ? m_elms[i]->chan() : 8] : '?');
-}
-
-void GPRVector::swizzle(const Swizzle& swz)
-{
-   Values v(m_elms);
-   for (uint32_t i = 0; i < 4; ++i)
-      if (i != swz[i]) {
-         assert(swz[i] < 4);
-         m_elms[i] = v[swz[i]];
-      }
-}
-
-bool GPRVector::is_equal_to(const Value& other) const
-{
-   if (other.type() != gpr_vector) {
-      std::cerr << "t";
-      return false;
-   }
-
-   const GPRVector& o = static_cast<const GPRVector&>(other);
-
-   for (int i = 0; i < 4; ++i) {
-      if (*m_elms[i] != *o.m_elms[i]) {
-         std::cerr << "elm" << i;
-         return false;
-      }
-   }
-   return true;
-}
-
-
-GPRArrayValue::GPRArrayValue(PValue value, PValue addr, GPRArray *array):
-   Value(gpr_array_value, value->chan()),
-   m_value(value),
-   m_addr(addr),
-   m_array(array)
-{
-}
-
-GPRArrayValue::GPRArrayValue(PValue value, GPRArray *array):
-   Value(gpr_array_value, value->chan()),
-   m_value(value),
-   m_array(array)
-{
-}
-
-static const char *swz_char = "xyzw01_";
-
-void GPRArrayValue::do_print(std::ostream& os) const
-{
-   assert(m_array);
-   os << "R"  << m_value->sel();
-   if (m_addr) {
-      os <<  "[" << *m_addr  << "] ";
-   }
-   os << swz_char[m_value->chan()];
-
-   os << "(" << *m_array << ")";
-}
-
-bool GPRArrayValue::is_equal_to(const Value& other) const
-{
-   const GPRArrayValue& v = static_cast<const GPRArrayValue&>(other);
-
-   return *m_value == *v.m_value &&
-         *m_array == *v.m_array;
-}
-
-void GPRArrayValue::record_read(LiverangeEvaluator& ev) const
-{
-   if (m_addr) {
-      ev.record_read(*m_addr);
-      unsigned chan = m_value->chan();
-      assert(m_array);
-      m_array->record_read(ev, chan);
-   } else
-      ev.record_read(*m_value);
-}
-
-void GPRArrayValue::record_write(LiverangeEvaluator& ev) const
-{
-   if (m_addr) {
-      ev.record_read(*m_addr);
-      unsigned chan = m_value->chan();
-      assert(m_array);
-      m_array->record_write(ev, chan);
-   } else
-      ev.record_write(*m_value);
-}
-
-void GPRArrayValue::reset_value(PValue new_value)
-{
-   m_value = new_value;
-}
-
-void GPRArrayValue::reset_addr(PValue new_addr)
-{
-   m_addr = new_addr;
-}
-
-
-GPRArray::GPRArray(int base, int size, int mask, int frac):
-   Value (gpr_vector),
-   m_base_index(base),
-   m_component_mask(mask),
-   m_frac(frac)
-{
-   m_values.resize(size);
-   for (int i = 0; i < size; ++i) {
-      for (int j = 0; j < 4; ++j) {
-         if (mask & (1 << j)) {
-            auto gpr = new GPRValue(base + i, j);
-            /* If we want to use sb, we have to keep arrays
-             * alife for the whole shader range, otherwise the sb scheduler
-             * thinks is not capable to rename non-array uses of these registers */
-            gpr->set_as_input();
-            gpr->set_keep_alive();
-            m_values[i].set_reg_i(j, PValue(gpr));
-
-         }
-      }
-   }
-}
-
-uint32_t GPRArray::sel() const
-{
-   return m_base_index;
-}
-
-static const char *compchar = "xyzw";
-void GPRArray::do_print(std::ostream& os) const
-{
-   os << "ARRAY[R" << sel() << "..R" << sel() + m_values.size()  - 1 << "].";
-   for (int j = 0; j < 4; ++j) {
-      if (m_component_mask & (1 << j))
-         os << compchar[j];
-   }
-}
-
-bool GPRArray::is_equal_to(const Value& other) const
-{
-   const GPRArray& o = static_cast<const GPRArray&>(other);
-   return o.sel() == sel() &&
-         o.m_values.size() == m_values.size() &&
-         o.m_component_mask == m_component_mask;
-}
-
-uint32_t GPRArrayValue::sel() const
-{
-   return m_value->sel();
-}
-
-PValue GPRArray::get_indirect(unsigned index, PValue indirect, unsigned component)
-{
-   assert(index < m_values.size());
-   assert(m_component_mask & (1 << (component + m_frac)));
-
-   sfn_log << SfnLog::reg << "Create indirect register from " << *this;
-
-   PValue v = m_values[index].reg_i(component + m_frac);
-   assert(v);
-
-   sfn_log << SfnLog::reg << " ->  " << *v;
-
-   if (indirect) {
-      sfn_log << SfnLog::reg << "["  << *indirect << "]";
-      switch (indirect->type()) {
-      case Value::literal: {
-         const LiteralValue& lv = static_cast<const LiteralValue&>(*indirect);
-         v = m_values[lv.value()].reg_i(component + m_frac);
-         break;
-      }
-      case Value::gpr:  {
-         v = PValue(new GPRArrayValue(v, indirect, this));
-         sfn_log << SfnLog::reg << "(" << *v << ")";
-         break;
-      }
-      default:
-         assert(0 && !"Indirect addressing must be literal value or GPR");
-      }
-   }
-   sfn_log << SfnLog::reg <<"  -> " << *v << "\n";
-   return v;
-}
-
-void GPRArray::record_read(LiverangeEvaluator& ev, int chan) const
-{
-   for (auto& v: m_values)
-      ev.record_read(*v.reg_i(chan), true);
-}
-
-void GPRArray::record_write(LiverangeEvaluator& ev, int chan) const
-{
-   for (auto& v: m_values)
-      ev.record_write(*v.reg_i(chan), true);
-}
-
-void GPRArray::collect_registers(ValueMap& output) const
-{
-   for (auto& v: m_values) {
-      for (int i = 0; i < 4; ++i) {
-         auto vv = v.reg_i(i);
-         if (vv)
-            output.insert(vv);
-      }
-   }
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_value_gpr.h b/src/gallium/drivers/r600/sfn/sfn_value_gpr.h
deleted file mode 100644
index 7893488..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_value_gpr.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SFN_GPRARRAY_H
-#define SFN_GPRARRAY_H
-
-#include "sfn_value.h"
-#include <vector>
-#include <array>
-
-namespace r600 {
-
-class ValuePool;
-class ValueMap;
-class LiverangeEvaluator;
-
-class GPRValue : public Value {
-public:
-   GPRValue() = default;
-   GPRValue(GPRValue&& orig) = default;
-   GPRValue(const GPRValue& orig) = default;
-
-   GPRValue(uint32_t sel, uint32_t chan, int base_offset);
-
-   GPRValue(uint32_t sel, uint32_t chan);
-
-   GPRValue& operator = (const GPRValue& orig) = default;
-   GPRValue& operator = (GPRValue&& orig) = default;
-
-   uint32_t sel() const override final;
-
-   void set_as_input(){ m_input = true; }
-   bool is_input() const {return  m_input; }
-   void set_keep_alive() { m_keep_alive = true; }
-   bool keep_alive() const {return  m_keep_alive; }
-   void set_pin_to_channel() override { m_pin_to_channel = true;}
-   bool pin_to_channel()  const { return m_pin_to_channel;}
-
-private:
-   void do_print(std::ostream& os) const override;
-   void do_print(std::ostream& os, const PrintFlags& flags) const override;
-   bool is_equal_to(const Value& other) const override;
-   uint32_t m_sel;
-   bool m_base_offset;
-   bool m_input;
-   bool m_pin_to_channel;
-   bool m_keep_alive;
-};
-
-using PGPRValue = std::shared_ptr<GPRValue>;
-
-class GPRVector : public Value {
-public:
-   using Swizzle = std::array<uint32_t,4>;
-   using Values = std::array<PValue,4>;
-   GPRVector() = default;
-   GPRVector(GPRVector&& orig) = default;
-   GPRVector(const GPRVector& orig);
-
-   GPRVector(const GPRVector& orig, const std::array<uint8_t, 4>& swizzle);
-   GPRVector(std::array<PValue,4> elms);
-   GPRVector(uint32_t sel, std::array<uint32_t,4> swizzle);
-
-   GPRVector& operator = (const GPRVector& orig) = default;
-   GPRVector& operator = (GPRVector&& orig) = default;
-
-   void swizzle(const Swizzle& swz);
-
-   uint32_t sel() const override final;
-
-   void set_reg_i(int i, PValue reg);
-
-   unsigned chan_i(int i) const {return m_elms[i]->chan();}
-   PValue reg_i(int i) const {return m_elms[i];}
-   PValue operator [] (int i) const {return m_elms[i];}
-   PValue& operator [] (int i) {return m_elms[i];}
-
-   void pin_to_channel(int i);
-   void pin_all_to_channel();
-
-   PValue x() const {return m_elms[0];}
-   PValue y() const {return m_elms[1];}
-   PValue z() const {return m_elms[2];}
-   PValue w() const {return m_elms[3];}
-
-   Values& values() { return m_elms;}
-
-private:
-   void do_print(std::ostream& os) const override;
-   bool is_equal_to(const Value& other) const override;
-   void validate() const;
-
-   Values m_elms;
-   mutable bool m_valid;
-};
-
-
-class GPRArray : public Value
-{
-public:
-   using Pointer = std::shared_ptr<GPRArray>;
-
-   GPRArray(int base, int size, int comp_mask, int frac);
-
-   uint32_t sel() const override;
-
-   uint32_t mask() const { return m_component_mask; };
-
-   size_t size() const {return m_values.size();}
-
-   PValue get_indirect(unsigned index, PValue indirect, unsigned component);
-
-   void record_read(LiverangeEvaluator& ev, int chan)const;
-   void record_write(LiverangeEvaluator& ev, int chan)const;
-
-   void collect_registers(ValueMap& output) const;
-
-private:
-   void do_print(std::ostream& os) const override;
-
-   bool is_equal_to(const Value& other) const override;
-
-   int m_base_index;
-   int m_component_mask;
-   int m_frac;
-
-   std::vector<GPRVector> m_values;
-};
-
-using PGPRArray = GPRArray::Pointer;
-
-class GPRArrayValue :public Value {
-public:
-   GPRArrayValue(PValue value, GPRArray *array);
-   GPRArrayValue(PValue value, PValue index, GPRArray *array);
-
-   void record_read(LiverangeEvaluator& ev) const;
-   void record_write(LiverangeEvaluator& ev) const;
-
-   size_t array_size() const;
-   uint32_t sel() const override;
-
-   PValue value() {return m_value;}
-
-   void reset_value(PValue new_value);
-   void reset_addr(PValue new_addr);
-
-   Value::Pointer indirect() const {return m_addr;}
-
-private:
-
-   void do_print(std::ostream& os) const override;
-
-   bool is_equal_to(const Value& other) const override;
-
-   PValue m_value;
-   PValue m_addr;
-   GPRArray *m_array;
-};
-
-inline size_t GPRArrayValue::array_size() const
-{
-   return m_array->size();
-}
-
-inline GPRVector::Swizzle swizzle_from_comps(unsigned ncomp)
-{
-   GPRVector::Swizzle swz = {0,1,2,3};
-   for (int i = ncomp; i < 4; ++i)
-      swz[i] = 7;
-   return swz;
-}
-
-inline GPRVector::Swizzle swizzle_from_mask(unsigned mask)
-{
-   GPRVector::Swizzle swz;
-   for (int i = 0; i < 4; ++i)
-      swz[i] =  ((1 << i) & mask) ? i : 7;
-   return swz;
-}
-
-
-}
-
-#endif // SFN_GPRARRAY_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp
new file mode 100644
index 0000000..74fca92
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp
@@ -0,0 +1,959 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2021 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "sfn_valuefactory.h"
+#include "sfn_instr.h"
+#include "sfn_debug.h"
+
+#include "gallium/drivers/r600/r600_shader.h"
+
+#include <iostream>
+#include <sstream>
+#include <queue>
+#include <algorithm>
+
+namespace r600 {
+
+using std::istringstream;
+using std::string;
+
+
+ValueFactory::ValueFactory():
+   m_next_register_index(VirtualValue::virtual_register_base),
+   m_nowrite_idx(0)
+{
+}
+
+void ValueFactory::set_virtual_register_base(int base)
+{
+   m_next_register_index = base;
+}
+
+bool ValueFactory::allocate_registers(const exec_list *registers)
+{
+   bool has_arrays = false;
+   struct  array_entry {
+      unsigned index;
+      unsigned length;
+      unsigned ncomponents;
+
+      bool operator ()(const array_entry& a, const array_entry& b) const {
+         return a.length < b.length || (a.length == b.length && a.ncomponents > b.ncomponents);
+      }
+   };
+
+   using array_list = std::priority_queue<array_entry, std::vector<array_entry>,
+                                          array_entry>;
+
+   array_list arrays;
+
+
+   foreach_list_typed(nir_register, reg, node, registers) {
+      if (reg->num_array_elems) {
+         array_entry ae = {reg->index, reg->num_array_elems, reg->bit_size / 32 * reg->num_components};
+         arrays.push(ae);
+         has_arrays = true;
+      }
+   }
+
+   int ncomponents = 0;
+   int sel = m_next_register_index;
+   unsigned length = 0;
+
+   while (!arrays.empty()) {
+      auto a = arrays.top();
+      arrays.pop();
+
+      /* This is a bit hackish, return an id that encodes the array merge. To make sure
+       * that the mapping doesn't go wrong we have to make sure the arrays is longer than
+       * the number of instances in this arrays slot */
+      if (a.ncomponents + ncomponents > 4 ||
+          a.length > length) {
+         sel = m_next_register_index;
+         ncomponents = 0;
+         length = 0;
+      }
+
+      if (ncomponents == 0)
+         m_next_register_index += a.length;
+
+      uint32_t frac = ncomponents;
+      auto array = new LocalArray( sel, a.ncomponents, a.length, frac);
+
+      for (unsigned i = 0; i < a.ncomponents; ++i) {
+         RegisterKey key(a.index, i, vp_array);
+         m_channel_counts.inc_count(i);
+         m_registers[key] = array;
+         sfn_log << SfnLog::reg << __func__ << ": Allocate array " << key << ":" << *array << "\n";
+      }
+
+      ncomponents += a.ncomponents;
+      length = a.length;
+   }
+
+   foreach_list_typed(nir_register, reg, node, registers) {
+      if (!reg->num_array_elems) {
+         uint32_t sel = m_next_register_index++;
+         unsigned num_components = reg->num_components * reg->bit_size / 32;
+         for (auto chan = 0u; chan < num_components; ++chan) {
+            RegisterKey key(reg->index, chan, vp_register);
+            m_channel_counts.inc_count(chan);
+            m_registers[key] = new Register( sel, chan, num_components > 1 ? pin_none : pin_free);
+            sfn_log << SfnLog::reg << "allocate register " << key << ":" << *m_registers[key] << "\n";
+         }
+      }
+   }
+   return has_arrays;
+}
+
+PRegister ValueFactory::allocate_pinned_register(int sel, int chan)
+{
+   if (m_next_register_index <= sel)
+      m_next_register_index = sel + 1;
+
+   auto reg = new Register(sel, chan, pin_fully);
+   m_pinned_registers.push_back(reg);
+   return reg;
+}
+
+RegisterVec4 ValueFactory::allocate_pinned_vec4(int sel, bool is_ssa)
+{
+   if (m_next_register_index <= sel)
+      m_next_register_index = sel + 1;
+
+   RegisterVec4 retval(sel, is_ssa, {0,1,2,3}, pin_fully);
+   for (int i = 0; i < 4; ++i)
+      m_pinned_registers.push_back(retval[i]);
+   return retval;
+}
+
+
+void ValueFactory::inject_value(const nir_dest& dest, int chan, PVirtualValue value)
+{
+   assert(dest.is_ssa);
+   RegisterKey key(dest.ssa.index, chan, vp_ssa);
+   sfn_log << SfnLog::reg << "Inject value with key " << key << "\n";
+   assert(m_values.find(key) == m_values.end());
+   m_values[key] = value;
+}
+
+PRegister ValueFactory::dest(const nir_alu_dest& dst, int chan,
+                             Pin pin_channel)
+{
+   sfn_log << SfnLog::reg << "Search (ref) " << &dst << "\n";
+   return dest(dst.dest, chan, pin_channel);
+}
+
+class TranslateRegister: public RegisterVisitor {
+public:
+   void visit(VirtualValue& value) {(void)value;}
+   void visit(Register& value) {(void)value;};
+   void visit(LocalArray& value) { m_value = value.element(m_offset, m_addr, m_chan);}
+   void visit(LocalArrayValue& value) {(void)value;}
+   void visit(UniformValue& value) {(void)value;}
+   void visit(LiteralConstant& value) {(void)value;}
+   void visit(InlineConstant& value) {(void)value;}
+
+   TranslateRegister(int offset, PVirtualValue addr, int chan): m_addr(addr),
+      m_value(nullptr), m_offset(offset), m_chan(chan) {}
+
+   PVirtualValue m_addr;
+   PRegister m_value;
+   int m_offset;
+   int m_chan;
+};
+
+PRegister ValueFactory::resolve_array(nir_register *reg, nir_src *indirect,
+                                      int base_offset, int chan)
+{
+   PVirtualValue addr = nullptr;
+   auto type = reg->num_array_elems ? vp_array : vp_register;
+   RegisterKey key(reg->index, chan, type);
+   auto ireg = m_registers.find(key);
+   if (ireg == m_registers.end()) {
+      std::cerr << "Key " << key << " not found\n";
+      assert(0);
+   }
+
+   if (reg->num_array_elems) {
+
+      if (indirect)
+         addr = src(*indirect, 0);
+
+      TranslateRegister array_resolution(base_offset, addr, chan);
+
+      ireg->second->accept(array_resolution);
+      assert(array_resolution.m_value);
+      return array_resolution.m_value;
+   } else {
+      return ireg->second;
+   }
+}
+
+PRegister ValueFactory::dest(const nir_dest& dst, int chan, Pin pin_channel)
+{
+   if (dst.is_ssa) {
+      return dest(dst.ssa, chan, pin_channel);
+   } else {
+      return resolve_array(dst.reg.reg, dst.reg.indirect,
+                           dst.reg.base_offset, chan);
+   }
+}
+
+void ValueFactory::allocate_const(nir_load_const_instr *load_const)
+{
+   assert(load_const->def.bit_size == 32);
+   for (int i = 0; i < load_const->def.num_components; ++i) {
+      RegisterKey key(load_const->def.index, i, vp_ssa);
+      m_values[key] = literal(load_const->value[i].i32);
+      sfn_log << SfnLog::reg << "Add const with key " << key << " as " << m_values[key] << "\n";
+   }
+}
+
+PVirtualValue ValueFactory::uniform(nir_intrinsic_instr *load_uniform, int chan)
+{
+   auto literal = nir_src_as_const_value(load_uniform->src[0]);
+   assert(literal);
+
+   int index = nir_intrinsic_base(load_uniform) +  + literal->u32 + 512;
+
+   return uniform(index, chan, 0);
+}
+
+PVirtualValue ValueFactory::uniform(uint32_t index, int chan, int kcache)
+{
+   return new UniformValue( index, chan, kcache);
+}
+
+PRegister ValueFactory::temp_register(int pinned_channel, bool is_ssa)
+{
+   int sel = m_next_register_index++;
+   int chan = (pinned_channel >= 0) ?
+            pinned_channel : m_channel_counts.least_used();
+
+   auto reg = new Register( sel, chan,
+                            pinned_channel >= 0 ? pin_chan : pin_free);
+   m_channel_counts.inc_count(chan);
+
+   reg->set_is_ssa(is_ssa);
+   m_registers[RegisterKey(sel, chan, vp_temp)] = reg;
+   return reg;
+}
+
+RegisterVec4 ValueFactory::temp_vec4(Pin pin, const RegisterVec4::Swizzle &swizzle)
+{
+   int sel = m_next_register_index++;
+
+   if (pin == pin_free)
+      pin = pin_chan;
+
+   PRegister vec4[4];
+
+   for (int i = 0; i < 4; ++i) {
+      vec4[i] = new Register( sel, swizzle[i], pin);
+      vec4[i]->set_is_ssa(true);
+      m_registers[RegisterKey(sel, swizzle[i], vp_temp)] = vec4[i];
+   }
+   return RegisterVec4(vec4[0], vec4[1], vec4[2], vec4[3], pin);
+}
+
+RegisterVec4 ValueFactory::dest_vec4(const nir_dest& dst, Pin pin)
+{
+   if (pin != pin_group && pin != pin_chgr)
+      pin = pin_chan;
+   if (dst.is_ssa) {
+      PRegister x = dest(dst, 0, pin);
+      PRegister y = dest(dst, 1, pin);
+      PRegister z = dest(dst, 2, pin);
+      PRegister w = dest(dst, 3, pin);
+      return RegisterVec4(x, y, z, w, pin);
+   } else {
+      assert(!dst.reg.indirect);
+      PRegister v[4];
+      int sel = -1;
+      for (int i = 0; i < 4; ++i) {
+         RegisterKey key(dst.reg.reg->index, i, vp_register);
+         v[i] = m_registers[key];
+         assert(sel >= 0 || v[i]);
+         if (sel < 0)
+            sel = v[i]->sel();
+
+         if (!v[i]) {
+            v[i] = m_registers[key] = new Register(sel, i, pin_group);
+         }
+      }
+      return RegisterVec4(v[0], v[1], v[2], v[3], pin);
+   }
+   unreachable("unsupported");
+}
+
+PVirtualValue ValueFactory::src(const nir_alu_src& alu_src, int chan)
+{
+   return src(alu_src.src, alu_src.swizzle[chan]);
+}
+
+PVirtualValue ValueFactory::src64(const nir_alu_src& alu_src, int chan, int comp)
+{
+   return src(alu_src.src, 2 * alu_src.swizzle[chan] + comp);
+}
+
+PVirtualValue ValueFactory::src(const nir_src& src, int chan)
+{
+   sfn_log << SfnLog::reg << "search (ref) " << (void *)&src << "\n";
+
+   if (src.is_ssa) {
+      sfn_log << SfnLog::reg << "search ssa " << src.ssa->index << " c:" << chan << " got ";
+      auto val = ssa_src(*src.ssa, chan);
+      sfn_log << *val << "\n";
+      return val;
+   } else {
+      sfn_log << SfnLog::reg << "search reg " << src.reg.reg->index << "\n";
+      return local_register(src.reg, chan);
+   }
+}
+
+PVirtualValue ValueFactory::src(const nir_tex_src& tex_src, int chan)
+{
+   return src(tex_src.src, chan);
+}
+
+PRegister ValueFactory::dummy_dest(unsigned chan)
+{
+   assert(chan < 4);
+   return m_dummy_dest_pinned[chan];
+}
+
+PRegister
+ValueFactory::dest(const nir_ssa_def& ssa, int chan, Pin pin_channel)
+{
+   RegisterKey key(ssa.index, chan, vp_ssa);
+
+   /* dirty workaround for Cayman trans ops, because we may request
+    * the same sa reg more than once, but only write to it once.  */
+   auto ireg = m_registers.find(key);
+   if (ireg != m_registers.end())
+      return ireg->second;
+
+   auto isel = m_ssa_index_to_sel.find(ssa.index);
+   int sel;
+   if (isel != m_ssa_index_to_sel.end())
+      sel = isel->second;
+   else {
+      sel = m_next_register_index++;
+      m_ssa_index_to_sel[ssa.index] = sel;
+   }
+
+   if (pin_channel == pin_free)
+      chan = m_channel_counts.least_used();
+
+   auto vreg = new Register( sel, chan, pin_channel);
+   m_channel_counts.inc_count(chan);
+   vreg->set_is_ssa(true);
+   m_registers[key] = vreg;
+   sfn_log << SfnLog::reg << "allocate Ssa " << key << ":" << *vreg << "\n";
+   return vreg;
+}
+
+PVirtualValue ValueFactory::zero()
+{
+   return inline_const(ALU_SRC_0, 0);
+}
+
+PVirtualValue ValueFactory::one()
+{
+   return inline_const(ALU_SRC_1, 0);
+}
+
+PVirtualValue ValueFactory::one_i()
+{
+   return inline_const(ALU_SRC_1_INT, 0);
+}
+
+PRegister ValueFactory::undef(int index, int chan)
+{
+   RegisterKey key(index, chan, vp_ssa);
+   PRegister reg = new Register(m_next_register_index++, 0, pin_free);
+   reg->set_is_ssa(true);
+   m_registers[key] = reg;
+   return reg;
+}
+
+PVirtualValue
+ValueFactory::ssa_src(const nir_ssa_def& ssa, int chan)
+{
+   RegisterKey key(ssa.index, chan, vp_ssa);
+   sfn_log << SfnLog::reg << "search src with key" << key << "\n";
+
+   auto ireg = m_registers.find(key);
+   if (ireg != m_registers.end())
+      return ireg->second;
+
+   auto ival = m_values.find(key);
+   if (ival != m_values.end())
+      return ival->second;
+
+   std::cerr << "Didn't find source with key " << key << "\n";
+   unreachable("Source values should always exist");
+}
+
+PRegister ValueFactory::local_register(const nir_reg_dest& dst, int chan)
+{
+   return resolve_array(dst.reg, dst.indirect,
+                        dst.base_offset, chan);
+}
+
+PRegister ValueFactory::local_register(const nir_reg_src& src, int chan)
+{
+   return resolve_array(src.reg, src.indirect,
+                        src.base_offset, chan);
+}
+
+PVirtualValue ValueFactory::literal(uint32_t value)
+{
+   auto iv = m_literal_values.find(value);
+   if (iv != m_literal_values.end())
+      return iv->second;
+
+   auto v = new LiteralConstant( value);
+   m_literal_values[value] = v;
+   return v;
+}
+
+PInlineConstant ValueFactory::inline_const(AluInlineConstants sel, int chan)
+{
+   int hash = (sel << 3) | chan;
+   auto iv = m_inline_constants.find(hash);
+   if (iv !=  m_inline_constants.end())
+      return iv->second;
+   auto v = new InlineConstant( sel, chan);
+   m_inline_constants[hash] = v;
+   return v;
+}
+
+ std::vector<PVirtualValue, Allocator<PVirtualValue>> ValueFactory::src_vec(const nir_src& source, int components)
+{
+   std::vector<PVirtualValue, Allocator<PVirtualValue>> retval;
+   retval.reserve(components);
+   for (int i = 0; i < components;  ++i)
+      retval.push_back(src(source, i));
+   return retval;
+}
+
+std::vector<PRegister, Allocator<PRegister>>
+ValueFactory::dest_vec(const nir_dest& dst, int num_components)
+{
+   std::vector<PRegister, Allocator<PRegister>> retval;
+   retval.reserve(num_components);
+   for (int i = 0; i < num_components;  ++i)
+      retval.push_back(dest(dst, i, num_components > 1 ? pin_chan : pin_free));
+   return retval;
+}
+
+RegisterVec4 ValueFactory::src_vec4(const nir_src& source, Pin pin, const RegisterVec4::Swizzle& swz)
+{
+   auto sx = swz[0] < 4 ? src(source, swz[0])->as_register() : nullptr;
+   auto sy = swz[1] < 4 ? src(source, swz[1])->as_register() : nullptr;
+   auto sz = swz[2] < 4 ? src(source, swz[2])->as_register() : nullptr;
+   auto sw = swz[3] < 4 ? src(source, swz[3])->as_register() : nullptr;
+
+   assert(sx || sy || sz || sw);
+
+   int sel = sx ? sx->sel() : (sy ? sy->sel() : (sz ? sz->sel() : sw ? sw->sel() : -1));
+   if (sel < 0)
+      unreachable("source vector without valid components");
+
+   if (!sx) sx = new Register(sel, 7, pin);
+   if (!sy) sy = new Register(sel, 7, pin);
+   if (!sz) sz = new Register(sel, 7, pin);
+   if (!sw) sw = new Register(sel, 7, pin);
+
+   return RegisterVec4(sx, sy, sz, sw, pin);
+}
+
+static Pin
+pin_from_string(const std::string& pinstr)
+{
+   if (pinstr == "chan")
+      return pin_chan;
+   if (pinstr == "array")
+      return pin_array;
+   if (pinstr == "fully")
+      return pin_fully;
+   if (pinstr == "group")
+      return pin_group;
+   if (pinstr == "chgr")
+      return pin_chgr;
+   if (pinstr == "free")
+      return pin_free;
+   return pin_none;
+}
+
+static int chan_from_char(char chan)
+{
+   switch (chan) {
+   case 'x' : return 0;
+   case 'y' : return 1;
+   case 'z' : return 2;
+   case 'w' : return 3;
+   case '0' : return 4;
+   case '1' : return 5;
+   case '_' : return 7;
+   }
+   unreachable("Unknown swizzle char");
+}
+
+static int
+str_to_int(const string& s)
+{
+   istringstream ss(s);
+   int retval;
+   ss >> retval;
+   return retval;
+}
+
+static bool
+split_register_string(const string& s,
+                      string& index_str,
+                      string& size_str,
+                      string& swizzle_str,
+                      string& pin_str)
+{
+   int type = 0;
+   for (unsigned i = 1; i < s.length(); ++i) {
+      if (s[i] == '.' && type != 3) {
+         type = 1;
+         continue;
+      } else if (s[i] == '@' && type != 3) {
+         type = 2;
+         continue;
+      } else if (s[i] == '[') {
+         type = 3;
+         continue;
+      } else if (s[i] == ']') {
+         if (type != 3)
+            std::cerr << "s=" << s
+                      << ": type=" << type
+                      << ": i=" << i
+                      << "\n";
+         assert(type == 3);
+
+         type = 4;
+         continue;
+      }
+
+      switch (type) {
+      case 0: index_str.append(1, s[i]); break;
+      case 1: swizzle_str.append(1, s[i]); break;
+      case 2: pin_str.append(1, s[i]); break;
+      case 3: size_str.append(1, s[i]); break;
+      default:
+         unreachable("Malformed Array allocation string");
+      }
+   }
+   return true;
+}
+
+PRegister ValueFactory::dest_from_string(const std::string& s)
+{
+   assert(s.length() >= 4);
+
+   assert(strchr("ARS_", s[0]));
+
+   string index_str;
+   string size_str;
+   string swizzle_str;
+   string pin_str;
+
+   split_register_string(s, index_str, size_str,
+                         swizzle_str, pin_str);
+
+   int sel = 0;
+   if (s[0] == '_') {
+      /* Since these instructions still may use or switch to a different channel
+       * we have to create a new instance for each occurance */
+      sel = std::numeric_limits<int>::max() - m_nowrite_idx++;
+   } else {      
+      std::istringstream n(index_str);
+      n >> sel;
+   }
+
+   auto p = pin_from_string(pin_str);
+   char chan = chan_from_char(swizzle_str[0]);
+
+   EValuePool pool = vp_temp;
+   switch (s[0]) {
+   case 'A': pool = vp_array; break;
+   case 'R': pool = vp_register; break;
+   case '_': pool = vp_ignore; break;
+   case 'S': pool = vp_ssa; break;
+   default:
+   unreachable("Unknown value type");
+   }
+
+   bool is_ssa = s[0] == 'S';
+
+   RegisterKey key (sel, chan, pool);
+
+   sfn_log << SfnLog::reg << "Search register with key " << key << "\n";
+
+   auto ireg = m_registers.find(key);
+   if ( ireg == m_registers.end()) {
+      auto reg = new Register(sel, chan, p);
+      reg->set_is_ssa(is_ssa);
+      if (p == pin_fully)
+         reg->pin_live_range(true);
+      m_registers[key] = reg;
+      return reg;
+   } else  if (pool == vp_ignore) {
+      assert(ireg->second->sel() == std::numeric_limits<int>::max());
+      return ireg->second;
+   } else {
+      assert(!is_ssa || s[0] == '_');
+
+      if (size_str.length()) {
+         auto array = static_cast<LocalArray *>(ireg->second);
+         PVirtualValue addr = nullptr;
+         int offset = 0;
+         if (size_str[0] == 'S' || size_str[0] == 'R') {
+            addr = src_from_string(size_str);
+         } else {
+            istringstream num_str(size_str);
+            num_str >> offset;
+         }
+
+         return array->element(offset, addr, chan - array->frac());
+      } else
+         return ireg->second;
+   }
+}
+
+PVirtualValue ValueFactory::src_from_string(const std::string& s)
+{
+   switch (s[0]) {
+   case 'A':
+   case 'S':
+   case 'R': break;
+   case 'L': return LiteralConstant::from_string(s);
+   case 'K': return UniformValue::from_string(s);
+   case 'P': return InlineConstant::param_from_string(s);
+   case 'I': return InlineConstant::from_string(s);
+
+   default:
+      std::cerr << "'" << s << "'";
+      unreachable("Unknown register type");
+   }
+
+   assert(strchr("ARS_", s[0]));
+
+   string index_str;
+   string size_str;
+   string swizzle_str;
+   string pin_str;
+
+   split_register_string(s, index_str, size_str,
+                         swizzle_str, pin_str);
+
+   int sel = 0;
+   if (s[0] == '_') {
+      sel = std::numeric_limits<int>::max();
+   } else {
+      std::istringstream n(index_str);
+      n >> sel;
+   }
+
+   auto p = pin_from_string(pin_str);
+   char chan = chan_from_char(swizzle_str[0]);
+
+   EValuePool pool = vp_temp;
+   switch (s[0]) {
+   case 'A': pool = vp_array; break;
+   case 'R': pool = vp_register; break;
+   case '_': pool = vp_ignore; break;
+   case 'S': pool = vp_ssa; break;
+   default:
+   unreachable("Unknown value type");
+   }
+
+   RegisterKey key (sel, chan, pool);
+
+   auto ireg = m_registers.find(key);
+   if (ireg != m_registers.end()) {
+      if (pool != vp_ssa && size_str.length()) {
+         auto array = static_cast<LocalArray *>(ireg->second);
+         PVirtualValue addr = nullptr;
+         int offset = 0;
+         if (size_str[0] == 'S' || size_str[0] == 'R') {
+            addr = src_from_string(size_str);
+         } else {
+            istringstream num_str(size_str);
+            num_str >> offset;
+         }
+         return array->element(offset, addr, chan - array->frac());
+      } else {
+         return ireg->second;
+      }
+   } else  {
+      if (sel != std::numeric_limits<int>::max()) {
+         std::cerr << "register " << key << "not found \n";
+         unreachable("Source register should exist");
+      } else {
+         auto reg = new Register( sel, chan, p);
+         m_registers[key] = reg;
+         return reg;
+      }
+   }
+}
+
+RegisterVec4 ValueFactory::dest_vec4_from_string(const std::string& s,
+                                                 RegisterVec4::Swizzle& swz,
+                                                 Pin pin)
+{
+   bool is_ssa = false;
+   int sel = sel_and_szw_from_string(s, swz, is_ssa);
+
+   PRegister v[4];
+
+   for (int i = 0; i < 4; ++i) {
+      auto pool = is_ssa ? vp_ssa : vp_register;
+      if (swz[i] > 3)
+         pool = vp_ignore;
+
+      RegisterKey  key(sel, i, pool);
+      auto ireg = m_registers.find(key);
+      if (ireg != m_registers.end()) {
+         v[i] = ireg->second;
+         assert(!is_ssa || pool == vp_ignore);
+      } else {
+         v[i] = new Register( sel, i, pin);
+         v[i]->set_is_ssa(is_ssa);
+         m_registers[key] = v[i];
+      }
+   }
+   return RegisterVec4(v[0], v[1], v[2], v[3], pin);
+}
+
+RegisterVec4 ValueFactory::src_vec4_from_string(const std::string& s)
+{
+   RegisterVec4::Swizzle swz;
+   bool is_ssa = false;
+   int sel = sel_and_szw_from_string(s, swz, is_ssa);
+
+   PRegister v[4];
+
+   PRegister used_reg = nullptr;
+   for (int i = 0; i < 4; ++i) {
+      if (swz[i] < 4) {
+         RegisterKey  key(sel, swz[i], is_ssa ? vp_ssa : vp_register);
+         auto ireg = m_registers.find(key);
+         if (ireg == m_registers.end()) {
+            std::cerr << s << ": Register with key " << key << " not found\n";
+            assert(0);
+         }
+         used_reg = v[i] = ireg->second;
+      } else {
+         v[i] = nullptr;
+      }
+   }
+   sel = used_reg ? used_reg->sel() : 0;
+   Pin pin = used_reg ? used_reg->pin() : pin_group;
+
+   for (int i = 0; i < 4; ++i) {
+      if (!v[i]) {
+         v[i] = new Register( sel, swz[i], pin);
+         v[i]->set_is_ssa(is_ssa);
+      } else {
+         if (v[i]->pin() == pin_none)
+            v[i]->set_pin(pin_group);
+      }
+   }
+   return RegisterVec4(v[0], v[1], v[2], v[3], pin);
+}
+
+LocalArray *ValueFactory::array_from_string(const std::string& s)
+{
+   assert(s[0] == 'A');
+   string index_str;
+   string size_str;
+   string swizzle_str;
+   string pin_str;
+
+
+   int type = 0;
+   for (unsigned i = 1; i < s.length(); ++i) {
+      if (s[i] == '.') {
+         type = 1;
+         continue;
+      } else if (s[i] == '@') {
+         type = 2;
+         continue;
+      } else if (s[i] == '[') {
+         type = 3;
+         continue;
+      } else if (s[i] == ']') {
+         assert(type == 3);
+         type = 4;
+         continue;
+      }
+
+      switch (type) {
+      case 0: index_str.append(1, s[i]); break;
+      case 1: swizzle_str.append(1, s[i]); break;
+      case 2: pin_str.append(1, s[i]); break;
+      case 3: size_str.append(1, s[i]); break;
+      default:
+         unreachable("Malformed Array allocation string");
+      }
+   }
+
+   int sel = str_to_int(index_str);
+   int size = str_to_int(size_str);
+   int ncomp = swizzle_str.length();
+
+   if (ncomp > 4 || ncomp <= 0) {
+      std::cerr << "Error reading array from '" << s << ": ";
+      std::cerr << "index:'" << index_str << "' -> '" << sel
+                << "' size:'" << size_str << "' -> '" << size
+                << " swizzle:'" << swizzle_str << "' -> '" << ncomp << "'\n";
+      assert(0);
+   }
+
+   const char *swz = "xyzw";
+   const char *first_swz = strchr(swz, swizzle_str[0]);
+   long frac = first_swz - swz;
+   assert(frac >= 0 && frac <= 4 - ncomp);
+
+   auto array = new LocalArray( sel, ncomp, size, frac);
+
+   for (int i = 0; i < ncomp; ++i) {
+      RegisterKey key(sel, i + frac, vp_array);
+      m_registers[key] = array;
+   }
+   return array;
+}
+
+void LiveRangeMap::append_register(Register *reg)
+{
+   sfn_log << SfnLog::merge << __func__ << ": " << *reg << "\n";
+
+   auto chan = reg->chan();
+   auto& ranges = m_life_ranges[chan];
+
+   LiveRangeEntry entry(reg);
+   ranges.emplace_back(entry);
+}
+
+std::array<size_t, 4> LiveRangeMap::sizes() const
+{
+   std::array<size_t, 4> result;
+   std::transform(m_life_ranges.begin(), m_life_ranges.end(),
+                  result.begin(), [](auto lr) {return lr.size(); });
+   return result;
+}
+
+LiveRangeMap ValueFactory::prepare_live_range_map()
+{
+   LiveRangeMap result;
+
+   for (auto [key, reg] : m_registers) {
+      if (key.value.pool == vp_ignore)
+         continue;
+
+      if (key.value.pool == vp_array) {
+         if (key.value.chan == 0) {
+            auto array = static_cast<LocalArray *>(reg);
+            for (auto& a : *array)  {
+               result.append_register(a);
+            }
+         }
+      } else {
+         if (reg->chan() < 4)
+            result.append_register(reg);
+      }
+   }
+
+   for (auto r : m_pinned_registers) {
+      result.append_register(r);
+   }
+
+   for (int i = 0; i < 4; ++i) {
+      auto& comp = result.component(i);
+      std::sort(comp.begin(), comp.end(),
+                [](const LiveRangeEntry& lhs, const LiveRangeEntry& rhs) {
+         return lhs.m_register->sel() < rhs.m_register->sel();
+      });
+      for(size_t j = 0; j < comp.size(); ++j)
+         comp[j].m_register->set_index(j);
+   }
+
+   return result;
+}
+
+void ValueFactory::clear_pins()
+{
+   for (auto [key, reg] : m_registers)
+      reg->set_pin(pin_none);
+
+   for (auto reg : m_pinned_registers)
+      reg->set_pin(pin_none);
+}
+
+void ValueFactory::clear()
+{
+   m_registers.clear();
+   m_values.clear();
+   m_literal_values.clear();
+   m_inline_constants.clear();
+   m_ssa_index_to_sel.clear();
+}
+
+void ValueFactory::get_shader_info(r600_shader *sh_info)
+{
+   std::set<LocalArray*> arrays;
+
+   for (auto& [key, reg] : m_registers) {
+      if (key.value.pool == vp_array)
+         arrays.insert(static_cast<LocalArray *>(reg));
+   }
+
+   if (!arrays.empty()) {
+
+      sh_info->num_arrays = arrays.size();
+      sh_info->arrays = new r600_shader_array[arrays.size()];
+
+      for (auto& arr : arrays) {
+         sh_info->arrays->gpr_start = arr->sel();
+         sh_info->arrays->gpr_count = arr->size();
+         sh_info->arrays->comp_mask =
+               ((1 << arr->nchannels()) - 1)
+               << arr->frac();
+      }
+      sh_info->indirect_files |= 1 << TGSI_FILE_TEMPORARY;
+   }
+}
+
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.h b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h
new file mode 100644
index 0000000..89a9bcf
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h
@@ -0,0 +1,299 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2021 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef VALUEFACTORY_H
+#define VALUEFACTORY_H
+
+#include "sfn_virtualvalues.h"
+#include "sfn_alu_defines.h"
+
+#include "nir.h"
+
+#include <unordered_map>
+#include <cassert>
+#include <ostream>
+#include <unordered_map>
+
+struct r600_shader;
+
+namespace r600 {
+
+struct LiveRangeEntry {
+   enum EUse {
+      use_export,
+      use_unspecified
+   };
+
+   LiveRangeEntry (Register *reg): m_register(reg) {}
+   int m_start{-1};
+   int m_end{-1};
+   int m_index{-1};
+   int m_color{-1};
+   std::bitset<use_unspecified> m_use;
+   Register *m_register;
+
+   void print(std::ostream& os) const {
+      os << *m_register << "(" << m_index << ", " <<  m_color << ") ["
+         << m_start << ":" << m_end << "]";
+   }  
+};
+
+inline std::ostream& operator << (std::ostream& os, const LiveRangeEntry& lre)
+{
+   lre.print(os);
+   return os;
+}
+
+class LiveRangeMap {
+public:
+   using ChannelLiveRange = std::vector<LiveRangeEntry>;
+
+   LiveRangeEntry& operator()(int index, int chan) {
+      assert(chan  < 4);
+      return m_life_ranges[chan].at(index);
+   }
+
+   void append_register(Register *reg);
+
+   void set_life_range(const Register& reg, int start, int end) {
+      auto& entry = m_life_ranges[reg.chan()].at(reg.index());
+      entry.m_start = start;
+      entry.m_end = end;
+   }
+
+   std::array<size_t, 4> sizes() const;
+
+   ChannelLiveRange& component(int i) {
+      return m_life_ranges[i];
+   }
+
+   const ChannelLiveRange& component(int i) const {
+      return m_life_ranges[i];
+   }
+
+private:
+
+   std::array<ChannelLiveRange, 4> m_life_ranges;
+};
+
+std::ostream& operator <<  (std::ostream& os, const LiveRangeMap& lrm);
+
+bool operator == (const LiveRangeMap& lhs, const LiveRangeMap& rhs);
+
+inline bool operator != (const LiveRangeMap& lhs, const LiveRangeMap& rhs)
+{
+   return !(lhs == rhs);
+}
+
+enum EValuePool {
+   vp_ssa,
+   vp_register,
+   vp_temp,
+   vp_array,
+   vp_ignore
+};
+
+union RegisterKey {
+    struct  {
+        uint32_t index;
+        uint32_t chan : 29;
+        EValuePool pool : 3;
+    } value;
+    uint64_t hash;
+
+    RegisterKey(uint32_t index, uint32_t chan, EValuePool pool)
+    {
+        value.index = index;
+        value.chan = chan;
+        value.pool = pool;
+    }
+
+    void print(std::ostream& os) const {
+        os << "(" << value.index
+           << ", " << value.chan
+           << ", ";
+        switch (value.pool) {
+        case vp_ssa: os << "ssa"; break;
+        case vp_register: os << "reg"; break;
+        case vp_temp: os << "temp"; break;
+        case vp_array : os << "array"; break;
+        case vp_ignore : break;
+        }
+        os << ")";
+    }
+};
+
+
+inline bool operator == (const RegisterKey& lhs, const RegisterKey& rhs) {
+    return lhs.hash == rhs.hash;
+}
+
+inline std::ostream& operator << (std::ostream& os, const RegisterKey& key) {
+    key.print(os);
+    return os;
+}
+
+struct register_key_hash {
+    std::size_t operator () (const RegisterKey& key) const {
+        return key.hash;
+    }
+};
+
+class ChannelCounts {
+public:
+   void inc_count(int chan) {++m_counts[chan];}
+   int least_used() const  {
+      int least_used = 0;
+      uint32_t count = m_counts[0];
+      for (int i = 1; i < 4; ++i) {
+         if (count > m_counts[i]) {
+            count = m_counts[i];
+            least_used = i;
+         }
+      }
+      return least_used;
+   }
+   void print(std::ostream& os) const {
+      os << "CC:" << m_counts[0] << " " << m_counts[1]
+         << " " << m_counts[2] << " " << m_counts[3];
+   }
+private:
+   std::array<uint32_t, 4> m_counts{0,0,0,0};
+};
+
+inline std::ostream& operator << (std::ostream& os, const ChannelCounts& cc)
+{
+   cc.print(os);
+   return os;
+}
+
+class ValueFactory : public Allocate {
+public:
+    ValueFactory();
+
+    void clear();
+
+    ValueFactory(const ValueFactory& orig) = delete;
+    ValueFactory& operator = (const ValueFactory& orig) = delete;
+
+    void set_virtual_register_base(int base);
+
+    bool allocate_registers(const exec_list *registers);
+    PRegister allocate_pinned_register(int sel, int chan);
+    RegisterVec4 allocate_pinned_vec4(int sel, bool is_ssa);
+
+    void inject_value(const nir_dest& dest, int chan, PVirtualValue value);
+
+    std::vector<PRegister, Allocator<PRegister>>  dest_vec(const nir_dest& dest, int num_components);
+    std::vector<PRegister, Allocator<PRegister>>  dest_vector(const nir_src& src,
+                                                              const std::vector<int>& components);
+
+
+    PRegister dest(const nir_alu_dest& dest, int chan, Pin pin_channel);
+    PRegister dest(const nir_dest& dest, int chan, Pin pin_channel);
+    PRegister dest(const nir_ssa_def& dest, int chan, Pin pin_channel);
+
+    PRegister dummy_dest(unsigned chan);
+    PRegister temp_register(int pinned_channel = -1, bool is_ssa = true);
+    RegisterVec4 temp_vec4(Pin pin, const RegisterVec4::Swizzle& swizzle = {0,1,2,3});
+    RegisterVec4 dest_vec4(const nir_dest& dest, Pin pin);
+
+    RegisterVec4 src_vec4(const nir_src& src, Pin pin, const RegisterVec4::Swizzle &swz = {0,1,2,3});
+
+    PVirtualValue src(const nir_alu_src& alu_src, int chan);
+    PVirtualValue src64(const nir_alu_src& alu_src, int chan, int comp);
+    PVirtualValue src(const nir_src& src, int chan);
+    PVirtualValue src(const nir_tex_src& tex_src, int chan);
+    PVirtualValue literal(uint32_t value);
+    PVirtualValue uniform(nir_intrinsic_instr *load_uniform, int chan);
+    PVirtualValue uniform(uint32_t index, int chan, int kcache);
+
+    void allocate_const(nir_load_const_instr *load_const);
+
+    PRegister dest_from_string(const std::string& s);
+    RegisterVec4 dest_vec4_from_string(const std::string& s, RegisterVec4::Swizzle &swz,
+                                       Pin pin = pin_none);
+    PVirtualValue src_from_string(const std::string& s);
+    RegisterVec4 src_vec4_from_string(const std::string& s);
+
+    LocalArray *array_from_string(const std::string& s);
+
+    std::vector<PVirtualValue, Allocator<PVirtualValue>> src_vec(const nir_src& src, int components);
+
+    PInlineConstant inline_const(AluInlineConstants sel, int chan);
+
+    void get_shader_info(r600_shader *sh_info);
+
+    PRegister undef(int index, int chan);
+    PVirtualValue zero();
+    PVirtualValue one();
+    PVirtualValue one_i();
+
+    LiveRangeMap prepare_live_range_map();
+
+    void clear_pins();
+
+    int next_register_index() const { return m_next_register_index; }
+private:
+
+    PVirtualValue ssa_src(const nir_ssa_def &dest, int chan);
+
+    PRegister local_register(const nir_reg_dest& dest, int chan);
+    PRegister local_register(const nir_reg_src& dest, int chan);
+    PRegister resolve_array(nir_register *reg, nir_src *indirect,
+                            int base_offset, int chan);
+
+    int m_next_register_index;
+    int m_next_temp_channel{0};
+
+    template <typename Key, typename T>
+    using unordered_map_alloc = std::unordered_map<Key, T, std::hash<Key>, std::equal_to<Key>,
+    Allocator<std::pair<const Key, T>>>;
+
+    template <typename Key, typename T>
+    using unordered_reg_map_alloc = std::unordered_map<Key, T, register_key_hash, std::equal_to<Key>,
+    Allocator<std::pair<const Key, T>>>;
+
+    using RegisterMap = unordered_reg_map_alloc<RegisterKey, PRegister>;
+    using ROValueMap = unordered_reg_map_alloc<RegisterKey, PVirtualValue>;
+
+    RegisterMap m_registers;
+    std::vector<PRegister> m_pinned_registers;
+    ROValueMap m_values;
+    unordered_map_alloc<uint32_t, PLiteralVirtualValue> m_literal_values;
+    unordered_map_alloc<uint32_t, InlineConstant::Pointer> m_inline_constants;
+    unordered_map_alloc<uint32_t, uint32_t> m_ssa_index_to_sel;
+
+    uint32_t m_nowrite_idx;
+
+    RegisterVec4 m_dummy_dest_pinned{126, pin_chan, {0,1,2,3}};
+    ChannelCounts m_channel_counts;
+};
+
+}
+
+#endif // VALUEFACTORY_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp b/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp
deleted file mode 100644
index efc9efd..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp
+++ /dev/null
@@ -1,526 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018-2019 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sfn_debug.h"
-#include "sfn_value_gpr.h"
-#include "sfn_valuepool.h"
-
-#include <iostream>
-#include <queue>
-
-namespace r600 {
-
-using std::vector;
-using std::pair;
-using std::make_pair;
-using std::queue;
-
-ValuePool::ValuePool():
-   m_next_register_index(0),
-   current_temp_reg_index(0),
-   next_temp_reg_comp(4)
-{
-}
-
-PValue ValuePool::m_undef = Value::zero;
-
-GPRVector ValuePool::vec_from_nir(const nir_dest& dst, int num_components)
-{
-   std::array<PValue, 4> result;
-   for (int i = 0; i < 4; ++i)
-      result[i] = from_nir(dst, i < num_components ? i : 7);
-   return GPRVector(result);
-}
-
-std::vector<PValue> ValuePool::varvec_from_nir(const nir_dest& dst, int num_components)
-{
-   std::vector<PValue> result(num_components);
-   for (int i = 0; i < num_components; ++i)
-      result[i] = from_nir(dst, i);
-   return result;
-}
-
-
-std::vector<PValue> ValuePool::varvec_from_nir(const nir_src& src, int num_components)
-{
-   std::vector<PValue> result(num_components);
-   int i;
-   for (i = 0; i < num_components; ++i)
-      result[i] = from_nir(src, i);
-
-   return result;
-}
-
-
-PValue ValuePool::from_nir(const nir_src& v, unsigned component, unsigned swizzled)
-{
-   sfn_log << SfnLog::reg << "Search " << (v.is_ssa ? "ssa_reg " : "reg ")
-           << (v.is_ssa ? v.ssa->index : v.reg.reg->index);
-
-   if (!v.is_ssa) {
-      int idx = lookup_register_index(v);
-      sfn_log << SfnLog::reg << "  -> got index " <<  idx << "\n";
-      if (idx >= 0) {
-         auto reg = lookup_register(idx, swizzled, false);
-         if (reg) {
-            if (reg->type() == Value::gpr_vector) {
-               auto& array = static_cast<GPRArray&>(*reg);
-               reg = array.get_indirect(v.reg.base_offset,
-                                        v.reg.indirect ?
-                                           from_nir(*v.reg.indirect, 0, 0) : nullptr,
-                                        component);
-            }
-            return reg;
-         }
-      }
-      assert(0 && "local registers should always be found");
-   }
-
-   unsigned index = v.ssa->index;
-   /* For undefs we use zero and let ()yet to be implemeneted dce deal with it */
-   if (m_ssa_undef.find(index) != m_ssa_undef.end())
-      return Value::zero;
-
-
-   int idx = lookup_register_index(v);
-   sfn_log << SfnLog::reg << "  -> got index " <<  idx << "\n";
-   if (idx >= 0) {
-      auto reg = lookup_register(idx, swizzled, false);
-      if (reg)
-         return reg;
-   }
-
-   auto literal_val = nir_src_as_const_value(v);
-   if (literal_val) {
-      assert(v.is_ssa);
-      switch (v.ssa->bit_size) {
-      case 1:
-         return PValue(new LiteralValue(literal_val[swizzled].b ? 0xffffffff : 0, component));
-      case 32:
-         return literal(literal_val[swizzled].u32);
-      default:
-         sfn_log << SfnLog::reg << "Unsupported bit size " << v.ssa->bit_size
-                 << " fall back to 32\n";
-         return PValue(new LiteralValue(literal_val[swizzled].u32, component));
-      }
-   }
-
-   return PValue();
-}
-
-PValue ValuePool::from_nir(const nir_src& v, unsigned component)
-{
-   return from_nir(v, component, component);
-}
-
-PValue ValuePool::from_nir(const nir_tex_src &v, unsigned component)
-{
-   return from_nir(v.src, component, component);
-}
-
-PValue ValuePool::from_nir(const nir_alu_src &v, unsigned component)
-{
-   return from_nir(v.src, component, v.swizzle[component]);
-}
-
-PGPRValue ValuePool::get_temp_register(int channel)
-{
-   /* Skip to next register to get the channel we want */
-   if (channel >= 0) {
-      if (next_temp_reg_comp <= channel)
-         next_temp_reg_comp = channel;
-      else
-         next_temp_reg_comp = 4;
-   }
-
-   if (next_temp_reg_comp > 3) {
-      current_temp_reg_index = allocate_temp_register();
-      next_temp_reg_comp = 0;
-   }
-   return std::make_shared<GPRValue>(current_temp_reg_index, next_temp_reg_comp++);
-}
-
-GPRVector ValuePool::get_temp_vec4(const GPRVector::Swizzle& swizzle)
-{
-   int sel = allocate_temp_register();
-   return GPRVector(sel, swizzle);
-}
-
-PValue ValuePool::create_register_from_nir_src(const nir_src& src, int comp)
-{
-   int idx = src.is_ssa ? get_dst_ssa_register_index(*src.ssa):
-                          get_local_register_index(*src.reg.reg);
-
-   auto retval = lookup_register(idx, comp, false);
-   if (!retval || retval->type() != Value::gpr || retval->type() != Value::gpr_array_value)
-      retval = create_register(idx, comp);
-   return retval;
-}
-
-PValue ValuePool::from_nir(const nir_alu_dest &v, unsigned component)
-{
-   //assert(v->write_mask & (1 << component));
-   return from_nir(v.dest, component);
-}
-
-int ValuePool::lookup_register_index(const nir_dest& dst)
-{
-   return dst.is_ssa ? get_dst_ssa_register_index(dst.ssa):
-                       get_local_register_index(*dst.reg.reg);
-}
-
-int ValuePool::lookup_register_index(const nir_src& src) const
-{
-   int index = 0;
-
-   index = src.is_ssa ?
-              get_ssa_register_index(*src.ssa) :
-              get_local_register_index(*src.reg.reg);
-
-   sfn_log << SfnLog::reg << " LIDX:" << index;
-
-   auto r = m_register_map.find(index);
-   if (r == m_register_map.end()) {
-      return -1;
-   }
-   return static_cast<int>(r->second.index);
-}
-
-
-int ValuePool::allocate_temp_register()
-{
-   return m_next_register_index++;
-}
-
-
-PValue ValuePool::from_nir(const nir_dest& v, unsigned component)
-{
-   int idx = lookup_register_index(v);
-   sfn_log << SfnLog::reg << __func__  << ": ";
-   if (v.is_ssa)
-      sfn_log << "ssa_" << v.ssa.index;
-   else
-      sfn_log << "r" << v.reg.reg->index;
-   sfn_log << " -> " << idx << "\n";
-
-   auto retval = lookup_register(idx, component, false);
-   if (!retval)
-      retval = create_register(idx, component);
-
-   if (retval->type() == Value::gpr_vector) {
-      assert(!v.is_ssa);
-      auto& array = static_cast<GPRArray&>(*retval);
-      retval = array.get_indirect(v.reg.base_offset,
-                                  v.reg.indirect ?
-                                  from_nir(*v.reg.indirect, 0, 0) : nullptr,
-                                  component);
-   }
-
-   return retval;
-}
-
-ValueMap ValuePool::get_temp_registers() const
-{
-   ValueMap result;
-
-   for (auto& v : m_registers) {
-      if (v.second->type() == Value::gpr)
-         result.insert(v.second);
-      else if (v.second->type() == Value::gpr_vector) {
-         auto& array = static_cast<GPRArray&>(*v.second);
-         array.collect_registers(result);
-      }
-   }
-   return result;
-}
-
-static const char swz[] = "xyzw01?_";
-
-PValue ValuePool::create_register(unsigned sel, unsigned swizzle)
-{
-   sfn_log << SfnLog::reg
-           <<"Create register " << sel  << '.' << swz[swizzle] << "\n";
-   auto retval = PValue(new GPRValue(sel, swizzle));
-   m_registers[(sel << 3) + swizzle] = retval;
-   return retval;
-}
-
-bool ValuePool::inject_register(unsigned sel, unsigned swizzle,
-                                const PValue& reg, bool map)
-{
-   uint32_t ssa_index = sel;
-
-   if (map) {
-      auto pos = m_ssa_register_map.find(sel);
-      if (pos == m_ssa_register_map.end())
-         ssa_index = m_next_register_index++;
-      else
-         ssa_index = pos->second;
-   }
-
-   sfn_log << SfnLog::reg
-           << "Inject register " << sel  << '.' << swz[swizzle]
-           << " at index " <<  ssa_index << " ...";
-
-   if (map)
-      m_ssa_register_map[sel] = ssa_index;
-
-   allocate_with_mask(ssa_index, swizzle, true);
-
-   unsigned idx = (ssa_index << 3) + swizzle;
-   auto p = m_registers.find(idx);
-   if ( (p != m_registers.end()) && *p->second != *reg) {
-      std::cerr << "Register location (" << ssa_index << ", " << swizzle << ") was already reserved\n";
-      assert(0);
-      return false;
-   }
-   sfn_log << SfnLog::reg << " at idx:" << idx << " to " << *reg << "\n";
-   m_registers[idx] = reg;
-
-   if (m_next_register_index <= ssa_index)
-      m_next_register_index = ssa_index + 1;
-   return true;
-}
-
-
-PValue ValuePool::lookup_register(unsigned sel, unsigned swizzle,
-                                  bool required)
-{
-
-   PValue retval;
-   sfn_log << SfnLog::reg
-           << "lookup register " << sel  << '.' << swz[swizzle] << "("
-           << ((sel << 3) + swizzle) << ")...";
-
-
-   auto reg = m_registers.find((sel << 3) + swizzle);
-   if (reg != m_registers.end()) {
-      sfn_log << SfnLog::reg << " -> Found " << *reg->second << "\n";
-      retval = reg->second;
-   } else if (swizzle == 7) {
-      PValue retval = create_register(sel, swizzle);
-      sfn_log << SfnLog::reg << " -> Created " << *retval << "\n";
-   } else if (required) {
-      sfn_log << SfnLog::reg << "Register (" << sel << ", "
-              << swizzle << ") not found but required\n";
-      assert(0 && "Unallocated register value requested\n");
-   }
-   sfn_log << SfnLog::reg << " -> Not required and not  allocated\n";
-   return retval;
-}
-
-unsigned ValuePool::get_dst_ssa_register_index(const nir_ssa_def& ssa)
-{
-   sfn_log << SfnLog::reg << __func__ << ": search dst ssa "
-           << ssa.index;
-
-   auto pos = m_ssa_register_map.find(ssa.index);
-   if (pos == m_ssa_register_map.end()) {
-      sfn_log << SfnLog::reg << " Need to allocate ...";
-      allocate_ssa_register(ssa);
-      pos = m_ssa_register_map.find(ssa.index);
-      assert(pos != m_ssa_register_map.end());
-   }
-   sfn_log << SfnLog::reg << "... got " << pos->second << "\n";
-   return pos->second;
-}
-
-unsigned ValuePool::get_ssa_register_index(const nir_ssa_def& ssa) const
-{
-   sfn_log << SfnLog::reg << __func__ << ": search ssa "
-           << ssa.index;
-
-   auto pos = m_ssa_register_map.find(ssa.index);
-   sfn_log << SfnLog::reg << " got " << pos->second<< "\n";
-   if (pos == m_ssa_register_map.end()) {
-      sfn_log << SfnLog::reg << __func__ << ": ssa register "
-              << ssa.index << " lookup failed\n";
-      return -1;
-   }
-   return pos->second;
-}
-
-unsigned ValuePool::get_local_register_index(const nir_register& reg)
-{
-   unsigned index = reg.index | 0x80000000;
-
-   auto pos = m_ssa_register_map.find(index);
-   if (pos == m_ssa_register_map.end()) {
-      allocate_local_register(reg);
-      pos = m_ssa_register_map.find(index);
-      assert(pos != m_ssa_register_map.end());
-   }
-   return pos->second;
-}
-
-unsigned ValuePool::get_local_register_index(const nir_register& reg) const
-{
-   unsigned index = reg.index | 0x80000000;
-   auto pos = m_ssa_register_map.find(index);
-   if (pos == m_ssa_register_map.end()) {
-      sfn_log << SfnLog::err << __func__ << ": local register "
-              << reg.index << " lookup failed";
-      return -1;
-   }
-   return pos->second;
-}
-
-void ValuePool::allocate_ssa_register(const nir_ssa_def& ssa)
-{
-   sfn_log << SfnLog::reg << "ValuePool: Allocate ssa register " << ssa.index
-           << " as " << m_next_register_index << "\n";
-   int index = m_next_register_index++;
-   m_ssa_register_map[ssa.index] = index;
-   allocate_with_mask(index, 0xf, true);
-}
-
-void ValuePool::allocate_arrays(array_list& arrays)
-{
-   int ncomponents = 0;
-   int current_index = m_next_register_index;
-   unsigned instance = 0;
-
-   while (!arrays.empty()) {
-      auto a = arrays.top();
-      arrays.pop();
-
-      /* This is a bit hackish, return an id that encodes the array merge. To make sure
-       * that the mapping doesn't go wrong we have to make sure the arrays is longer than
-       * the number of instances in this arrays slot */
-      if (a.ncomponents + ncomponents > 4 ||
-          a.length < instance) {
-         current_index = m_next_register_index;
-         ncomponents = 0;
-         instance = 0;
-      }
-
-      if (ncomponents == 0)
-         m_next_register_index += a.length;
-
-      uint32_t mask = ((1 << a.ncomponents) - 1) << ncomponents;
-
-      PGPRArray array = PGPRArray(new GPRArray(current_index, a.length, mask, ncomponents));
-
-      m_reg_arrays.push_back(array);
-
-      sfn_log << SfnLog::reg << "Add array at "<< current_index
-              << " of size " << a.length << " with " << a.ncomponents
-              << " components, mask " << mask << "\n";
-
-      m_ssa_register_map[a.index | 0x80000000] = current_index + instance;
-
-      for (unsigned  i = 0; i < a.ncomponents; ++i)
-         m_registers[((current_index  + instance) << 3) + i] = array;
-
-      VRec next_reg = {current_index + instance, mask, mask};
-      m_register_map[current_index + instance] = next_reg;
-
-      ncomponents += a.ncomponents;
-      ++instance;
-   }
-}
-
-void ValuePool::allocate_local_register(const nir_register& reg)
-{
-   int index = m_next_register_index++;
-   m_ssa_register_map[reg.index | 0x80000000] = index;
-   allocate_with_mask(index, 0xf, true);
-
-   /* Create actual register and map it */;
-   for (int i = 0; i < 4; ++i) {
-      int k = (index << 3) + i;
-      m_registers[k] = std::make_shared<GPRValue>(index, i);
-   }
-}
-
-void ValuePool::allocate_local_register(const nir_register& reg, array_list& arrays)
-{
-   sfn_log << SfnLog::reg << "ValuePool: Allocate local register " << reg.index
-           << " as " << m_next_register_index << "\n";
-
-   if (reg.num_array_elems) {
-      array_entry ae = {reg.index, reg.num_array_elems, reg.num_components};
-      arrays.push(ae);
-   }
-   else
-      allocate_local_register(reg);
-}
-
-bool ValuePool::create_undef(nir_ssa_undef_instr* instr)
-{
-   m_ssa_undef.insert(instr->def.index);
-   return true;
-}
-
-int ValuePool::allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc)
-{
-   int retval;
-   VRec next_register = { index, mask };
-
-   sfn_log << SfnLog::reg << (pre_alloc ? "Pre-alloc" : "Allocate")
-           << " register (" << index << ", " << mask << ")\n";
-   retval = index;
-   auto r = m_register_map.find(index);
-
-   if (r != m_register_map.end()) {
-      if ((r->second.mask & next_register.mask) &&
-          !(r->second.pre_alloc_mask & next_register.mask)) {
-         std::cerr << "r600 ERR: register ("
-                   << index << ", " << mask
-                   << ") already allocated as (" << r->second.index << ", "
-                   << r->second.mask << ", " << r->second.pre_alloc_mask
-                   << ") \n";
-         retval = -1;
-      } else {
-         r->second.mask |= next_register.mask;
-         if (pre_alloc)
-            r->second.pre_alloc_mask |= next_register.mask;
-         retval = r->second.index;
-      }
-   } else  {
-      if (pre_alloc)
-         next_register.pre_alloc_mask = mask;
-      m_register_map[index] = next_register;
-      retval = next_register.index;
-   }
-
-   sfn_log << SfnLog::reg << "Allocate register (" << index << "," << mask << ") in R"
-           << retval << "\n";
-
-   return retval;
-}
-
-PValue ValuePool::literal(uint32_t value)
-{
-   auto l = m_literals.find(value);
-   if (l != m_literals.end())
-      return l->second;
-
-   m_literals[value] = PValue(new LiteralValue(value));
-   return m_literals[value];
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_valuepool.h b/src/gallium/drivers/r600/sfn/sfn_valuepool.h
deleted file mode 100644
index fa1e550..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_valuepool.h
+++ /dev/null
@@ -1,242 +0,0 @@
-/* -*- mesa-c++  -*-
- *
- * Copyright (c) 2018 Collabora LTD
- *
- * Author: Gert Wollny <gert.wollny@collabora.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef SFN_VALUEPOOL_H
-#define SFN_VALUEPOOL_H
-
-#include "sfn_value.h"
-#include "sfn_value_gpr.h"
-
-#include <set>
-#include <queue>
-
-namespace r600 {
-
-using LiteralBuffer = std::map<unsigned, const nir_load_const_instr *>;
-
-class ValueMap {
-public:
-   void insert(const PValue& v) {
-      auto idx = index_from(v->sel(), v->chan());
-      m_map[idx] = v;
-   }
-   PValue get_or_inject(uint32_t index, uint32_t chan) {
-      auto idx = index_from(index, chan);
-      auto v = m_map.find(idx);
-      if (v == m_map.end()) {
-         insert(PValue(new GPRValue(index, chan)));
-         v = m_map.find(idx);
-      }
-      return v->second;
-   }
-   std::map<uint32_t, PValue>::const_iterator begin() const {return m_map.begin();}
-   std::map<uint32_t, PValue>::const_iterator end() const {return m_map.end();}
-
-private:
-   uint32_t index_from(uint32_t index, uint32_t chan) {
-      return (index << 3) + chan;
-   }
-   std::map<uint32_t, PValue> m_map;
-};
-
-/** \brief Class to keep track of registers, uniforms, and literals
- * This class holds the references to the uniforms and the literals
- * and is responsible for allocating the registers.
- */
-class ValuePool
-{
-public:
-
-   struct  array_entry {
-      unsigned index;
-      unsigned length;
-      unsigned ncomponents;
-
-      bool operator ()(const array_entry& a, const array_entry& b) const {
-         return a.length < b.length || (a.length == b.length && a.ncomponents > b.ncomponents);
-      }
-   };
-
-   using array_list = std::priority_queue<array_entry, std::vector<array_entry>,
-                                          array_entry>;
-
-   ValuePool();
-
-
-   GPRVector vec_from_nir(const nir_dest& dst, int num_components);
-
-   std::vector<PValue> varvec_from_nir(const nir_dest& src, int num_components);
-   std::vector<PValue> varvec_from_nir(const nir_src& src, int num_components);
-
-   PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled);
-
-   PValue from_nir(const nir_src& v, unsigned component);
-   /** Get a register that is used as source register in an ALU instruction
-    * The PValue holds one componet as specified. If the register refers to
-    * a GPR it must already have been allocated, uniforms and literals on
-    * the other hand might be pre-loaded.
-    */
-   PValue from_nir(const nir_alu_src& v, unsigned component);
-
-   /** Get a register that is used as source register in an Texture instruction
-    * The PValue holds one componet as specified.
-    */
-   PValue from_nir(const nir_tex_src& v, unsigned component);
-
-   /** Allocate a register that is used as destination register in an ALU
-    * instruction. The PValue holds one componet as specified.
-    */
-   PValue from_nir(const nir_alu_dest& v, unsigned component);
-
-   /** Allocate a register that is used as destination register in any
-    * instruction. The PValue holds one componet as specified.
-    */
-   PValue from_nir(const nir_dest& v, unsigned component);
-
-
-   /** Inject a register into a given ssa index position
-    * This is used to redirect loads from system values and vertex attributes
-    * that are already loaded into registers */
-   bool inject_register(unsigned sel, unsigned swizzle, const PValue &reg, bool map);
-
-   /** Reserve space for a local register */
-   void allocate_local_register(const nir_register& reg);
-   void allocate_local_register(const nir_register &reg, array_list& arrays);
-
-   void allocate_arrays(array_list& arrays);
-
-
-   void increment_reserved_registers() {
-      ++m_next_register_index;
-   }
-
-   void set_reserved_registers(unsigned rr) {
-      m_next_register_index =rr;
-   }
-
-   /** Reserve a undef register, currently it uses (0,7),
-    * \todo should be eliminated in the final pass
-    */
-   bool create_undef(nir_ssa_undef_instr* instr);
-
-   /** Create a new register with the given index and store it in the
-    * lookup map
-    */
-   PValue create_register_from_nir_src(const nir_src& sel, int comp);
-
-   ValueMap get_temp_registers() const;
-
-   PValue lookup_register(unsigned sel, unsigned swizzle, bool required);
-
-   size_t register_count() const {return m_next_register_index;}
-
-   PValue literal(uint32_t value);
-
-   PGPRValue get_temp_register(int channel = -1);
-
-   GPRVector get_temp_vec4(const GPRVector::Swizzle &swizzle = {0,1,2,3});
-
-protected:
-   std::vector<PGPRArray> m_reg_arrays;
-
-private:
-
-   /** Get the register index mapped from the NIR code to the r600 ir
-    * \param index NIR index of register
-    * \returns r600 ir inxex
-    */
-   int lookup_register_index(const nir_src& src) const;
-
-   /** Get the register index mapped from the NIR code to the r600 ir
-    * \param index NIR index of register
-    * \returns r600 ir inxex
-    */
-   int lookup_register_index(const nir_dest& dst);
-
-   /** Allocate a register that is is needed for lowering an instruction
-    * that requires complex calculations,
-    */
-   int allocate_temp_register();
-
-
-   PValue create_register(unsigned index, unsigned swizzle);
-
-   unsigned get_dst_ssa_register_index(const nir_ssa_def& ssa);
-
-   unsigned get_ssa_register_index(const nir_ssa_def& ssa) const;
-
-   unsigned get_local_register_index(const nir_register& reg);
-
-   unsigned get_local_register_index(const nir_register& reg) const;
-
-   void allocate_ssa_register(const nir_ssa_def& ssa);
-
-   void allocate_array(const nir_register& reg);
-
-
-   /** Allocate a register index with the given component mask.
-    * If one of the components is already been allocated the function
-    * will signal an error bz returning -1, otherwise a register index is
-    * returned.
-    */
-   int allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc);
-
-   /** search for a new register with the given index in the
-    * lookup map.
-    * \param sel register sel value
-    * \param swizzle register component, can also be 4,5, and 7
-    * \param required true: in debug mode assert when register doesn't exist
-    *                 false: return nullptr on failure
-    */
-
-   std::set<unsigned> m_ssa_undef;
-
-   std::map<unsigned, unsigned> m_ssa_register_map;
-
-   std::map<unsigned, PValue> m_registers;
-
-   static PValue m_undef;
-
-   struct VRec {
-      unsigned index;
-      unsigned mask;
-      unsigned pre_alloc_mask;
-   };
-   std::map<unsigned, VRec> m_register_map;
-
-   unsigned m_next_register_index;
-
-
-   std::map<uint32_t, PValue> m_literals;
-
-   int current_temp_reg_index;
-   int next_temp_reg_comp;
-};
-
-}
-
-#endif // SFN_VALUEPOOL_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
deleted file mode 100644
index ff49216..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
+++ /dev/null
@@ -1,535 +0,0 @@
-#include "sfn_vertexstageexport.h"
-
-#include "sfn_shaderio.h"
-
-namespace r600 {
-
-using std::priority_queue;
-
-VertexStageExportBase::VertexStageExportBase(VertexStage& proc):
-   m_proc(proc),
-   m_cur_clip_pos(1)
-{
-
-}
-
-VertexStageExportBase::~VertexStageExportBase()
-{
-
-}
-
-bool VertexStageExportBase::do_process_outputs(nir_variable *output)
-{
-   return true;
-}
-
-void VertexStageExportBase::emit_shader_start()
-{
-
-}
-
-void VertexStageExportBase::scan_store_output(nir_intrinsic_instr* instr)
-{
-
-}
-
-bool VertexStageExportBase::store_output(nir_intrinsic_instr* instr)
-{
-   auto index = nir_src_as_const_value(instr->src[1]);
-   assert(index && "Indirect outputs not supported");
-
-   const store_loc store_info  = {
-      nir_intrinsic_component(instr),
-      nir_intrinsic_io_semantics(instr).location,
-      (unsigned)nir_intrinsic_base(instr) + index->u32,
-      0
-   };
-
-   return do_store_output(store_info, instr);
-}
-
-VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
-                                               const pipe_stream_output_info *so_info,
-                                               r600_pipe_shader *pipe_shader, const r600_shader_key &key):
-   VertexStageWithOutputInfo(proc),
-   m_last_param_export(nullptr),
-   m_last_pos_export(nullptr),
-   m_num_clip_dist(0),
-   m_enabled_stream_buffers_mask(0),
-   m_so_info(so_info),
-   m_pipe_shader(pipe_shader),
-   m_key(key)
-{
-}
-
-bool VertexStageWithOutputInfo::do_process_outputs(nir_variable *output)
-{
-   if (output->data.location == VARYING_SLOT_COL0 ||
-       output->data.location == VARYING_SLOT_COL1 ||
-       (output->data.location >= VARYING_SLOT_VAR0 &&
-       output->data.location <= VARYING_SLOT_VAR31) ||
-       (output->data.location >= VARYING_SLOT_TEX0 &&
-        output->data.location <= VARYING_SLOT_TEX7) ||
-       output->data.location == VARYING_SLOT_BFC0 ||
-       output->data.location == VARYING_SLOT_BFC1 ||
-       output->data.location == VARYING_SLOT_CLIP_VERTEX ||
-       output->data.location == VARYING_SLOT_CLIP_DIST0 ||
-       output->data.location == VARYING_SLOT_CLIP_DIST1 ||
-       output->data.location == VARYING_SLOT_POS ||
-       output->data.location == VARYING_SLOT_PSIZ ||
-       output->data.location == VARYING_SLOT_FOGC ||
-       output->data.location == VARYING_SLOT_LAYER ||
-       output->data.location == VARYING_SLOT_EDGE ||
-       output->data.location == VARYING_SLOT_VIEWPORT
-       ) {
-
-      r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location];
-      auto semantic = r600_get_varying_semantic(output->data.location);
-      io.name = semantic.first;
-      io.sid = semantic.second;
-
-      m_proc.evaluate_spi_sid(io);
-      io.write_mask = ((1 << glsl_get_components(output->type)) - 1)
-                      << output->data.location_frac;
-      ++m_proc.sh_info().noutput;
-
-      if (output->data.location == VARYING_SLOT_PSIZ ||
-          output->data.location == VARYING_SLOT_EDGE ||
-          output->data.location == VARYING_SLOT_LAYER) // VIEWPORT?
-            m_cur_clip_pos = 2;
-
-      if (output->data.location != VARYING_SLOT_POS &&
-          output->data.location != VARYING_SLOT_EDGE &&
-          output->data.location != VARYING_SLOT_PSIZ &&
-          output->data.location != VARYING_SLOT_CLIP_VERTEX)
-         m_param_driver_locations.push(output->data.driver_location);
-
-      return true;
-   }
-   return false;
-}
-
-bool VertexStageExportForFS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
-{
-   switch (store_info.location) {
-   case VARYING_SLOT_PSIZ:
-      m_proc.sh_info().vs_out_point_size = 1;
-      m_proc.sh_info().vs_out_misc_write = 1;
-      FALLTHROUGH;
-   case VARYING_SLOT_POS:
-      return emit_varying_pos(store_info, instr);
-   case VARYING_SLOT_EDGE: {
-      std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
-      return emit_varying_pos(store_info, instr, &swizzle_override);
-   }
-   case VARYING_SLOT_VIEWPORT: {
-      std::array<uint32_t, 4> swizzle_override = {7, 7, 7, 0};
-      return emit_varying_pos(store_info, instr, &swizzle_override) &&
-            emit_varying_param(store_info, instr);
-   }
-   case VARYING_SLOT_CLIP_VERTEX:
-      return emit_clip_vertices(store_info, instr);
-   case VARYING_SLOT_CLIP_DIST0:
-   case VARYING_SLOT_CLIP_DIST1:
-      m_num_clip_dist += 4;
-      return emit_varying_param(store_info, instr) && emit_varying_pos(store_info, instr);
-   case VARYING_SLOT_LAYER: {
-      m_proc.sh_info().vs_out_misc_write = 1;
-      m_proc.sh_info().vs_out_layer = 1;
-      std::array<uint32_t, 4> swz = {7,7,0,7};
-      return emit_varying_pos(store_info, instr, &swz) &&
-            emit_varying_param(store_info, instr);
-   }
-   case VARYING_SLOT_VIEW_INDEX:
-      return emit_varying_pos(store_info, instr) &&
-            emit_varying_param(store_info, instr);
-
-   default:
-         return emit_varying_param(store_info, instr);
-   }
-
-   fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
-           store_info.location);
-   return false;
-}
-
-bool VertexStageExportForFS::emit_varying_pos(const store_loc &store_info, nir_intrinsic_instr* instr,
-                                              std::array<uint32_t, 4> *swizzle_override)
-{
-   std::array<uint32_t,4> swizzle;
-   uint32_t write_mask = 0;
-
-   if (swizzle_override) {
-      swizzle = *swizzle_override;
-      for (int i = 0; i < 4; ++i) {
-         if (swizzle[i] < 6)
-            write_mask |= 1 << i;
-      }
-   } else {
-      write_mask = nir_intrinsic_write_mask(instr) << store_info.frac;
-      for (int i = 0; i < 4; ++i)
-         swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
-   }
-
-   m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask;
-
-   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle);
-   m_proc.set_output(store_info.driver_location, value.sel());
-
-   int export_slot = 0;
-
-   switch (store_info.location) {
-   case VARYING_SLOT_EDGE: {
-      m_proc.sh_info().vs_out_misc_write = 1;
-      m_proc.sh_info().vs_out_edgeflag = 1;
-      m_proc.emit_instruction(op1_mov, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
-      m_proc.emit_instruction(op1_flt_to_int, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_last_instr});
-      m_proc.sh_info().output[store_info.driver_location].write_mask = 0xf;
-   }
-      FALLTHROUGH;
-   case VARYING_SLOT_PSIZ:
-   case VARYING_SLOT_LAYER:
-      export_slot = 1;
-      break;
-   case VARYING_SLOT_VIEWPORT:
-      m_proc.sh_info().vs_out_misc_write = 1;
-      m_proc.sh_info().vs_out_viewport = 1;
-      export_slot = 1;
-      break;
-   case VARYING_SLOT_POS:
-      break;
-   case VARYING_SLOT_CLIP_DIST0:
-   case VARYING_SLOT_CLIP_DIST1:
-      export_slot = m_cur_clip_pos++;
-      break;
-   default:
-      sfn_log << SfnLog::err << __func__ << "Unsupported location "
-              << store_info.location << "\n";
-      return false;
-   }
-
-   m_last_pos_export = new ExportInstruction(export_slot, value, ExportInstruction::et_pos);
-   m_proc.emit_export_instruction(m_last_pos_export);
-   m_proc.add_param_output_reg(store_info.driver_location, m_last_pos_export->gpr_ptr());
-   return true;
-}
-
-bool VertexStageExportForFS::emit_varying_param(const store_loc &store_info, nir_intrinsic_instr* instr)
-{
-   assert(store_info.driver_location < m_proc.sh_info().noutput);
-   sfn_log << SfnLog::io << __func__ << ": emit DDL: " << store_info.driver_location << "\n";
-
-   int write_mask = nir_intrinsic_write_mask(instr) << store_info.frac;
-   std::array<uint32_t,4> swizzle;
-   for (int i = 0; i < 4; ++i)
-      swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
-
-   //m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask;
-
-   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle, true);
-   m_proc.sh_info().output[store_info.driver_location].gpr = value.sel();
-
-   /* This should use the registers!! */
-   m_proc.set_output(store_info.driver_location, value.sel());
-
-   m_last_param_export = new ExportInstruction(param_id(store_info.driver_location),
-                                               value, ExportInstruction::et_param);
-   m_proc.emit_export_instruction(m_last_param_export);
-   m_proc.add_param_output_reg(store_info.driver_location, m_last_param_export->gpr_ptr());
-   return true;
-}
-
-bool VertexStageExportForFS::emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr)
-{
-   m_proc.sh_info().cc_dist_mask = 0xff;
-   m_proc.sh_info().clip_dist_write = 0xff;
-
-   m_clip_vertex = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], 0xf, {0,1,2,3});
-   m_proc.add_param_output_reg(store_info.driver_location, &m_clip_vertex);
-
-   for (int i = 0; i < 4; ++i)
-      m_proc.sh_info().output[store_info.driver_location].write_mask |= 1 << i;
-
-   GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()};
-
-   for (int i = 0; i < 8; i++) {
-      int oreg = i >> 2;
-      int ochan = i & 3;
-      AluInstruction *ir = nullptr;
-      for (int j = 0; j < 4; j++) {
-         ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), m_clip_vertex.reg_i(j),
-                                 PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
-                                 (j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
-         m_proc.emit_instruction(ir);
-      }
-      ir->set_flag(alu_last_instr);
-   }
-
-   m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
-   m_proc.emit_export_instruction(m_last_pos_export);
-
-   m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
-   m_proc.emit_export_instruction(m_last_pos_export);
-
-   return true;
-}
-
-VertexStageWithOutputInfo::VertexStageWithOutputInfo(VertexStage& proc):
-   VertexStageExportBase(proc),
-   m_current_param(0)
-{
-
-}
-
-void VertexStageWithOutputInfo::scan_store_output(nir_intrinsic_instr* instr)
-{
-   auto location = nir_intrinsic_io_semantics(instr).location;
-   auto driver_location = nir_intrinsic_base(instr);
-   auto index = nir_src_as_const_value(instr->src[1]);
-   assert(index);
-
-   unsigned noutputs = driver_location + index->u32 + 1;
-   if (m_proc.sh_info().noutput < noutputs)
-      m_proc.sh_info().noutput = noutputs;
-
-   r600_shader_io& io = m_proc.sh_info().output[driver_location + index->u32];
-   auto semantic = r600_get_varying_semantic(location + index->u32);
-   io.name = semantic.first;
-   io.sid = semantic.second;
-   m_proc.evaluate_spi_sid(io);
-   io.write_mask = nir_intrinsic_write_mask(instr);
-
-   if (location == VARYING_SLOT_PSIZ ||
-       location == VARYING_SLOT_EDGE ||
-       location == VARYING_SLOT_LAYER) // VIEWPORT?
-      m_cur_clip_pos = 2;
-
-   if (location != VARYING_SLOT_POS &&
-       location != VARYING_SLOT_EDGE &&
-       location != VARYING_SLOT_PSIZ &&
-       location != VARYING_SLOT_CLIP_VERTEX) {
-      m_param_driver_locations.push(driver_location + index->u32);
-   }
-}
-
-unsigned VertexStageWithOutputInfo::param_id(unsigned driver_location)
-{
-   auto param_loc = m_param_map.find(driver_location);
-   assert(param_loc != m_param_map.end());
-   return param_loc->second;
-}
-
-void VertexStageWithOutputInfo::emit_shader_start()
-{
-   while (!m_param_driver_locations.empty()) {
-      auto loc = m_param_driver_locations.top();
-      m_param_driver_locations.pop();
-      m_param_map[loc] = m_current_param++;
-   }
-}
-
-unsigned VertexStageWithOutputInfo::current_param() const
-{
-   return m_current_param;
-}
-
-void VertexStageExportForFS::finalize_exports()
-{
-   if (m_key.vs.as_gs_a) {
-      PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
-      GPRVector primid({m_proc.primitive_id(), o,o,o});
-      m_last_param_export = new ExportInstruction(current_param(), primid, ExportInstruction::et_param);
-      m_proc.emit_export_instruction(m_last_param_export);
-      int i;
-      i = m_proc.sh_info().noutput++;
-      auto& io = m_proc.sh_info().output[i];
-      io.name = TGSI_SEMANTIC_PRIMID;
-      io.sid = 0;
-      io.gpr = 0;
-      io.interpolate = TGSI_INTERPOLATE_CONSTANT;
-      io.write_mask = 0x1;
-      io.spi_sid = m_key.vs.prim_id_out;
-      m_proc.sh_info().vs_as_gs_a = 1;
-   }
-
-   if (m_so_info && m_so_info->num_outputs)
-      emit_stream(-1);
-
-   m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
-
-   if (!m_last_param_export) {
-      GPRVector value(0,{7,7,7,7});
-      m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
-      m_proc.emit_export_instruction(m_last_param_export);
-   }
-   m_last_param_export->set_last();
-
-   if (!m_last_pos_export) {
-      GPRVector value(0,{7,7,7,7});
-      m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
-      m_proc.emit_export_instruction(m_last_pos_export);
-   }
-   m_last_pos_export->set_last();
-}
-
-bool VertexStageExportForFS::emit_stream(int stream)
-{
-   assert(m_so_info);
-   if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
-           R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
-           return false;
-   }
-   for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
-           if (m_so_info->output[i].output_buffer >= 4) {
-                   R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
-                            m_so_info->output[i].output_buffer);
-                   return false;
-           }
-   }
-   const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
-   unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
-   std::vector<GPRVector> tmp(m_so_info->num_outputs);
-
-   /* Initialize locations where the outputs are stored. */
-   for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
-      if (stream != -1 && stream != m_so_info->output[i].stream)
-         continue;
-
-      sfn_log << SfnLog::instr << "Emit stream " << i
-              << " with register index " << m_so_info->output[i].register_index << "  so_gpr:";
-
-
-      so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index);
-
-      if (!so_gpr[i]) {
-         sfn_log << SfnLog::err << "\nERR: register index "
-                 << m_so_info->output[i].register_index
-                 << " doesn't correspond to an output register\n";
-         return false;
-      }
-      start_comp[i] = m_so_info->output[i].start_component;
-      /* Lower outputs with dst_offset < start_component.
-       *
-       * We can only output 4D vectors with a write mask, e.g. we can
-       * only output the W component at offset 3, etc. If we want
-       * to store Y, Z, or W at buffer offset 0, we need to use MOV
-       * to move it to X and output X. */
-      if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
-
-         GPRVector::Swizzle swizzle =  {0,1,2,3};
-         for (auto j = m_so_info->output[i].num_components; j < 4; ++j)
-            swizzle[j] = 7;
-         tmp[i] = m_proc.get_temp_vec4(swizzle);
-
-         int sc = m_so_info->output[i].start_component;
-         AluInstruction *alu = nullptr;
-         for (int j = 0; j < m_so_info->output[i].num_components; j++) {
-            alu = new AluInstruction(op1_mov, tmp[i][j], so_gpr[i]->reg_i(j + sc), {alu_write});
-            m_proc.emit_instruction(alu);
-         }
-         if (alu)
-            alu->set_flag(alu_last_instr);
-
-         start_comp[i] = 0;
-         so_gpr[i] = &tmp[i];
-      }
-      sfn_log << SfnLog::instr <<  *so_gpr[i] << "\n";
-   }
-
-   /* Write outputs to buffers. */
-   for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
-      sfn_log << SfnLog::instr << "Write output buffer " << i
-              << " with register index " << m_so_info->output[i].register_index << "\n";
-
-      StreamOutIntruction *out_stream =
-            new StreamOutIntruction(*so_gpr[i],
-                                    m_so_info->output[i].num_components,
-                                    m_so_info->output[i].dst_offset - start_comp[i],
-                                    ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
-                                    m_so_info->output[i].output_buffer,
-                                    m_so_info->output[i].stream);
-      m_proc.emit_export_instruction(out_stream);
-      m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
-   }
-   return true;
-}
-
-
-VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc,
-                                               const r600_shader *gs_shader):
-   VertexStageWithOutputInfo(proc),
-   m_num_clip_dist(0),
-   m_gs_shader(gs_shader)
-{
-
-}
-
-bool VertexStageExportForGS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
-{
-   int ring_offset = -1;
-   const r600_shader_io& out_io = m_proc.sh_info().output[store_info.driver_location];
-
-   sfn_log << SfnLog::io << "check output " << store_info.driver_location
-           << " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
-   for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
-      auto& in_io = m_gs_shader->input[k];
-      sfn_log << SfnLog::io << "  against  " <<  k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
-
-      if (in_io.name == out_io.name &&
-          in_io.sid == out_io.sid) {
-         ring_offset = in_io.ring_offset;
-         break;
-      }
-   }
-
-   if (store_info.location == VARYING_SLOT_VIEWPORT) {
-      m_proc.sh_info().vs_out_viewport = 1;
-      m_proc.sh_info().vs_out_misc_write = 1;
-      return true;
-   }
-
-   if (ring_offset == -1) {
-      sfn_log << SfnLog::err << "VS defines output at "
-              << store_info.driver_location << "name=" << out_io.name
-              << " sid=" << out_io.sid << " that is not consumed as GS input\n";
-      return true;
-   }
-
-   uint32_t write_mask =  (1 << instr->num_components) - 1;
-
-   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask,
-         swizzle_from_comps(instr->num_components), true);
-
-   auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, value,
-                                      ring_offset >> 2, 4, PValue());
-   m_proc.emit_export_instruction(ir);
-
-   m_proc.sh_info().output[store_info.driver_location].write_mask |= write_mask;
-   if (store_info.location == VARYING_SLOT_CLIP_DIST0 ||
-       store_info.location == VARYING_SLOT_CLIP_DIST1)
-      m_num_clip_dist += 4;
-
-   return true;
-}
-
-void VertexStageExportForGS::finalize_exports()
-{
-
-}
-
-VertexStageExportForES::VertexStageExportForES(VertexStage& proc):
-   VertexStageExportBase(proc)
-{
-}
-
-bool VertexStageExportForES::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
-{
-   return true;
-}
-
-void VertexStageExportForES::finalize_exports()
-{
-
-}
-
-}
diff --git a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
deleted file mode 100644
index 46aee80..0000000
--- a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
+++ /dev/null
@@ -1,116 +0,0 @@
-#ifndef VERTEXSTAGEEXPORT_H
-#define VERTEXSTAGEEXPORT_H
-
-#include "sfn_shader_base.h"
-#include <queue>
-
-namespace r600 {
-
-class VertexStage : public ShaderFromNirProcessor {
-public:
-   using ShaderFromNirProcessor::ShaderFromNirProcessor;
-
-   virtual PValue primitive_id() = 0;
-};
-
-class VertexStageExportBase
-{
-public:
-   VertexStageExportBase(VertexStage& proc);
-   virtual ~VertexStageExportBase();
-   virtual void finalize_exports() = 0;
-   virtual bool do_process_outputs(nir_variable *output);
-
-   virtual void emit_shader_start();
-
-   virtual void scan_store_output(nir_intrinsic_instr* instr);
-   bool store_output(nir_intrinsic_instr* instr);
-protected:
-
-   struct store_loc {
-      unsigned frac;
-      unsigned location;
-      unsigned driver_location;
-      int data_loc;
-   };
-   virtual bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) = 0;
-
-   VertexStage& m_proc;
-   int m_cur_clip_pos;
-   GPRVector m_clip_vertex;
-};
-
-
-class VertexStageWithOutputInfo: public VertexStageExportBase
-{
-protected:
-   VertexStageWithOutputInfo(VertexStage& proc);
-   void scan_store_output(nir_intrinsic_instr* instr) override;
-   void emit_shader_start() override;
-   bool do_process_outputs(nir_variable *output) override;
-protected:
-   unsigned param_id(unsigned driver_location);
-   unsigned current_param() const;
-private:
-   std::priority_queue<unsigned, std::vector<unsigned>, std::greater<unsigned> > m_param_driver_locations;
-   std::map<unsigned, unsigned> m_param_map;
-   unsigned m_current_param;
-};
-
-
-class VertexStageExportForFS : public VertexStageWithOutputInfo
-{
-public:
-   VertexStageExportForFS(VertexStage& proc,
-                          const pipe_stream_output_info *so_info,
-                          r600_pipe_shader *pipe_shader,
-                          const r600_shader_key& key);
-
-   void finalize_exports() override;
-private:
-   bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
-
-   bool emit_varying_param(const store_loc& store_info, nir_intrinsic_instr* instr);
-   bool emit_varying_pos(const store_loc& store_info, nir_intrinsic_instr* instr,
-                         std::array<uint32_t, 4> *swizzle_override = nullptr);
-   bool emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr);
-   bool emit_stream(int stream);
-
-   ExportInstruction *m_last_param_export;
-   ExportInstruction *m_last_pos_export;
-
-   int m_num_clip_dist;
-   int m_enabled_stream_buffers_mask;
-   const pipe_stream_output_info *m_so_info;
-   r600_pipe_shader *m_pipe_shader;
-   const r600_shader_key& m_key;
-
-
-};
-
-class VertexStageExportForGS : public VertexStageWithOutputInfo
-{
-public:
-   VertexStageExportForGS(VertexStage& proc,
-                          const r600_shader *gs_shader);
-   void finalize_exports() override;
-
-private:
-   bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
-   unsigned m_num_clip_dist;
-   const r600_shader *m_gs_shader;
-};
-
-class VertexStageExportForES : public VertexStageExportBase
-{
-public:
-   VertexStageExportForES(VertexStage& proc);
-   void finalize_exports() override;
-private:
-   bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
-};
-
-
-}
-
-#endif // VERTEXSTAGEEXPORT_H
diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp
new file mode 100644
index 0000000..9b96d60
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp
@@ -0,0 +1,1072 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2021 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_virtualvalues.h"
+#include "sfn_alu_defines.h"
+#include "sfn_valuefactory.h"
+#include "sfn_instr.h"
+#include "sfn_debug.h"
+
+#include "util/macros.h"
+
+#include <ostream>
+#include <iostream>
+#include <iomanip>
+
+namespace r600 {
+
+std::ostream& operator << (std::ostream& os, Pin pin)
+{
+#define PRINT_PIN(X) case pin_ ## X : os << #X; break
+   switch (pin) {
+   PRINT_PIN(chan);
+   PRINT_PIN(array);
+   PRINT_PIN(fully);
+   PRINT_PIN(group);
+   PRINT_PIN(chgr);
+   PRINT_PIN(free);
+   case pin_none:
+   default:
+      ;
+   }
+#undef PRINT_PIN
+   return os;
+}
+
+VirtualValue::VirtualValue(int sel, int chan, Pin pin):
+   m_sel(sel), m_chan(chan), m_pins(pin)
+{
+#if __cpp_exceptions >= 199711L
+   ASSERT_OR_THROW(m_sel < virtual_register_base || pin != pin_fully, "Register is virtual but pinned to sel");
+#endif
+}
+
+bool VirtualValue::ready(int block, int index) const
+{
+   (void)block;
+   (void)index;
+   return true;
+}
+
+bool VirtualValue::is_virtual() const
+{
+   return m_sel >= virtual_register_base;
+}
+
+class ValueComparer: public ConstRegisterVisitor {
+public:
+   ValueComparer();
+   ValueComparer(const Register *value);
+   ValueComparer(const LocalArray *value);
+   ValueComparer(const LocalArrayValue *value);
+   ValueComparer(const UniformValue *value);
+   ValueComparer(const LiteralConstant *value);
+   ValueComparer(const InlineConstant *value);
+
+   void visit(const Register& other) override;
+   void visit(const LocalArray& other) override;
+   void visit(const LocalArrayValue& other) override;
+   void visit(const UniformValue& value) override;
+   void visit(const LiteralConstant& other) override;
+   void visit(const InlineConstant& other) override;
+
+   bool m_result;
+private:
+   const Register *m_register;
+   const LocalArray *m_array;
+   const LocalArrayValue *m_array_value;
+   const UniformValue *m_uniform_value;
+   const LiteralConstant *m_literal_value;
+   const InlineConstant *m_inline_constant;
+};
+
+class ValueCompareCreater: public ConstRegisterVisitor {
+public:
+   void visit(const Register& value) { compare = ValueComparer(&value);}
+   void visit(const LocalArray& value) {compare = ValueComparer(&value);}
+   void visit(const LocalArrayValue& value) {compare = ValueComparer(&value);}
+   void visit(const UniformValue& value) {compare = ValueComparer(&value);}
+   void visit(const LiteralConstant& value) {compare = ValueComparer(&value);}
+   void visit(const InlineConstant& value) {compare = ValueComparer(&value);}
+
+   ValueComparer compare;
+};
+
+VirtualValue::Pointer VirtualValue::from_string(const std::string& s)
+{
+   switch (s[0]) {
+   case 'S':
+   case 'R': return Register::from_string(s);
+   case 'L': return LiteralConstant::from_string(s);
+   case 'K': return UniformValue::from_string(s);
+   case 'P': return InlineConstant::param_from_string(s);
+   case 'I': return InlineConstant::from_string(s);
+
+   default:
+      std::cerr << "'" << s << "'";
+      unreachable("Unknown register type");
+   }
+}
+
+bool VirtualValue::equal_to(const VirtualValue& other) const
+{
+   bool result = m_sel == other.m_sel &&
+         m_chan == other.m_chan &&
+         m_pins == other.m_pins;
+
+   if (result) {
+      ValueCompareCreater comp_creater;
+      accept(comp_creater);
+      other.accept(comp_creater.compare);
+      result &= comp_creater.compare.m_result;
+   }
+
+   return result;
+}
+
+VirtualValue::Pointer VirtualValue::get_addr() const
+{
+   class GetAddressRegister: public ConstRegisterVisitor {
+   public:
+      void visit(const VirtualValue& value) {(void)value;}
+      void visit(const Register& value) {(void)value;};
+      void visit(const LocalArray& value) {(void)value;}
+      void visit(const LocalArrayValue& value) {m_result = value.addr();}
+      void visit(const UniformValue& value) {(void)value;}
+      void visit(const LiteralConstant& value) {(void)value;}
+      void visit(const InlineConstant& value) {(void)value;}
+
+      GetAddressRegister() : m_result(nullptr) {}
+
+      PVirtualValue m_result;
+   };
+   GetAddressRegister get_addr;
+   accept(get_addr);
+   return get_addr.m_result;
+}
+
+Register::Register(int sel, int chan, Pin pin):
+   VirtualValue(sel, chan, pin)
+{
+}
+
+void Register::add_parent(Instr *instr)
+{
+   m_parents.insert(instr);
+   instr->add_use();
+   add_parent_to_array(instr);
+}
+
+void Register::add_parent_to_array(Instr *instr)
+{
+   (void)instr;
+}
+
+void Register::del_parent(Instr *instr)
+{
+   m_parents.erase(instr);
+   instr->dec_use();
+   del_parent_from_array(instr);
+}
+
+void Register::del_parent_from_array(Instr *instr)
+{
+   (void)instr;
+}
+
+
+void Register::add_use(Instr *instr)
+{
+   const auto& [itr, inserted] = m_uses.insert(instr);  {}
+
+   if (inserted) {
+      for (auto& p: m_parents)
+         p->add_use();
+   }
+}
+
+void Register::del_use(Instr *instr)
+{
+   sfn_log << SfnLog::opt << "Del use of " << *this << " in " << *instr << "\n";
+   if (m_uses.find(instr) != m_uses.end()) {
+      m_uses.erase(instr);
+      if (is_ssa())
+         for (auto& p: m_parents)
+            p->dec_use();
+   }
+}
+
+bool Register::ready(int block, int index) const
+{
+   for (auto p : m_parents) {
+      if (p->block_id() <= block) {
+         if (p->index() < index && !p->is_scheduled()) {
+            return false;
+         }
+      }
+   }
+   return true;
+}
+
+void Register::accept(RegisterVisitor& visitor)
+{
+   visitor.visit(*this);
+}
+
+void Register::accept(ConstRegisterVisitor& visitor) const
+{
+   visitor.visit(*this);
+}
+
+
+void Register::pin_live_range(bool start, bool end)
+{
+   m_pin_start = start;
+   m_pin_end = end;
+}
+
+void Register::set_is_ssa(bool value)
+{
+   m_is_ssa = value;
+}
+
+void Register::print(std::ostream& os) const
+{
+   os << (m_is_ssa ? "S" : "R") << sel() << "." << chanchar[chan()];
+
+   if (pin() !=  pin_none)
+      os << "@" << pin();
+}
+
+Register::Pointer Register::from_string(const std::string &s)
+{
+   std::string numstr;
+   char chan = 0;
+   std::string pinstr;
+
+   assert(s[0] == 'R' || s[0] == '_' || s[0] == 'S' );
+
+   int type = 0;
+   for (unsigned i = 1; i < s.length(); ++i) {
+      if (s[i] == '.') {
+         type = 1;
+         continue;
+      } else if (s[i] == '@') {
+         type = 2;
+         continue;
+      }
+
+      switch (type) {
+      case 0: numstr.append(1, s[i]); break;
+      case 1: chan = s[i]; break;
+      case 2: pinstr.append(1, s[i]); break;
+      default:
+         unreachable("Malformed register string");
+      }
+   }
+
+   int sel;
+   if (s[0] != '_') {
+      std::istringstream n(numstr);
+      n >> sel;
+   } else {
+      sel = std::numeric_limits<int>::max();
+   }
+
+   auto p = pin_none;
+   if (pinstr == "chan")
+      p = pin_chan;
+   else if (pinstr == "array")
+      p = pin_array;
+   else if (pinstr == "fully")
+      p = pin_fully;
+   else if (pinstr == "group")
+      p = pin_group;
+   else if (pinstr == "chgr")
+      p = pin_chgr;
+   else if (pinstr == "free")
+      p = pin_free;
+
+   switch (chan) {
+   case 'x' : chan = 0; break;
+   case 'y' : chan = 1; break;
+   case 'z' : chan = 2; break;
+   case 'w' : chan = 3; break;
+   case '0' : chan = 4; break;
+   case '1' : chan = 5; break;
+   case '_' : chan = 7; break;
+   }
+
+   auto reg = new Register( sel, chan, p);
+   reg->set_is_ssa(s[0] == 'S');
+   if (p == pin_fully || p == pin_array)
+      reg->pin_live_range(true);
+   return reg;
+}
+
+RegisterVec4::RegisterVec4():
+   m_sel(-1),
+   m_swz({7,7,7,7}),
+   m_values({nullptr, nullptr, nullptr, nullptr})
+{
+}
+
+RegisterVec4::RegisterVec4(int sel, bool is_ssa, const Swizzle& swz, Pin pin):
+   m_sel(sel),
+   m_swz(swz)
+{
+   for (int i = 0; i < 4; ++i) {
+      m_values[i] = new Element( *this, new Register(m_sel, swz[i], pin));
+      m_values[i]->value()->set_is_ssa(is_ssa);
+   }
+}
+
+RegisterVec4::RegisterVec4(const RegisterVec4& orig):
+   m_sel(orig.m_sel),
+   m_swz(orig.m_swz)
+{
+   for (int i = 0; i < 4; ++i)
+      m_values[i] = new Element(*this, orig.m_values[i]->value());
+}
+
+RegisterVec4::RegisterVec4(PRegister x, PRegister y, PRegister z, PRegister w, Pin pin)
+{
+   PRegister dummy = nullptr;
+
+   if (x) {
+      m_sel = x->sel();
+   } else if (y) {
+      m_sel = y->sel();
+   } else if (z) {
+      m_sel = z->sel();
+   } else if (w) {
+      m_sel = w->sel();
+   } else
+      m_sel = 0;
+
+   if (!(x && y && z && w))
+      dummy = new Register (m_sel, 7, pin_none);
+
+   m_values[0] = new Element(*this,  x ? x : dummy);
+   m_values[1] = new Element(*this,  y ? y : dummy);
+   m_values[2] = new Element(*this,  z ? z : dummy);
+   m_values[3] = new Element(*this,  w ? w : dummy);
+
+   for (int i = 0; i < 4; ++i) {
+      if (m_values[0]->value()->pin() == pin_fully) {
+         pin = pin_fully;
+         break;
+      }
+   }
+
+   for (int i = 0; i < 4; ++i) {
+      switch (m_values[i]->value()->pin()) {
+      case pin_none:
+      case pin_free:
+         m_values[i]->value()->set_pin(pin);
+      break;
+      case pin_chan:
+         if (pin == pin_group)
+            m_values[i]->value()->set_pin(pin_chgr);
+      break;
+      default:
+         ;
+      }
+
+      m_swz[i] = m_values[i]->value()->chan();
+      assert(m_values[i]->value()->sel() == m_sel);
+   }
+}
+
+void RegisterVec4::add_use(Instr *instr)
+{
+   for (auto& r: m_values) {
+      if (r->value()->chan() < 4)
+          r->value()->add_use(instr);
+   }
+}
+
+void RegisterVec4::del_use(Instr *instr)
+{
+   for (auto& r: m_values) {
+      r->value()->del_use(instr);
+   }
+}
+
+bool RegisterVec4::has_uses() const
+{
+   for (auto& r: m_values) {
+      if (r->value()->has_uses())
+         return true;
+   }
+   return false;
+}
+
+
+int RegisterVec4::sel() const
+{
+   int comp = 0;
+   while (comp < 4 && m_values[comp]->value()->chan() > 3)
+      ++comp;
+   return m_values[comp < 4 ? comp : 0]->value()->sel();
+}
+
+bool RegisterVec4::ready(int block_id, int index) const
+{
+   for (int i = 0; i < 4; ++i) {
+      if (m_values[i]->value()->chan() < 4) {
+         if (!m_values[i]->value()->ready(block_id, index))
+            return false;
+      }
+   }
+   return true;
+}
+
+void RegisterVec4::print(std::ostream& os) const
+{
+   os << (m_values[0]->value()->is_ssa() ? 'S' : 'R') << sel() << ".";
+   for (int i = 0; i < 4; ++i)
+      os << VirtualValue::chanchar[m_swz[i]];
+}
+
+bool operator == (const RegisterVec4& lhs, const RegisterVec4& rhs)
+{
+   for (int i = 0; i < 4; ++i) {
+      assert(lhs[i]);
+      assert(rhs[i]);
+      if (!lhs[i]->equal_to(*rhs[i])) {
+         return false;
+      }
+   }
+   return true;
+}
+
+RegisterVec4::Element::Element(const RegisterVec4& parent, int chan):
+   m_parent(parent),
+   m_value(new Register(parent.m_sel, chan, pin_none))
+{
+}
+
+RegisterVec4::Element::Element(const RegisterVec4& parent, PRegister value):
+   m_parent(parent),
+   m_value(value)
+{
+}
+
+LiteralConstant::LiteralConstant(uint32_t value):
+   VirtualValue(ALU_SRC_LITERAL, -1, pin_none),
+   m_value(value)
+{
+}
+
+void LiteralConstant::accept(RegisterVisitor& vistor)
+{
+   vistor.visit(*this);
+}
+
+void LiteralConstant::accept(ConstRegisterVisitor& vistor) const
+{
+   vistor.visit(*this);
+}
+
+void LiteralConstant::print(std::ostream& os) const
+{
+   os << "L[0x" << std::hex << m_value << std::dec << "]";
+}
+
+LiteralConstant::Pointer LiteralConstant::from_string(const std::string& s)
+{
+   if (s[1] != '[')
+      return nullptr;
+
+   std::string numstr;
+   for (unsigned i = 2; i < s.length(); ++i) {
+      if (s[i] == ']')
+         break;
+
+      if (isxdigit(s[i]))
+         numstr.append(1, s[i]);
+      if (s[i] == 'x')
+         continue;
+   }
+
+   std::istringstream n(numstr);
+
+   uint32_t num;
+   n >> std::hex >> num;
+   return new LiteralConstant( num);
+}
+
+
+// Inline constants usually don't care about the channel but
+// ALU_SRC_PV should be pinned, but we only emit these constants
+// very late, and based on the real register they replace
+InlineConstant::InlineConstant(int sel, int chan):
+   VirtualValue(sel, chan, pin_none)
+{
+}
+
+void InlineConstant::accept(RegisterVisitor& vistor)
+{
+   vistor.visit(*this);
+}
+
+void InlineConstant::accept(ConstRegisterVisitor& vistor) const
+{
+   vistor.visit(*this);
+}
+
+void InlineConstant::print(std::ostream& os) const
+{
+   auto ivalue = alu_src_const.find(static_cast<AluInlineConstants>(sel()));
+   if (ivalue != alu_src_const.end()) {
+      os << "I[" << ivalue->second.descr<< "]";
+      if (ivalue->second.use_chan)
+         os << "." << chanchar[chan()];
+   } else if (sel() >= ALU_SRC_PARAM_BASE &&
+              sel() <  ALU_SRC_PARAM_BASE + 32 ) {
+      os << "Param"
+          << sel() - ALU_SRC_PARAM_BASE
+          << "." << chanchar[chan()];
+   } else {
+      unreachable("Unknown inline constant");
+   }
+}
+
+std::map<std::string, std::pair<AluInlineConstants, bool>> InlineConstant::s_opmap;
+
+InlineConstant::Pointer InlineConstant::from_string(const std::string& s)
+{
+   std::string namestr;
+   char chan = 0;
+
+   ASSERT_OR_THROW(s[1] == '[', "inline const not started with '['");
+
+   unsigned i = 2;
+   while (i < s.length()) {
+      if (s[i] == ']')
+         break;
+      namestr.append(1, s[i]);
+      ++i;
+   }
+
+   ASSERT_OR_THROW(s[i] == ']', "inline const not closed with ']'");
+
+   auto entry = s_opmap.find(namestr);
+   AluInlineConstants value = ALU_SRC_UNKNOWN;
+   bool use_chan = false;
+
+   if (entry == s_opmap.end())  {
+      for (auto& [opcode, descr] : alu_src_const) {
+         if (namestr == descr.descr) {
+            value = opcode;
+            use_chan = descr.use_chan;
+            s_opmap[namestr] = std::make_pair(opcode, use_chan);
+
+            break;
+         }
+      }
+   } else {
+      value = entry->second.first;
+      use_chan = entry->second.second;
+   }
+
+   ASSERT_OR_THROW(value != ALU_SRC_UNKNOWN, "Unknwon inline constant was given");
+
+   if (use_chan) {
+      ASSERT_OR_THROW(s[i + 1] == '.', "inline const channel not started with '.'");
+      switch (s[i + 2]) {
+      case 'x': chan = 0; break;
+      case 'y': chan = 1; break;
+      case 'z': chan = 2; break;
+      case 'w': chan = 3; break;
+      case '0': chan = 4; break;
+      case '1': chan = 5; break;
+      case '_': chan = 7; break;
+      default:
+         ASSERT_OR_THROW(0, "invalied inline const channel ");
+      }
+   }
+   return new InlineConstant( value, chan);
+}
+
+InlineConstant::Pointer InlineConstant::param_from_string(const std::string& s)
+{
+   assert(s.substr(0, 5) == "Param");
+
+   int param = 0;
+   int i = 5;
+   while (isdigit(s[i])) {
+      param *= 10;
+      param +=  s[i] - '0';
+      ++i;
+   }
+
+   int chan = 7;
+   assert(s[i] == '.');
+   switch (s[i+1]) {
+   case 'x': chan = 0; break;
+   case 'y': chan = 1; break;
+   case 'z': chan = 2; break;
+   case 'w': chan = 3; break;
+   default:
+      unreachable("unsupported channel char");
+   }
+
+   return new InlineConstant( ALU_SRC_PARAM_BASE + param, chan);
+}
+
+UniformValue::UniformValue(int sel, int chan, int kcache_bank):
+   VirtualValue(sel, chan, pin_none),
+   m_kcache_bank(kcache_bank),
+   m_buf_addr(nullptr)
+{
+}
+
+UniformValue::UniformValue(int sel, int chan, PVirtualValue buf_addr):
+   VirtualValue(sel, chan, pin_none),
+   m_kcache_bank(0),
+   m_buf_addr(buf_addr)
+{
+}
+
+void UniformValue::accept(RegisterVisitor& vistor)
+{
+   vistor.visit(*this);
+}
+
+void UniformValue::accept(ConstRegisterVisitor& vistor) const
+{
+   vistor.visit(*this);
+}
+
+PVirtualValue UniformValue::buf_addr() const
+{
+   return m_buf_addr;
+}
+
+void UniformValue::print(std::ostream& os) const
+{
+   os << "KC" << m_kcache_bank;
+   if (m_buf_addr) {
+      os << "[" << *m_buf_addr
+         << "]";
+   }
+   os << "[" << (sel() - 512) << "]." << chanchar[chan()];
+}
+
+bool UniformValue::equal_buf_and_cache(const UniformValue& other) const
+{
+   bool result = m_kcache_bank == other.m_kcache_bank;
+   if (result) {
+      if (m_buf_addr && other.m_buf_addr) {
+         result = m_buf_addr->equal_to(other);
+      } else {
+         result = !m_buf_addr && !other.m_buf_addr;
+      }
+   }
+   return result;
+}
+
+
+UniformValue::Pointer UniformValue::from_string(const std::string& s)
+{
+   assert(s[1] == 'C');
+   std::istringstream is(s.substr(2));
+   int bank;
+   char c;
+   is >> bank;
+   is >> c;
+
+   assert(c == '[');
+
+   int index;
+   is >> index;
+
+   is >> c;
+   assert(c == ']');
+   is >> c;
+   assert(c == '.');
+
+   is >> c;
+   int chan = 0;
+   switch (c) {
+   case 'x': chan = 0; break;
+   case 'y': chan = 1; break;
+   case 'z': chan = 2; break;
+   case 'w': chan = 3; break;
+   default:
+      unreachable("Unknown channle when reading uniform");
+   }
+   return new UniformValue(index + 512, chan, bank);
+}
+
+LocalArray::LocalArray(int base_sel, int nchannels, int size, int frac):
+   Register(base_sel, nchannels, pin_array),
+   m_base_sel(base_sel),
+   m_nchannels(nchannels),
+   m_size(size),
+   m_values(size * nchannels),
+   m_frac(frac)
+{
+   assert(nchannels <= 4);
+   assert(nchannels + frac <= 4);
+
+   sfn_log << SfnLog::reg << "Allocate array A" <<  base_sel << "("
+           << size << ", " << frac << ", " << nchannels << ")\n";
+
+   for (int c = 0; c < nchannels; ++c) {
+      for (unsigned i = 0; i < m_size; ++i) {
+         PRegister reg = new Register( base_sel + i, c + frac, pin_array);
+         m_values[m_size * c + i] = new LocalArrayValue(reg, *this);
+
+         /* Pin the array register on the start, because currently we don't
+          * don't track the first write to an array element as write to all
+          * array elements, and it seems that the one can not just use registers
+          * that are not written to in an array for other purpouses */
+         m_values[m_size * c + i]->pin_live_range(true);
+      }
+   }
+}
+
+void LocalArray::accept(RegisterVisitor& vistor)
+{
+   vistor.visit(*this);
+}
+
+void LocalArray::accept(ConstRegisterVisitor& vistor) const
+{
+   vistor.visit(*this);
+}
+
+void LocalArray::print(std::ostream& os) const
+{
+   os << "A" << m_base_sel << "[0 " << ":" << m_values.size() << "].";
+   for (unsigned i = 0; i < m_nchannels; ++i) {
+      os << chanchar[i];
+   }
+}
+
+
+size_t LocalArray::size() const
+{
+   return m_size;
+}
+
+uint32_t LocalArray::nchannels() const
+{
+   return m_nchannels;
+}
+
+PRegister LocalArray::element(size_t offset, PVirtualValue indirect, uint32_t chan)
+{
+   ASSERT_OR_THROW(offset < m_size, "Array: index out of range");
+   ASSERT_OR_THROW(chan < m_nchannels, "Array: channel out of range");
+
+   sfn_log << SfnLog::reg << "Request element A" << m_base_sel << "["  << offset;
+   if (indirect)
+      sfn_log   << "+" << *indirect;
+   sfn_log << SfnLog::reg << "]\n";
+
+   if (indirect) {
+      class ResolveDirectArrayElement: public ConstRegisterVisitor {
+      public:
+         void visit(const Register& value) {(void) value;};
+         void visit(const LocalArray& value) {(void)value; unreachable("An array can't be used as address");}
+         void visit(const LocalArrayValue& value) {(void) value;}
+         void visit(const UniformValue& value) {(void)value;}
+         void visit(const LiteralConstant& value) {offset = value.value(); is_contant = true;}
+         void visit(const InlineConstant& value) {(void)value;}
+
+         ResolveDirectArrayElement(): offset(0), is_contant(false) {}
+
+         int offset;
+         bool is_contant;
+      } addr;
+
+      // If the address os a literal constant then update the offset
+      // and don't access the value indirectly
+      indirect->accept(addr);
+      if (addr.is_contant) {
+         offset += addr.offset;
+         indirect = nullptr;
+         ASSERT_OR_THROW(offset < m_size, "Array: indirect constant index out of range");
+      }
+   }
+
+   LocalArrayValue *reg = m_values[m_size * chan + offset];
+   if (indirect) {
+      reg = new LocalArrayValue( reg, indirect, *this);
+      m_values_indirect.push_back(reg);
+   }
+
+   sfn_log << SfnLog::reg << "  got " << *reg << "\n";
+   return reg;
+}
+
+bool LocalArray::ready_for_direct(int block, int index, int chan) const
+{
+   if (!Register::ready(block, index))
+      return false;
+
+   /* For direct access to an array value we also have to take indirect
+    * writes on the same channels into account */
+   for (LocalArrayValue *e : m_values_indirect) {
+      if (e->chan() == chan && !e->Register::ready(block, index)) {
+         return false;
+      }
+   }
+
+   return true;
+}
+
+bool LocalArray::ready_for_indirect(int block, int index, int chan) const
+{
+   int offset = (chan - m_frac) * m_size;
+   for (unsigned i = 0; i < m_size; ++i) {
+      if (!m_values[offset + i]->Register::ready(block, index))
+         return false;
+   }
+
+   return ready_for_direct(block, index, chan);
+}
+
+
+LocalArrayValue::LocalArrayValue(PRegister reg, PVirtualValue index,
+                                 LocalArray& array):
+   Register(reg->sel(), reg->chan(), pin_array),
+   m_addr(index),
+   m_array(array)
+{
+}
+
+const Register& LocalArray::operator ()(size_t idx, size_t chan) const
+{
+   return *m_values[m_size  * (chan - m_frac) + idx];
+}
+
+LocalArrayValue::LocalArrayValue(PRegister reg, LocalArray& array):
+   LocalArrayValue(reg, nullptr, array)
+{
+
+}
+
+
+PVirtualValue LocalArrayValue::addr() const
+{
+   return m_addr;
+}
+
+const LocalArray& LocalArrayValue::array() const
+{
+   return m_array;
+}
+
+
+void LocalArrayValue::forward_del_use(Instr *instr)
+{
+   if (m_addr && m_addr->as_register())
+      m_addr->as_register()->del_use(instr);
+}
+
+void LocalArrayValue::forward_add_use(Instr *instr)
+{
+   if (m_addr && m_addr->as_register())
+      m_addr->as_register()->add_use(instr);
+}
+
+void LocalArrayValue::accept(RegisterVisitor& vistor)
+{
+   vistor.visit(*this);
+}
+
+void LocalArrayValue::accept(ConstRegisterVisitor& vistor) const
+{
+   vistor.visit(*this);
+}
+
+void LocalArrayValue::add_parent_to_array(Instr *instr)
+{
+   m_array.add_parent(instr);
+}
+
+void LocalArrayValue::del_parent_from_array(Instr *instr)
+{
+   m_array.del_parent(instr);
+}
+
+void LocalArrayValue::print(std::ostream& os) const
+{
+   int offset = sel() - m_array.sel();
+   os << "A" << m_array.sel() << "[";
+   if ( offset > 0 && m_addr)
+      os << offset << "+" << *m_addr;
+   else if (m_addr)
+      os << *m_addr;
+   else
+      os << offset;
+   os << "]." << chanchar[chan()];
+}
+
+bool LocalArrayValue::ready(int block, int index) const
+{   
+   return m_addr ?
+         (m_array.ready_for_indirect(block, index, chan()) && m_addr->ready(block, index)):
+            m_array.ready_for_direct(block, index, chan());
+}
+
+ValueComparer::ValueComparer() :
+   m_result(false),
+   m_register(nullptr),
+   m_array(nullptr),
+   m_array_value(nullptr),
+   m_uniform_value(nullptr),
+   m_literal_value(nullptr),
+   m_inline_constant(nullptr)
+{}
+
+ValueComparer::ValueComparer(const Register *value):
+   m_result(false),
+   m_register(value),
+   m_array(nullptr),
+   m_array_value(nullptr),
+   m_uniform_value(nullptr),
+   m_literal_value(nullptr),
+   m_inline_constant(nullptr)
+{}
+
+ValueComparer::ValueComparer(const LocalArray *value):
+   m_result(false),
+   m_register(nullptr),
+   m_array(value),
+   m_array_value(nullptr),
+   m_uniform_value(nullptr),
+   m_literal_value(nullptr),
+   m_inline_constant(nullptr)
+{}
+
+ValueComparer::ValueComparer(const LocalArrayValue *value):
+   m_result(false),
+   m_register(nullptr),
+   m_array(nullptr),
+   m_array_value(value),
+   m_uniform_value(nullptr),
+   m_literal_value(nullptr),
+   m_inline_constant(nullptr)
+{}
+
+ValueComparer::ValueComparer(const UniformValue *value):
+   m_result(false),
+   m_register(nullptr),
+   m_array(nullptr),
+   m_array_value(nullptr),
+   m_uniform_value(value),
+   m_literal_value(nullptr),
+   m_inline_constant(nullptr)
+{}
+
+ValueComparer::ValueComparer(const LiteralConstant *value):
+   m_result(false),
+   m_register(nullptr),
+   m_array(nullptr),
+   m_array_value(nullptr),
+   m_uniform_value(nullptr),
+   m_literal_value(value),
+   m_inline_constant(nullptr)
+{}
+
+ValueComparer::ValueComparer(const InlineConstant *value):
+   m_result(false),
+   m_register(nullptr),
+   m_array(nullptr),
+   m_array_value(nullptr),
+   m_uniform_value(nullptr),
+   m_literal_value(nullptr),
+   m_inline_constant(value)
+{}
+
+void ValueComparer::visit(const Register& other)
+{
+   (void)other;
+   m_result = !!m_register;
+};
+
+void ValueComparer::visit(const LocalArray& other)
+{
+   m_result = false;
+   if (m_array) {
+      m_result = m_array->size() == other.size() &&
+            m_array->nchannels() == other.nchannels();
+   }
+};
+
+void ValueComparer::visit(const LocalArrayValue& other)
+{
+   m_result = false;
+   if (m_array_value) {
+      m_result = m_array_value->array().equal_to(other.array());
+      if (m_result) {
+         auto my_addr = m_array_value->addr();
+         auto other_addr = other.addr();
+         if (my_addr && other_addr) {
+            m_result = my_addr->equal_to(*other_addr);
+         } else {
+            m_result = !my_addr && !other_addr;
+         }
+      }
+   }
+};
+
+void ValueComparer::visit(const UniformValue& value)
+{
+   m_result = false;
+   if (m_uniform_value) {
+      m_result = m_uniform_value->kcache_bank() == value.kcache_bank();
+      if (m_result) {
+         auto my_buf_addr = m_uniform_value->buf_addr();
+         auto other_buf_addr = value.buf_addr();
+         if (my_buf_addr && other_buf_addr) {
+            m_result = my_buf_addr->equal_to(*other_buf_addr);
+         } else {
+            m_result = !my_buf_addr && !other_buf_addr;
+         }
+      }
+   }
+};
+
+void ValueComparer::visit(const LiteralConstant& other)
+{
+   m_result = m_literal_value && (m_literal_value->value() == other.value());
+};
+
+void ValueComparer::visit(const InlineConstant& other)
+{
+   (void)other;
+   m_result = !!m_inline_constant;
+};
+
+
+} // namespace r600
diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h
new file mode 100644
index 0000000..25fd958
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h
@@ -0,0 +1,460 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2021 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "sfn_memorypool.h"
+#include "sfn_alu_defines.h"
+#include <memory>
+#include <vector>
+#include <iosfwd>
+#include <map>
+#include <set>
+#include <cassert>
+
+#if __cpp_exceptions >= 199711L
+#include <exception>
+#define ASSERT_OR_THROW(EXPR, ERROR) if (!(EXPR))  throw std::invalid_argument(ERROR)
+#else
+#define ASSERT_OR_THROW(EXPR, ERROR) if (!(EXPR)) unreachable(ERROR)
+#endif
+
+namespace r600 {
+
+enum Pin {
+   pin_none,
+   pin_chan,
+   pin_array,
+   pin_group,
+   pin_chgr,
+   pin_fully,
+   pin_free
+};
+
+std::ostream& operator << (std::ostream& os, Pin pin);
+
+class Register;
+class RegisterVisitor;
+class ConstRegisterVisitor;
+class Instr;
+class InlineConstant;
+class LiteralConstant;
+class UniformValue;
+
+using InstructionSet = std::set<Instr *, std::less<Instr *>,  Allocator<Instr *>>;
+
+class VirtualValue : public Allocate {
+public:
+
+   static const uint32_t virtual_register_base = 1024;
+   static const uint32_t clause_temp_registers = 2;
+   static const uint32_t gpr_register_end = 128 - 2 * clause_temp_registers;
+   static const uint32_t clause_temp_register_begin = gpr_register_end;
+   static const uint32_t clause_temp_register_end = 128;
+
+   static const uint32_t uniforms_begin = 512;
+   static const uint32_t uniforms_end = 640;
+
+   using Pointer = R600_POINTER_TYPE(VirtualValue);
+
+   VirtualValue(int sel, int chan, Pin pin);
+   VirtualValue(const VirtualValue& orig) = default;
+
+   int sel() const { return m_sel; }
+   int chan() const { return m_chan;}
+   Pin pin() const { return m_pins;};
+   bool is_virtual() const;
+
+   void set_pin(Pin p) { m_pins = p;}
+
+
+   virtual void accept(RegisterVisitor& vistor) = 0;
+   virtual void accept(ConstRegisterVisitor& vistor) const = 0;
+   virtual void print(std::ostream& os) const = 0;
+
+   bool equal_to(const VirtualValue& other) const;
+   Pointer get_addr() const;
+
+   static Pointer from_string(const std::string& s);
+
+   virtual Register *as_register() { return nullptr;}
+   virtual InlineConstant * as_inline_const() { return nullptr;}
+   virtual LiteralConstant *as_literal() { return nullptr;}
+   virtual UniformValue *as_uniform() { return nullptr;}
+   virtual bool ready(int block, int index) const;
+
+   static constexpr char chanchar[9] = "xyzw01?_";
+
+protected:
+   void do_set_chan(int c) {m_chan = c;}
+   void set_sel_internal(int sel) { m_sel = sel; }
+
+private:
+   uint32_t m_sel;
+   int m_chan;
+   Pin m_pins;
+};
+using PVirtualValue = VirtualValue::Pointer;
+
+
+inline std::ostream& operator << (std::ostream& os, const VirtualValue& val)
+{
+    val.print(os);
+    return os;
+}
+
+inline bool operator == (const VirtualValue& lhs, const VirtualValue& rhs)
+{
+    return lhs.equal_to(rhs);
+}
+
+struct LiveRange {
+   LiveRange(): start(-1), end(-1), is_pinned(false) {}
+   LiveRange(int s, int e): start(s), end(e), is_pinned(false) {}
+   int start;
+   int end;
+   int is_pinned;
+};
+
+class Register : public VirtualValue {
+public:
+    using Pointer = R600_POINTER_TYPE(Register);
+
+    Register(int sel, int chan, Pin pin);
+    void accept(RegisterVisitor& vistor) override;
+    void accept(ConstRegisterVisitor& vistor) const override;
+    void print(std::ostream& os) const override;
+
+    int live_start_pinned() const { return m_pin_start;}
+    int live_end_pinned() const { return m_pin_end;}
+
+    void pin_live_range(bool start, bool end = false);
+
+    static Pointer from_string(const std::string& s);
+
+    Register *as_register() override { return this;}
+
+    void set_is_ssa(bool value);
+
+    bool is_ssa() const { return m_is_ssa;}
+
+    void add_parent(Instr *instr);
+    void del_parent(Instr *instr);
+    const InstructionSet& parents() const {return m_parents;}
+
+    bool ready(int block, int index) const override;
+
+    const InstructionSet& uses() const {return m_uses;}
+    void add_use(Instr *instr);
+    void del_use(Instr *instr);
+    bool has_uses() const {return !m_uses.empty() || pin() == pin_array;}
+    void set_chan(int c) {do_set_chan(c);}
+
+    virtual VirtualValue *addr() const { return nullptr;}
+
+    int index() const {return m_index;}
+    void set_index(int idx) {m_index = idx;}
+
+    void set_sel(int new_sel) { set_sel_internal(new_sel); m_is_ssa = false;}
+
+private:
+    Register(const Register& orig) = delete;
+    Register(const Register&& orig) = delete;
+    Register& operator = (const Register& orig) = delete;
+    Register& operator = (Register&& orig) = delete;
+
+    virtual void forward_del_use(Instr *instr) {(void)instr;}
+    virtual void forward_add_use(Instr *instr) {(void)instr;}
+    virtual void add_parent_to_array(Instr *instr);
+    virtual void del_parent_from_array(Instr *instr);
+
+    InstructionSet m_parents;
+    InstructionSet m_uses;
+
+    int m_index{-1};
+
+    bool m_is_ssa {false};
+    bool m_pin_start {false};
+    bool m_pin_end {false};
+};
+using PRegister = Register::Pointer;
+
+inline std::ostream& operator << (std::ostream& os, const Register& val)
+{
+    val.print(os);
+    return os;
+}
+
+class InlineConstant : public VirtualValue {
+public:
+    using Pointer = R600_POINTER_TYPE(InlineConstant);
+
+    InlineConstant(int sel, int chan = 0);    
+
+    void accept(RegisterVisitor& vistor) override;
+    void accept(ConstRegisterVisitor& vistor) const override;
+    void print(std::ostream& os) const override;
+    static Pointer from_string(const std::string& s);
+	 static Pointer param_from_string(const std::string& s);
+
+    InlineConstant * as_inline_const() override { return this;}
+private:
+    InlineConstant(const InlineConstant& orig) = default;
+    static std::map<std::string, std::pair<AluInlineConstants, bool>> s_opmap;
+
+};
+using PInlineConstant = InlineConstant::Pointer;
+
+inline std::ostream& operator << (std::ostream& os, const InlineConstant& val)
+{
+    val.print(os);
+    return os;
+}
+
+class RegisterVec4 {
+public:
+	using Swizzle = std::array<uint8_t, 4>;
+   RegisterVec4();
+   RegisterVec4(int sel, bool is_ssa = false, const Swizzle& swz = {0,1,2,3}, Pin pin = pin_group);
+   RegisterVec4(PRegister x, PRegister y, PRegister z, PRegister w, Pin pin);
+
+   RegisterVec4(const RegisterVec4& orig);
+
+   RegisterVec4(RegisterVec4&& orig) = default;
+   RegisterVec4& operator = (RegisterVec4& orig) = default;
+   RegisterVec4& operator = (RegisterVec4&& orig) = default;
+
+   void add_use(Instr *instr);
+   void del_use(Instr *instr);
+   bool has_uses() const;
+
+   int sel() const;
+   void print(std::ostream& os) const;
+
+   class Element  : public Allocate {
+   public:
+      Element(const RegisterVec4& parent, int chan);
+      Element(const RegisterVec4& parent, PRegister value);
+      PRegister value() { return m_value; }
+      void set_value(PRegister reg) { m_value = reg;}
+   private:
+      const RegisterVec4& m_parent;
+      PRegister m_value;
+	};
+
+	friend class Element;
+
+	PRegister operator [] (int i) const {
+           return m_values[i]->value();
+	}
+
+        PRegister operator [] (int i) {
+           return m_values[i]->value();
+        }
+
+        void set_value(int i, PRegister reg) {
+           assert(reg->sel() == m_sel);
+           m_swz[i] = reg->chan();
+           m_values[i]->set_value(reg);
+        }
+
+        bool ready(int block_id, int index) const;
+private:
+        int m_sel;
+        Swizzle m_swz;
+        std::array<R600_POINTER_TYPE(Element), 4> m_values;
+};
+
+bool operator == (const RegisterVec4& lhs, const RegisterVec4& rhs);
+
+inline bool operator != (const RegisterVec4& lhs, const RegisterVec4& rhs)
+{
+   return !(lhs == rhs);
+}
+
+inline std::ostream& operator << (std::ostream& os, const RegisterVec4& val)
+{
+    val.print(os);
+    return os;
+}
+
+
+class LiteralConstant : public VirtualValue {
+public:
+   using Pointer = R600_POINTER_TYPE(LiteralConstant);
+
+   LiteralConstant(uint32_t value);
+   void accept(RegisterVisitor& vistor) override;
+   void accept(ConstRegisterVisitor& vistor) const override;
+   void print(std::ostream& os) const override;
+   uint32_t value() const {return m_value;}
+   static Pointer from_string(const std::string& s);
+   LiteralConstant *as_literal() override { return this;}
+
+private:
+   LiteralConstant(const LiteralConstant& orig) = default;
+   uint32_t m_value;
+};
+using PLiteralVirtualValue = LiteralConstant::Pointer;
+
+
+class UniformValue : public VirtualValue {
+public:
+   using Pointer = R600_POINTER_TYPE(UniformValue);
+
+   UniformValue(int sel, int chan, int kcache_bank = 0);
+   UniformValue(int sel, int chan, PVirtualValue buf_addr);
+
+   void accept(RegisterVisitor& vistor) override;
+   void accept(ConstRegisterVisitor& vistor) const override;
+   void print(std::ostream& os) const override;
+   int kcache_bank() const { return m_kcache_bank; }
+   PVirtualValue buf_addr() const;
+   UniformValue *as_uniform() override { return this;}
+
+   bool equal_buf_and_cache(const UniformValue& other) const;
+   static Pointer from_string(const std::string& s);
+
+private:
+   int m_kcache_bank;
+   PVirtualValue m_buf_addr;
+};
+using PUniformVirtualValue = UniformValue::Pointer;
+
+inline std::ostream& operator << (std::ostream& os, const UniformValue& val)
+{
+    val.print(os);
+    return os;
+}
+
+class LocalArrayValue;
+class LocalArray : public Register {
+public:
+   using Pointer = R600_POINTER_TYPE(LocalArray);
+   using Values = std::vector<LocalArrayValue *, Allocator<LocalArrayValue *> >;
+
+   LocalArray(int base_sel, int nchannels, int size, int frac = 0);
+   void accept(RegisterVisitor& vistor) override;
+   void accept(ConstRegisterVisitor& vistor) const override;
+   void print(std::ostream& os) const override;
+   bool ready_for_direct(int block, int index, int chan) const;
+   bool ready_for_indirect(int block, int index, int chan) const;
+
+   PRegister element(size_t offset, PVirtualValue indirect, uint32_t chan);
+
+   size_t size() const;
+   uint32_t nchannels() const;
+   uint32_t frac() const { return m_frac;}
+
+   void add_parent_to_elements(Instr *instr);
+
+   const Register& operator ()(size_t idx, size_t chan) const;
+
+   Values::iterator begin() { return m_values.begin();}
+   Values::iterator end() { return m_values.end();}
+
+private:
+    uint32_t m_base_sel;
+    uint32_t m_nchannels;
+    size_t m_size;
+    Values m_values;
+    Values m_values_indirect;
+    int m_frac;
+};
+
+inline std::ostream& operator << (std::ostream& os, const LocalArray & val)
+{
+    val.print(os);
+    return os;
+}
+
+class LocalArrayValue : public Register {
+public:
+    using Pointer = R600_POINTER_TYPE(LocalArrayValue);
+
+    LocalArrayValue(PRegister reg, LocalArray& array);
+    LocalArrayValue(PRegister reg, PVirtualValue index, LocalArray &array);
+
+    void accept(RegisterVisitor& vistor) override;
+    void accept(ConstRegisterVisitor& vistor) const override;
+    void print(std::ostream& os) const override;
+    bool ready(int block, int index) const override;
+
+    VirtualValue *addr() const override;
+    const LocalArray& array() const;
+private:
+    void forward_del_use(Instr *instr) override;
+    void forward_add_use(Instr *instr) override;
+    void add_parent_to_array(Instr *instr) override;
+    void del_parent_from_array(Instr *instr) override;
+
+    PVirtualValue m_addr;
+    LocalArray& m_array;
+};
+
+inline std::ostream& operator << (std::ostream& os, const LocalArrayValue& val)
+{
+    val.print(os);
+    return os;
+}
+
+template <typename T>
+bool sfn_value_equal(const T* lhs, const T* rhs)
+{
+   if (lhs) {
+      if (!rhs) return
+            false;
+      if ( !lhs->equal_to(*rhs))
+         return false;
+   } else {
+      if (rhs)
+         return false;
+   }
+   return true;
+}
+
+class RegisterVisitor {
+public:
+    virtual void visit(Register& value) = 0;
+    virtual void visit(LocalArray& value) = 0;
+    virtual void visit(LocalArrayValue& value) = 0;
+    virtual void visit(UniformValue& value) = 0;
+    virtual void visit(LiteralConstant& value) = 0;
+    virtual void visit(InlineConstant& value) = 0;
+};
+
+class ConstRegisterVisitor {
+public:
+    virtual void visit(const Register& value) = 0;
+    virtual void visit(const LocalArray& value) = 0;
+    virtual void visit(const LocalArrayValue& value) = 0;
+    virtual void visit(const UniformValue& value) = 0;
+    virtual void visit(const LiteralConstant& value) = 0;
+    virtual void visit(const InlineConstant& value) = 0;
+};
+
+}
+
diff --git a/src/gallium/drivers/r600/sfn/tests/meson.build b/src/gallium/drivers/r600/sfn/tests/meson.build
new file mode 100644
index 0000000..e256548
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/tests/meson.build
@@ -0,0 +1,37 @@
+
+r600_test_lib = static_library('r600_test', 'sfn_test_shaders.cpp',
+   cpp_args: '-std=c++17',
+   include_directories : [ inc_src, inc_mapi, inc_mesa, inc_include,
+                           inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common,
+                           inc_gallium_drivers, ],
+   dependencies : [idep_gtest, dep_thread, dep_llvm, idep_nir,
+                     idep_nir_headers],
+
+)
+
+r600_test_dep = declare_dependency(
+    include_directories : [ inc_src, inc_mapi, inc_mesa, inc_include,
+                            inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common,
+                            inc_gallium_drivers, ],
+    link_with : [ libr600, libmesa, libgalliumvl,  libgallium, libradeonwinsys, r600_test_lib],
+    dependencies : [idep_gtest, dep_thread, dep_llvm, idep_nir,
+                    idep_nir_headers]
+)
+
+
+
+if with_tests
+   foreach t : ['valuefactory', 'value', 'instr', 'instrfromstring', 'liverange',
+                'optimizer', 'shaderfromstring' ]
+   test(
+       t,
+       executable('test-@0@-r600-sfn'.format(t),
+                  ['sfn_@0@_test.cpp'.format(t)],
+                  dependencies : [r600_test_dep],
+                  cpp_args: '-std=c++17'
+                  ),
+       suite : ['r600']
+    )
+    endforeach
+endif
+
diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp
new file mode 100644
index 0000000..227f577
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/tests/sfn_instr_test.cpp
@@ -0,0 +1,798 @@
+
+#include "../sfn_instr_alugroup.h"
+#include "../sfn_instr_export.h"
+#include "../sfn_instr_fetch.h"
+#include "../sfn_instr_lds.h"
+#include "../sfn_instr_tex.h"
+
+#include "gtest/gtest.h"
+
+using namespace r600;
+
+using std::vector;
+
+class InstrTest : public ::testing::Test
+{
+   void SetUp() override {
+      init_pool();
+   }
+
+   void TearDown() override {
+      release_pool();
+   }
+protected:
+   void check(const Instr& lhs,const Instr& rhs) const {
+      EXPECT_EQ(lhs, rhs);
+   }
+};
+
+
+TEST_F(InstrTest, test_alu_barrier)
+{
+   AluInstr alu(op0_group_barrier);
+
+   EXPECT_FALSE(alu.has_alu_flag(alu_write));
+   EXPECT_EQ(alu.opcode(), op0_group_barrier);
+
+   EXPECT_EQ(alu.dest_chan(), 0);
+
+   EXPECT_EQ(alu, alu);
+}
+
+
+TEST_F(InstrTest, test_alu_uni_op_mov)
+{
+   AluInstr alu(op1_mov,
+                new Register( 128, 2, pin_none),
+                new Register( 129, 0, pin_chan), {alu_write});
+
+   EXPECT_TRUE(alu.has_alu_flag(alu_write));
+
+   EXPECT_FALSE(alu.has_alu_flag(alu_last_instr));
+   EXPECT_FALSE(alu.end_group());
+   EXPECT_FALSE(alu.has_alu_flag(alu_op3));
+   EXPECT_FALSE(alu.has_alu_flag(alu_src0_abs));
+   EXPECT_FALSE(alu.has_alu_flag(alu_src0_neg));
+
+   EXPECT_EQ(alu.opcode(), op1_mov);
+
+   EXPECT_EQ(alu.dest_chan(), 2);
+   auto dest = alu.dest();
+
+   ASSERT_TRUE(dest);
+   EXPECT_EQ(dest->sel(), 128);
+   EXPECT_EQ(dest->chan(), 2);
+   EXPECT_EQ(dest->pin(), pin_none);
+
+   auto src0 = alu.psrc(0);
+   ASSERT_TRUE(src0);
+
+   EXPECT_EQ(src0->sel(), 129);
+   EXPECT_EQ(src0->chan(), 0);
+   EXPECT_EQ(src0->pin(), pin_chan);
+
+   EXPECT_EQ(alu.n_sources(), 1);
+
+   EXPECT_FALSE(alu.psrc(1));
+   EXPECT_FALSE(alu.psrc(2));
+
+   alu.set_alu_flag(alu_src0_abs);
+   EXPECT_TRUE(alu.has_alu_flag(alu_src0_abs));
+
+   alu.set_alu_flag(alu_src0_neg);
+   EXPECT_TRUE(alu.has_alu_flag(alu_src0_neg));
+
+}
+
+TEST_F(InstrTest, test_alu_op2)
+{
+   AluInstr alu(op2_add,
+                new Register( 130, 1, pin_none),
+                new Register( 129, 2, pin_chan),
+                new Register( 129, 3, pin_none),
+                {alu_write, alu_last_instr});
+
+   EXPECT_TRUE(alu.has_alu_flag(alu_write));
+
+   EXPECT_TRUE(alu.has_alu_flag(alu_last_instr));
+   EXPECT_FALSE(alu.has_alu_flag(alu_op3));
+
+   EXPECT_FALSE(alu.has_alu_flag(alu_src0_neg));
+   EXPECT_FALSE(alu.has_alu_flag(alu_src1_neg));
+   EXPECT_FALSE(alu.has_alu_flag(alu_src2_neg));
+
+   EXPECT_FALSE(alu.has_alu_flag(alu_src0_rel));
+   EXPECT_FALSE(alu.has_alu_flag(alu_src1_rel));
+   EXPECT_FALSE(alu.has_alu_flag(alu_src2_rel));
+
+   EXPECT_EQ(alu.opcode(), op2_add);
+
+   EXPECT_EQ(alu.dest_chan(), 1);
+   auto dest = alu.dest();
+
+   ASSERT_TRUE(dest);
+   EXPECT_EQ(dest->sel(), 130);
+   EXPECT_EQ(dest->chan(), 1);
+   EXPECT_EQ(dest->pin(), pin_none);
+
+   EXPECT_EQ(alu.n_sources(), 2);
+
+   auto src0 = alu.psrc(0);
+   ASSERT_TRUE(src0);
+
+   EXPECT_EQ(src0->sel(), 129);
+   EXPECT_EQ(src0->chan(), 2);
+   EXPECT_EQ(src0->pin(), pin_chan);
+
+   auto src1 = alu.psrc(1);
+   ASSERT_TRUE(src1);
+
+   EXPECT_EQ(src1->sel(), 129);
+   EXPECT_EQ(src1->chan(), 3);
+   EXPECT_EQ(src1->pin(), pin_none);
+
+   EXPECT_FALSE(alu.psrc(2));
+   EXPECT_EQ(alu, alu);
+}
+
+TEST_F(InstrTest, test_alu_op3)
+{
+   AluInstr alu(op3_cnde,
+                new Register( 130, 1, pin_none),
+                new Register( 129, 2, pin_chan),
+                new Register( 129, 3, pin_none),
+                new Register( 131, 1, pin_none),
+                {alu_write, alu_last_instr});
+
+   EXPECT_TRUE(alu.has_alu_flag(alu_write));
+   EXPECT_TRUE(alu.has_alu_flag(alu_last_instr));
+   EXPECT_TRUE(alu.end_group());
+   EXPECT_TRUE(alu.has_alu_flag(alu_op3));
+
+   EXPECT_EQ(alu.opcode(), op3_cnde);
+
+   EXPECT_EQ(alu.dest_chan(), 1);
+   auto dest = alu.dest();
+
+   ASSERT_TRUE(dest);
+   EXPECT_EQ(dest->sel(), 130);
+   EXPECT_EQ(dest->chan(), 1);
+   EXPECT_EQ(dest->pin(), pin_none);
+
+   EXPECT_EQ(alu.n_sources(), 3);
+
+   auto src0 = alu.psrc(0);
+   ASSERT_TRUE(src0);
+
+   EXPECT_EQ(src0->sel(), 129);
+   EXPECT_EQ(src0->chan(), 2);
+   EXPECT_EQ(src0->pin(), pin_chan);
+
+   auto src1 = alu.psrc(1);
+   ASSERT_TRUE(src1);
+
+   EXPECT_EQ(src1->sel(), 129);
+   EXPECT_EQ(src1->chan(), 3);
+   EXPECT_EQ(src1->pin(), pin_none);
+
+   auto src2 = alu.psrc(2);
+   ASSERT_TRUE(src2);
+
+   EXPECT_EQ(src2->sel(), 131);
+   EXPECT_EQ(src2->chan(), 1);
+   EXPECT_EQ(src2->pin(), pin_none);
+
+   EXPECT_EQ(alu, alu);
+}
+
+TEST_F(InstrTest, test_alu_op1_comp)
+{
+   auto r128z = new Register( 128, 2, pin_none);
+   auto r128zc = new Register( 128, 2, pin_chan);
+   auto r128y = new Register( 128, 1, pin_none);
+   auto r129x = new Register( 129, 0, pin_none);
+   auto r129xc = new Register( 129, 0, pin_chan);
+   auto r129y = new Register( 129, 1, pin_none);
+   auto r130x = new Register( 130, 0, pin_none);
+
+
+   AluInstr alu1(op1_mov, r128z, r129x, {alu_write});
+   EXPECT_NE(alu1, AluInstr(op1_mov, r128y, r129x, {alu_write}));
+   EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129xc, {alu_write}));
+   EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129y, {alu_write}));
+   EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r130x, {alu_write}));
+   EXPECT_NE(alu1, AluInstr(op1_mov, r128z, r129x, {alu_write, alu_last_instr}));
+   EXPECT_NE(alu1, AluInstr(op1_flt_to_int, r128z, r129x, {alu_write}));
+   EXPECT_NE(alu1, AluInstr(op1_mov, r128zc, r129x, {alu_write}));
+
+   EXPECT_EQ(alu1, alu1);
+}
+
+TEST_F(InstrTest, test_alu_op2_comp)
+{
+   auto r128x = new Register( 128, 0, pin_none);
+   auto r128y = new Register( 128, 1, pin_none);
+   auto r128z = new Register( 128, 2, pin_none);
+
+   AluInstr alu1(op2_add, r128z, r128x, r128y, {alu_write});
+
+   EXPECT_NE(alu1, AluInstr(op2_add, r128z, r128x, new Register( 129, 2, pin_none), {alu_write}));
+   EXPECT_NE(alu1, AluInstr(op2_add, r128z, r128x, new Register( 128, 0, pin_none), {alu_write}));
+   EXPECT_NE(alu1, AluInstr(op2_add, r128z, r128x, new Register( 128, 1, pin_chan), {alu_write}));
+}
+
+TEST_F(InstrTest, test_alu_op3_comp)
+{
+   auto r128x = new Register( 128, 0, pin_none);
+   auto r128y = new Register( 128, 1, pin_none);
+   auto r128z = new Register( 128, 2, pin_none);
+
+   AluInstr alu1(op3_muladd, r128z, r128x, r128y, r128y, {alu_write});
+
+   EXPECT_NE(alu1, AluInstr(op3_muladd, r128z, r128x, r128y, new Register( 129, 2, pin_none), {alu_write}));
+   EXPECT_NE(alu1, AluInstr(op3_muladd, r128z, r128x, r128y, new Register( 128, 0, pin_none), {alu_write}));
+   EXPECT_NE(alu1, AluInstr(op3_muladd, r128z, r128x, r128y, new Register( 128, 1, pin_chan), {alu_write}));
+}
+
+TEST_F(InstrTest, test_alu_op3_ne)
+{
+   auto R130x =  new Register( 130, 0, pin_none);
+   auto R130y =  new Register( 130, 1, pin_none);
+   auto R130z =  new Register( 130, 2, pin_none);
+   auto R131z =  new Register( 131, 2, pin_none);
+   auto R131w =  new Register( 131, 3, pin_none);
+
+   AluInstr alu(op3_cnde, R130x, R130y, R131z, R131w, {alu_write, alu_last_instr});
+
+   EXPECT_NE(alu, AluInstr(op3_muladd, R130x, R130y, R131z, R131w, {alu_write, alu_last_instr}));
+
+   EXPECT_NE(alu, AluInstr(op3_cnde, R130z, R130y, R131z, R131w, {alu_write, alu_last_instr}));
+   EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130z, R131z, R131w, {alu_write, alu_last_instr}));
+   EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R130z, R131w, {alu_write, alu_last_instr}));
+   EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R130z, {alu_write, alu_last_instr}));
+   EXPECT_NE(alu, AluInstr(op3_cnde, R130x, R130y, R131z, R131w, {alu_write}));
+
+   AluInstr alu_cf_changes = alu;
+   alu_cf_changes.set_cf_type(cf_alu_push_before);
+
+   EXPECT_NE(alu, alu_cf_changes);
+
+   AluInstr alu_bs_changes = alu;
+   alu_bs_changes.set_bank_swizzle(alu_vec_021);
+
+   EXPECT_NE(alu, alu_bs_changes);
+};
+
+
+TEST_F(InstrTest, test_alu_op1_ne)
+{
+   auto R130x =  new Register( 130, 0, pin_none);
+   auto R130y =  new Register( 130, 1, pin_none);
+   auto R130z =  new Register( 130, 2, pin_none);
+
+   AluInstr alu(op1_mov, R130x, R130y, {alu_write, alu_last_instr});
+
+   EXPECT_NE(alu, AluInstr(op1_cos, R130x, R130y, {alu_write, alu_last_instr}));
+
+   EXPECT_NE(alu, AluInstr(op1_mov, R130z, R130y, {alu_write, alu_last_instr}));
+   EXPECT_NE(alu, AluInstr(op1_mov, R130x, R130z, {alu_write, alu_last_instr}));
+   EXPECT_NE(alu, AluInstr(op1_mov, R130x, R130y, {alu_last_instr}));
+
+   AluInstr alu_cf_changes = alu;
+   alu_cf_changes.set_cf_type(cf_alu_push_before);
+
+   EXPECT_NE(alu, alu_cf_changes);
+
+   AluInstr alu_bs_changes = alu;
+   alu_bs_changes.set_bank_swizzle(alu_vec_021);
+
+   EXPECT_NE(alu, alu_bs_changes);
+};
+
+TEST_F(InstrTest, test_alu_dot4_grouped)
+{
+   auto R130x =  new Register( 130, 0, pin_none);
+   auto R130y =  new Register( 130, 1, pin_none);
+   auto R130z =  new Register( 130, 2, pin_none);
+   auto R130w =  new Register( 130, 3, pin_none);
+
+   auto R131x =  new Register( 131, 0, pin_none);
+   auto R131y =  new Register( 131, 1, pin_none);
+   auto R131z =  new Register( 131, 2, pin_none);
+   auto R131w =  new Register( 131, 3, pin_none);
+
+   auto R132x =  new Register( 132, 0, pin_chan);
+   auto R132y =  new Register( 132, 1, pin_chan);
+   auto R132z =  new Register( 132, 2, pin_chan);
+   auto R132w =  new Register( 132, 3, pin_chan);
+
+   AluInstr::SrcValues src({R130x, R130y, R130z, R130w,
+                            R131x, R131y, R131z, R131w});
+
+   AluInstr alu(op2_dot4_ieee, R132x, src, {alu_write, alu_last_instr}, 4);
+
+   EXPECT_NE(alu, AluInstr(op1_cos, R130x, R130y, {alu_write, alu_last_instr}));
+   EXPECT_EQ(alu, alu);
+
+   ValueFactory vf;
+   auto group = alu.split(vf);
+   group->fix_last_flag();
+   ASSERT_TRUE(group);
+
+   auto i = group->begin();
+   EXPECT_NE(i, group->end());
+   ASSERT_TRUE(*i);
+   check(**i, AluInstr(op2_dot4_ieee, R132x, R130x, R130y, {alu_write}));
+   ++i;
+   EXPECT_NE(i, group->end());
+   ASSERT_TRUE(*i);
+   check(**i, AluInstr(op2_dot4_ieee, R132y, R130z, R130w, {}));
+   ++i;
+   EXPECT_NE(i, group->end());
+   ASSERT_TRUE(*i);
+   check(**i, AluInstr(op2_dot4_ieee, R132z, R131x, R131y, {}));
+   ++i;
+   EXPECT_NE(i, group->end());
+   ASSERT_TRUE(*i);
+   check(**i, AluInstr(op2_dot4_ieee, R132w, R131z, R131w, {alu_last_instr}));
+   ++i;
+   EXPECT_NE(i, group->end());
+   ASSERT_FALSE(*i);
+   ++i;
+   EXPECT_EQ(i, group->end());
+};
+
+
+
+
+#ifdef __cpp_exceptions
+TEST_F(InstrTest, test_alu_wrong_source_count)
+{
+   EXPECT_THROW(AluInstr(op3_cnde,
+                         new Register( 130, 1, pin_none),
+                         new Register( 129, 2, pin_chan),
+                         new Register( 129, 3, pin_none),
+                         {alu_write, alu_last_instr}), std::invalid_argument);
+
+   EXPECT_THROW(AluInstr(op3_cnde,
+                         new Register( 130, 1, pin_none),
+                         new Register( 129, 2, pin_chan),
+                         {alu_write, alu_last_instr}), std::invalid_argument);
+
+   EXPECT_THROW(AluInstr(op1_mov,
+                         new Register( 130, 1, pin_none),
+                         new Register( 129, 2, pin_chan),
+                         new Register( 129, 2, pin_chan),
+                         {alu_write, alu_last_instr}), std::invalid_argument);
+
+   EXPECT_THROW(AluInstr(op2_add,
+                         new Register( 130, 1, pin_none),
+                         new Register( 129, 2, pin_chan),
+                         {alu_write, alu_last_instr}), std::invalid_argument);
+
+   EXPECT_THROW(AluInstr(op2_add,
+                         new Register( 130, 1, pin_none),
+                         new Register( 129, 2, pin_chan),
+                         new Register( 129, 2, pin_chan),
+                         new Register( 129, 2, pin_chan),
+                         {alu_write, alu_last_instr}), std::invalid_argument);
+}
+
+TEST_F(InstrTest, test_alu_write_no_dest)
+{
+   EXPECT_THROW(AluInstr(op2_add,
+                         nullptr,
+                         new Register( 129, 2, pin_chan),
+                         new Register( 129, 2, pin_chan),
+                         {alu_write, alu_last_instr}), std::invalid_argument);
+}
+
+#endif
+
+TEST_F(InstrTest, test_tex_basic)
+{
+   TexInstr tex(TexInstr::sample,
+                RegisterVec4(129),
+                {0,1,2,3},
+                RegisterVec4(130),
+                1,
+                17);
+
+   EXPECT_EQ(tex.opcode(), TexInstr::sample);
+
+   auto& dst = tex.dst();
+   auto& src = tex.src();
+
+   for (int i = 0; i < 4; ++i) {
+      EXPECT_EQ(*dst[i], Register(129, i, pin_group));
+      EXPECT_EQ(*src[i], Register(130, i, pin_group));
+      EXPECT_EQ(tex.dest_swizzle(i), i);
+   }
+
+   EXPECT_EQ(tex.sampler_id(), 1);
+   EXPECT_EQ(tex.resource_id(), 17);
+
+   EXPECT_TRUE(tex.end_group());
+
+   for (int i = 0; i < 3; ++i)
+      EXPECT_EQ(tex.get_offset(i), 0);
+
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::x_unnormalized));
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::y_unnormalized));
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::z_unnormalized));
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::w_unnormalized));
+
+   tex.set_tex_flag(TexInstr::x_unnormalized);
+   EXPECT_TRUE(tex.has_tex_flag(TexInstr::x_unnormalized));
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::y_unnormalized));
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::z_unnormalized));
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::w_unnormalized));
+
+   tex.set_tex_flag(TexInstr::y_unnormalized);
+   EXPECT_TRUE(tex.has_tex_flag(TexInstr::x_unnormalized));
+   EXPECT_TRUE(tex.has_tex_flag(TexInstr::y_unnormalized));
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::z_unnormalized));
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::w_unnormalized));
+
+   tex.set_tex_flag(TexInstr::z_unnormalized);
+   tex.set_tex_flag(TexInstr::w_unnormalized);
+   EXPECT_TRUE(tex.has_tex_flag(TexInstr::x_unnormalized));
+   EXPECT_TRUE(tex.has_tex_flag(TexInstr::y_unnormalized));
+   EXPECT_TRUE(tex.has_tex_flag(TexInstr::z_unnormalized));
+   EXPECT_TRUE(tex.has_tex_flag(TexInstr::w_unnormalized));
+
+   EXPECT_EQ(tex.inst_mode(), 0);
+
+   EXPECT_FALSE(tex.sampler_offset());
+
+   tex.set_dest_swizzle({4, 7, 0, 1});
+   EXPECT_EQ(tex.dest_swizzle(0), 4);
+   EXPECT_EQ(tex.dest_swizzle(1), 7);
+   EXPECT_EQ(tex.dest_swizzle(2), 0);
+   EXPECT_EQ(tex.dest_swizzle(3), 1);
+
+   tex.set_dest_swizzle({7, 2, 5, 0});
+   EXPECT_EQ(tex.dest_swizzle(0), 7);
+   EXPECT_EQ(tex.dest_swizzle(1), 2);
+   EXPECT_EQ(tex.dest_swizzle(2), 5);
+   EXPECT_EQ(tex.dest_swizzle(3), 0);
+
+   tex.set_offset(0, 2);
+   tex.set_offset(1, -1);
+   tex.set_offset(2, 3);
+
+   EXPECT_EQ(tex.get_offset(0), 4);
+   EXPECT_EQ(tex.get_offset(1), -2);
+   EXPECT_EQ(tex.get_offset(2), 6);
+
+}
+
+TEST_F(InstrTest, test_tex_gather4)
+{
+   TexInstr tex(TexInstr::gather4,
+                RegisterVec4(131),
+                {0,1,2,3},
+                RegisterVec4(132),
+                2,
+                19);
+
+   EXPECT_EQ(tex.opcode(), TexInstr::gather4);
+
+   auto& dst = tex.dst();
+   auto& src = tex.src();
+
+   for (int i = 0; i < 4; ++i) {
+      EXPECT_EQ(*dst[i], Register(131, i, pin_group));
+      EXPECT_EQ(*src[i], Register(132, i, pin_group));
+      EXPECT_EQ(tex.dest_swizzle(i), i);
+   }
+
+   EXPECT_EQ(tex.sampler_id(), 2);
+   EXPECT_EQ(tex.resource_id(), 19);
+
+   for (int i = 0; i < 3; ++i)
+      EXPECT_EQ(tex.get_offset(i), 0);
+
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::x_unnormalized));
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::y_unnormalized));
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::z_unnormalized));
+   EXPECT_FALSE(tex.has_tex_flag(TexInstr::w_unnormalized));
+
+   tex.set_gather_comp(2);
+   EXPECT_EQ(tex.inst_mode(), 2);
+
+}
+
+TEST_F(InstrTest, test_tex_neq)
+{
+   TexInstr tex_ref(TexInstr::sample,
+                    RegisterVec4(129),
+                    {0,1,2,3},
+                    RegisterVec4(130),
+                    1,
+                    17);
+   EXPECT_EQ(tex_ref, tex_ref);
+
+
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample_c, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130), 1, 17));
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(130), {0,1,2,3}, RegisterVec4(130), 1, 17));
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(130), {0,1,2,3}, RegisterVec4(130), 1, 17));
+
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {7,1,2,3}, RegisterVec4(130), 1, 17));
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,7,2,3}, RegisterVec4(130), 1, 17));
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,7,3}, RegisterVec4(130), 1, 17));
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,7}, RegisterVec4(130), 1, 17));
+
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130, false, {7,1,2,3}), 1, 17));
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130, false, {0,7,2,3}), 1, 17));
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130, false, {0,1,7,3}), 1, 17));
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130, false, {0,1,2,7}), 1, 17));
+
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130), 2, 17));
+   EXPECT_NE(tex_ref, TexInstr(TexInstr::sample, RegisterVec4(129), {0,1,2,3}, RegisterVec4(130), 1, 18));
+
+   /*
+   auto tex_with_sampler_offset = tex_ref;
+   tex_with_sampler_offset.set_sampler_offset(new LiteralConstant( 2));
+   EXPECT_NE(tex_ref, tex_with_sampler_offset);
+
+   auto tex_cmp1 = tex_ref;
+   EXPECT_EQ(tex_ref, tex_cmp1);
+
+   tex_cmp1.set_tex_flag(TexInstr::x_unnormalized); EXPECT_NE(tex_ref, tex_cmp1);
+   auto tex_cmp2 = tex_ref; tex_cmp2.set_tex_flag(TexInstr::y_unnormalized); EXPECT_NE(tex_ref, tex_cmp2);
+   auto tex_cmp3 = tex_ref; tex_cmp3.set_tex_flag(TexInstr::z_unnormalized); EXPECT_NE(tex_ref, tex_cmp3);
+   auto tex_cmp4 = tex_ref; tex_cmp4.set_tex_flag(TexInstr::w_unnormalized); EXPECT_NE(tex_ref, tex_cmp4);
+
+   for (int i = 0; i < 3; ++i) {
+      auto tex_ofs = tex_ref;
+      tex_ofs.set_offset(i, 1);
+      EXPECT_NE(tex_ref, tex_ofs);
+   }
+
+   for (int i = 0; i < 4; ++i) {
+      auto tex_swz = tex_ref;
+      RegisterVec4::Swizzle dst_swz = {0,1,2,3};
+      dst_swz[i] = 7;
+      tex_swz.set_dest_swizzle(dst_swz);
+      EXPECT_NE(tex_ref, tex_swz);
+   }
+
+   auto tex_cmp_mode = tex_ref;
+   tex_cmp_mode.set_inst_mode(1);
+   EXPECT_NE(tex_ref, tex_cmp_mode);*/
+}
+
+
+TEST_F(InstrTest, test_export_basic)
+{
+   ExportInstr exp0(ExportInstr::param, 60, RegisterVec4(200));
+
+   EXPECT_EQ(exp0.export_type(), ExportInstr::param);
+   EXPECT_EQ(exp0.location(), 60);
+   EXPECT_EQ(exp0.value(), RegisterVec4(200));
+   EXPECT_FALSE(exp0.is_last_export());
+
+   ExportInstr exp1(ExportInstr::param, 60, RegisterVec4(200));
+   exp1.set_is_last_export(true);
+   EXPECT_TRUE(exp1.is_last_export());
+
+   EXPECT_EQ(exp0, exp0);
+   EXPECT_NE(exp0, exp1);
+
+   ExportInstr exp2(ExportInstr::pos, 60, RegisterVec4(200));
+   EXPECT_EQ(exp2.export_type(), ExportInstr::pos);
+   EXPECT_NE(exp0, exp2);
+
+   ExportInstr exp3(ExportInstr::param, 61, RegisterVec4(200));
+   EXPECT_EQ(exp3.location(), 61);
+   EXPECT_NE(exp0, exp3);
+
+   ExportInstr exp4(ExportInstr::param, 60, RegisterVec4(201));
+   EXPECT_EQ(exp4.value(), RegisterVec4(201));
+   EXPECT_NE(exp0, exp4);
+
+   EXPECT_NE(exp0, ExportInstr(ExportInstr::param, 60, RegisterVec4(200, false, {7,1,2,3})));
+   EXPECT_NE(exp0, ExportInstr(ExportInstr::param, 60, RegisterVec4(200, false, {0,7,2,3})));
+   EXPECT_NE(exp0, ExportInstr(ExportInstr::param, 60, RegisterVec4(200, false, {0,1,7,3})));
+   EXPECT_NE(exp0, ExportInstr(ExportInstr::param, 60, RegisterVec4(200, false, {0,1,2,7})));
+}
+
+
+TEST_F(InstrTest, test_fetch_basic)
+{
+   FetchInstr fetch(vc_fetch,
+                    RegisterVec4(200),
+                    {0,2,1,3},
+                    new Register( 201, 2, pin_none),
+                    0,
+                    vertex_data,
+                    fmt_8,
+                    vtx_nf_norm,
+                    vtx_es_none,
+                    1,
+                    nullptr);
+
+
+
+   EXPECT_EQ(fetch.opcode(), vc_fetch);
+   EXPECT_EQ(fetch.dst(), RegisterVec4(200));
+   EXPECT_EQ(fetch.dest_swizzle(0), 0);
+   EXPECT_EQ(fetch.dest_swizzle(1), 2);
+   EXPECT_EQ(fetch.dest_swizzle(2), 1);
+   EXPECT_EQ(fetch.dest_swizzle(3), 3);
+
+   EXPECT_EQ(fetch.src(), Register(201, 2, pin_none));
+   EXPECT_EQ(fetch.src_offset(), 0);
+
+   EXPECT_EQ(fetch.resource_id(), 1);
+   EXPECT_FALSE(fetch.resource_offset());
+
+   EXPECT_EQ(fetch.fetch_type(), vertex_data);
+   EXPECT_EQ(fetch.data_format(), fmt_8);
+   EXPECT_EQ(fetch.num_format(), vtx_nf_norm);
+   EXPECT_EQ(fetch.endian_swap(), vtx_es_none);
+
+   EXPECT_EQ(fetch.mega_fetch_count(), 0);
+   EXPECT_EQ(fetch.array_base(), 0);
+   EXPECT_EQ(fetch.array_size(), 0);
+   EXPECT_EQ(fetch.elm_size(), 0);
+
+   for (int i = 0; i < FetchInstr::unknown; ++i) {
+      EXPECT_FALSE(fetch.has_fetch_flag(static_cast<FetchInstr::EFlags>(i)));
+   }
+
+   EXPECT_NE(fetch,  FetchInstr(vc_get_buf_resinfo, RegisterVec4(200),{0,2,1,3},
+                                new Register( 201, 2, pin_none),
+                                0, vertex_data,
+                                fmt_8, vtx_nf_norm, vtx_es_none, 1, nullptr));
+
+   EXPECT_NE(fetch,  FetchInstr(vc_fetch, RegisterVec4(201),{0,2,1,3},
+                                new Register( 201, 2, pin_none),
+                                0, vertex_data,
+                                fmt_8, vtx_nf_norm, vtx_es_none,
+                                1, nullptr));
+
+
+   EXPECT_NE(fetch,  FetchInstr(vc_fetch, RegisterVec4(200),{1,2,0,3},
+                                new Register( 201, 2, pin_none),
+                                0, vertex_data,
+                                fmt_8, vtx_nf_norm, vtx_es_none,
+                                1, nullptr));
+
+
+   EXPECT_NE(fetch,  FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3},
+                                new Register( 200, 2, pin_none),
+                                0, vertex_data,
+                                fmt_8, vtx_nf_norm, vtx_es_none,
+                                1, nullptr));
+
+   EXPECT_NE(fetch,  FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3},
+                                new Register( 201, 2, pin_none),
+                                8, vertex_data,
+                                fmt_8, vtx_nf_norm, vtx_es_none,
+                                1, nullptr));
+
+
+   EXPECT_NE(fetch,  FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3},
+                                new Register( 201, 2, pin_none),
+                                0, instance_data,
+                                fmt_8, vtx_nf_norm, vtx_es_none,
+                                1, nullptr));
+
+
+   EXPECT_NE(fetch,  FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3},
+                                new Register( 201, 2, pin_none),
+                                0, vertex_data,
+                                fmt_8_8, vtx_nf_norm, vtx_es_none,
+                                1, nullptr));
+
+   EXPECT_NE(fetch,  FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3},
+                                new Register( 201, 2, pin_none),
+                                0, vertex_data,
+                                fmt_8, vtx_nf_int, vtx_es_none,
+                                1, nullptr));
+
+   EXPECT_NE(fetch,  FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3},
+                                new Register( 201, 2, pin_none),
+                                0, vertex_data,
+                                fmt_8, vtx_nf_norm, vtx_es_8in16,
+                                1, nullptr));
+
+   EXPECT_NE(fetch,  FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3},
+                                new Register( 201, 2, pin_none),
+                                0, vertex_data,
+                                fmt_8, vtx_nf_norm, vtx_es_none,
+                                2, nullptr));
+
+   EXPECT_NE(fetch,  FetchInstr(vc_fetch, RegisterVec4(200),{0,2,1,3},
+                                new Register( 201, 2, pin_none),
+                                0, vertex_data,
+                                fmt_8, vtx_nf_norm, vtx_es_none,
+                                1, new Register( 1000, 0, pin_none)));
+
+   auto fetch1 = fetch;
+   fetch1.set_mfc(31);
+   EXPECT_NE(fetch1, fetch);
+   EXPECT_EQ(fetch1.mega_fetch_count(), 31);
+   EXPECT_TRUE(fetch1.has_fetch_flag(static_cast<FetchInstr::EFlags>(FetchInstr::is_mega_fetch)));
+
+   auto fetch2 = fetch;
+   fetch2.set_array_base(32);
+   EXPECT_NE(fetch, fetch2);
+   EXPECT_EQ(fetch2.array_base(), 32);
+
+   auto fetch3 = fetch;
+   fetch3.set_array_size(16);
+   EXPECT_NE(fetch, fetch3);
+   EXPECT_EQ(fetch3.array_size(), 16);
+
+   auto fetch4 = fetch;
+   fetch4.set_element_size(3);
+   EXPECT_NE(fetch, fetch4);
+   EXPECT_EQ(fetch4.elm_size(), 3);
+}
+
+TEST_F(InstrTest, test_fetch_basic2)
+{
+   FetchInstr fetch(vc_get_buf_resinfo,
+                    RegisterVec4(201),
+                    {0,1,3,4},
+                    new Register( 202, 3, pin_none),
+                    1,
+                    no_index_offset,
+                    fmt_32_32,
+                    vtx_nf_int,
+                    vtx_es_8in16,
+                    3,
+                    new Register( 300, 1, pin_none));
+
+
+   EXPECT_EQ(fetch.opcode(), vc_get_buf_resinfo);
+   EXPECT_EQ(fetch.dst(), RegisterVec4(201));
+   EXPECT_EQ(fetch.dest_swizzle(0), 0);
+   EXPECT_EQ(fetch.dest_swizzle(1), 1);
+   EXPECT_EQ(fetch.dest_swizzle(2), 3);
+   EXPECT_EQ(fetch.dest_swizzle(3), 4);
+
+   EXPECT_EQ(fetch.src(), Register(202, 3, pin_none));
+   EXPECT_EQ(fetch.src_offset(), 1);
+
+   EXPECT_EQ(fetch.resource_id(), 3);
+   EXPECT_EQ(*fetch.resource_offset(), Register(300, 1, pin_none));
+
+   EXPECT_EQ(fetch.fetch_type(), no_index_offset);
+   EXPECT_EQ(fetch.data_format(), fmt_32_32);
+   EXPECT_EQ(fetch.num_format(), vtx_nf_int);
+   EXPECT_EQ(fetch.endian_swap(), vtx_es_8in16);
+
+   EXPECT_EQ(fetch.mega_fetch_count(), 0);
+   EXPECT_EQ(fetch.array_base(), 0);
+   EXPECT_EQ(fetch.array_size(), 0);
+   EXPECT_EQ(fetch.elm_size(), 0);
+
+   for (int i = 0; i < FetchInstr::unknown; ++i) {
+      EXPECT_FALSE(fetch.has_fetch_flag(static_cast<FetchInstr::EFlags>(i)));
+   }
+
+   auto fetch1 = fetch;
+   fetch1.set_mfc(15);
+   EXPECT_NE(fetch1, fetch);
+   EXPECT_EQ(fetch1.mega_fetch_count(), 15);
+   EXPECT_TRUE(fetch1.has_fetch_flag(static_cast<FetchInstr::EFlags>(FetchInstr::is_mega_fetch)));
+
+   auto fetch2 = fetch;
+   fetch2.set_array_base(128);
+   EXPECT_NE(fetch, fetch2);
+   EXPECT_EQ(fetch2.array_base(), 128);
+
+   auto fetch3 = fetch;
+   fetch3.set_array_size(8);
+   EXPECT_NE(fetch, fetch3);
+   EXPECT_EQ(fetch3.array_size(), 8);
+
+   auto fetch4 = fetch;
+   fetch4.set_element_size(1);
+   EXPECT_NE(fetch, fetch4);
+   EXPECT_EQ(fetch4.elm_size(), 1);
+}
diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp
new file mode 100644
index 0000000..df6a9ac
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/tests/sfn_instrfromstring_test.cpp
@@ -0,0 +1,728 @@
+
+#include "../sfn_instrfactory.h"
+
+#include "../sfn_instr_alu.h"
+#include "../sfn_instr_export.h"
+#include "../sfn_instr_fetch.h"
+#include "../sfn_instr_lds.h"
+#include "../sfn_instr_mem.h"
+#include "../sfn_instr_tex.h"
+
+#include "gtest/gtest.h"
+#include <sstream>
+
+namespace r600 {
+
+using std::istringstream;
+using std::ostringstream;
+using std::string;
+
+class TestInstrFromString : public ::testing::Test
+{
+public:
+   TestInstrFromString();
+
+   PInst from_string(const std::string& s);
+
+protected:
+   void add_dest_from_string(const char *init);
+   void add_dest_vec4_from_string(const char *init);
+
+   void check(const Instr& eval, const Instr& expect);
+   void check(const string& init, const Instr& expect);
+
+   InstrFactory m_instr_factory;
+
+};
+
+TEST_F(TestInstrFromString, test_alu_mov)
+{
+   add_dest_from_string("R1999.x");
+
+   AluInstr expect(op1_mov,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 0, pin_none),
+                   {alu_write, alu_last_instr});
+
+   check("ALU MOV R2000.y : R1999.x {WL}", expect);
+}
+
+TEST_F(TestInstrFromString, test_alu_lds_read_ret)
+{
+   add_dest_from_string("R1999.x");
+
+   AluInstr expect(DS_OP_READ_RET,
+                   {new Register( 1999, 0, pin_none)}, {});
+
+   check("ALU LDS READ_RET __.x : R1999.x {}", expect);
+}
+
+
+TEST_F(TestInstrFromString, test_alu_mov_literal)
+{
+   AluInstr expect(op1_mov,
+                   new Register( 2000, 1, pin_none),
+                   new LiteralConstant( 0x10),
+                   {alu_write, alu_last_instr});
+
+   check("ALU MOV R2000.y : L[0x10] {WL}", expect);
+}
+
+
+TEST_F(TestInstrFromString, test_alu_mov_neg)
+{
+   add_dest_from_string("R1999.x");
+   AluInstr expect(op1_mov,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 0, pin_none),
+                   {alu_write, alu_last_instr, alu_src0_neg});
+
+   check("ALU MOV R2000.y : -R1999.x {WL}", expect);
+}
+
+TEST_F(TestInstrFromString, test_alu_mov_abs)
+{
+   add_dest_from_string("R1999.x");
+   AluInstr expect(op1_mov,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 0, pin_none),
+                   {alu_write, alu_last_instr, alu_src0_abs});
+
+   check("ALU MOV R2000.y : |R1999.x| {WL}", expect);
+}
+
+TEST_F(TestInstrFromString, test_alu_mov_neg_abs)
+{
+   add_dest_from_string("R1999.x");
+   AluInstr expect(op1_mov,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 0, pin_none),
+                   {alu_write, alu_src0_neg, alu_src0_abs});
+   check("ALU MOV R2000.y : -|R1999.x| {W}", expect);
+}
+
+TEST_F(TestInstrFromString, test_alu_add)
+{
+   add_dest_from_string("R1998.z");
+   add_dest_from_string("R1999.w");
+
+   AluInstr expect(op2_add,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 3, pin_none),
+                   new Register( 1998, 2, pin_none),
+                   {alu_last_instr});
+   check("ALU ADD __.y : R1999.w R1998.z {L}", expect);
+
+}
+
+TEST_F(TestInstrFromString, test_alu_add_clmap)
+{
+   add_dest_from_string("R1998.z");
+   add_dest_from_string("R1999.w");
+   AluInstr expect(op2_add,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 3, pin_none),
+                   new Register( 1998, 2, pin_none),
+                   {alu_last_instr, alu_dst_clamp});
+   check("ALU ADD CLAMP __.y : R1999.w R1998.z {L}", expect);
+
+}
+
+TEST_F(TestInstrFromString, test_alu_add_neg2)
+{
+   add_dest_from_string("R1998.z");
+   add_dest_from_string("R1999.w");
+   AluInstr expect(op2_add,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 3, pin_none),
+                   new Register( 1998, 2, pin_none),
+                   {alu_last_instr, alu_src1_neg});
+   check("ALU ADD __.y : R1999.w -R1998.z {L}", expect);
+}
+
+TEST_F(TestInstrFromString, test_alu_sete_update_pref)
+{
+   add_dest_from_string("R1998.z");
+   add_dest_from_string("R1999.w");
+   AluInstr expect(op2_sete,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 3, pin_none),
+                   new Register( 1998, 2, pin_none),
+                   {alu_last_instr, alu_src1_neg, alu_update_pred});
+   check("ALU SETE __.y : R1999.w -R1998.z {LP}", expect);
+}
+
+TEST_F(TestInstrFromString, test_alu_sete_update_pref_empty_dest)
+{
+   add_dest_from_string("R1998.z");
+   add_dest_from_string("R1999.w");
+   AluInstr expect(op2_sete,
+                   new Register( 2000, 0, pin_none),
+                   new Register( 1999, 3, pin_none),
+                   new Register( 1998, 2, pin_none),
+                   {alu_last_instr, alu_update_pred});
+   check("ALU SETE __.x : R1999.w R1998.z {LP}", expect);
+}
+
+
+TEST_F(TestInstrFromString, test_alu_setne_update_exec)
+{
+   add_dest_from_string("R1998.z");
+   add_dest_from_string("R1999.w");
+   AluInstr expect(op2_setne,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 3, pin_none),
+                   new Register( 1998, 2, pin_none),
+                   {alu_last_instr, alu_src1_neg, alu_update_exec});
+   check("ALU SETNE __.y : R1999.w -R1998.z {LE}", expect);
+}
+
+
+TEST_F(TestInstrFromString, test_alu_add_abs2)
+{
+   add_dest_from_string("R1998.z");
+   add_dest_from_string("R1999.w");
+   AluInstr expect(op2_add,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 3, pin_none),
+                   new Register( 1998, 2, pin_none),
+                   {alu_write, alu_last_instr, alu_src1_abs});
+   check("ALU ADD R2000.y : R1999.w |R1998.z| {WL}", expect);
+}
+
+TEST_F(TestInstrFromString, test_alu_add_abs2_neg2)
+{
+   add_dest_from_string("R1998.z");
+   add_dest_from_string("R1999.w");
+   AluInstr expect(op2_add,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 3, pin_none),
+                   new Register( 1998, 2, pin_none),
+                   {alu_write, alu_last_instr, alu_src1_abs, alu_src1_neg});
+   check("ALU ADD R2000.y : R1999.w -|R1998.z| {WL}", expect);
+}
+
+
+TEST_F(TestInstrFromString, test_alu_muladd)
+{
+   add_dest_from_string("R1998.z");
+   add_dest_from_string("R1999.w");
+   add_dest_from_string("R2000.y");
+   AluInstr expect(op3_muladd_ieee,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 3, pin_none),
+                   new Register( 1998, 2, pin_none),
+                   new Register( 2000, 1, pin_none),
+                   {alu_write, alu_last_instr});
+   check("ALU MULADD_IEEE R2000.y : R1999.w R1998.z R2000.y {WL}", expect);
+}
+
+TEST_F(TestInstrFromString, test_alu_muladd_neg3)
+{
+   add_dest_from_string("R1998.z");
+   add_dest_from_string("R1999.w");
+   add_dest_from_string("R2000.y");
+   AluInstr expect(op3_muladd_ieee,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 3, pin_none),
+                   new Register( 1998, 2, pin_none),
+                   new Register( 2000, 1, pin_none),
+                   {alu_last_instr, alu_src2_neg});
+   check("ALU MULADD_IEEE __.y : R1999.w R1998.z -R2000.y {L}", expect);
+}
+
+
+TEST_F(TestInstrFromString, test_alu_mov_bs)
+{
+   add_dest_from_string("R1999.x");
+   for (auto& [expect_bs, str] : AluInstr::bank_swizzle_map) {
+      auto init = std::string("ALU MOV R2000.y : R1999.x {WL} ")  + str;
+
+      AluInstr expect(op1_mov,
+                      new Register( 2000, 1, pin_none),
+                      new Register( 1999, 0, pin_none),
+                      {alu_write, alu_last_instr});
+      expect.set_bank_swizzle(expect_bs);
+
+      check(init, expect);
+   }
+}
+
+TEST_F(TestInstrFromString, test_alu_dot4_ieee)
+{
+   add_dest_from_string("R199.x");
+   add_dest_from_string("R199.y");
+   add_dest_from_string("R199.z");
+   add_dest_from_string("R199.w");
+   add_dest_from_string("R198.x");
+   add_dest_from_string("R198.y");
+   add_dest_from_string("R198.z");
+   add_dest_from_string("R198.w");
+   auto init = std::string("ALU DOT4_IEEE R2000.y : R199.x R198.w + R199.y R198.z + R199.z R198.y + R199.w R198.x {WL}");
+
+   AluInstr expect(op2_dot4_ieee,
+                   new Register( 2000, 1, pin_none),
+                   {new Register( 199, 0, pin_none),
+                    new Register( 198, 3, pin_none),
+                    new Register( 199, 1, pin_none),
+                    new Register( 198, 2, pin_none),
+                    new Register( 199, 2, pin_none),
+                    new Register( 198, 1, pin_none),
+                    new Register( 199, 3, pin_none),
+                    new Register( 198, 0, pin_none)},
+                   {alu_write, alu_last_instr}, 4);
+
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_alu_mov_cf)
+{
+   add_dest_from_string("R1999.x");
+   for (auto& [expect_cf, str] : AluInstr::cf_map) {
+      auto init = std::string("ALU MOV R2000.y : R1999.x {WL} ")  + str;
+
+      AluInstr expect(op1_mov,
+                      new Register( 2000, 1, pin_none),
+                      new Register( 1999, 0, pin_none),
+                      {alu_write, alu_last_instr});
+      expect.set_cf_type(expect_cf);
+
+      check(init, expect);
+   }
+}
+
+TEST_F(TestInstrFromString, test_alu_interp_xy)
+{
+   add_dest_from_string("R0.y@fully");
+   auto init = std::string("ALU INTERP_ZW R1024.z@chan : R0.y@fully Param0.z {W} VEC_210");
+
+   AluInstr expect(op2_interp_zw,
+                   new Register( 1024, 2, pin_chan),
+                   new Register( 0, 1, pin_fully),
+                   new InlineConstant( ALU_SRC_PARAM_BASE, 2),
+                   {alu_write});
+   expect.set_bank_swizzle(alu_vec_210);
+
+   check(init, expect);
+}
+
+
+TEST_F(TestInstrFromString, test_alu_interp_xy_no_write)
+{
+   add_dest_from_string("R0.x@fully");
+   auto init = std::string("ALU INTERP_XY __.x@chan : R0.x@fully Param0.z {} VEC_210");
+
+   AluInstr expect(op2_interp_xy,
+                   new Register( 1024, 0, pin_chan),
+                   new Register( 0, 0, pin_fully),
+                   new InlineConstant( ALU_SRC_PARAM_BASE, 2),
+                   {});
+   expect.set_bank_swizzle(alu_vec_210);
+
+   check(init, expect);
+}
+
+
+TEST_F(TestInstrFromString, test_alu_mov_cf_bs)
+{
+   add_dest_from_string("R1999.x");
+   auto init = std::string("ALU MOV R2000.y : R1999.x {WL} VEC_210 POP_AFTER");
+   AluInstr expect(op1_mov,
+                   new Register( 2000, 1, pin_none),
+                   new Register( 1999, 0, pin_none),
+                   {alu_write, alu_last_instr});
+   expect.set_cf_type(cf_alu_pop_after);
+   expect.set_bank_swizzle(alu_vec_210);
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_tex_sample_basic)
+{
+   add_dest_vec4_from_string("R2000.xyzw");
+   auto init = std::string("TEX SAMPLE R1000.xyzw : R2000.xyzw RID:10 SID:1 NNNN");
+   TexInstr expect(TexInstr::sample, RegisterVec4(1000), {0,1,2,3}, RegisterVec4(2000), 1, 10);
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_tex_ld_basic)
+{
+   add_dest_vec4_from_string("R2002.xyzw");
+   auto init = std::string("TEX LD R1001.xyzw : R2002.xyzw RID:27 SID:7 NNNN");
+   TexInstr expect(TexInstr::ld, RegisterVec4(1001), {0,1,2,3}, RegisterVec4(2002), 7, 27);
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_tex_sample_with_offset)
+{
+   add_dest_vec4_from_string("R2002.xyzw");
+   auto init = std::string("TEX SAMPLE R1001.xyzw : R2002.xyzw RID:27 SID:2 OX:1 OY:-2 OZ:5 NNNN");
+
+   TexInstr expect(TexInstr::sample, RegisterVec4(1001), {0,1,2,3}, RegisterVec4(2002), 2, 27);
+   expect.set_offset(0, 1);
+   expect.set_offset(1, -2);
+   expect.set_offset(2, 5);
+
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_tex_gather4_x)
+{
+   add_dest_vec4_from_string("R2002.xyzw");
+   auto init = std::string("TEX GATHER4 R1001.xyzw : R2002.xyzw RID:7 SID:27 MODE:0 NNNN");
+   TexInstr expect(TexInstr::gather4, RegisterVec4(1001), {0,1,2,3}, RegisterVec4(2002), 27, 7);
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_tex_gather4_y)
+{
+   add_dest_vec4_from_string("R2002.xyzw");
+   auto init = std::string("TEX GATHER4 R1001.xyzw : R2002.xyzw RID:7 SID:27 MODE:1 NNNN");
+   TexInstr expect(TexInstr::gather4, RegisterVec4(1001), {0,1,2,3}, RegisterVec4(2002), 27, 7);
+   expect.set_gather_comp(1);
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_tex_sampler_with_offset)
+{
+   add_dest_vec4_from_string("R2002.xyzw");
+   auto init = std::string("TEX SAMPLE R1001.xyzw : R2002.xyzw RID:7 SID:27 SO:R200.z NNNN");
+   TexInstr expect(TexInstr::sample, RegisterVec4(1001), {0,1,2,3}, RegisterVec4(2002), 27, 7);
+   expect.set_sampler_offset(new Register( 200, 2, pin_none));
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_export_param_60)
+{
+   add_dest_vec4_from_string("R1001.xyzw");
+
+   ExportInstr expect(ExportInstr::param, 60, RegisterVec4(1001));
+   check("EXPORT PARAM 60 R1001.xyzw", expect);
+}
+
+TEST_F(TestInstrFromString, test_export_pos_61)
+{
+   add_dest_from_string("R1002.y@group");
+
+   ExportInstr expect(ExportInstr::pos, 61, RegisterVec4(1002, false, {1, 4, 5, 7}));
+   check("EXPORT POS 61 R1002.y01_", expect);
+}
+
+TEST_F(TestInstrFromString, test_export_last_pixel_0)
+{
+   add_dest_vec4_from_string("R1002.xyzw");
+
+   ExportInstr expect(ExportInstr::pixel, 0, RegisterVec4(1002, false, {2, 3, 0, 1}));
+   expect.set_is_last_export(true);
+   check("EXPORT_DONE PIXEL 0 R1002.zwxy", expect);
+}
+
+
+TEST_F(TestInstrFromString, test_fetch_basic)
+{
+   add_dest_from_string("R201.z");
+
+   FetchInstr expect(vc_fetch,
+                     RegisterVec4(1002),
+                     {0,4,5,1},
+                     new Register( 201, 2, pin_none),
+                     0,
+                     vertex_data,
+                     fmt_8,
+                     vtx_nf_norm,
+                     vtx_es_none,
+                     1,
+                     nullptr);
+   expect.set_mfc(31);
+   expect.set_element_size(3);
+   check("VFETCH R1002.x01y : R201.z RID:1 VERTEX FMT(8,UNORM) MFC:31 ES:3", expect);
+}
+
+TEST_F(TestInstrFromString, test_query_buffer_size)
+{
+   QueryBufferSizeInstr expect(RegisterVec4(1002),RegisterVec4::Swizzle({0,1,2,3}), 1);
+   check("GET_BUF_RESINFO R1002.xyzw : RID:1", expect);
+
+   FetchInstr expect_fetch(vc_get_buf_resinfo,
+                           RegisterVec4(1002),RegisterVec4::Swizzle({0,1,2,3}),
+                           new Register( 0, 7, pin_fully),
+                           0,
+                           no_index_offset,
+                           fmt_32_32_32_32,
+                           vtx_nf_norm,
+                           vtx_es_none,
+                           1,
+                           nullptr);
+   expect_fetch.set_fetch_flag(FetchInstr::format_comp_signed);
+   check("GET_BUF_RESINFO R1002.xyzw : RID:1", expect_fetch);
+}
+
+TEST_F(TestInstrFromString, test_load_from_buffer)
+{
+   add_dest_from_string("R201.x");
+   add_dest_from_string("R202.x");
+   string init = "LOAD_BUF R200.xzwy : R201.x + 16b RID:10 + R202.x";
+   LoadFromBuffer expect(RegisterVec4(200), RegisterVec4::Swizzle({0,2,3,1}),
+                         new Register( 201, 0, pin_none), 16, 10,
+                         new Register( 202, 0, pin_none), fmt_32_32_32_32_float);
+   check(init, expect);
+
+   auto instr = from_string(init);
+   FetchInstr expect_fetch(vc_fetch,
+                           RegisterVec4(200),RegisterVec4::Swizzle({0,2,3,1}),
+                           new Register( 201, 0, pin_none),
+                           16,
+                           no_index_offset,
+                           fmt_32_32_32_32_float,
+                           vtx_nf_scaled,
+                           vtx_es_none,
+                           10,
+                           new Register( 202, 0, pin_none));
+   expect_fetch.set_fetch_flag(FetchInstr::format_comp_signed);
+   expect_fetch.set_mfc(16);
+   check(*instr, expect_fetch);
+}
+
+TEST_F(TestInstrFromString, test_load_from_scratch)
+{
+
+   add_dest_from_string("R201.x");
+   string init = "READ_SCRATCH R200.xzwy : R201.x SIZE:20 ES:3";
+
+   LoadFromScratch expect(RegisterVec4(200), RegisterVec4::Swizzle({0,2,3,1}),
+                          new Register( 201, 0, pin_none), 20);
+   check(init, expect);
+
+   FetchInstr expect_fetch(vc_read_scratch,
+                           RegisterVec4(200),RegisterVec4::Swizzle({0,2,3,1}),
+                           new Register( 201, 0, pin_none),
+                           0,
+                           no_index_offset,
+                           fmt_32_32_32_32,
+                           vtx_nf_int,
+                           vtx_es_none,
+                           0,
+                           nullptr);
+   expect_fetch.set_element_size(3);
+   expect_fetch.set_print_skip(FetchInstr::EPrintSkip::mfc);
+   expect_fetch.set_print_skip(FetchInstr::EPrintSkip::fmt);
+   expect_fetch.set_print_skip(FetchInstr::EPrintSkip::ftype);
+   expect_fetch.set_fetch_flag(FetchInstr::EFlags::uncached);
+   expect_fetch.set_fetch_flag(FetchInstr::EFlags::indexed);
+   expect_fetch.set_fetch_flag(FetchInstr::EFlags::wait_ack);
+   expect_fetch.set_array_size(19);
+
+   check(init, expect_fetch);
+}
+
+TEST_F(TestInstrFromString, test_write_scratch_to_offset)
+{
+   add_dest_vec4_from_string("R1.xyzw");
+   string init = "WRITE_SCRATCH 20 R1.xyzw AL:4 ALO:16";
+   WriteScratchInstr expect(RegisterVec4(1), 20, 4, 16, 0xf);
+   check(init, expect);
+
+   add_dest_vec4_from_string("R2.xyzw");
+   string init2 = "WRITE_SCRATCH 10 R2.xy_w AL:8 ALO:8";
+   WriteScratchInstr expect2(RegisterVec4(2), 10, 8, 8, 0xb);
+   check(init2, expect2);
+}
+
+TEST_F(TestInstrFromString, test_write_scratch_to_index)
+{
+   add_dest_vec4_from_string("R1.xyzw");
+   add_dest_from_string("R3.x");
+   string init = "WRITE_SCRATCH @R3.x[10] R1.xyzw AL:4 ALO:16";
+   WriteScratchInstr expect(RegisterVec4(1), new Register(3, 0, pin_none), 4, 16, 0xf, 10);
+   check(init, expect);
+
+   add_dest_vec4_from_string("R2.xyzw");
+   add_dest_from_string("R4.x");
+   string init2 = "WRITE_SCRATCH @R4.x[20] R2.xy__ AL:4 ALO:16";
+   WriteScratchInstr expect2(RegisterVec4(2), new Register(4, 0, pin_none), 4, 16, 0x3, 20);
+   check(init2, expect2);
+
+
+}
+
+
+
+TEST_F(TestInstrFromString, test_load_from_scratch_fixed_offset)
+{
+   string init = "READ_SCRATCH R200.xzwy : L[0xA] SIZE:40 ES:3";
+
+   LoadFromScratch expect(RegisterVec4(200), RegisterVec4::Swizzle({0,2,3,1}),
+                          new LiteralConstant( 10), 40);
+   check(init, expect);
+
+   FetchInstr expect_fetch(vc_read_scratch,
+                           RegisterVec4(200),RegisterVec4::Swizzle({0,2,3,1}),
+                           new Register( 0, 7, pin_none),
+                           0,
+                           no_index_offset,
+                           fmt_32_32_32_32,
+                           vtx_nf_int,
+                           vtx_es_none,
+                           0,
+                           nullptr);
+   expect_fetch.set_element_size(3);
+   expect_fetch.set_print_skip(FetchInstr::EPrintSkip::mfc);
+   expect_fetch.set_print_skip(FetchInstr::EPrintSkip::fmt);
+   expect_fetch.set_print_skip(FetchInstr::EPrintSkip::ftype);
+   expect_fetch.set_fetch_flag(FetchInstr::EFlags::uncached);
+   expect_fetch.set_fetch_flag(FetchInstr::EFlags::wait_ack);
+   expect_fetch.set_array_base(10);
+   expect_fetch.set_array_size(39);
+
+   check(init, expect_fetch);
+}
+
+
+TEST_F(TestInstrFromString, test_lds_read_3_values)
+{
+   add_dest_from_string("R5.x@free");
+   add_dest_from_string("R5.y@free");
+   add_dest_from_string("R5.z@free");
+
+   auto init = "LDS_READ [ R10.x@free R11.x@free R12.x@free ] : [ R5.x@free R5.y@free R5.z@free ]";
+
+   std::vector<PRegister, Allocator<PRegister>> dests(3);
+   std::vector<PVirtualValue, Allocator<PVirtualValue>> srcs(3);
+
+   for (int i = 0; i < 3; ++i) {
+      dests[i] = new Register(10 + i, 0, pin_free);
+      srcs[i] = new Register(5, i, pin_free);
+   }
+
+   LDSReadInstr expect(dests, srcs);
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_lds_read_2_values)
+{
+   add_dest_from_string("R5.x@free");
+   add_dest_from_string("R5.y@free");
+
+   auto init = "LDS_READ [ R11.x@free R12.x@free ] : [ R5.x@free R5.y@free ]";
+
+   std::vector<PRegister, Allocator<PRegister>> dests(2);
+   std::vector<PVirtualValue, Allocator<PVirtualValue>> srcs(2);
+
+   for (int i = 0; i < 2; ++i) {
+      dests[i] = new Register(11 + i, 0, pin_free);
+      srcs[i] = new Register(5, i, pin_free);
+   }
+
+   LDSReadInstr expect(dests, srcs);
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_lds_write_1_value)
+{
+   auto init = "LDS WRITE __.x [ R1.x ] : R2.y";
+   add_dest_from_string("R1.x");
+   add_dest_from_string("R2.y");
+
+   LDSAtomicInstr expect(DS_OP_WRITE, nullptr,
+                         new Register(1, 0, pin_none),
+                         {new Register(2, 1, pin_none)});
+
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_lds_write_2_value)
+{
+   auto init = "LDS WRITE2 __.x [ R1.x ] : R2.y KC0[1].z";
+
+   add_dest_from_string("R1.x");
+   add_dest_from_string("R2.y");
+
+   LDSAtomicInstr expect(DS_OP_WRITE2, nullptr,
+                         new Register(1, 0, pin_none),
+                         {new Register(2, 1, pin_none),
+                          new UniformValue(513, 2, 0) });
+
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_lds_write_atomic_add_ret)
+{
+   auto init = "LDS ADD_RET R7.y [ R1.x ] : R2.y";
+
+   add_dest_from_string("R1.x");
+   add_dest_from_string("R2.y");
+
+   LDSAtomicInstr expect(DS_OP_ADD_RET,
+                         new Register(7, 1, pin_none),
+                         new Register(1, 0, pin_none),
+                         {new Register(2, 1, pin_none)});
+
+   check(init, expect);
+}
+
+TEST_F(TestInstrFromString, test_lds_write_atomic_add)
+{
+   auto init = "LDS ADD __.x [ R1.x ] : R2.y";
+
+   add_dest_from_string("R1.x");
+   add_dest_from_string("R2.y");
+
+   LDSAtomicInstr expect(DS_OP_ADD,
+                         nullptr,
+                         new Register(1, 0, pin_none),
+                         {new Register(2, 1, pin_none)});
+
+   check(init, expect);
+}
+
+
+TEST_F(TestInstrFromString, test_writeTF)
+{
+   auto init = "WRITE_TF R1.xyzw";
+
+   add_dest_vec4_from_string("R1.xyzw");
+
+   WriteTFInstr expect(RegisterVec4(1, true, {0,1,2,3}, pin_group));
+
+   check(init, expect);
+}
+
+TestInstrFromString::TestInstrFromString()
+{
+
+}
+
+PInst TestInstrFromString::from_string(const std::string& s)
+{
+   return m_instr_factory.from_string(s, 0);
+}
+
+void TestInstrFromString::check(const Instr& eval, const Instr& expect)
+{
+   EXPECT_EQ(eval, expect);
+}
+
+void TestInstrFromString::check(const string& init, const Instr& expect)
+{
+   auto instr = from_string(init);
+   ASSERT_TRUE(instr);
+   EXPECT_EQ(*instr, expect);
+
+   ostringstream os;
+   instr->print(os);
+   EXPECT_EQ(os.str(), init);
+}
+
+void TestInstrFromString::add_dest_from_string(const char *init)
+{
+   m_instr_factory.value_factory().dest_from_string(init);
+}
+
+void TestInstrFromString::add_dest_vec4_from_string(const char *init)
+{
+   RegisterVec4::Swizzle dummy;
+   m_instr_factory.value_factory().dest_vec4_from_string(init, dummy);
+}
+
+
+
+}
diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp
new file mode 100644
index 0000000..f12f600
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp
@@ -0,0 +1,217 @@
+#include "../sfn_shader.h"
+#include "../sfn_liverangeevaluator.h"
+#include "sfn_test_shaders.h"
+
+#include "gtest/gtest.h"
+#include <sstream>
+
+#include "../sfn_liverangeevaluator.h"
+
+namespace r600 {
+
+using std::ostringstream;
+
+class LiveRangeTests : public TestShader {
+
+protected:
+
+   void check(const char *shader, LiveRangeMap& expect);
+
+};
+
+using SimpleTest = testing::Test;
+
+TEST_F(SimpleTest, SimpleLiveRangeMapTest)
+{
+   LiveRangeMap a;
+   LiveRangeMap b;
+
+   EXPECT_EQ(a, b);
+
+   Register r1x(1, 0, pin_none);
+   a.append_register(&r1x);
+   r1x.set_index(0);
+   a.set_life_range(r1x, 0, 1);
+
+   EXPECT_NE(a, b);
+
+   b.append_register(&r1x);
+   b.set_life_range(r1x, 0, 1);
+   EXPECT_EQ(a, b);
+
+   Register r2x(2, 0, pin_none);
+   a.append_register(&r2x);
+   r2x.set_index(0);
+   a.set_life_range(r2x, 0, 2);
+
+   EXPECT_NE(a, b);
+
+   b.append_register(&r2x);
+   b.set_life_range(r2x, 0, 2);
+   EXPECT_EQ(a, b);
+
+   a.set_life_range(r2x, 1, 2);
+   EXPECT_NE(a, b);
+
+   b.set_life_range(r2x, 1, 2);
+   EXPECT_EQ(a, b);
+
+   a.set_life_range(r2x, 0, 1);
+   EXPECT_NE(a, b);
+}
+
+TEST_F(LiveRangeTests, SimpleAssignments)
+{
+   RegisterVec4::Swizzle dummy;
+
+   ValueFactory vf;
+
+   Register *r1x = vf.dest_from_string("S1.x@free");
+   RegisterVec4 r0 = vf.dest_vec4_from_string("S0.xyzw", dummy, pin_group);
+
+   LiveRangeMap expect = vf.prepare_live_range_map();
+
+
+   expect.set_life_range(*r1x, 2, 3);
+   for (int i = 0; i < 4; ++i)
+      expect.set_life_range(*r0[i], 1, 3);
+
+   check(red_triangle_fs_expect_from_nir, expect);
+}
+
+TEST_F(LiveRangeTests, SimpleAdd)
+{
+   RegisterVec4::Swizzle dummy;
+
+   ValueFactory vf;
+   Register *r0x = vf.dest_from_string("S0.x@free");
+   Register *r1x = vf.dest_from_string("S1.x@free");
+   RegisterVec4 r2 = vf.dest_vec4_from_string("S2.xyzw", dummy, pin_none);
+   Register *r3x = vf.dest_from_string("S3.x@free");
+   RegisterVec4 r4 = vf.dest_vec4_from_string("S4.xyzw", dummy, pin_group);
+
+   LiveRangeMap expect = vf.prepare_live_range_map();
+
+   expect.set_life_range(*r0x, 1, 4);
+   expect.set_life_range(*r1x, 2, 3);
+   expect.set_life_range(*r3x, 4, 5);
+
+   expect.set_life_range(*r2[0], 3, 4);
+   for (int i = 1; i < 4; ++i)
+      expect.set_life_range(*r2[i], 3, 5);
+
+   for (int i = 0; i < 4; ++i)
+      expect.set_life_range(*r4[i], 5, 6);
+
+   check(add_add_1_expect_from_nir, expect);
+}
+
+TEST_F(LiveRangeTests, SimpleAInterpolation)
+{
+   RegisterVec4::Swizzle dummy;
+
+   ValueFactory vf;
+   Register *r0x = vf.dest_from_string("R0.x@fully");
+   r0x->pin_live_range(true, false);
+   Register *r0y = vf.dest_from_string("R0.y@fully");
+   r0y->pin_live_range(true, false);
+
+   Register *r1x = vf.dest_from_string("S1.x@free");
+   RegisterVec4 r2 = vf.dest_vec4_from_string("S2.xyzw", dummy, pin_chan);
+
+   Register *r3x = vf.dest_from_string("S3.x");
+   Register *r3y = vf.dest_from_string("S3.y");
+   Register *r3z = vf.dest_from_string("S3.z");
+
+   Register *r4x = vf.dest_from_string("S4.x");
+   Register *r4y = vf.dest_from_string("S4.y");
+
+   RegisterVec4 r5 = vf.dest_vec4_from_string("S5.xy_w", dummy, pin_group);
+   RegisterVec4 r6 = vf.dest_vec4_from_string("S6.xyzw", dummy, pin_group);
+
+   LiveRangeMap expect = vf.prepare_live_range_map();
+
+   expect.set_life_range(*r0x, 0, 3);
+   expect.set_life_range(*r0y, 0, 3);
+
+   expect.set_life_range(*r1x, 1, 2);
+
+   expect.set_life_range(*r2[0], 3, 4);
+   expect.set_life_range(*r2[1], 3, 4);
+   expect.set_life_range(*r2[2], 2, 3);
+   expect.set_life_range(*r2[3], 2, 4);
+
+   expect.set_life_range(*r3x, 4, 5);
+   expect.set_life_range(*r3y, 4, 5);
+   expect.set_life_range(*r3z, 4, 6);
+
+   expect.set_life_range(*r4x, 5, 6);
+   expect.set_life_range(*r4y, 5, 6);
+
+   expect.set_life_range(*r5[0], 6, 7);
+   expect.set_life_range(*r5[1], 6, 7);
+   expect.set_life_range(*r5[3], 6, 7);
+
+
+   expect.set_life_range(*r6[0], 7, 8);
+   expect.set_life_range(*r6[1], 7, 8);
+   expect.set_life_range(*r6[2], 7, 8);
+   expect.set_life_range(*r6[3], 7, 8);
+
+   check(basic_interpolation_translated_1, expect);
+}
+
+TEST_F(LiveRangeTests, SimpleArrayAccess)
+{
+   RegisterVec4::Swizzle dummy;
+
+   ValueFactory vf;
+
+   auto array = vf.array_from_string("A0[2].xy");
+
+   auto s1 = vf.dest_from_string("S1.x");
+   auto s2x = vf.dest_from_string("S2.x");
+   auto s2y = vf.dest_from_string("S2.y");
+
+   auto s3 = vf.dest_vec4_from_string("S3.xy01", dummy, pin_group);
+
+
+   LiveRangeMap expect = vf.prepare_live_range_map();
+
+   expect.set_life_range(*array->element(0, nullptr, 0), 0, 4);
+   expect.set_life_range(*array->element(0, nullptr, 1), 0, 4);
+   expect.set_life_range(*array->element(1, nullptr, 0), 0, 4);
+
+   expect.set_life_range(*array->element(1, nullptr, 1), 0, 4);
+
+   expect.set_life_range(*s1, 2, 3);
+
+   expect.set_life_range(*s2x, 4, 5);
+   expect.set_life_range(*s2y, 4, 5);
+
+   expect.set_life_range(*s3[0], 5, 6);
+   expect.set_life_range(*s3[1], 5, 6);
+
+   check(shader_with_dest_array2_scheduled, expect);
+}
+
+void LiveRangeTests::check(const char *shader, LiveRangeMap& expect)
+{
+   auto sh = from_string(shader);
+   ASSERT_TRUE(sh);
+
+   LiveRangeEvaluator eval;
+
+   LiveRangeMap eval_map = eval.run(*sh);
+
+   ostringstream eval_str;
+   eval_str << eval_map;
+
+   ostringstream expect_str;
+   expect_str << expect;
+
+   EXPECT_EQ(eval_str.str(), expect_str.str());
+
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp
new file mode 100644
index 0000000..b14bec1
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp
@@ -0,0 +1,300 @@
+
+#include "sfn_test_shaders.h"
+#include "../sfn_shader.h"
+#include "../sfn_optimizer.h"
+#include "../sfn_ra.h"
+#include "../sfn_scheduler.h"
+
+using namespace r600;
+using std::ostringstream;
+
+class TestShaderFromNir : public TestShader {
+
+protected:
+
+   void check(Shader *s, const char *expect_str);
+   void ra_check(Shader *s, const char *expect_str);
+};
+
+
+TEST_F(TestShaderFromNir, SimpleDCE)
+{
+   auto sh = from_string(red_triangle_fs_expect_from_nir);
+   dead_code_elimination(*sh);
+
+   check(sh, red_triangle_fs_expect_from_nir_dce);
+}
+
+
+TEST_F(TestShaderFromNir, CopyPropagationForwardBackward)
+{
+   auto sh = from_string(add_add_1_expect_from_nir);
+   copy_propagation_fwd(*sh);
+   check(sh, add_add_1_expect_from_nir_copy_prop_fwd);
+}
+
+TEST_F(TestShaderFromNir, CopyPropagationForwardDCE)
+{
+   auto sh = from_string(add_add_1_expect_from_nir);
+   copy_propagation_fwd(*sh);
+   dead_code_elimination(*sh);
+   check(sh, add_add_1_expect_from_nir_copy_prop_fwd_dce);
+}
+
+TEST_F(TestShaderFromNir, CopyPropagationBackwardDCE)
+{
+   auto sh = from_string(add_add_1_expect_from_nir_copy_prop_fwd_dce);
+   copy_propagation_backward(*sh);
+   dead_code_elimination(*sh);
+   check(sh, add_add_1_expect_from_nir_copy_prop_fwd_dce_bwd);
+}
+
+
+TEST_F(TestShaderFromNir, FullOPtimize)
+{
+   auto sh = from_string(basic_interpolation_orig);
+
+   bool progress;
+
+   do {
+      progress = false;
+      progress |= copy_propagation_fwd(*sh);
+      progress |= dead_code_elimination(*sh);
+      progress |= copy_propagation_backward(*sh);
+      progress |= dead_code_elimination(*sh);
+   } while (progress);
+
+   check(sh, basic_interpolation_expect_from_nir_opt);
+}
+
+TEST_F(TestShaderFromNir, CombinePinFlags)
+{
+   auto sh = from_string(shader_group_chan_pin_to_combine);
+
+   bool progress;
+
+   do {
+      progress = false;
+      progress |= copy_propagation_fwd(*sh);
+      progress |= dead_code_elimination(*sh);
+      progress |= copy_propagation_backward(*sh);
+      progress |= dead_code_elimination(*sh);
+   } while (progress);
+
+   check(sh, shader_group_chan_pin_combined);
+}
+
+
+TEST_F(TestShaderFromNir, FullOPtimizeLoop)
+{
+   auto sh = from_string(vs_nexted_loop_from_nir_expect);
+
+   optimize(*sh);
+
+   check(sh, vs_nexted_loop_from_nir_expect_opt);
+}
+TEST_F(TestShaderFromNir, OptimizeWithDestArrayValue)
+{
+   auto sh = from_string(shader_with_dest_array);
+
+   optimize(*sh);
+
+   check(sh, shader_with_dest_array_opt_expect);
+}
+
+TEST_F(TestShaderFromNir, ScheduleOPtimizedWithDestArrayValue)
+{
+   auto sh = from_string(shader_with_dest_array_opt_expect);
+   check(schedule(sh), shader_with_dest_array_opt_scheduled);
+}
+
+TEST_F(TestShaderFromNir, ScheduleWithArrayWriteAndRead)
+{
+   auto sh = from_string(shader_with_dest_array2);
+   check(schedule(sh), shader_with_dest_array2_scheduled);
+}
+
+TEST_F(TestShaderFromNir, RA_with_dest_array)
+{
+   auto sh = from_string(shader_with_dest_array2_scheduled);
+
+   auto lrm = r600::LiveRangeEvaluator().run(*sh);
+   EXPECT_TRUE(r600::register_allocation(lrm));
+
+
+   ra_check(sh, shader_with_dest_array2_scheduled_ra);
+}
+
+TEST_F(TestShaderFromNir, RA_with_chan_group)
+{
+   auto sh = from_string(shader_group_chan_pin_combined_sheduled);
+
+   auto lrm = r600::LiveRangeEvaluator().run(*sh);
+   EXPECT_TRUE(r600::register_allocation(lrm));
+   ra_check(sh, shader_group_chan_pin_combined_sheduled_ra);
+}
+
+
+TEST_F(TestShaderFromNir, TES_opt)
+{
+   auto sh = from_string(tes_pre_op);
+
+   optimize(*sh);
+
+   check(sh, tes_optimized);
+}
+
+TEST_F(TestShaderFromNir, TES_scheduled)
+{
+   auto sh = from_string(tes_optimized_pre_sched);
+
+   check(schedule(sh), tes_optimized_sched);
+}
+
+
+/*
+TEST_F(TestShaderFromNir, ShaderClone)
+{
+   auto sh = from_string(red_triangle_fs_expect_from_nir);
+
+   auto sh_cloned = sh->clone();
+
+   MemoryPool::instance().push();
+   dead_code_elimination(*sh);
+
+   check(sh, red_triangle_fs_expect_from_nir_dce);
+
+   check(sh_cloned, red_triangle_fs_expect_from_nir);
+
+   MemoryPool::instance().pop();
+   check(sh, red_triangle_fs_expect_from_nir_dce);
+}
+*/
+
+TEST_F(TestShaderFromNir, ShaderSchedule)
+{
+   auto sh = from_string(basic_interpolation_orig);
+
+   check(schedule(sh), basic_interpolation_expect_from_nir_sched);
+
+}
+
+TEST_F(TestShaderFromNir, ShaderScheduleCayman)
+{
+   auto sh = from_string(basic_interpolation_orig_cayman);
+
+   check(schedule(sh), basic_interpolation_expect_from_nir_sched_cayman);
+}
+
+
+TEST_F(TestShaderFromNir, ShaderScheduleOptimizedCayman)
+{
+   auto sh = from_string(basic_interpolation_orig_cayman);
+
+   optimize(*sh);
+
+   check(schedule(sh), basic_interpolation_expect_opt_sched_cayman);
+}
+
+
+TEST_F(TestShaderFromNir, CopyPropLegalConst)
+{
+     auto sh = from_string(dot4_pre);
+
+     copy_propagation_fwd(*sh);
+     dead_code_elimination(*sh);
+
+     check(sh, dot4_copy_prop_dce);
+}
+
+
+TEST_F(TestShaderFromNir, FullOPtimize_glxgears_vs2)
+{
+
+   auto sh = from_string(glxgears_vs2_from_nir_expect);
+   optimize(*sh);
+   check(sh, glxgears_vs2_from_nir_expect_optimized);
+}
+
+TEST_F(TestShaderFromNir, test_schedule_group)
+{
+
+   auto sh = from_string(test_schedule_group);
+   optimize(*sh);
+   check(schedule(sh), test_schedule_group_expect);
+}
+
+TEST_F(TestShaderFromNir, test_dont_kill_dual_use)
+{
+   auto sh = from_string(shader_copy_prop_dont_kill_double_use);
+   optimize(*sh);
+   check(schedule(sh), shader_copy_prop_dont_kill_double_use_expect);
+}
+
+
+
+TEST_F(TestShaderFromNir, test_schedule_with_bany)
+{
+
+   auto sh = from_string(shader_with_bany_expect_eg);
+   optimize(*sh);
+   check(schedule(sh), shader_with_bany_expect_opt_sched_eg);
+}
+
+
+TEST_F(TestShaderFromNir, GroupAndChanCombine)
+{
+   auto sh = from_string(shader_group_chan_pin_to_combine_2);
+   optimize(*sh);
+   check(sh, shader_group_chan_pin_to_combine_2_opt);
+}
+
+TEST_F(TestShaderFromNir, RemoveUseAfterSplitgroup)
+{
+   auto sh = from_string(fs_with_loop_multislot_reuse);
+   check(schedule(sh), fs_with_loop_multislot_reuse_scheduled);
+}
+
+TEST_F(TestShaderFromNir, OptimizeVSforTCS)
+{
+   auto sh = from_string(vtx_for_tcs_inp);
+   optimize(*sh);
+   check(sh, vtx_for_tcs_opt);
+}
+
+TEST_F(TestShaderFromNir, ScheduleVSforTCS)
+{
+   auto sh = from_string(vtx_for_tcs_pre_sched);
+   check(schedule(sh), vtx_for_tcs_sched);
+}
+
+
+void TestShaderFromNir::check(Shader *s, const char *expect_orig)
+{
+   ostringstream test_str;
+   s->print(test_str);
+
+   auto expect = from_string(expect_orig);
+
+   ostringstream expect_str;
+   expect->print(expect_str);
+
+   EXPECT_EQ(test_str.str(), expect_str.str());
+}
+
+void TestShaderFromNir::ra_check(Shader *s, const char *expect_orig)
+{
+   s->value_factory().clear_pins();
+   ostringstream test_str;
+   s->print(test_str);
+
+   auto expect = from_string(expect_orig);
+   expect->value_factory().clear_pins();
+
+   ostringstream expect_str;
+   expect->print(expect_str);
+
+   EXPECT_EQ(test_str.str(), expect_str.str());
+}
+
+
diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp
new file mode 100644
index 0000000..8ecbd3c
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/tests/sfn_shaderfromstring_test.cpp
@@ -0,0 +1,123 @@
+
+#include "../sfn_instrfactory.h"
+
+#include "../sfn_instr_alu.h"
+#include "../sfn_instr_tex.h"
+#include "../sfn_instr_export.h"
+
+#include "gtest/gtest.h"
+#include <sstream>
+
+using namespace r600;
+
+using std::istringstream;
+using std::string;
+using std::vector;
+
+class TestShaderFromString : public ::testing::Test
+{
+public:
+   void SetUp() override {
+      m_instr_factory = new InstrFactory();
+      init_pool();
+   }
+
+   void TearDown() override {
+      release_pool();
+   }
+
+   TestShaderFromString();
+
+   std::vector<PInst> from_string(const std::string& s);
+
+protected:
+   void check(const vector<PInst>& eval, const std::vector<PInst, Allocator<PInst>>& expect);
+private:
+   InstrFactory *m_instr_factory;
+};
+
+
+TEST_F(TestShaderFromString, test_simple_fs)
+{
+   auto init_str =
+         R"(
+
+# load constant color
+ALU MOV R2000.x@group : L[0x38000000] {W}
+ALU MOV R2000.y@group : L[0x0] {W}
+ALU MOV R2000.z@group : L[0x0] {W}
+ALU MOV R2000.w@group : L[0x38F00000] {WL}
+
+# write output
+EXPORT_DONE PIXEL 0 R2000.xyzw
+)";
+
+
+   auto shader = from_string(init_str);
+
+   std::vector<PInst, Allocator<PInst>> expect;
+
+   expect.push_back(new AluInstr(op1_mov,
+                                 new Register( 2000, 0, pin_group),
+                                 new LiteralConstant(0x38000000),
+                                 {alu_write}));
+
+   expect.push_back(new AluInstr(op1_mov,
+                                 new Register( 2000, 1, pin_group),
+                                 new LiteralConstant( 0x0),
+                                 {alu_write}));
+
+   expect.push_back(new AluInstr(op1_mov,
+                                 new Register( 2000, 2, pin_group),
+                                 new LiteralConstant( 0x0),
+                                 {alu_write}));
+
+   expect.push_back(new AluInstr(op1_mov,
+                                 new Register( 2000, 3, pin_group),
+                                 new LiteralConstant( 0x38F00000),
+                                 {alu_write, alu_last_instr}));
+
+   auto exp = new ExportInstr(
+            ExportInstr::pixel, 0, RegisterVec4(2000, false));
+   exp->set_is_last_export(true);
+   expect.push_back(exp);
+
+   check(shader, expect);
+
+}
+
+
+
+TestShaderFromString::TestShaderFromString()
+{
+
+}
+
+std::vector<PInst> TestShaderFromString::from_string(const std::string& s)
+{
+   istringstream is(s);
+   string line;
+
+   std::vector<PInst> shader;
+
+   while (std::getline(is, line)) {
+      if (line.find_first_not_of(" \t") == std::string::npos)
+         continue;
+      if (line[0] == '#')
+         continue;
+
+      shader.push_back(m_instr_factory->from_string(line, 0));
+   }
+
+   return shader;
+}
+
+void TestShaderFromString::check(const vector<PInst>& eval,
+                                 const std::vector<PInst, Allocator<PInst>>& expect)
+{
+   ASSERT_EQ(eval.size(), expect.size());
+
+   for (unsigned i = 0; i < eval.size(); ++i) {
+      EXPECT_EQ(*eval[i], *expect[i]);
+   }
+}
diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp
new file mode 100644
index 0000000..1dde8ce
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp
@@ -0,0 +1,3116 @@
+#include "sfn_test_shaders.h"
+#include "../sfn_shader_fs.h"
+#include "../sfn_shader_gs.h"
+#include "../sfn_shader_tess.h"
+#include "../sfn_shader_vs.h"
+#include "../sfn_memorypool.h"
+
+namespace r600 {
+
+using std::istringstream;
+using std::string;
+
+const char *red_triangle_fs_nir  =
+R"(shader: MESA_SHADER_FRAGMENT
+name: TTN
+inputs: 0
+outputs: 1
+uniforms: 0
+shared: 0
+decl_function main (0 params)
+
+impl main {
+   decl_var  INTERP_MODE_FLAT vec4 out@out_0-temp
+   block block_0:
+   /* preds: */
+   vec4 32 ssa_0 = load_const (0x3f800000 /* 1.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */)
+   vec1 32 ssa_1 = load_const (0x00000000 /* 0.000000 */)
+   intrinsic store_output (ssa_0, ssa_1) (0, 15, 0, 160, 132) /* base=0 */ /* wrmask=xyz */ /* component=0 */ /* src_type=float32 */ /* location=4 slots=1 */
+   /* succs: block_1 */
+   block block_1:
+})";
+
+const char *red_triangle_fs_expect_from_nir = R"(
+FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV S0.x@group : I[1.0] {W}
+ALU MOV S0.y@group : I[0] {W}
+ALU MOV S0.z@group : I[0] {W}
+ALU MOV S0.w@group : I[1.0] {WL}
+ALU MOV S1.x@free : I[0] {WL}
+EXPORT_DONE PIXEL 0 S0.xyzw
+)";
+
+const char *red_triangle_fs_expect_from_nir_dce = R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV S0.x@group : I[1.0] {W}
+ALU MOV S0.y@group : I[0] {W}
+ALU MOV S0.z@group : I[0] {W}
+ALU MOV S0.w@group : I[1.0] {WL}
+EXPORT_DONE PIXEL 0 S0.xyzw
+)";
+
+
+const char *add_add_1_nir =
+R"(shader: MESA_SHADER_FRAGMENT
+name: GLSL3
+inputs: 0
+outputs: 1
+uniforms: 1
+shared: 0
+decl_var uniform INTERP_MODE_NONE vec4 color (0, 0, 0)
+decl_function main (0 params)
+
+impl main {
+     decl_var  INTERP_MODE_NONE vec4 out@gl_FragColor-temp
+     block block_0:
+     /* preds: */
+     vec1 32 ssa_0 = load_const (0xbf000000 /* -0.500000 */)
+     vec1 32 ssa_1 = load_const (0x00000000 /* 0.000000 */)
+     vec4 32 ssa_2 = intrinsic load_uniform (ssa_1) (0, 1, 160) /* base=0 */ /* range=1 */ /* dest_type=float32 */   /* color */
+     vec1 32 ssa_3 = fadd ssa_0, ssa_2.x
+     vec4 32 ssa_4 = vec4 ssa_3, ssa_2.y, ssa_2.z, ssa_2.w
+     intrinsic store_output (ssa_4, ssa_1) (0, 15, 0, 160, 130) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=2 slots=1 */
+     /* succs: block_1 */
+     block block_1:
+})";
+
+const char *add_add_1_expect_from_nir =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP WRITE_ALL_COLORS:1
+PROP COLOR_EXPORT_MASK:15
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV S0.x@free : L[0xbf000000] {WL}
+ALU MOV S1.x@free : I[0] {WL}
+ALU MOV S2.x : KC0[0].x {W}
+ALU MOV S2.y : KC0[0].y {W}
+ALU MOV S2.z : KC0[0].z {W}
+ALU MOV S2.w : KC0[0].w {WL}
+ALU ADD S3.x@free : S0.x@free S2.x {WL}
+ALU MOV S4.x@group : S3.x@free {W}
+ALU MOV S4.y@group : S2.y {W}
+ALU MOV S4.z@group : S2.z {W}
+ALU MOV S4.w@group : S2.w {WL}
+EXPORT_DONE PIXEL 0 S4.xyzw
+)";
+
+
+const char *add_add_1_expect_from_nir_copy_prop_fwd =
+R"(
+FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP WRITE_ALL_COLORS:1
+PROP COLOR_EXPORT_MASK:15
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV S0.x@free : L[0xbf000000] {WL}
+ALU MOV S1.x@free : I[0] {WL}
+ALU MOV S2.x : KC0[0].x {W}
+ALU MOV S2.y : KC0[0].y {W}
+ALU MOV S2.z : KC0[0].z {W}
+ALU MOV S2.w : KC0[0].w {WL}
+ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {WL}
+ALU MOV S4.x@group : S3.x@free {W}
+ALU MOV S4.y@group : KC0[0].y {W}
+ALU MOV S4.z@group : KC0[0].z {W}
+ALU MOV S4.w@group : KC0[0].w {WL}
+EXPORT_DONE PIXEL 0 S4.xyzw
+)";
+
+const char *add_add_1_expect_from_nir_copy_prop_fwd_dce =
+R"(
+FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP WRITE_ALL_COLORS:1
+PROP COLOR_EXPORT_MASK:15
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU ADD S3.x@free : L[0xbf000000] KC0[0].x {WL}
+ALU MOV S4.x@group : S3.x@free {W}
+ALU MOV S4.y@group : KC0[0].y {W}
+ALU MOV S4.z@group : KC0[0].z {W}
+ALU MOV S4.w@group : KC0[0].w {WL}
+EXPORT_DONE PIXEL 0 S4.xyzw
+)";
+
+
+const char *add_add_1_expect_from_nir_copy_prop_fwd_dce_bwd =
+R"(
+FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP WRITE_ALL_COLORS:1
+PROP COLOR_EXPORT_MASK:15
+OUTPUT LOC:0 NAME:1 MASK:15
+SYSVALUES R0.xy__
+SHADER
+ALU ADD S4.x@group : L[0xbf000000] KC0[0].x {W}
+ALU MOV S4.y@group : KC0[0].y {W}
+ALU MOV S4.z@group : KC0[0].z {W}
+ALU MOV S4.w@group : KC0[0].w {WL}
+EXPORT_DONE PIXEL 0 S4.xyzw
+)";
+
+
+const char *basic_interpolation_nir =
+R"(shader: MESA_SHADER_FRAGMENT
+name: TTN
+inputs: 1
+outputs: 1
+uniforms: 0
+shared: 0
+decl_var uniform INTERP_MODE_NONE sampler2D sampler (0, 0, 0)
+decl_function main (0 params)
+
+impl main {
+        decl_var  INTERP_MODE_NOPERSPECTIVE vec4 in@in_0-temp
+        decl_var  INTERP_MODE_FLAT vec4 out@out_0-temp
+        block block_0:
+        /* preds: */
+        vec2 32 ssa_0 = intrinsic load_barycentric_pixel () (3) /* interp_mode=3 */
+        vec1 32 ssa_1 = load_const (0x00000000 /* 0.000000 */)
+        vec4 32 ssa_2 = intrinsic load_interpolated_input (ssa_0, ssa_1) (0, 0, 160, 160) /* base=0 */ /* component=0 */ /* dest_type=float32 */ /* location=32 slots=1 */
+        vec3 32 ssa_3 = f2i32 ssa_2.xyw
+        vec1 32 ssa_4 = mov ssa_3.z
+        vec2 32 ssa_5 = vec2 ssa_3.x, ssa_3.y
+        vec4 32 ssa_6 = (float32)txf ssa_5 (coord), ssa_4 (lod), 0 (texture), 0 (sampler)
+        intrinsic store_output (ssa_6, ssa_1) (0, 15, 0, 160, 132) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=4 slots=1 */
+        /* succs: block_1 */
+        block block_1:
+})";
+
+const char *basic_interpolation_expect_from_nir =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+#PROP RAT_BASE:1
+INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SYSVALUES R0.xy__
+SHADER
+ALU MOV S1.x@free : I[0] {WL}
+ALU_GROUP_BEGIN
+ALU INTERP_ZW  __.x@chan : R0.y@fully Param0.x VEC_210 {}
+ALU INTERP_ZW  __.y@chan : R0.x@fully Param0.y VEC_210 {}
+ALU INTERP_ZW  S2.z@chan : R0.y@fully Param0.z VEC_210 {W}
+ALU INTERP_ZW  S2.w@chan : R0.x@fully Param0.w VEC_210 {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY  S2.x@chan : R0.y@fully Param0.x VEC_210 {W}
+ALU INTERP_XY  S2.y@chan : R0.x@fully Param0.y VEC_210 {W}
+ALU INTERP_XY  __.z@chan : R0.y@fully Param0.z VEC_210 {}
+ALU INTERP_XY  __.w@chan : R0.x@fully Param0.w VEC_210 {L}
+ALU_GROUP_END
+
+ALU TRUNC S3.x@free : S2.x@chan {WL}
+ALU TRUNC S4.y@free : S2.y@chan {WL}
+ALU TRUNC S5.z@free : S2.w@chan {WL}
+
+ALU FLT_TO_INT S6.x : S3.x@free {W}
+ALU FLT_TO_INT S6.y : S4.y@free {W}
+ALU FLT_TO_INT S6.z : S5.z@free {WL}
+
+ALU MOV S7.x@free : S6.z {WL}
+ALU MOV S8.x : S6.x {W}
+ALU MOV S8.y : S6.y {WL}
+ALU MOV S9.x@group : S8.x {W}
+ALU MOV S9.y@group : S8.y {W}
+ALU MOV S9.w@group : S7.x@free {WL}
+TEX LD S10.xyzw : S9.xy_w RID:18 SID:0 NNNN
+EXPORT_DONE PIXEL 0 S10.xyzw)";
+
+
+const char *basic_interpolation_translated_1 =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+#PROP RAT_BASE:1
+INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SYSVALUES R0.xy__
+SHADER
+ALU MOV S1.x@free : I[0] {WL}
+ALU_GROUP_BEGIN
+ALU INTERP_ZW  __.x@chan : R0.y@fully Param0.x VEC_210 {}
+ALU INTERP_ZW  __.y@chan : R0.x@fully Param0.y VEC_210 {}
+ALU INTERP_ZW  S2.z@chan : R0.y@fully Param0.z VEC_210 {W}
+ALU INTERP_ZW  S2.w@chan : R0.x@fully Param0.w VEC_210 {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY  S2.x@chan : R0.y@fully Param0.x VEC_210 {W}
+ALU INTERP_XY  S2.y@chan : R0.x@fully Param0.y VEC_210 {W}
+ALU INTERP_XY  __.z@chan : R0.y@fully Param0.z VEC_210 {}
+ALU INTERP_XY  __.w@chan : R0.x@fully Param0.w VEC_210 {L}
+ALU_GROUP_END
+
+ALU FLT_TO_INT S3.x : S2.x@free {W}
+ALU FLT_TO_INT S3.y : S2.y@free {W}
+ALU FLT_TO_INT S3.z : S2.w@free {WL}
+ALU MOV S4.x : S3.x {W}
+ALU MOV S4.y : S3.y {WL}
+ALU MOV S5.x@group : S4.x {W}
+ALU MOV S5.y@group : S4.y {W}
+ALU MOV S5.w@group : S3.z {WL}
+TEX LD S6.xyzw : S5.xy_w RID:18 SID:0 NNNN
+EXPORT_DONE PIXEL 0 S6.xyzw)";
+
+
+
+const char *basic_interpolation_2 =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+#PROP RAT_BASE:1
+INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SYSVALUES R0.xy__
+SHADER
+ALU_GROUP_BEGIN
+ALU INTERP_ZW  __.x@chan : R0.y@fully Param0.x VEC_210 {}
+ALU INTERP_ZW  __.y@chan : R0.x@fully Param0.y VEC_210 {}
+ALU INTERP_ZW  S2.z@chan : R0.y@fully Param0.z VEC_210 {W}
+ALU INTERP_ZW  S2.w@chan : R0.x@fully Param0.w VEC_210 {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY  S2.x@chan : R0.y@fully Param0.x VEC_210 {W}
+ALU INTERP_XY  S2.y@chan : R0.x@fully Param0.y VEC_210 {W}
+ALU INTERP_XY  __.z@chan : R0.y@fully Param0.z VEC_210 {}
+ALU INTERP_XY  __.w@chan : R0.x@fully Param0.w VEC_210 {L}
+ALU_GROUP_END
+EXPORT_DONE PIXEL 0 S2.xyzw
+)";
+
+
+const char *basic_interpolation_orig =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+#PROP RAT_BASE:1
+INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SYSVALUES R0.xy__
+SHADER
+ALU MOV S1024.x : I[0] {WL}
+ALU_GROUP_BEGIN
+ALU INTERP_ZW  __.x@chan : R0.y@fully Param0.x VEC_210 {}
+ALU INTERP_ZW  __.y@chan : R0.x@fully Param0.y VEC_210 {}
+ALU INTERP_ZW  S1025.z@chan : R0.y@fully Param0.z VEC_210 {W}
+ALU INTERP_ZW  S1025.w@chan : R0.x@fully Param0.w VEC_210 {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY  S1025.x@chan : R0.y@fully Param0.x VEC_210 {W}
+ALU INTERP_XY  S1025.y@chan : R0.x@fully Param0.y VEC_210 {W}
+ALU INTERP_XY  __.z@chan : R0.y@fully Param0.z VEC_210 {}
+ALU INTERP_XY  __.w@chan : R0.x@fully Param0.w VEC_210 {L}
+ALU_GROUP_END
+
+ALU FLT_TO_INT S1026.x : S1025.x@chan {W}
+ALU FLT_TO_INT S1026.y : S1025.y@chan {W}
+ALU FLT_TO_INT S1026.z : S1025.w@chan {WL}
+ALU MOV S1027.x : S1026.x {W}
+ALU MOV S1027.y : S1026.y {WL}
+ALU MOV S1028.x@group : S1027.x {W}
+ALU MOV S1028.y@group : S1027.y {W}
+ALU MOV S1028.w@group : S1026.z {WL}
+TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
+EXPORT_DONE PIXEL 0 S1029.xyzw
+)";
+
+const char *basic_interpolation_expect_from_nir_sched =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+#PROP RAT_BASE:1
+INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SYSVALUES R0.xy__
+SHADER
+BLOCK_START
+ALU_GROUP_BEGIN
+ALU INTERP_ZW  __.x@chan : R0.y@fully Param0.x VEC_210 {}
+ALU INTERP_ZW  __.y@chan : R0.x@fully Param0.y VEC_210 {}
+ALU INTERP_ZW  S1025.z@chan : R0.y@fully Param0.z VEC_210 {W}
+ALU INTERP_ZW  S1025.w@chan : R0.x@fully Param0.w VEC_210 {W}
+ALU MOV S1024.x : I[0] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY  S1025.x@chan : R0.y@fully Param0.x VEC_210 {W}
+ALU INTERP_XY  S1025.y@chan : R0.x@fully Param0.y VEC_210 {W}
+ALU INTERP_XY  __.z@chan : R0.y@fully Param0.z VEC_210 {}
+ALU INTERP_XY  __.w@chan : R0.x@fully Param0.w VEC_210 {L}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU FLT_TO_INT S1026.x : S1025.x@chan {W}
+ALU FLT_TO_INT S1026.y : S1025.y@chan {W}
+ALU FLT_TO_INT S1026.z : S1025.w@chan {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU MOV S1027.x : S1026.x {W}
+ALU MOV S1027.y : S1026.y {W}
+ALU MOV S1028.w@group : S1026.z {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU MOV S1028.x@group : S1027.x {W}
+ALU MOV S1028.y@group : S1027.y {WL}
+ALU_GROUP_END
+BLOCK_END
+BLOCK_START
+TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
+BLOCK_END
+BLOCK_START
+EXPORT_DONE PIXEL 0 S1029.xyzw
+BLOCK_END
+)";
+
+
+const char *basic_interpolation_orig_cayman =
+R"(FS
+CHIPCLASS CAYMAN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+#PROP RAT_BASE:1
+INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SYSVALUES R0.xy__
+SHADER
+ALU MOV S1024.x : I[0] {WL}
+ALU_GROUP_BEGIN
+ALU INTERP_ZW  __.x@chan : R0.y@fully Param0.x VEC_210 {}
+ALU INTERP_ZW  __.y@chan : R0.x@fully Param0.y VEC_210 {}
+ALU INTERP_ZW  S1025.z@chan : R0.y@fully Param0.z VEC_210 {W}
+ALU INTERP_ZW  S1025.w@chan : R0.x@fully Param0.w VEC_210 {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY  S1025.x@chan : R0.y@fully Param0.x VEC_210 {W}
+ALU INTERP_XY  S1025.y@chan : R0.x@fully Param0.y VEC_210 {W}
+ALU INTERP_XY  __.z@chan : R0.y@fully Param0.z VEC_210 {}
+ALU INTERP_XY  __.w@chan : R0.x@fully Param0.w VEC_210 {L}
+ALU_GROUP_END
+
+ALU FLT_TO_INT S1026.x : S1025.x@chan {W}
+ALU FLT_TO_INT S1026.y : S1025.y@chan {W}
+ALU FLT_TO_INT S1026.z : S1025.w@chan {WL}
+ALU MOV S1027.x : S1026.x {W}
+ALU MOV S1027.y : S1026.y {WL}
+ALU MOV S1028.x@group : S1027.x {W}
+ALU MOV S1028.y@group : S1027.y {W}
+ALU MOV S1028.w@group : S1026.z {WL}
+TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
+EXPORT_DONE PIXEL 0 S1029.xyzw
+)";
+
+const char *basic_interpolation_expect_from_nir_sched_cayman =
+R"(FS
+CHIPCLASS CAYMAN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+#PROP RAT_BASE:1
+INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SYSVALUES R0.xy__
+SHADER
+BLOCK_START
+ALU_GROUP_BEGIN
+ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210
+ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210
+ALU INTERP_ZW S1025.z@chan : R0.y@fully Param0.z {W} VEC_210
+ALU INTERP_ZW S1025.w@chan : R0.x@fully Param0.w {WL} VEC_210
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY S1025.x@chan : R0.y@fully Param0.x {W} VEC_210
+ALU INTERP_XY S1025.y@chan : R0.x@fully Param0.y {W} VEC_210
+ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
+ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU FLT_TO_INT S1026.x : S1025.x@chan {W}
+ALU FLT_TO_INT S1026.y : S1025.y@chan {W}
+ALU FLT_TO_INT S1026.z : S1025.w@chan {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU MOV S1027.x : S1026.x {W}
+ALU MOV S1027.y : S1026.y {W}
+ALU MOV S1028.w@group : S1026.z {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU MOV S1028.x@group : S1027.x {W}
+ALU MOV S1028.y@group : S1027.y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU MOV S1024.x : I[0] {WL}
+ALU_GROUP_END
+BLOCK_START
+BLOCK_END
+TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
+BLOCK_START
+BLOCK_END
+EXPORT_DONE PIXEL 0 S1029.xyzw
+BLOCK_END
+)";
+
+const char *basic_interpolation_expect_opt_sched_cayman =
+R"(FS
+CHIPCLASS CAYMAN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+#PROP RAT_BASE:1
+INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SYSVALUES R0.xy__
+SHADER
+BLOCK_START
+ALU_GROUP_BEGIN
+ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210
+ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210
+ALU INTERP_ZW S1025.z@chan : R0.y@fully Param0.z {W} VEC_210
+ALU INTERP_ZW S1025.w@chan : R0.x@fully Param0.w {WL} VEC_210
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY S1025.x@chan : R0.y@fully Param0.x {W} VEC_210
+ALU INTERP_XY S1025.y@chan : R0.x@fully Param0.y {W} VEC_210
+ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
+ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W}
+ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W}
+ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL}
+ALU_GROUP_END
+BLOCK_END
+BLOCK_START
+TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
+BLOCK_END
+BLOCK_START
+EXPORT_DONE PIXEL 0 S1029.xyzw
+BLOCK_END
+)";
+
+const char *basic_interpolation_expect_from_nir_opt =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+#PROP RAT_BASE:1
+INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SYSVALUES R0.xy__
+SHADER
+ALU_GROUP_BEGIN
+ALU INTERP_ZW  __.x@chan : R0.y@fully Param0.x VEC_210 {}
+ALU INTERP_ZW  __.y@chan : R0.x@fully Param0.y VEC_210 {}
+ALU INTERP_ZW  S1025.z@chan : R0.y@fully Param0.z VEC_210 {W}
+ALU INTERP_ZW  S1025.w@chan : R0.x@fully Param0.w VEC_210 {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY  S1025.x@chan : R0.y@fully Param0.x VEC_210 {W}
+ALU INTERP_XY  S1025.y@chan : R0.x@fully Param0.y VEC_210 {W}
+ALU INTERP_XY  __.z@chan : R0.y@fully Param0.z VEC_210 {}
+ALU INTERP_XY  __.w@chan : R0.x@fully Param0.w VEC_210 {L}
+ALU_GROUP_END
+ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W}
+ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W}
+ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL}
+TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
+EXPORT_DONE PIXEL 0 S1029.xyzw
+)";
+
+const char *dot4_pre =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+#PROP RAT_BASE:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV S1.x : KC0[0].x  {W}
+ALU MOV S1.y : KC0[0].y  {W}
+ALU MOV S1.z : KC0[0].z  {W}
+ALU MOV S1.w : KC0[0].w  {WL}
+ALU MOV S2.x : KC0[1].x  {W}
+ALU MOV S2.y : KC0[1].y  {W}
+ALU MOV S2.z : KC0[1].z  {W}
+ALU MOV S2.w : KC0[1].w  {WL}
+ALU DOT4_IEEE S3.x@free : S1.x S2.x + S1.y S2.y + S1.z S2.z + S1.w S2.w  {WL}
+ALU MOV S4.x : S3.x@free {W}
+ALU MOV S4.y : S3.x@free {W}
+ALU MOV S4.z : S3.x@free {W}
+ALU MOV S4.w : S3.x@free {W}
+EXPORT_DONE PIXEL 0 S4.xyzw
+)";
+
+const char *dot4_copy_prop_dce =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+#PROP RAT_BASE:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV S2.x : KC0[1].x  {W}
+ALU MOV S2.y : KC0[1].y  {W}
+ALU MOV S2.z : KC0[1].z  {W}
+ALU MOV S2.w : KC0[1].w  {WL}
+ALU DOT4_IEEE S3.x@free : KC0[0].x S2.x + KC0[0].y S2.y + KC0[0].z S2.z + KC0[0].w S2.w  {WL}
+ALU MOV S4.x : S3.x@free {W}
+ALU MOV S4.y : S3.x@free {W}
+ALU MOV S4.z : S3.x@free {W}
+ALU MOV S4.w : S3.x@free {W}
+EXPORT_DONE PIXEL 0 S4.xyzw
+)";
+
+const char *glxgears_vs2_nir =
+R"(shader: MESA_SHADER_VERTEX
+name: ARB0
+inputs: 2
+outputs: 2
+uniforms: 11
+shared: 0
+decl_var uniform INTERP_MODE_NONE vec4[11] name (0, 0, 0)
+decl_function main (0 params)
+
+impl main {
+        block block_0:
+        /* preds: */
+        vec1 32 ssa_0 = load_const (0x00000000 /* 0.000000 */)
+        vec4 32 ssa_1 = intrinsic load_input (ssa_0) (0, 0, 160, 128) /* base=0 */ /* component=0 */ /* dest_type=float32 */ /* location=0 slots=1 */
+        vec1 32 ssa_2 = load_const (0x00000006 /* 0.000000 */)
+        vec4 32 ssa_3 = intrinsic load_uniform (ssa_2) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /*  */
+        vec4 32 ssa_4 = fmul ssa_1.xxxx, ssa_3
+        vec1 32 ssa_5 = load_const (0x00000007 /* 0.000000 */)
+        vec4 32 ssa_6 = intrinsic load_uniform (ssa_5) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /*  */
+        vec4 32 ssa_7 = ffma ssa_1.yyyy, ssa_6, ssa_4
+        vec1 32 ssa_8 = load_const (0x00000008 /* 0.000000 */)
+        vec4 32 ssa_9 = intrinsic load_uniform (ssa_8) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */ /*  */
+        vec4 32 ssa_10 = ffma ssa_1.zzzz, ssa_9, ssa_7
+        vec1 32 ssa_11 = load_const (0x00000009 /* 0.000000 */)
+        vec4 32 ssa_12 = intrinsic load_uniform (ssa_11) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */       /*  */
+        vec4 32 ssa_13 = ffma ssa_1.wwww, ssa_12, ssa_10
+        vec4 32 ssa_14 = intrinsic load_input (ssa_0) (1, 0, 160, 129) /* base=1 */ /* component=0 */ /* dest_type=float32 */ /* location=1 slots=1 */
+        vec1 32 ssa_15 = fdot3 ssa_14.xyz, ssa_14.xyz
+        vec1 32 ssa_16 = frsq abs(ssa_15)
+        vec4 32 ssa_17 = fmul ssa_14, ssa_16.xxxx
+        vec1 32 ssa_18 = load_const (0x00000002 /* 0.000000 */)
+        vec4 32 ssa_19 = intrinsic load_uniform (ssa_18) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */       /*  */
+        vec1 32 ssa_20 = load_const (0x0000000a /* 0.000000 */)
+        vec4 32 ssa_21 = intrinsic load_uniform (ssa_20) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */       /*  */
+        vec1 32 ssa_22 = fdot3 ssa_17.xyz, ssa_21.xyz
+        vec4 32 ssa_23 = load_const (0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */)
+        vec1 32 ssa_24 = fmax ssa_23.y, ssa_22
+        vec4 32 ssa_25 = load_const (0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */)
+        vec1 32 ssa_26 = slt ssa_25.z, ssa_22
+        vec1 32 ssa_27 = load_const (0x00000003 /* 0.000000 */)
+        vec4 32 ssa_28 = intrinsic load_uniform (ssa_27) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */       /*  */
+        vec3 32 ssa_29 = fadd ssa_28.xyz, ssa_19.xyz
+        vec1 32 ssa_30 = load_const (0x00000004 /* 0.000000 */)
+        vec4 32 ssa_31 = intrinsic load_uniform (ssa_30) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */       /*  */
+        vec3 32 ssa_32 = ffma ssa_24.xxx, ssa_31.xyz, ssa_29
+        vec1 32 ssa_33 = load_const (0x00000005 /* 0.000000 */)
+        vec4 32 ssa_34 = intrinsic load_uniform (ssa_33) (0, 11, 160) /* base=0 */ /* range=11 */ /* dest_type=float32 */       /*  */
+        vec3 32 ssa_35 = ffma.sat ssa_26.xxx, ssa_34.xyz, ssa_32
+        intrinsic store_output (ssa_13, ssa_0) (0, 15, 0, 160, 128) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=0 slots=1 */
+        vec3 32 ssa_36 = mov ssa_35
+        vec1 32 ssa_37 = fsat ssa_19.w
+        vec4 32 ssa_38 = vec4 ssa_36.x, ssa_36.y, ssa_36.z, ssa_37
+        intrinsic store_output (ssa_38, ssa_0) (1, 15, 0, 160, 129) /* base=1 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=1 slots=1 */
+        /* succs: block_1 */
+        block block_1:
+})";
+
+const char *glxgears_vs2_from_nir_expect =
+R"(VS
+CHIPCLASS EVERGREEN
+INPUT LOC:0 NAME:0
+INPUT LOC:1 NAME:1
+OUTPUT LOC:0 NAME:0 MASK:15 SID:0 SPI_SID:0
+OUTPUT LOC:1 NAME:1 MASK:15 SID:0 SPI_SID:137
+SYSVALUES R1.xyzw R2.xyzw
+SHADER
+ALU MOV S3.x@free : I[0] {WL}
+ALU MOV S4.x@free : L[0x6] {WL}
+ALU MOV S5.x : KC0[6].x {W}
+ALU MOV S5.y : KC0[6].y {W}
+ALU MOV S5.z : KC0[6].z {W}
+ALU MOV S5.w : KC0[6].w {WL}
+ALU MUL_IEEE S6.x : R1.x@fully S5.x {W}
+ALU MUL_IEEE S6.y : R1.x@fully S5.y {W}
+ALU MUL_IEEE S6.z : R1.x@fully S5.z {W}
+ALU MUL_IEEE S6.w : R1.x@fully S5.w {WL}
+ALU MOV S7.x@free : L[0x7] {WL}
+ALU MOV S8.x : KC0[7].x {W}
+ALU MOV S8.y : KC0[7].y {W}
+ALU MOV S8.z : KC0[7].z {W}
+ALU MOV S8.w : KC0[7].w {WL}
+ALU MULADD_IEEE S9.x : R1.y@fully S8.x S6.x {W}
+ALU MULADD_IEEE S9.y : R1.y@fully S8.y S6.y {W}
+ALU MULADD_IEEE S9.z : R1.y@fully S8.z S6.z {W}
+ALU MULADD_IEEE S9.w : R1.y@fully S8.w S6.w {WL}
+ALU MOV S10.x@free : L[0x8] {WL}
+ALU MOV S11.x : KC0[8].x {W}
+ALU MOV S11.y : KC0[8].y {W}
+ALU MOV S11.z : KC0[8].z {W}
+ALU MOV S11.w : KC0[8].w {WL}
+ALU MULADD_IEEE S12.x : R1.z@fully S11.x S9.x {W}
+ALU MULADD_IEEE S12.y : R1.z@fully S11.y S9.y {W}
+ALU MULADD_IEEE S12.z : R1.z@fully S11.z S9.z {W}
+ALU MULADD_IEEE S12.w : R1.z@fully S11.w S9.w {WL}
+ALU MOV S13.x@free : L[0x9] {WL}
+ALU MOV S14.x : KC0[9].x {W}
+ALU MOV S14.y : KC0[9].y {W}
+ALU MOV S14.z : KC0[9].z {W}
+ALU MOV S14.w : KC0[9].w {WL}
+ALU MULADD_IEEE S15.x@group : R1.w@fully S14.x S12.x {W}
+ALU MULADD_IEEE S15.y@group : R1.w@fully S14.y S12.y {W}
+ALU MULADD_IEEE S15.z@group : R1.w@fully S14.z S12.z {W}
+ALU MULADD_IEEE S15.w@group : R1.w@fully S14.w S12.w {WL}
+ALU DOT4_IEEE S16.x@free : R2.x@fully R2.x@fully + R2.y@fully R2.y@fully + R2.z@fully R2.z@fully + I[0].x I[0].x {WL}
+ALU RECIPSQRT_IEEE S17.x@free : |S16.x@free| {WL}
+ALU MUL_IEEE S18.x : R2.x@fully S17.x@free {W}
+ALU MUL_IEEE S18.y : R2.y@fully S17.x@free {W}
+ALU MUL_IEEE S18.z : R2.z@fully S17.x@free {W}
+ALU MUL_IEEE S18.w : R2.w@fully S17.x@free {WL}
+ALU MOV S19.x@free : L[0x2] {WL}
+ALU MOV S20.x : KC0[2].x {W}
+ALU MOV S20.y : KC0[2].y {W}
+ALU MOV S20.z : KC0[2].z {W}
+ALU MOV S20.w : KC0[2].w {WL}
+ALU MOV S21.x@free : L[0xa] {WL}
+ALU MOV S22.x : KC0[10].x {W}
+ALU MOV S22.y : KC0[10].y {W}
+ALU MOV S22.z : KC0[10].z {W}
+ALU MOV S22.w : KC0[10].w {WL}
+ALU DOT4_IEEE S23.x@free : S18.x S22.x + S18.y S22.y + S18.z S22.z + I[0].x I[0].x {WL}
+ALU MOV S24.x : I[0] {W}
+ALU MOV S24.y : I[0] {W}
+ALU MOV S24.z : I[0] {W}
+ALU MOV S24.w : I[1.0] {WL}
+ALU MAX_DX10 S25.x@free : S24.y S23.x@free {WL}
+ALU MOV S26.x : I[0] {W}
+ALU MOV S26.y : I[0] {W}
+ALU MOV S26.z : I[0] {W}
+ALU MOV S26.w : I[0] {WL}
+ALU SETGT S27.x@free : S23.x@free S26.z {WL}
+ALU MOV S28.x@free : L[0x3] {WL}
+ALU MOV S29.x : KC0[3].x {W}
+ALU MOV S29.y : KC0[3].y {W}
+ALU MOV S29.z : KC0[3].z {W}
+ALU MOV S29.w : KC0[3].w {WL}
+ALU ADD S30.x : S29.x S20.x {W}
+ALU ADD S30.y : S29.y S20.y {W}
+ALU ADD S30.z : S29.z S20.z {WL}
+ALU MOV S31.x@free : L[0x4] {WL}
+ALU MOV S32.x : KC0[4].x {W}
+ALU MOV S32.y : KC0[4].y {W}
+ALU MOV S32.z : KC0[4].z {W}
+ALU MOV S32.w : KC0[4].w {WL}
+ALU MULADD_IEEE S33.x : S25.x@free S32.x S30.x {W}
+ALU MULADD_IEEE S33.y : S25.x@free S32.y S30.y {W}
+ALU MULADD_IEEE S33.z : S25.x@free S32.z S30.z {WL}
+ALU MOV S34.x@free : L[0x5] {WL}
+ALU MOV S35.x : KC0[5].x {W}
+ALU MOV S35.y : KC0[5].y {W}
+ALU MOV S35.z : KC0[5].z {W}
+ALU MOV S35.w : KC0[5].w {WL}
+ALU MULADD_IEEE CLAMP S36.x : S27.x@free S35.x S33.x {W}
+ALU MULADD_IEEE CLAMP S36.y : S27.x@free S35.y S33.y {W}
+ALU MULADD_IEEE CLAMP S36.z : S27.x@free S35.z S33.z {WL}
+EXPORT_DONE POS 0 S15.xyzw
+ALU MOV S38.x : S36.x {W}
+ALU MOV S38.y : S36.y {W}
+ALU MOV S38.z : S36.z {WL}
+ALU MOV CLAMP S39.x@free : S20.w {WL}
+ALU MOV S40.x@group : S38.x {W}
+ALU MOV S40.y@group : S38.y {W}
+ALU MOV S40.z@group : S38.z {W}
+ALU MOV S40.w@group : S39.x@free {WL}
+EXPORT_DONE PARAM 0 S40.xyzw)";
+
+
+const char *glxgears_vs2_from_nir_expect_cayman =
+R"(VS
+CHIPCLASS CAYMAN
+INPUT LOC:0 NAME:0
+INPUT LOC:1 NAME:1
+OUTPUT LOC:0 NAME:0 MASK:15 SID:0 SPI_SID:0
+OUTPUT LOC:1 NAME:1 MASK:15 SID:0 SPI_SID:137
+SYSVALUES R1.xyzw R2.xyzw
+SHADER
+ALU MOV S3.x@free : I[0] {WL}
+ALU MOV S4.x@free : L[0x6] {WL}
+ALU MOV S5.x : KC0[6].x {W}
+ALU MOV S5.y : KC0[6].y {W}
+ALU MOV S5.z : KC0[6].z {W}
+ALU MOV S5.w : KC0[6].w {WL}
+ALU MUL_IEEE S6.x : R1.x@fully S5.x {W}
+ALU MUL_IEEE S6.y : R1.x@fully S5.y {W}
+ALU MUL_IEEE S6.z : R1.x@fully S5.z {W}
+ALU MUL_IEEE S6.w : R1.x@fully S5.w {WL}
+ALU MOV S7.x@free : L[0x7] {WL}
+ALU MOV S8.x : KC0[7].x {W}
+ALU MOV S8.y : KC0[7].y {W}
+ALU MOV S8.z : KC0[7].z {W}
+ALU MOV S8.w : KC0[7].w {WL}
+ALU MULADD_IEEE S9.x : R1.y@fully S8.x S6.x {W}
+ALU MULADD_IEEE S9.y : R1.y@fully S8.y S6.y {W}
+ALU MULADD_IEEE S9.z : R1.y@fully S8.z S6.z {W}
+ALU MULADD_IEEE S9.w : R1.y@fully S8.w S6.w {WL}
+ALU MOV S10.x@free : L[0x8] {WL}
+ALU MOV S11.x : KC0[8].x {W}
+ALU MOV S11.y : KC0[8].y {W}
+ALU MOV S11.z : KC0[8].z {W}
+ALU MOV S11.w : KC0[8].w {WL}
+ALU MULADD_IEEE S12.x : R1.z@fully S11.x S9.x {W}
+ALU MULADD_IEEE S12.y : R1.z@fully S11.y S9.y {W}
+ALU MULADD_IEEE S12.z : R1.z@fully S11.z S9.z {W}
+ALU MULADD_IEEE S12.w : R1.z@fully S11.w S9.w {WL}
+ALU MOV S13.x@free : L[0x9] {WL}
+ALU MOV S14.x : KC0[9].x {W}
+ALU MOV S14.y : KC0[9].y {W}
+ALU MOV S14.z : KC0[9].z {W}
+ALU MOV S14.w : KC0[9].w {WL}
+ALU MULADD_IEEE S15.x@group : R1.w@fully S14.x S12.x {W}
+ALU MULADD_IEEE S15.y@group : R1.w@fully S14.y S12.y {W}
+ALU MULADD_IEEE S15.z@group : R1.w@fully S14.z S12.z {W}
+ALU MULADD_IEEE S15.w@group : R1.w@fully S14.w S12.w {WL}
+ALU DOT4_IEEE S16.x@free : R2.x@fully R2.x@fully + R2.y@fully R2.y@fully + R2.z@fully R2.z@fully + I[0].x I[0].x {WL}
+ALU RECIPSQRT_IEEE S17.x@chan : |S16.x@free| + |S16.x@free| + S16.x@free {WL}
+ALU MUL_IEEE S18.x : R2.x@fully S17.x@free {W}
+ALU MUL_IEEE S18.y : R2.y@fully S17.x@free {W}
+ALU MUL_IEEE S18.z : R2.z@fully S17.x@free {W}
+ALU MUL_IEEE S18.w : R2.w@fully S17.x@free {WL}
+ALU MOV S19.x@free : L[0x2] {WL}
+ALU MOV S20.x : KC0[2].x {W}
+ALU MOV S20.y : KC0[2].y {W}
+ALU MOV S20.z : KC0[2].z {W}
+ALU MOV S20.w : KC0[2].w {WL}
+ALU MOV S21.x@free : L[0xa] {WL}
+ALU MOV S22.x : KC0[10].x {W}
+ALU MOV S22.y : KC0[10].y {W}
+ALU MOV S22.z : KC0[10].z {W}
+ALU MOV S22.w : KC0[10].w {WL}
+ALU DOT4_IEEE S23.x@free : S18.x S22.x + S18.y S22.y + S18.z S22.z + I[0].x I[0].x {WL}
+ALU MOV S24.x : I[0] {W}
+ALU MOV S24.y : I[0] {W}
+ALU MOV S24.z : I[0] {W}
+ALU MOV S24.w : I[1.0] {WL}
+ALU MAX_DX10 S25.x@free : S24.y S23.x@free {WL}
+ALU MOV S26.x : I[0] {W}
+ALU MOV S26.y : I[0] {W}
+ALU MOV S26.z : I[0] {W}
+ALU MOV S26.w : I[0] {WL}
+ALU SETGT S27.x@free : S23.x@free S26.z {WL}
+ALU MOV S28.x@free : L[0x3] {WL}
+ALU MOV S29.x : KC0[3].x {W}
+ALU MOV S29.y : KC0[3].y {W}
+ALU MOV S29.z : KC0[3].z {W}
+ALU MOV S29.w : KC0[3].w {WL}
+ALU ADD S30.x : S29.x S20.x {W}
+ALU ADD S30.y : S29.y S20.y {W}
+ALU ADD S30.z : S29.z S20.z {WL}
+ALU MOV S31.x@free : L[0x4] {WL}
+ALU MOV S32.x : KC0[4].x {W}
+ALU MOV S32.y : KC0[4].y {W}
+ALU MOV S32.z : KC0[4].z {W}
+ALU MOV S32.w : KC0[4].w {WL}
+ALU MULADD_IEEE S33.x : S25.x@free S32.x S30.x {W}
+ALU MULADD_IEEE S33.y : S25.x@free S32.y S30.y {W}
+ALU MULADD_IEEE S33.z : S25.x@free S32.z S30.z {WL}
+ALU MOV S34.x@free : L[0x5] {WL}
+ALU MOV S35.x : KC0[5].x {W}
+ALU MOV S35.y : KC0[5].y {W}
+ALU MOV S35.z : KC0[5].z {W}
+ALU MOV S35.w : KC0[5].w {WL}
+ALU MULADD_IEEE CLAMP S36.x : S27.x@free S35.x S33.x {W}
+ALU MULADD_IEEE CLAMP S36.y : S27.x@free S35.y S33.y {W}
+ALU MULADD_IEEE CLAMP S36.z : S27.x@free S35.z S33.z {WL}
+EXPORT_DONE POS 0 S15.xyzw
+ALU MOV S38.x : S36.x {W}
+ALU MOV S38.y : S36.y {W}
+ALU MOV S38.z : S36.z {WL}
+ALU MOV CLAMP S39.x@free : S20.w {WL}
+ALU MOV S40.x@group : S38.x {W}
+ALU MOV S40.y@group : S38.y {W}
+ALU MOV S40.z@group : S38.z {W}
+ALU MOV S40.w@group : S39.x@free {WL}
+EXPORT_DONE PARAM 0 S40.xyzw)";
+
+
+const char *glxgears_vs2_from_nir_expect_optimized =
+R"(VS
+CHIPCLASS EVERGREEN
+INPUT LOC:0 NAME:0
+INPUT LOC:1 NAME:1
+OUTPUT LOC:0 NAME:0 MASK:15 SID:0 SPI_SID:0
+OUTPUT LOC:1 NAME:1 MASK:15 SID:0 SPI_SID:137
+SYSVALUES R1.xyzw R2.xyzw
+SHADER
+ALU MUL_IEEE S6.x : R1.x@fully KC0[6].x {W}
+ALU MUL_IEEE S6.y : R1.x@fully KC0[6].y {W}
+ALU MUL_IEEE S6.z : R1.x@fully KC0[6].z {W}
+ALU MUL_IEEE S6.w : R1.x@fully KC0[6].w {WL}
+ALU MULADD_IEEE S9.x : R1.y@fully KC0[7].x S6.x {W}
+ALU MULADD_IEEE S9.y : R1.y@fully KC0[7].y S6.y {W}
+ALU MULADD_IEEE S9.z : R1.y@fully KC0[7].z S6.z {W}
+ALU MULADD_IEEE S9.w : R1.y@fully KC0[7].w S6.w {WL}
+ALU MULADD_IEEE S12.x : R1.z@fully KC0[8].x S9.x {W}
+ALU MULADD_IEEE S12.y : R1.z@fully KC0[8].y S9.y {W}
+ALU MULADD_IEEE S12.z : R1.z@fully KC0[8].z S9.z {W}
+ALU MULADD_IEEE S12.w : R1.z@fully KC0[8].w S9.w {WL}
+ALU MULADD_IEEE S15.x@group : R1.w@fully KC0[9].x S12.x {W}
+ALU MULADD_IEEE S15.y@group : R1.w@fully KC0[9].y S12.y {W}
+ALU MULADD_IEEE S15.z@group : R1.w@fully KC0[9].z S12.z {W}
+ALU MULADD_IEEE S15.w@group : R1.w@fully KC0[9].w S12.w {WL}
+ALU DOT4_IEEE S16.x@free : R2.x@fully R2.x@fully + R2.y@fully R2.y@fully + R2.z@fully R2.z@fully + I[0].x I[0].x {WL}
+ALU RECIPSQRT_IEEE S17.x@free : |S16.x@free| {WL}
+ALU MUL_IEEE S18.x : R2.x@fully S17.x@free {W}
+ALU MUL_IEEE S18.y : R2.y@fully S17.x@free {W}
+ALU MUL_IEEE S18.z : R2.z@fully S17.x@free {W}
+ALU DOT4_IEEE S23.x@free : S18.x KC0[10].x + S18.y KC0[10].y + S18.z KC0[10].z + I[0].x I[0].x {WL}
+ALU MAX_DX10 S25.x@free : I[0] S23.x@free {WL}
+ALU SETGT S27.x@free : S23.x@free I[0] {WL}
+ALU ADD S30.x : KC0[3].x KC0[2].x {W}
+ALU ADD S30.y : KC0[3].y KC0[2].y {W}
+ALU ADD S30.z : KC0[3].z KC0[2].z {WL}
+ALU MULADD_IEEE S33.x : S25.x@free KC0[4].x S30.x {W}
+ALU MULADD_IEEE S33.y : S25.x@free KC0[4].y S30.y {W}
+ALU MULADD_IEEE S33.z : S25.x@free KC0[4].z S30.z {WL}
+ALU MULADD_IEEE CLAMP S40.x@group : S27.x@free KC0[5].x S33.x {W}
+ALU MULADD_IEEE CLAMP S40.y@group : S27.x@free KC0[5].y S33.y {W}
+ALU MULADD_IEEE CLAMP S40.z@group : S27.x@free KC0[5].z S33.z {W}
+EXPORT_DONE POS 0 S15.xyzw
+ALU MOV CLAMP S40.w@group : KC0[2].w {WL}
+EXPORT_DONE PARAM 0 S40.xyzw)";
+
+
+
+const char *vs_nexted_loop_nir =
+R"(shader: MESA_SHADER_VERTEX
+name: GLSL3
+inputs: 1
+outputs: 2
+uniforms: 3
+shared: 0
+decl_var uniform INTERP_MODE_NONE int a (0, 0, 0)
+decl_var uniform INTERP_MODE_NONE int b (1, 1, 0)
+decl_var uniform INTERP_MODE_NONE int c (2, 2, 0)
+decl_function main (0 params)
+
+impl main {
+   decl_var  INTERP_MODE_NONE vec4 out@gl_Position-temp
+   decl_var  INTERP_MODE_NONE vec4 out@gl_FrontColor-temp
+   decl_reg vec1 32 r2
+   decl_reg vec1 32 r3
+   decl_reg vec1 32 r4
+   decl_reg vec1 32 r5
+   decl_reg vec1 32 r6
+   decl_reg vec1 32 r7
+   decl_reg vec1 32 r8
+   block block_0:
+   /* preds: */
+   vec1 32 ssa_0 = load_const (0x00000000 /* 0.000000 */)
+   vec4 32 ssa_1 = intrinsic load_input (ssa_0) (0, 0, 160, 128) /* base=0 */ /* component=0 */ /* dest_type=float32 */ /* location=0 slots=1 */
+   vec1 32 ssa_2 = load_const (0xffffffff /* -nan */)
+   vec1 32 ssa_3 = load_const (0x00000000 /* 0.000000 */)
+   vec1 32 ssa_4 = load_const (0x00000001 /* 0.000000 */)
+   vec4 32 ssa_5 = load_const (0x3f800000 /* 1.000000 */, 0x3f800000 /* 1.000000 */, 0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */)
+   vec1 32 ssa_6 = load_const (0x00000002 /* 0.000000 */)
+   vec1 32 ssa_7 = intrinsic load_uniform (ssa_0) (0, 1, 34) /* base=0 */ /* range=1 */ /* dest_type=int32 */	/* a */
+   vec1 32 ssa_8 = ieq32 ssa_7, ssa_4
+   /* succs: block_1 block_10 */
+   if ssa_8 {
+      block block_1:
+      /* preds: block_0 */
+      vec1 32 ssa_9 = intrinsic load_uniform (ssa_0) (2, 1, 34) /* base=2 */ /* range=1 */ /* dest_type=int32 */	/* c */
+      vec1 32 ssa_10 = ine32 ssa_9, ssa_4
+      /* succs: block_2 block_8 */
+      if ssa_10 {
+         block block_2:
+         /* preds: block_1 */
+         r3 = mov ssa_4
+         r2 = mov ssa_0
+         /* succs: block_3 */
+         loop {
+            block block_3:
+            /* preds: block_2 block_6 */
+            r4 = i2f32 r2
+            vec1 32 ssa_11 = intrinsic load_uniform (ssa_0) (1, 1, 34) /* base=1 */ /* range=1 */ /* dest_type=int32 */	/* b */
+            vec1 32 ssa_12 = ine32 ssa_11, ssa_6
+            /* succs: block_4 block_5 */
+            if ssa_12 {
+               block block_4:
+               /* preds: block_3 */
+               break
+               /* succs: block_7 */
+            } else {
+               block block_5:
+               /* preds: block_3 */
+               /* succs: block_6 */
+            }
+            block block_6:
+            /* preds: block_5 */
+            r5 = iadd r3, ssa_4
+            r2 = mov r3
+            r3 = mov r5
+            /* succs: block_3 */
+         }
+         block block_7:
+         /* preds: block_4 */
+         vec1 32 ssa_13 = load_const (0x3f800000 /* 1.000000 */)
+         r8 = mov ssa_13
+         r7 = mov r8
+         r6 = mov ssa_2
+         /* succs: block_9 */
+      } else {
+         block block_8:
+         /* preds: block_1 */
+         vec1 32 ssa_14 = load_const (0x3f800000 /* 1.000000 */)
+         r8 = mov ssa_14
+         r7 = mov ssa_0
+         r4 = mov r8
+         r6 = mov ssa_3
+         /* succs: block_9 */
+      }
+      block block_9:
+      /* preds: block_7 block_8 */
+      /* succs: block_11 */
+   } else {
+      block block_10:
+      /* preds: block_0 */
+      vec1 32 ssa_15 = load_const (0x3f800000 /* 1.000000 */)
+      r8 = mov ssa_15
+      r7 = mov ssa_0
+      r4 = mov r8
+      r6 = mov ssa_2
+      /* succs: block_11 */
+   }
+   block block_11:
+   /* preds: block_9 block_10 */
+   vec1 32 ssa_16 = b32csel r6, r4, ssa_5.x
+   vec1 32 ssa_17 = b32csel r6, r7, ssa_5.y
+   vec1 32 ssa_18 = b32csel r6, r8, ssa_5.w
+   intrinsic store_output (ssa_1, ssa_0) (0, 15, 0, 160, 128) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=0 slots=1 */
+   vec1 32 ssa_19 = fsat ssa_16
+   vec1 32 ssa_20 = fsat ssa_17
+   vec1 32 ssa_21 = fsat ssa_18
+   vec4 32 ssa_22 = vec4 ssa_19, ssa_20, ssa_0, ssa_21
+   intrinsic store_output (ssa_22, ssa_0) (1, 15, 0, 160, 129) /* base=1 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=1 slots=1 */
+   /* succs: block_12 */
+   block block_12:
+})";
+
+const char *vs_nexted_loop_from_nir_expect =
+R"(VS
+CHIPCLASS EVERGREEN
+INPUT LOC:0 NAME:0
+OUTPUT LOC:0 NAME:0 MASK:15 SID:0 SPI_SID:0
+OUTPUT LOC:1 NAME:1 MASK:15 SID:0 SPI_SID:137
+SYSVALUES R1.xyzw
+REGISTERS R2.x R3.x R4.x R5.x R6.x R7.x R8.x
+SHADER
+ALU MOV S9.x@free : I[0] {WL}
+ALU MOV S10.x@free : I[-1] {WL}
+ALU MOV S11.x@free : I[0] {WL}
+ALU MOV S12.x@free : I[1] {WL}
+ALU MOV S13.x : I[1.0] {W}
+ALU MOV S13.y : I[1.0] {W}
+ALU MOV S13.z : I[0] {W}
+ALU MOV S13.w : I[1.0] {WL}
+ALU MOV S14.x@free : L[0x2] {WL}
+ALU MOV S15.x@free : KC0[0].x {WL}
+ALU SETE_INT S16.x@free : S15.x@free S12.x@free {WL}
+IF (( ALU PRED_SETNE_INT __.x@free : S16.x@free I[0] {LEP} PUSH_BEFORE ))
+  ALU MOV S18.x@free : KC0[2].x {WL}
+  ALU SETNE_INT S19.x@free : S18.x@free S12.x {WL}
+  IF (( ALU PRED_SETNE_INT __.y@free : S19.x@free I[0] {LEP} PUSH_BEFORE ))
+    ALU MOV R3.x : S12.x@free {WL}
+    ALU MOV R2.x : S9.x@free {WL}
+    LOOP_BEGIN
+      ALU INT_TO_FLT R4.x : R2.x {WL}
+      ALU MOV S21.x@free : KC0[1].x {WL}
+      ALU SETNE_INT S22.x@free : S21.x@free S14.x@free {WL}
+      IF (( ALU PRED_SETNE_INT __.z@free : S22.x@free I[0] {LEP} PUSH_BEFORE ))
+        BREAK
+      ENDIF
+      ALU ADD_INT R5.x@free : R3.x S12.x@free {WL}
+      ALU MOV R2.x : R3.x {WL}
+      ALU MOV R3.x : R5.x {WL}
+    LOOP_END
+    ALU MOV S24.x@free : I[1.0] {WL}
+    ALU MOV R8.x : S24.x@free {WL}
+    ALU MOV R7.x : R8.x {WL}
+    ALU MOV R6.x : S10.x@free {WL}
+  ELSE
+    ALU MOV S25.x@free : I[1.0] {WL}
+    ALU MOV R8.x : S25.x@free {WL}
+    ALU MOV R7.x : S9.x {WL}
+    ALU MOV R4.x : R8.x {WL}
+    ALU MOV R6.x : S11.x@free {WL}
+  ENDIF
+ELSE
+  ALU MOV S26.x@free : I[1.0] {WL}
+  ALU MOV R8.x : S26.x@free {WL}
+  ALU MOV R7.x : S9.x {WL}
+  ALU MOV R4.x : R8.x {WL}
+  ALU MOV R6.x : S10.x@free {WL}
+ENDIF
+ALU CNDE_INT S27.x@free : R6.x S13.x R4.x {WL}
+ALU CNDE_INT S28.x@free : R6.x S13.y R7.x {WL}
+ALU CNDE_INT S29.x@free : R6.x S13.w R8.x {WL}
+EXPORT_DONE POS 0 R1.xyzw
+ALU MOV CLAMP S31.x@free : S27.x@free {WL}
+ALU MOV CLAMP S32.x@free : S28.x@free {WL}
+ALU MOV CLAMP S33.x@free : S29.x@free {WL}
+ALU MOV S34.x@group : S31.x@free {W}
+ALU MOV S34.y@group : S32.x@free {W}
+ALU MOV S34.z@group : S9.x@free {W}
+ALU MOV S34.w@group : S33.x@free {WL}
+EXPORT_DONE PARAM 0 S34.xyzw
+)";
+
+const char *vs_nexted_loop_from_nir_expect_opt =
+R"(
+VS
+CHIPCLASS EVERGREEN
+INPUT LOC:0 NAME:0
+OUTPUT LOC:0 NAME:0 MASK:15 SID:0 SPI_SID:0
+OUTPUT LOC:1 NAME:1 MASK:15 SID:0 SPI_SID:137
+SYSVALUES R1.xyzw
+REGISTERS R2.x@free R3.x@free R4.x@free R5.x@free R6.x@free R7.x@free R8.x@free
+SHADER
+IF (( ALU PREDE_INT __.x@free : KC0[0].x I[1] {LEP} PUSH_BEFORE ))
+  IF (( ALU PRED_SETNE_INT __.y@free : KC0[2].x I[1]  {LEP} PUSH_BEFORE ))
+    ALU MOV R3.x : I[1] {WL}
+    ALU MOV R2.x : I[0] {WL}
+    LOOP_BEGIN
+      ALU INT_TO_FLT R4.x : R2.x {WL}
+      IF (( ALU PRED_SETNE_INT __.z@free : KC0[1].x L[0x2] {LEP} PUSH_BEFORE ))
+        BREAK
+      ENDIF
+      ALU ADD_INT R5.x : R3.x I[1] {WL}
+      ALU MOV R2.x : R3.x {WL}
+      ALU MOV R3.x : R5.x {WL}
+    LOOP_END
+    ALU MOV R8.x : I[1.0] {WL}
+    ALU MOV R7.x : R8.x {WL}
+    ALU MOV R6.x : I[-1] {WL}
+  ELSE
+    ALU MOV R8.x : I[1.0] {WL}
+    ALU MOV R7.x : I[0] {WL}
+    ALU MOV R4.x : R8.x {WL}
+    ALU MOV R6.x : I[0] {WL}
+  ENDIF
+ELSE
+  ALU MOV R8.x : I[1.0] {WL}
+  ALU MOV R7.x : I[0] {WL}
+  ALU MOV R4.x : R8.x {WL}
+  ALU MOV R6.x : I[-1] {WL}
+ENDIF
+ALU CNDE_INT S27.x@free : R6.x I[1.0] R4.x {WL}
+ALU CNDE_INT S28.x@free : R6.x I[1.0] R7.x {WL}
+ALU CNDE_INT S29.x@free : R6.x I[1.0] R8.x {WL}
+EXPORT_DONE POS 0 R1.xyzw
+ALU MOV CLAMP S34.x@group : S27.x@free {W}
+ALU MOV CLAMP S34.y@group : S28.x@free {W}
+ALU MOV CLAMP S34.w@group : S29.x@free {WL}
+EXPORT_DONE PARAM 0 S34.xy0w
+)";
+
+const char *shader_with_local_array_nir =
+R"(
+shader: MESA_SHADER_FRAGMENT
+name: GLSL3
+inputs: 2
+outputs: 1
+uniforms: 2
+shared: 0
+decl_var uniform INTERP_MODE_NONE int index (1, 0, 0)
+decl_var uniform INTERP_MODE_NONE float expect (2, 1, 0)
+decl_function main (0 params)
+
+impl main {
+	decl_var  INTERP_MODE_NONE float[4] m1
+	decl_var  INTERP_MODE_NONE float[4] m2
+	decl_var  INTERP_MODE_NONE vec4 in@packed:m1[0],m1[1],m1[2],m1[3]-temp
+	decl_var  INTERP_MODE_NONE vec4 in@packed:m2[0],m2[1],m2[2],m2[3]-temp
+	decl_var  INTERP_MODE_NONE vec4 out@gl_FragColor-temp
+	decl_reg vec1 32 r0[4]
+	decl_reg vec1 32 r1[4]
+	decl_reg vec1 32 r2
+	block block_0:
+	/* preds: */
+	vec2 32 ssa_0 = intrinsic load_barycentric_pixel () (0) /* interp_mode=0 */
+	vec1 32 ssa_1 = load_const (0x00000000 /* 0.000000 */)
+	vec4 32 ssa_2 = intrinsic load_interpolated_input (ssa_0, ssa_1) (0, 0, 160, 160) /* base=0 */ /* component=0 */ /* location=32 slots=1 */
+	vec4 32 ssa_3 = intrinsic load_interpolated_input (ssa_0, ssa_1) (1, 0, 160, 161) /* base=1 */ /* component=0 */ /* location=33 slots=1 */
+	vec1 32 ssa_4 = load_const (0x00000004 /* 0.000000 */)
+	vec1 32 ssa_5 = load_const (0xfffffffc /* -nan */)
+	vec4 32 ssa_6 = load_const (0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */, 0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */)
+	vec4 32 ssa_7 = load_const (0x3f800000 /* 1.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x3f800000 /* 1.000000 */)
+	vec1 32 ssa_8 = mov ssa_2.x
+	r0[0] = mov ssa_8
+	vec1 32 ssa_9 = mov ssa_2.y
+	r0[1] = mov ssa_9
+	vec1 32 ssa_10 = mov ssa_2.z
+	r0[2] = mov ssa_10
+	vec1 32 ssa_11 = mov ssa_2.w
+	r0[3] = mov ssa_11
+	vec1 32 ssa_12 = mov ssa_3.x
+	r1[0] = mov ssa_12
+	vec1 32 ssa_13 = mov ssa_3.y
+	r1[1] = mov ssa_13
+	vec1 32 ssa_14 = mov ssa_3.z
+	r1[2] = mov ssa_14
+	vec1 32 ssa_15 = mov ssa_3.w
+	r1[3] = mov ssa_15
+	vec1 32 ssa_16 = intrinsic load_uniform (ssa_1) (0, 1, 34) /* base=0 */ /* range=1 */ /* dest_type=int32 */	/* index */
+	vec1 32 ssa_17 = ige32 ssa_16, ssa_4
+	/* succs: block_1 block_2 */
+	if ssa_17 {
+		block block_1:
+		/* preds: block_0 */
+		vec1 32 ssa_18 = iadd ssa_16, ssa_5
+		vec1 32 ssa_19 = load_const (0x00000000 /* 0.000000 */)
+		vec1 32 ssa_20 = iadd ssa_19, ssa_18
+		r2 = mov r1[0 + ssa_20]
+		/* succs: block_3 */
+	} else {
+		block block_2:
+		/* preds: block_0 */
+		vec1 32 ssa_21 = load_const (0x00000000 /* 0.000000 */)
+		vec1 32 ssa_22 = iadd ssa_21, ssa_16
+		r2 = mov r0[0 + ssa_22]
+		/* succs: block_3 */
+	}
+	block block_3:
+	/* preds: block_1 block_2 */
+	vec1 32 ssa_23 = intrinsic load_uniform (ssa_1) (1, 1, 160) /* base=1 */ /* range=1 */ /* dest_type=float32 */	/* expect */
+	vec1 32 ssa_24 = feq32 r2, ssa_23
+	vec1 32 ssa_25 = fneu32 r2, ssa_23
+	vec1 32 ssa_26 = b2f32 ssa_25
+	vec1 32 ssa_27 = b2f32 ssa_24
+	vec2 32 ssa_28 = b32csel ssa_24.xx, ssa_6.zw, ssa_7.zw
+	vec4 32 ssa_29 = vec4 ssa_26, ssa_27, ssa_28.x, ssa_28.y
+	intrinsic store_output (ssa_29, ssa_1) (0, 15, 0, 160, 130) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=2 slots=1 */
+	/* succs: block_4 */
+	block block_4:
+}
+)";
+
+const char *shader_with_local_array_expect =
+R"(FS
+CHIPCLASS EVERGREEN
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+#PROP RAT_BASE:1
+INPUT LOC:0 NAME:5 SID:9 SPI_SID:10 INTERP:2
+INPUT LOC:1 NAME:5 SID:10 SPI_SID:11 INTERP:2
+OUTPUT LOC:0 NAME:1 MASK:15
+REGISTERS R0.x@fully R0.y@fully R1.x
+ARRAYS A1[4].x A1[4].y
+SHADER
+ALU MOV S6.x@free : I[0] {WL}
+ALU_GROUP_BEGIN
+  ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210
+  ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210
+  ALU INTERP_ZW S7.z@chan : R0.y@fully Param0.z {W} VEC_210
+  ALU INTERP_ZW S7.w@chan : R0.x@fully Param0.w {WL} VEC_210
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU INTERP_XY S7.x@chan : R0.y@fully Param0.x {W} VEC_210
+  ALU INTERP_XY S7.y@chan : R0.x@fully Param0.y {W} VEC_210
+  ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
+  ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU INTERP_ZW __.x@chan : R0.y@fully Param1.x {} VEC_210
+  ALU INTERP_ZW __.y@chan : R0.x@fully Param1.y {} VEC_210
+  ALU INTERP_ZW S8.z@chan : R0.y@fully Param1.z {W} VEC_210
+  ALU INTERP_ZW S8.w@chan : R0.x@fully Param1.w {WL} VEC_210
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU INTERP_XY S8.x@chan : R0.y@fully Param1.x {W} VEC_210
+  ALU INTERP_XY S8.y@chan : R0.x@fully Param1.y {W} VEC_210
+  ALU INTERP_XY __.z@chan : R0.y@fully Param1.z {} VEC_210
+  ALU INTERP_XY __.w@chan : R0.x@fully Param1.w {L} VEC_210
+ALU_GROUP_END
+ALU MOV S9.x@free : L[0x4] {WL}
+ALU MOV S10.x@free : L[0xfffffffc] {WL}
+ALU MOV S11.x : I[0] {W}
+ALU MOV S11.y : I[1.0] {W}
+ALU MOV S11.z : I[0] {W}
+ALU MOV S11.w : I[1.0] {WL}
+ALU MOV S12.x : I[1.0] {W}
+ALU MOV S12.y : I[0] {W}
+ALU MOV S12.z : I[0] {W}
+ALU MOV S12.w : I[1.0] {WL}
+ALU MOV S13.x@free : S7.x@chan {WL}
+ALU MOV A1[0].x : S13.x@free {WL}
+ALU MOV S14.x@free : S7.y@chan {WL}
+ALU MOV A1[1].x : S14.x@free {WL}
+ALU MOV S15.x@free : S7.z@chan {WL}
+ALU MOV A1[2].x : S15.x@free {WL}
+ALU MOV S16.x@free : S7.w@chan {WL}
+ALU MOV A1[3].x : S16.x@free {WL}
+ALU MOV S17.x@free : S8.x@chan {WL}
+ALU MOV A1[0].y : S17.x@free {WL}
+ALU MOV S18.x@free : S8.y@chan {WL}
+ALU MOV A1[1].y : S18.x@free {WL}
+ALU MOV S19.x@free : S8.z@chan {WL}
+ALU MOV A1[2].y : S19.x@free {WL}
+ALU MOV S20.x@free : S8.w@chan {WL}
+ALU MOV A1[3].y : S20.x@free {WL}
+ALU MOV S21.x@free : KC0[0].x {WL}
+ALU SETGE_INT S22.x@free : S21.x@free S9.x@free {WL}
+IF (( ALU PRED_SETNE_INT __.x@free : S22.x@free I[0] {LEP} PUSH_BEFORE ))
+  ALU ADD_INT S24.x@free : S21.x@free S10.x@free {WL}
+  ALU MOV S25.x@free : I[0] {WL}
+  ALU ADD_INT S26.x@free : S25.x@free S24.x@free {WL}
+  ALU MOV R5.x@free : A1[S26.x@free].y {WL}
+ELSE
+  ALU MOV S27.x@free : I[0] {WL}
+  ALU ADD_INT S28.x@free : S27.x@free S21.x@free {WL}
+  ALU MOV R5.x@free : A1[S28.x@free].x {WL}
+ENDIF
+ALU MOV S29.x@free : KC0[1].x {WL}
+ALU SETE_DX10 S30.x@free : R5.x@free S29.x@free {WL}
+ALU SETNE_DX10 S31.x@free : R5.x@free S29.x@free {WL}
+ALU AND_INT S32.x@free : S31.x@free I[1.0] {WL}
+ALU AND_INT S33.x@free : S30.x@free I[1.0] {WL}
+ALU CNDE_INT S34.x : S30.x@free S12.z S11.z {W}
+ALU CNDE_INT S34.y : S30.x@free S12.w S11.w {WL}
+ALU MOV S35.x@group : S32.x@free {W}
+ALU MOV S35.y@group : S33.x@free {W}
+ALU MOV S35.z@group : S34.x {W}
+ALU MOV S35.w@group : S34.y {WL}
+EXPORT_DONE PIXEL 0 S35.xyzw)";
+
+const char *test_schedule_group =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV S0.x : I[0] {WL}
+ALU MOV S1.x : I[1.0] {WL}
+ALU MOV S2.x : KC0[0].x {W}
+ALU MOV S2.y : KC0[0].y {WL}
+ALU MOV S3.x : KC0[2].x {W}
+ALU MOV S3.y : KC0[2].y {WL}
+ALU ADD S4.x : |S2.x| -S3.x {W}
+ALU ADD S4.y : |S2.y| -S3.y {WL}
+ALU DOT4_IEEE S5.x : S4.x S4.x + S4.y S4.y + I[0] I[0] + I[0] I[0] {WL}
+ALU SQRT_IEEE S6.x : S5.x {WL}
+ALU MOV S7.x : KC0[1].x {WL}
+ALU SETGE_DX10 S8.x : S7.x S6.x {WL}
+ALU NOT_INT S9.x : S8.x {WL}
+ALU AND_INT S10.x : S9.x I[1.0] {WL}
+ALU AND_INT S11.x : S8.x I[1.0] {WL}
+ALU MOV S12.x@group : S10.x {W}
+ALU MOV S12.y@group : S11.x {W}
+ALU MOV S12.z@group : S0.x {W}
+ALU MOV S12.w@group : S1.x {WL}
+EXPORT_DONE PIXEL 0 S12.xyzw
+)";
+
+const char *test_schedule_group_expect =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+BLOCK_START
+ALU_GROUP_BEGIN
+  ALU ADD S4.x@chan : |KC0[0].x| -KC0[2].x {W}
+  ALU ADD S4.y@chan : |KC0[0].y| -KC0[2].y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU DOT4_IEEE S5.x@chan : S4.x@chan S4.x@chan {W}
+  ALU DOT4_IEEE __.y@chan : S4.y@chan S4.y@chan {}
+  ALU DOT4_IEEE __.z@chan : I[0] I[0] {}
+  ALU DOT4_IEEE __.w@chan : I[0] I[0] {L}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU SQRT_IEEE S6.x : S5.x@chan {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU SETGE_DX10 S8.x : KC0[1].x S6.x {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU NOT_INT S9.x : S8.x {W}
+  ALU AND_INT S12.y@group : S8.x I[1.0] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU AND_INT S12.x@group : S9.x I[1.0] {WL}
+ALU_GROUP_END
+BLOCK_END
+BLOCK_START
+EXPORT_DONE PIXEL 0 S12.xy01
+BLOCK_END
+)";
+
+
+const char *shader_with_bany_nir =
+R"(shader: MESA_SHADER_FRAGMENT
+source_sha1: {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}
+name: GLSL3
+inputs: 0
+outputs: 1
+uniforms: 8
+shared: 0
+ray queries: 0
+decl_var uniform INTERP_MODE_NONE mat4 arg0 (0, 0, 0)
+decl_var uniform INTERP_MODE_NONE mat4 arg1 (1, 4, 0)
+decl_function main (0 params)
+
+impl main {
+	decl_var  INTERP_MODE_NONE vec4 out@gl_FragColor-temp
+	block block_0:
+	/* preds: */
+	vec1 32 ssa_0 = load_const (0x00000000 /* 0.000000 */)
+	vec1 32 ssa_1 = load_const (0x00000001 /* 0.000000 */)
+	vec1 32 ssa_2 = load_const (0x00000002 /* 0.000000 */)
+	vec1 32 ssa_3 = load_const (0x00000003 /* 0.000000 */)
+	vec4 32 ssa_4 = intrinsic load_uniform (ssa_0) (4, 4, 160) /* base=4 */ /* range=4 */ /* dest_type=float32 */	/* arg1 */
+	vec4 32 ssa_5 = intrinsic load_uniform (ssa_0) (0, 4, 160) /* base=0 */ /* range=4 */ /* dest_type=float32 */	/* arg0 */
+	vec1 32 ssa_6 = b32any_fnequal4 ssa_4, ssa_5
+	vec4 32 ssa_7 = intrinsic load_uniform (ssa_1) (4, 4, 160) /* base=4 */ /* range=4 */ /* dest_type=float32 */	/* arg1 */
+	vec4 32 ssa_8 = intrinsic load_uniform (ssa_1) (0, 4, 160) /* base=0 */ /* range=4 */ /* dest_type=float32 */	/* arg0 */
+	vec1 32 ssa_9 = b32any_fnequal4 ssa_7, ssa_8
+	vec4 32 ssa_10 = intrinsic load_uniform (ssa_2) (4, 4, 160) /* base=4 */ /* range=4 */ /* dest_type=float32 */	/* arg1 */
+	vec4 32 ssa_11 = intrinsic load_uniform (ssa_2) (0, 4, 160) /* base=0 */ /* range=4 */ /* dest_type=float32 */	/* arg0 */
+	vec1 32 ssa_12 = b32any_fnequal4 ssa_10, ssa_11
+	vec4 32 ssa_13 = intrinsic load_uniform (ssa_3) (4, 4, 160) /* base=4 */ /* range=4 */ /* dest_type=float32 */	/* arg1 */
+	vec4 32 ssa_14 = intrinsic load_uniform (ssa_3) (0, 4, 160) /* base=0 */ /* range=4 */ /* dest_type=float32 */	/* arg0 */
+	vec1 32 ssa_15 = b32any_fnequal4 ssa_13, ssa_14
+	vec4 32 ssa_16 = vec4 ssa_6, ssa_9, ssa_12, ssa_15
+	vec4 32 ssa_17 = load_const (0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */)
+	vec1 32 ssa_18 = b32any_inequal4 ssa_16, ssa_17
+	vec1 32 ssa_19 = inot ssa_18
+	vec1 32 ssa_20 = b2f32 ssa_19
+	vec4 32 ssa_21 = vec4 ssa_20, ssa_0, ssa_0, ssa_0
+	intrinsic store_output (ssa_21, ssa_0) (0, 15, 0, 160, 130) /* base=0 */ /* wrmask=xyzw */ /* component=0 */ /* src_type=float32 */ /* location=2 slots=1 */
+	/* succs: block_1 */
+	block block_1:
+})";
+
+
+const char *shader_with_bany_expect_eg =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV S0.x@free : I[0] {WL}
+ALU MOV S1.x@free : I[1] {WL}
+ALU MOV S2.x@free : L[0x2] {WL}
+ALU MOV S3.x@free : L[0x3] {WL}
+ALU MOV S4.x : KC0[4].x {W}
+ALU MOV S4.y : KC0[4].y {W}
+ALU MOV S4.z : KC0[4].z {W}
+ALU MOV S4.w : KC0[4].w {WL}
+ALU MOV S5.x : KC0[0].x {W}
+ALU MOV S5.y : KC0[0].y {W}
+ALU MOV S5.z : KC0[0].z {W}
+ALU MOV S5.w : KC0[0].w {WL}
+ALU SETNE S6.x@group : S4.x S5.x {W}
+ALU SETNE S6.y@group : S4.y S5.y {W}
+ALU SETNE S6.z@group : S4.z S5.z {W}
+ALU SETNE S6.w@group : S4.w S5.w {WL}
+ALU MAX4 S7.x@free : S6.x@group + S6.y@group + S6.z@group + S6.w@group {WL}
+ALU SETE_DX10 S8.x@free : S7.x@free I[1.0] {WL}
+ALU MOV S9.x : KC0[5].x {W}
+ALU MOV S9.y : KC0[5].y {W}
+ALU MOV S9.z : KC0[5].z {W}
+ALU MOV S9.w : KC0[5].w {WL}
+ALU MOV S10.x : KC0[1].x {W}
+ALU MOV S10.y : KC0[1].y {W}
+ALU MOV S10.z : KC0[1].z {W}
+ALU MOV S10.w : KC0[1].w {WL}
+ALU SETNE S11.x@group : S9.x S10.x {W}
+ALU SETNE S11.y@group : S9.y S10.y {W}
+ALU SETNE S11.z@group : S9.z S10.z {W}
+ALU SETNE S11.w@group : S9.w S10.w {WL}
+ALU MAX4 S12.y@free : S11.x@group + S11.y@group + S11.z@group + S11.w@group {WL}
+ALU SETE_DX10 S13.x@free : S12.y@free I[1.0] {WL}
+ALU MOV S14.x : KC0[6].x {W}
+ALU MOV S14.y : KC0[6].y {W}
+ALU MOV S14.z : KC0[6].z {W}
+ALU MOV S14.w : KC0[6].w {WL}
+ALU MOV S15.x : KC0[2].x {W}
+ALU MOV S15.y : KC0[2].y {W}
+ALU MOV S15.z : KC0[2].z {W}
+ALU MOV S15.w : KC0[2].w {WL}
+ALU SETNE S16.x@group : S14.x S15.x {W}
+ALU SETNE S16.y@group : S14.y S15.y {W}
+ALU SETNE S16.z@group : S14.z S15.z {W}
+ALU SETNE S16.w@group : S14.w S15.w {WL}
+ALU MAX4 S17.z@free : S16.x@group + S16.y@group + S16.z@group + S16.w@group {WL}
+ALU SETE_DX10 S18.x@free : S17.z@free I[1.0] {WL}
+ALU MOV S19.x : KC0[7].x {W}
+ALU MOV S19.y : KC0[7].y {W}
+ALU MOV S19.z : KC0[7].z {W}
+ALU MOV S19.w : KC0[7].w {WL}
+ALU MOV S20.x : KC0[3].x {W}
+ALU MOV S20.y : KC0[3].y {W}
+ALU MOV S20.z : KC0[3].z {W}
+ALU MOV S20.w : KC0[3].w {WL}
+ALU SETNE S21.x@group : S19.x S20.x {W}
+ALU SETNE S21.y@group : S19.y S20.y {W}
+ALU SETNE S21.z@group : S19.z S20.z {W}
+ALU SETNE S21.w@group : S19.w S20.w {WL}
+ALU MAX4 S22.w@free : S21.x@group + S21.y@group + S21.z@group + S21.w@group {WL}
+ALU SETE_DX10 S23.x@free : S22.w@free I[1.0] {WL}
+ALU MOV S24.x : S8.x@free {W}
+ALU MOV S24.y : S13.x@free {W}
+ALU MOV S24.z : S18.x@free {W}
+ALU MOV S24.w : S23.x@free {WL}
+ALU MOV S25.x : I[0] {W}
+ALU MOV S25.y : I[0] {W}
+ALU MOV S25.z : I[0] {W}
+ALU MOV S25.w : I[0] {WL}
+ALU SETNE_INT S27.x@free : S24.x S25.x {W}
+ALU SETNE_INT S28.y@free : S24.y S25.y {W}
+ALU SETNE_INT S29.z@free : S24.z S25.z {W}
+ALU SETNE_INT S30.w@free : S24.w S25.w {WL}
+ALU OR_INT S31.x@free : S27.x@free S28.y@free {W}
+ALU OR_INT S32.y@free : S29.z@free S30.w@free {WL}
+ALU OR_INT S26.x@free : S31.x@free S32.y@free {WL}
+ALU NOT_INT S33.x@free : S26.x@free {WL}
+ALU AND_INT S34.x@free : S33.x@free I[1.0] {WL}
+ALU MOV S35.x@group : S34.x@free {W}
+ALU MOV S35.y@group : S0.x@free {W}
+ALU MOV S35.z@group : S0.x@free {W}
+ALU MOV S35.w@group : S0.x@free {WL}
+EXPORT_DONE PIXEL 0 S35.xyzw
+)";
+
+const char *shader_with_bany_expect_opt_sched_eg =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+BLOCK_START
+ALU_GROUP_BEGIN
+  ALU SETNE S6.x@chgr : KC0[4].x KC0[0].x {W}
+  ALU SETNE S6.y@chgr : KC0[4].y KC0[0].y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU SETNE S6.z@chgr : KC0[4].z KC0[0].z {W}
+  ALU SETNE S6.w@chgr : KC0[4].w KC0[0].w {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MAX4 S7.x@chan : S6.x@chgr {W}
+  ALU MAX4 __.y@chan : S6.y@chgr {}
+  ALU MAX4 __.z@chan : S6.z@chgr {}
+  ALU MAX4 __.w@chan : S6.w@chgr {}
+  ALU SETNE S11.x@chgr : KC0[5].x KC0[1].x {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU SETE_DX10 S8.x@free : S7.x@chan I[1.0] {W}
+  ALU SETNE S11.y@chgr : KC0[5].y KC0[1].y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU SETNE_INT S27.x@chan : S8.x@free I[0] {W}
+  ALU SETNE S11.z@chgr : KC0[5].z KC0[1].z {W}
+  ALU SETNE S11.w@chgr : KC0[5].w KC0[1].w {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MAX4 __.x@chan : S11.x@chgr {}
+  ALU MAX4 S12.y@chan : S11.y@chgr {W}
+  ALU MAX4 __.z@chan : S11.z@chgr {}
+  ALU MAX4 __.w@chan : S11.w@chgr {}
+  ALU SETNE S16.x@chgr : KC0[6].x KC0[2].x {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU SETE_DX10 S13.x@free : S12.y@chan I[1.0] {W}
+  ALU SETNE S16.y@chgr : KC0[6].y KC0[2].y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU SETNE_INT S28.y@chan : S13.x@free I[0] {W}
+  ALU SETNE S16.z@chgr : KC0[6].z KC0[2].z {W}
+  ALU SETNE S16.w@chgr : KC0[6].w KC0[2].w {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MAX4 __.x@chan : S16.x@chgr {}
+  ALU MAX4 __.y@chan : S16.y@chgr {}
+  ALU MAX4 S17.z@chan : S16.z@chgr {W}
+  ALU MAX4 __.w@chan : S16.w@chgr {}
+  ALU OR_INT S31.x@chan : S27.x@chan S28.y@chan {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU SETNE S21.x@chgr : KC0[7].x KC0[3].x {W}
+  ALU SETNE S21.y@chgr : KC0[7].y KC0[3].y {W}
+  ALU SETE_DX10 S18.z@chan : S17.z@chan I[1.0] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU SETNE_INT S29.x@chan : S18.z@chan I[0] {W}
+  ALU SETNE S21.z@chgr : KC0[7].z KC0[3].z {W}
+  ALU SETNE S21.w@chgr : KC0[7].w KC0[3].w {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MAX4 __.x@chan : S21.x@chgr {}
+  ALU MAX4 __.y@chan : S21.y@chgr {}
+  ALU MAX4 __.z@chan : S21.z@chgr {}
+  ALU MAX4 S22.w@chan : S21.w@chgr {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU SETE_DX10 S23.x@free : S22.w@chan I[1.0] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU SETNE_INT S30.w@chan : S23.x@free I[0] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU OR_INT S32.y@chan : S29.x@chan S30.w@chan {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU OR_INT S26.x@chan : S31.x@chan S32.y@chan {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU NOT_INT S33.x@free : S26.x@chan {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU AND_INT S35.x@group : S33.x@free I[1.0] {WL}
+ALU_GROUP_END
+BLOCK_END
+BLOCK_START
+EXPORT_DONE PIXEL 0 S35.x000
+BLOCK_END
+)";
+
+const char *shader_copy_prop_dont_kill_double_use =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV S0.x : I[0] {WL}
+ALU MOV S1.x : I[1] {WL}
+ALU MOV S2.x : I[1.0] {WL}
+ALU MOV S3.x : KC0[2].x {W}
+ALU MOV S3.y : KC0[2].y {WL}
+ALU MOV S4.x : KC0[0].x {W}
+ALU MOV S4.y : KC0[0].y {WL}
+ALU SETNE_DX10 S5.x : S3.y S4.y {W}
+ALU SETNE_DX10 S5.y : S3.x S4.x {WL}
+ALU OR_INT S6.x : S5.x S5.y {WL}
+ALU MOV S7.x : KC0[3].x {W}
+ALU MOV S7.y : KC0[3].y {WL}
+ALU MOV S8.x : KC0[1].x {W}
+ALU MOV S8.y : KC0[1].y {WL}
+ALU SETNE_DX10 S9.x : S7.y S8.y {W}
+ALU SETNE_DX10 S9.y : S7.x S8.x {WL}
+ALU OR_INT S10.x : S9.x S9.y {WL}
+ALU OR_INT S11.x : S10.x S6.x {WL}
+ALU NOT_INT S12.x : S11.x {WL}
+ALU AND_INT S13.x : S12.x I[1.0] {WL}
+ALU AND_INT S14.x : S11.x I[1.0] {WL}
+ALU MOV S15.x@group : S13.x {W}
+ALU MOV S15.y@group : S13.x {W}
+ALU MOV S15.z@group : S14.x {W}
+ALU MOV S15.w@group : S2.x {WL}
+EXPORT_DONE PIXEL 0 S15.xyzw
+)";
+
+
+const char *shader_copy_prop_dont_kill_double_use_expect =
+R"(
+FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+BLOCK_START
+ALU_GROUP_BEGIN
+  ALU SETNE_DX10 S5.x : KC0[2].y KC0[0].y {W}
+  ALU SETNE_DX10 S5.y : KC0[2].x KC0[0].x {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU OR_INT S6.x : S5.x S5.y {W}
+  ALU SETNE_DX10 S9.y : KC0[3].x KC0[1].x {W}
+  ALU SETNE_DX10 S9.x : KC0[3].y KC0[1].y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU OR_INT S10.x : S9.x S9.y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU OR_INT S11.x : S10.x S6.x {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU NOT_INT S12.x : S11.x {W}
+  ALU AND_INT S15.z@group : S11.x I[1.0] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU AND_INT S13.x : S12.x I[1.0] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV S15.x@group : S13.x {W}
+  ALU MOV S15.y@group : S13.x {WL}
+ALU_GROUP_END
+BLOCK_END
+BLOCK_START
+EXPORT_DONE PIXEL 0 S15.xyz1
+BLOCK_END
+)";
+
+
+const char *shader_with_dest_array =
+R"(VS
+CHIPCLASS EVERGREEN
+INPUT LOC:0 NAME:0
+OUTPUT LOC:0 NAME:0 MASK:15
+OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10
+OUTPUT LOC:2 NAME:5 MASK:15 SID:10 SPI_SID:11
+OUTPUT LOC:3 NAME:5 MASK:15 SID:11 SPI_SID:12
+OUTPUT LOC:4 NAME:5 MASK:15 SID:12 SPI_SID:13
+REGISTERS R1.xyzw
+ARRAYS A2[4].xy A2[4].zw
+SHADER
+ALU MOV S6.x : I[0] {WL}
+ALU MOV S7.x : I[1] {WL}
+ALU MOV S8.x : L[0x2] {WL}
+ALU MOV S9.x : L[0x3] {WL}
+ALU MOV S10.x : L[0x4] {WL}
+ALU MOV S11.x : L[0xfffffffc] {WL}
+ALU MOV S12.x : KC0[1].x {W}
+ALU MOV S12.y : KC0[1].y {W}
+ALU MOV S12.z : KC0[1].z {W}
+ALU MOV S12.w : KC0[1].w {WL}
+ALU MOV S13.x : KC0[2].x {W}
+ALU MOV S13.y : KC0[2].y {W}
+ALU MOV S13.z : KC0[2].z {W}
+ALU MOV S13.w : KC0[2].w {WL}
+ALU MUL_IEEE S14.x : S13.x R1.y@fully {W}
+ALU MUL_IEEE S14.y : S13.y R1.y@fully {W}
+ALU MUL_IEEE S14.z : S13.z R1.y@fully {W}
+ALU MUL_IEEE S14.w : S13.w R1.y@fully {WL}
+ALU MULADD_IEEE S15.x : S12.x R1.x@fully S14.x {W}
+ALU MULADD_IEEE S15.y : S12.y R1.x@fully S14.y {W}
+ALU MULADD_IEEE S15.z : S12.z R1.x@fully S14.z {W}
+ALU MULADD_IEEE S15.w : S12.w R1.x@fully S14.w {WL}
+ALU MOV S16.x : KC0[3].x {W}
+ALU MOV S16.y : KC0[3].y {W}
+ALU MOV S16.z : KC0[3].z {W}
+ALU MOV S16.w : KC0[3].w {WL}
+ALU MULADD_IEEE S17.x : S16.x R1.z@fully S15.x {W}
+ALU MULADD_IEEE S17.y : S16.y R1.z@fully S15.y {W}
+ALU MULADD_IEEE S17.z : S16.z R1.z@fully S15.z {W}
+ALU MULADD_IEEE S17.w : S16.w R1.z@fully S15.w {WL}
+ALU MOV S18.x : KC0[4].x {W}
+ALU MOV S18.y : KC0[4].y {W}
+ALU MOV S18.z : KC0[4].z {W}
+ALU MOV S18.w : KC0[4].w {WL}
+ALU MULADD_IEEE S19.x@group : S18.x R1.w@fully S17.x {W}
+ALU MULADD_IEEE S19.y@group : S18.y R1.w@fully S17.y {W}
+ALU MULADD_IEEE S19.z@group : S18.z R1.w@fully S17.z {W}
+ALU MULADD_IEEE S19.w@group : S18.w R1.w@fully S17.w {WL}
+ALU MOV S20.x : I[1.0] {W}
+ALU MOV S20.y : L[0x3f8ccccd] {WL}
+ALU MOV A2[0].x : S20.x {W}
+ALU MOV A2[0].y : S20.y {WL}
+ALU MOV S21.x : L[0x40000000] {W}
+ALU MOV S21.y : L[0x40066666] {WL}
+ALU MOV A2[1].x : S21.x {W}
+ALU MOV A2[1].y : S21.y {WL}
+ALU MOV S22.x : L[0x40400000] {W}
+ALU MOV S22.y : L[0x40466666] {WL}
+ALU MOV A2[2].x : S22.x {W}
+ALU MOV A2[2].y : S22.y {WL}
+ALU MOV S23.x : L[0x40800000] {W}
+ALU MOV S23.y : L[0x40833333] {WL}
+ALU MOV A2[3].x : S23.x {W}
+ALU MOV A2[3].y : S23.y {WL}
+ALU MOV S24.x : L[0x40a00000] {W}
+ALU MOV S24.y : L[0x40a33333] {WL}
+ALU MOV A2[0].z : S24.x {W}
+ALU MOV A2[0].w : S24.y {WL}
+ALU MOV S25.x : L[0x40c00000] {W}
+ALU MOV S25.y : L[0x40c33333] {WL}
+ALU MOV A2[1].z : S25.x {W}
+ALU MOV A2[1].w : S25.y {WL}
+ALU MOV S26.x : L[0x40e00000] {W}
+ALU MOV S26.y : L[0x40e33333] {WL}
+ALU MOV A2[2].z : S26.x {W}
+ALU MOV A2[2].w : S26.y {WL}
+ALU MOV S27.x : L[0x41000000] {W}
+ALU MOV S27.y : L[0x4101999a] {WL}
+ALU MOV A2[3].z : S27.x {W}
+ALU MOV A2[3].w : S27.y {WL}
+ALU MOV S28.x : KC0[0].x {WL}
+ALU SETGE_INT S29.x : S28.x S10.x {WL}
+IF (( ALU PRED_SETNE_INT __.x@free : S29.x I[0] {LEP} PUSH_BEFORE ))
+  ALU ADD_INT S31.x : S28.x S11.x {WL}
+  ALU MOV S32.x : I[0] {W}
+  ALU MOV S32.y : L[0x3dcccccd] {WL}
+  ALU MOV S33.x : I[0] {WL}
+  ALU ADD_INT S34.x : S33.x S31.x {WL}
+  ALU MOV A2[S34.x].z : S32.x {W}
+  ALU MOV A2[S34.x].w : S32.y {WL}
+ELSE
+  ALU MOV S35.x : I[0] {W}
+  ALU MOV S35.y : L[0x3dcccccd] {WL}
+  ALU MOV S36.x : I[0] {WL}
+  ALU ADD_INT S37.x : S36.x S28.x {WL}
+  ALU MOV A2[S37.x].x : S35.x {W}
+  ALU MOV A2[S37.x].y : S35.y {WL}
+ENDIF
+ALU MOV S38.x : A2[0].x {W}
+ALU MOV S38.y : A2[0].y {WL}
+ALU MOV S39.x : A2[1].x {W}
+ALU MOV S39.y : A2[1].y {WL}
+ALU MOV S40.x : A2[2].x {W}
+ALU MOV S40.y : A2[2].y {WL}
+ALU MOV S41.x : A2[3].x {W}
+ALU MOV S41.y : A2[3].y {WL}
+ALU MOV S42.x : A2[0].z {W}
+ALU MOV S42.y : A2[0].w {WL}
+ALU MOV S43.x : A2[1].z {W}
+ALU MOV S43.y : A2[1].w {WL}
+ALU MOV S44.x : A2[2].z {W}
+ALU MOV S44.y : A2[2].w {WL}
+ALU MOV S45.x : A2[3].z {W}
+ALU MOV S45.y : A2[3].w {WL}
+EXPORT_DONE POS 0 S19.xyzw
+ALU MOV S46.x@group : S38.x {W}
+ALU MOV S46.y@group : S38.y {W}
+ALU MOV S46.z@group : S39.x {W}
+ALU MOV S46.w@group : S39.y {WL}
+EXPORT PARAM 0 S46.xyzw
+ALU MOV S47.x@group : S40.x {W}
+ALU MOV S47.y@group : S40.y {W}
+ALU MOV S47.z@group : S41.x {W}
+ALU MOV S47.w@group : S41.y {WL}
+EXPORT PARAM 1 S47.xyzw
+ALU MOV S48.x@group : S42.x {W}
+ALU MOV S48.y@group : S42.y {W}
+ALU MOV S48.z@group : S43.x {W}
+ALU MOV S48.w@group : S43.y {WL}
+EXPORT PARAM 2 S48.xyzw
+ALU MOV S49.x@group : S44.x {W}
+ALU MOV S49.y@group : S44.y {W}
+ALU MOV S49.z@group : S45.x {W}
+ALU MOV S49.w@group : S45.y {WL}
+EXPORT_DONE PARAM 3 S49.xyzw
+)";
+
+const char *shader_with_dest_array_opt_expect =
+R"(VS
+CHIPCLASS EVERGREEN
+INPUT LOC:0 NAME:0
+OUTPUT LOC:0 NAME:0 MASK:15
+OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10
+OUTPUT LOC:2 NAME:5 MASK:15 SID:10 SPI_SID:11
+OUTPUT LOC:3 NAME:5 MASK:15 SID:11 SPI_SID:12
+OUTPUT LOC:4 NAME:5 MASK:15 SID:12 SPI_SID:13
+REGISTERS R1.xyzw
+ARRAYS A2[4].xy A2[4].zw
+SHADER
+ALU MUL_IEEE S14.x : KC0[2].x R1.y@fully {W}
+ALU MUL_IEEE S14.y : KC0[2].y R1.y@fully {W}
+ALU MUL_IEEE S14.z : KC0[2].z R1.y@fully {W}
+ALU MUL_IEEE S14.w : KC0[2].w R1.y@fully {WL}
+ALU MULADD_IEEE S15.x : KC0[1].x R1.x@fully S14.x {W}
+ALU MULADD_IEEE S15.y : KC0[1].y R1.x@fully S14.y {W}
+ALU MULADD_IEEE S15.z : KC0[1].z R1.x@fully S14.z {W}
+ALU MULADD_IEEE S15.w : KC0[1].w R1.x@fully S14.w {WL}
+ALU MULADD_IEEE S17.x : KC0[3].x R1.z@fully S15.x {W}
+ALU MULADD_IEEE S17.y : KC0[3].y R1.z@fully S15.y {W}
+ALU MULADD_IEEE S17.z : KC0[3].z R1.z@fully S15.z {W}
+ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL}
+ALU MULADD_IEEE S19.x@group : KC0[4].x R1.w@fully S17.x {W}
+ALU MULADD_IEEE S19.y@group : KC0[4].y R1.w@fully S17.y {W}
+ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W}
+ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL}
+ALU MOV A2[0].x : I[1.0] {W}
+ALU MOV A2[0].y : L[0x3f8ccccd] {WL}
+ALU MOV A2[1].x : L[0x40000000] {W}
+ALU MOV A2[1].y : L[0x40066666] {WL}
+ALU MOV A2[2].x : L[0x40400000] {W}
+ALU MOV A2[2].y : L[0x40466666] {WL}
+ALU MOV A2[3].x : L[0x40800000] {W}
+ALU MOV A2[3].y : L[0x40833333] {WL}
+ALU MOV A2[0].z : L[0x40a00000] {W}
+ALU MOV A2[0].w : L[0x40a33333] {WL}
+ALU MOV A2[1].z : L[0x40c00000] {W}
+ALU MOV A2[1].w : L[0x40c33333] {WL}
+ALU MOV A2[2].z : L[0x40e00000] {W}
+ALU MOV A2[2].w : L[0x40e33333] {WL}
+ALU MOV A2[3].z : L[0x41000000] {W}
+ALU MOV A2[3].w : L[0x4101999a] {WL}
+IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE ))
+  ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc]  {WL}
+  ALU MOV A2[S34.x].z : I[0] {W}
+  ALU MOV A2[S34.x].w : L[0x3dcccccd] {WL}
+ELSE
+  ALU MOV S37.x : KC0[0].x {WL}
+  ALU MOV A2[S37.x].x : I[0] {W}
+  ALU MOV A2[S37.x].y : L[0x3dcccccd] {WL}
+ENDIF
+EXPORT_DONE POS 0 S19.xyzw
+ALU MOV S46.x@group : A2[0].x {W}
+ALU MOV S46.y@group : A2[0].y {W}
+ALU MOV S46.z@group : A2[1].x {W}
+ALU MOV S46.w@group : A2[1].y {WL}
+EXPORT PARAM 0 S46.xyzw
+ALU MOV S47.x@group : A2[2].x {W}
+ALU MOV S47.y@group : A2[2].y {W}
+ALU MOV S47.z@group : A2[3].x {W}
+ALU MOV S47.w@group : A2[3].y {WL}
+EXPORT PARAM 1 S47.xyzw
+ALU MOV S48.x@group : A2[0].z {W}
+ALU MOV S48.y@group : A2[0].w {W}
+ALU MOV S48.z@group : A2[1].z {W}
+ALU MOV S48.w@group : A2[1].w {WL}
+EXPORT PARAM 2 S48.xyzw
+ALU MOV S49.x@group : A2[2].z {W}
+ALU MOV S49.y@group : A2[2].w {W}
+ALU MOV S49.z@group : A2[3].z {W}
+ALU MOV S49.w@group : A2[3].w {WL}
+EXPORT_DONE PARAM 3 S49.xyzw
+)";
+
+const char *shader_with_dest_array_opt_scheduled =
+R"(VS
+CHIPCLASS EVERGREEN
+INPUT LOC:0 NAME:0
+OUTPUT LOC:0 NAME:0 MASK:15
+OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10
+OUTPUT LOC:2 NAME:5 MASK:15 SID:10 SPI_SID:11
+OUTPUT LOC:3 NAME:5 MASK:15 SID:11 SPI_SID:12
+OUTPUT LOC:4 NAME:5 MASK:15 SID:12 SPI_SID:13
+REGISTERS R1.xyzw
+ARRAYS A2[4].xy A2[4].zw
+SHADER
+BLOCK_START
+ALU_GROUP_BEGIN
+  ALU MOV A2[0].x : I[1.0] {W}
+  ALU MOV A2[0].y : L[0x3f8ccccd] {W}
+  ALU MOV A2[0].z : L[0x40a00000] {W}
+  ALU MOV A2[0].w : L[0x40a33333] {W}
+  ALU MOV A2[1].x : L[0x40000000] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV A2[2].x : L[0x40400000] {W}
+  ALU MOV A2[1].y : L[0x40066666] {W}
+  ALU MOV A2[1].z : L[0x40c00000] {W}
+  ALU MOV A2[1].w : L[0x40c33333] {W}
+  ALU MUL_IEEE S14.x : KC0[2].x R1.y@fully {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV A2[3].x : L[0x40800000] {W}
+  ALU MOV A2[2].y : L[0x40466666] {W}
+  ALU MOV A2[2].z : L[0x40e00000] {W}
+  ALU MOV A2[2].w : L[0x40e33333] {W}
+  ALU MULADD_IEEE S15.x : KC0[1].x R1.x@fully S14.x {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MULADD_IEEE S17.x : KC0[3].x R1.z@fully S15.x {W}
+  ALU MOV A2[3].y : L[0x40833333] {W}
+  ALU MOV A2[3].z : L[0x41000000] {W}
+  ALU MOV A2[3].w : L[0x4101999a] {W}
+  ALU MUL_IEEE S14.y : KC0[2].y R1.y@fully {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MULADD_IEEE S19.x@group : KC0[4].x R1.w@fully S17.x {W}
+  ALU MULADD_IEEE S15.y : KC0[1].y R1.x@fully S14.y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MULADD_IEEE S17.y : KC0[3].y R1.z@fully S15.y {W}
+  ALU MUL_IEEE S14.z : KC0[2].z R1.y@fully {W}
+  ALU MUL_IEEE S14.w : KC0[2].w R1.y@fully {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MULADD_IEEE S19.y@group : KC0[4].y R1.w@fully S17.y {W}
+  ALU MULADD_IEEE S15.z : KC0[1].z R1.x@fully S14.z {W}
+  ALU MULADD_IEEE S15.w : KC0[1].w R1.x@fully S14.w {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MULADD_IEEE S17.z : KC0[3].z R1.z@fully S15.z {W}
+  ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W}
+  ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL}
+ALU_GROUP_END
+IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE ))
+  ALU_GROUP_BEGIN
+    ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL}
+  ALU_GROUP_END
+  ALU_GROUP_BEGIN
+    ALU MOV A2[S34.x].z : I[0] {W}
+    ALU MOV A2[S34.x].w : L[0x3dcccccd] {WL}
+  ALU_GROUP_END
+ELSE
+  ALU_GROUP_BEGIN
+    ALU MOV S37.x : KC0[0].x {WL}
+  ALU_GROUP_END
+  ALU_GROUP_BEGIN
+    ALU MOV A2[S37.x].x : I[0] {W}
+    ALU MOV A2[S37.x].y : L[0x3dcccccd] {WL}
+  ALU_GROUP_END
+ENDIF
+ALU_GROUP_BEGIN
+  ALU MOV S46.x@group : A2[0].x {W}
+  ALU MOV S46.y@group : A2[0].y {W}
+  ALU MOV S46.z@group : A2[1].x {W}
+  ALU MOV S46.w@group : A2[1].y {W}
+  ALU MOV S47.x@group : A2[2].x {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV S48.x@group : A2[0].z {W}
+  ALU MOV S47.y@group : A2[2].y {W}
+  ALU MOV S47.z@group : A2[3].x {W}
+  ALU MOV S47.w@group : A2[3].y {W}
+  ALU MOV S48.y@group : A2[0].w {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV S49.x@group : A2[2].z {W}
+  ALU MOV S49.y@group : A2[2].w {W}
+  ALU MOV S48.z@group : A2[1].z {W}
+  ALU MOV S48.w@group : A2[1].w {W}
+  ALU MOV S49.z@group : A2[3].z {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV S49.w@group : A2[3].w {WL}
+ALU_GROUP_END
+BLOCK_END
+BLOCK_START
+EXPORT_DONE POS 0 S19.xyzw
+EXPORT PARAM 0 S46.xyzw
+EXPORT PARAM 1 S47.xyzw
+EXPORT PARAM 2 S48.xyzw
+EXPORT_DONE PARAM 3 S49.xyzw
+BLOCK_END
+)";
+
+
+const char *shader_with_dest_array2 =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+ARRAYS A0[2].xy
+SHADER
+BLOCK_START
+ALU MOV A0[0].x : KC0[0].x {W}
+ALU MOV A0[0].y : KC0[0].y {WL}
+ALU MOV A0[1].x : KC0[1].x {W}
+ALU MOV A0[1].y : KC0[1].y {WL}
+ALU MOV S1.x : KC0[2].x {WL}
+ALU MOV A0[S1.x].x : I[1.0] {W}
+ALU MOV A0[S1.x].y : L[2.0] {WL}
+ALU MOV S2.x : A0[0].x {W}
+ALU MOV S2.y : A0[0].y {WL}
+ALU MUL_IEEE S3.x@group : S2.x KC0[2].y {W}
+ALU MUL_IEEE S3.y@group : S2.y KC0[2].y {WL}
+BLOCK_END
+BLOCK_START
+EXPORT_DONE PIXEL 0 S3.xy01
+BLOCK_END
+)";
+
+const char *shader_with_dest_array2_scheduled =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+ARRAYS A0[2].xy
+SHADER
+BLOCK_START
+ALU_GROUP_BEGIN
+  ALU MOV A0[0].x : KC0[0].x {W}
+  ALU MOV A0[0].y : KC0[0].y {W}
+  ALU MOV A0[1].x : KC0[1].x {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV S1.x : KC0[2].x {W}
+  ALU MOV A0[1].y : KC0[1].y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV A0[S1.x].x : I[1.0] {W}
+  ALU MOV A0[S1.x].y : L[2.0] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV S2.x : A0[0].x {W}
+  ALU MOV S2.y : A0[0].y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MUL_IEEE S3.x@group : S2.x KC0[2].y {W}
+  ALU MUL_IEEE S3.y@group : S2.y KC0[2].y {WL}
+ALU_GROUP_END
+BLOCK_END
+BLOCK_START
+EXPORT_DONE PIXEL 0 S3.xy01
+BLOCK_END
+)";
+
+const char *shader_with_dest_array2_scheduled_ra =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+ARRAYS A0[2].xy
+SHADER
+BLOCK_START
+ALU_GROUP_BEGIN
+  ALU MOV A0[0].x : KC0[0].x {W}
+  ALU MOV A0[0].y : KC0[0].y {W}
+  ALU MOV A0[1].x : KC0[1].x {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV R2.x : KC0[2].x {W}
+  ALU MOV A0[1].y : KC0[1].y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV A0[R2.x].x : I[1.0] {W}
+  ALU MOV A0[R2.x].y : L[2.0] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MOV R2.x : A0[0].x {W}
+  ALU MOV R2.y : A0[0].y {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU MUL_IEEE R0.x : R2.x KC0[2].y {W}
+  ALU MUL_IEEE R0.y : R2.y KC0[2].y {WL}
+ALU_GROUP_END
+BLOCK_END
+BLOCK_START
+EXPORT_DONE PIXEL 0 R0.xy01
+BLOCK_END
+)";
+
+const char *shader_group_chan_pin_to_combine =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10
+OUTPUT LOC:0 NAME:1 MASK:15
+REGISTERS R0.xy__
+SHADER
+ALU_GROUP_BEGIN
+ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210
+ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210
+ALU INTERP_ZW S1.z@chan : R0.y@fully Param0.z {W} VEC_210
+ALU INTERP_ZW S1.w@chan : R0.x@fully Param0.w {WL} VEC_210
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY S1.x@chan : R0.y@fully Param0.x {W} VEC_210
+ALU INTERP_XY S1.y@chan : R0.x@fully Param0.y {W} VEC_210
+ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
+ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
+ALU_GROUP_END
+ALU MOV S2.x@group : S1.x@chan {W} VEC_210
+ALU MOV S2.y@group : S1.y@chan {W} VEC_210
+ALU MOV S2.z@group : S1.z@chan {W} VEC_210
+ALU MOV S2.w@group : S1.w@chan {WL} VEC_210
+EXPORT_DONE PIXEL 0 S2.xyzw
+)";
+
+
+const char *shader_group_chan_pin_combined =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10
+OUTPUT LOC:0 NAME:1 MASK:15
+REGISTERS R0.x R0.y
+SHADER
+ALU_GROUP_BEGIN
+ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210
+ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210
+ALU INTERP_ZW S2.z@chgr : R0.y@fully Param0.z {W} VEC_210
+ALU INTERP_ZW S2.w@chgr : R0.x@fully Param0.w {WL} VEC_210
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY S2.x@chgr : R0.y@fully Param0.x {W} VEC_210
+ALU INTERP_XY S2.y@chgr : R0.x@fully Param0.y {W} VEC_210
+ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
+ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
+ALU_GROUP_END
+EXPORT_DONE PIXEL 0 S2.xyzw
+)";
+
+const char *shader_group_chan_pin_combined_sheduled =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10
+OUTPUT LOC:0 NAME:1 MASK:15
+REGISTERS R0.x@fully R0.y@fully
+SHADER
+ALU_GROUP_BEGIN
+ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210
+ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210
+ALU INTERP_ZW S2.z@chgr : R0.y@fully Param0.z {W} VEC_210
+ALU INTERP_ZW S2.w@chgr : R0.x@fully Param0.w {WL} VEC_210
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY S2.x@chgr : R0.y@fully Param0.x {W} VEC_210
+ALU INTERP_XY S2.y@chgr : R0.x@fully Param0.y {W} VEC_210
+ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
+ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
+ALU_GROUP_END
+EXPORT_DONE PIXEL 0 S2.xyzw
+)";
+
+const char *shader_group_chan_pin_combined_sheduled_ra =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10
+OUTPUT LOC:0 NAME:1 MASK:15
+REGISTERS R0.x@fully R0.y@fully R1.xyzw
+SHADER
+ALU_GROUP_BEGIN
+ALU INTERP_ZW __.x : R0.y Param0.x {} VEC_210
+ALU INTERP_ZW __.y : R0.x Param0.y {} VEC_210
+ALU INTERP_ZW R1.z : R0.y Param0.z {W} VEC_210
+ALU INTERP_ZW R1.w : R0.x Param0.w {WL} VEC_210
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+ALU INTERP_XY R1.x : R0.y Param0.x {W} VEC_210
+ALU INTERP_XY R1.y : R0.x Param0.y {W} VEC_210
+ALU INTERP_XY __.z : R0.y Param0.z {} VEC_210
+ALU INTERP_XY __.w : R0.x Param0.w {L} VEC_210
+ALU_GROUP_END
+EXPORT_DONE PIXEL 0 R1.xyzw
+)";
+
+
+const char *shader_group_chan_pin_to_combine_2 =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV S0.x@free : I[0] {WL}
+ALU MOV S1.x : KC0[0].x {W}
+ALU MOV S1.y : KC0[0].y {W}
+ALU MOV S1.z : KC0[0].z {W}
+ALU MOV S1.w : KC0[0].w {WL}
+ALU DOT4_IEEE S2.x@free : S1.y S1.y + S1.y S1.y + I[0] I[0] + I[0] I[0] {WL}
+ALU DOT4_IEEE S3.x@free : S1.x S1.z + S1.x S1.w + I[0] I[0] + I[0] I[0] {WL}
+ALU DOT4_IEEE S4.x@free : S1.y S1.w + S1.w S1.y + I[0] I[0] + I[0] I[0] {WL}
+ALU MOV S5.x@group : S2.x@free {W}
+ALU MOV S5.y@group : S3.x@free {W}
+ALU MOV S5.z@group : S3.x@free {W}
+ALU MOV S5.w@group : S4.x@free {WL}
+EXPORT_DONE PIXEL 0 S5.xyzw
+)";
+
+const char *shader_group_chan_pin_to_combine_2_opt =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU DOT4_IEEE S5.x@group : KC0[0].y KC0[0].y + KC0[0].y KC0[0].y + I[0] I[0] + I[0] I[0] {W}
+ALU DOT4_IEEE S3.x@free : KC0[0].x KC0[0].z + KC0[0].x KC0[0].w + I[0] I[0] + I[0] I[0] {WL}
+ALU DOT4_IEEE S5.w@group : KC0[0].y KC0[0].w + KC0[0].w KC0[0].y + I[0] I[0] + I[0] I[0] {WL}
+ALU MOV S5.y@group : S3.x@free {W}
+ALU MOV S5.z@group : S3.x@free {W}
+EXPORT_DONE PIXEL 0 S5.xyzw
+)";
+
+
+const char *fs_with_grand_and_abs =
+R"(FS
+CHIPCLASS EVERGREEN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV S1.x@free : I[0] {WL}
+ALU_GROUP_BEGIN
+  x: ALU INTERP_XY S2.x@chgr : R0.y@fully Param0.x {W} VEC_210
+  y: ALU INTERP_XY S2.y@chan : R0.x@fully Param0.y {W} VEC_210
+  z: ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
+  w: ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
+ALU_GROUP_END
+ALU MOV S3.x@free : L[0xbf800000] {WL}
+ALU MOV S4.x@free : I[1.0] {WL}
+ALU MOV S5.x@free : L[0x41a00000] {WL}
+ALU MOV S6.x@free : L[0x41200000] {WL}
+ALU SETGT_DX10 S7.x : S2.x@chgr S1.x@free {W}
+ALU SETGT_DX10 S7.y : S2.y@chan S1.x@free {WL}
+ALU AND_INT S8.x : S7.x I[1.0] {W}
+ALU AND_INT S8.y : S7.y I[1.0] {WL}
+ALU SETGT_DX10 S9.x : S1.x@free S2.x@chgr {W}
+ALU SETGT_DX10 S9.y : S1.x@free S2.y@chan {WL}
+ALU AND_INT S10.x : S9.x I[1.0] {W}
+ALU AND_INT S10.y : S9.y I[1.0] {WL}
+ALU ADD S11.x : S8.x -S10.x {W}
+ALU ADD S11.y : S8.y -S10.y {WL}
+ALU SETE_DX10 S12.x : S11.x S3.x@free {W}
+ALU SETE_DX10 S12.y : S11.y S3.x@free {WL}
+ALU MOV S13.x@group : |S2.x@chgr| {WL}
+TEX GET_GRADIENTS_H S14.x___ : S2.x___ RID:18 SID:0 NNNN
+ALU MUL_IEEE S15.x@free : S14.x@group S5.x@free {WL}
+ALU MOV S16.x@free : -S15.x@free {WL}
+ALU CNDE_INT S17.x@free : S12.x S15.x@free S16.x@free {WL}
+ALU MOV S18.x : KC0[0].x {W}
+ALU MOV S18.y : KC0[0].y {W}
+ALU MOV S18.z : KC0[0].z {W}
+ALU MOV S18.w : KC0[0].w {WL}
+ALU MUL_IEEE S19.x@group : |S2.y@chan| S18.x {WL}
+ALU MOV S20.x@group : S19.x@group {WL}
+TEX GET_GRADIENTS_V S21.x___ : S19.x___ RID:18 SID:0 NNNN
+ALU MUL_IEEE S22.x@free : S21.x@group S6.x@free {WL}
+ALU MOV S23.x@free : -S22.x@free {WL}
+ALU CNDE_INT S24.x@free : S12.y S22.x@free S23.x@free {WL}
+ALU MOV S25.x@group : S17.x@free {W}
+ALU MOV S25.y@group : S24.x@free {W}
+ALU MOV S25.z@group : S1.x@free {W}
+ALU MOV S25.w@group : S4.x@free {WL}
+EXPORT_DONE PIXEL 0 S25.xyzw
+)";
+
+
+const char *fs_with_loop_multislot_reuse =
+R"(FS
+CHIPCLASS CAYMAN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU MOV R1.x@free : I[0] {WL}
+ALU MOV S2.x@free : L[0x38f00000] {WL}
+LOOP_BEGIN
+ALU RECIPSQRT_IEEE S3.x@free : |R1.x@free| + |R1.x@free| + |R1.x@free| {WL}
+ALU SETGT_DX10 S4.x@free : S3.x@free S2.x@free {W}
+  IF (( ALU PRED_SETNE_INT __.x@free : S4.x@free I[0] {LEP} PUSH_BEFORE ))
+     BREAK
+  ENDIF
+  ALU ADD S5.x@free : S3.x@chan  L[0x38f00000] {WL}
+  ALU MUL R1.x@free : S5.x@free  L[0x38f00000] {WL}
+LOOP_END
+EXPORT_DONE PIXEL 0 R1.xxxx
+)";
+
+const char *fs_with_loop_multislot_reuse_scheduled =
+R"(FS
+CHIPCLASS CAYMAN
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+PROP WRITE_ALL_COLORS:1
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU_GROUP_BEGIN
+  ALU MOV R1.x@free : I[0] {W}
+  ALU MOV S2.y@chan : L[0x38f00000] {WL}
+ALU_GROUP_END
+LOOP_BEGIN
+  ALU_GROUP_BEGIN
+    ALU RECIPSQRT_IEEE S3.x@chan : |R1.x@free| {W}
+    ALU RECIPSQRT_IEEE __.y@chan : |R1.x@free| {}
+    ALU RECIPSQRT_IEEE __.z@chan : |R1.x@free| {L}
+  ALU_GROUP_END
+  ALU_GROUP_BEGIN
+    ALU SETGT_DX10 S4.x@chan : S3.x@chgr S2.y@free {WL}
+  ALU_GROUP_END
+  IF (( ALU PRED_SETNE_INT __.x@free : S4.x@chan I[0] {LEP} PUSH_BEFORE ))
+     BREAK
+  ENDIF
+  ALU_GROUP_BEGIN
+    ALU ADD S5.x@free : S3.x@chan  L[0x38f00000] {WL}
+  ALU_GROUP_END
+  ALU_GROUP_BEGIN
+    ALU MUL R1.x@free : S5.x@free  L[0x38f00000] {WL}
+  ALU_GROUP_END
+LOOP_END
+EXPORT_DONE PIXEL 0 R1.xxxx
+)";
+
+
+const char *gs_abs_float_nir =
+R"(shader: MESA_SHADER_GEOMETRY
+source_sha1: {0xdfd2ba73, 0x5eff5b0c, 0x577ee695, 0xb65ae49e, 0xecc34679}
+name: GLSL4
+inputs: 1
+outputs: 2
+uniforms: 3
+shared: 0
+ray queries: 0
+invocations: 1
+vertices in: 3
+vertices out: 3
+input primitive: TRIANGLES
+output primitive: TRIANGLE_STRIP
+active_stream_mask: 0x1
+uses_end_primitive: 0
+decl_var uniform INTERP_MODE_NONE float arg0 (0, 0, 0)
+decl_var uniform INTERP_MODE_NONE float tolerance (1, 1, 0)
+decl_var uniform INTERP_MODE_NONE float expected (2, 2, 0)
+decl_function main (0 params)
+
+impl main {
+	block block_0:
+	/* preds: */
+	vec1 32 ssa_0 = load_const (0x00000000 = 0.000000)
+	vec4 32 ssa_1 = intrinsic load_per_vertex_input (ssa_0, ssa_0) (0, 0, 160, 160)
+	vec1 32 ssa_2 = load_const (0x00000001 = 0.000000)
+	vec4 32 ssa_3 = intrinsic load_per_vertex_input (ssa_2, ssa_0) (0, 0, 160, 160)
+	vec1 32 ssa_4 = load_const (0x00000002 = 0.000000)
+	vec4 32 ssa_5 = intrinsic load_per_vertex_input (ssa_4, ssa_0) (0, 0, 160, 160)
+	vec1 32 ssa_6 = load_const (0x3f800000 = 1.000000)
+	vec1 32 ssa_7 = intrinsic load_uniform (ssa_0) (0, 1, 160)	/* arg0 */
+	vec1 32 ssa_8 = intrinsic load_uniform (ssa_0) (2, 1, 160)	/* expected */
+	vec1 32 ssa_9 = fsub abs(ssa_7), ssa_8
+	vec1 32 ssa_10 = intrinsic load_uniform (ssa_0) (1, 1, 160)	/* tolerance */
+	vec1 32 ssa_11 = fge32 ssa_10, abs(ssa_9)
+	vec1 32 ssa_12 = inot ssa_11
+	vec1 32 ssa_13 = b2f32 ssa_12
+	vec1 32 ssa_14 = b2f32 ssa_11
+	intrinsic store_output (ssa_1, ssa_0) (0, 15, 0,  160, 128)
+	vec4 32 ssa_15 = vec4 ssa_13, ssa_14, ssa_0, ssa_6
+	intrinsic store_output (ssa_15, ssa_0) (1, 15, 0, 160, 160)
+	intrinsic emit_vertex () (0)
+	intrinsic store_output (ssa_3, ssa_0) (0, 15, 0, 160, 128)
+	intrinsic store_output (ssa_15, ssa_0) (1,15, 0, 160, 160)
+	intrinsic emit_vertex () (0)
+	intrinsic store_output (ssa_5, ssa_0) (0, 15, 0, 160, 128)
+	intrinsic store_output (ssa_15, ssa_0) (1,15, 0, 160, 160)
+	intrinsic emit_vertex () (0)
+	/* succs: block_1 */
+	block block_1:
+})";
+
+const char *gs_abs_float_expect =
+R"(GS
+CHIPCLASS EVERGREEN
+INPUT LOC:0 NAME:5 SID:9 SPI_SID:10
+OUTPUT LOC:0 NAME:0 MASK:15
+OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10
+REGISTERS R0.x@fully R0.y@fully R0.w@fully
+SHADER
+ALU MOV S2.x@chan : I[0] {WL}
+ALU MOV S3.x@chan : I[0] {WL}
+ALU MOV S4.x@chan : I[0] {WL}
+ALU MOV S5.x@chan : I[0] {WL}
+ALU MOV S6.x@free : I[0] {WL}
+LOAD_BUF S7.xyzw : R0.x@fully RID:17
+ALU MOV S8.x@free : I[1] {WL}
+LOAD_BUF S9.xyzw : R0.y@fully RID:17
+ALU MOV S10.x@free : L[0x2] {WL}
+LOAD_BUF S11.xyzw : R0.w@fully RID:17
+ALU MOV S12.x@free : I[1.0] {WL}
+ALU MOV S13.x@free : KC0[0].x {WL}
+ALU MOV S14.x@free : KC0[2].x {WL}
+ALU ADD S15.x@free : |S13.x@free| -S14.x@free {WL}
+ALU MOV S16.x@free : KC0[1].x {WL}
+ALU SETGE_DX10 S17.x@free : S16.x@free |S15.x@free| {WL}
+ALU NOT_INT S18.x@free : S17.x@free {WL}
+ALU AND_INT S19.x@free : S18.x@free I[1.0] {WL}
+ALU AND_INT S20.x@free : S17.x@free I[1.0] {WL}
+ALU MOV S21.x@group : S19.x@free {W}
+ALU MOV S21.y@group : S20.x@free {W}
+ALU MOV S21.z@group : S6.x@free {W}
+ALU MOV S21.w@group : S12.x@free {WL}
+MEM_RING 0 WRITE_IDX 0 S7.xyzw @S2.x@chan ES:4
+MEM_RING 0 WRITE_IDX 4 S21.xyzw @S2.x@chan ES:4
+EMIT_VERTEX @0
+ALU ADD_INT S22.x@chan : S2.x@chan L[0x2] {WL}
+MEM_RING 0 WRITE_IDX 0 S9.xyzw @S22.x@chan ES:4
+MEM_RING 0 WRITE_IDX 4 S21.xyzw @S22.x@chan ES:4
+EMIT_VERTEX @0
+ALU ADD_INT S23.x@chan : S22.x@chan L[0x2] {WL}
+MEM_RING 0 WRITE_IDX 0 S11.xyzw @S23.x@chan ES:4
+MEM_RING 0 WRITE_IDX 4 S21.xyzw @S23.x@chan ES:4
+EMIT_VERTEX @0
+ALU ADD_INT S24.x@chan : S23.x@chan L[0x2] {WL}
+)";
+
+
+const char *vtx_for_tcs_nir =
+R"(shader: MESA_SHADER_VERTEX
+source_sha1: {0xbd6100f2, 0xc71e7b0e, 0x74662024, 0x261073d8, 0xeae01762}
+name: GLSL5
+inputs: 0
+outputs: 1
+uniforms: 10
+shared: 0
+ray queries: 0
+decl_var uniform INTERP_MODE_NONE int[6] constarray_1_0 (0, 0, 0) = { { 0x00000000 }, { 0x00000001 }, { 0x00000002 }, { 0x00000000 }, { 0x00000002 }, { 0x00000003 } }
+decl_var uniform INTERP_MODE_NONE vec2[4] constarray_0_0 (1, 6, 0) = { { -1.000000, 1.000000 }, { -1.000000, -1.000000 }, { 1.000000, -1.000000 }, { 1.000000, 1.000000 } }
+decl_function main (0 params)
+
+impl main {
+        block block_0:
+        /* preds: */
+        vec1 32 ssa_0 = load_const (0x00000000 = 0.000000)
+        vec1 32 ssa_1 = load_const (0x3f800000 = 1.000000)
+        vec1 32 ssa_2 = intrinsic load_vertex_id () ()
+        vec1 32 ssa_3 = intrinsic load_uniform (ssa_2) (0, 6, 34)
+        vec2 32 ssa_4 = intrinsic load_uniform (ssa_3) (6, 4, 160)
+        vec4 32 ssa_5 = vec4 ssa_4.x, ssa_4.y, ssa_0, ssa_1
+        vec4 32 ssa_6 = intrinsic load_tcs_in_param_base_r600 () ()
+        vec1 32 ssa_7 = intrinsic load_tcs_rel_patch_id_r600 () ()
+        vec1 32 ssa_8 = umul24 ssa_6.y, ssa_7
+        intrinsic store_local_shared_r600 (ssa_5, ssa_8) (3)
+        vec1 32 ssa_9 = load_const (0x00000008 = 0.000000)
+        vec1 32 ssa_10 = iadd ssa_9, ssa_8
+        intrinsic store_local_shared_r600 (ssa_5, ssa_10) (12)
+        /* succs: block_1 */
+        block block_1:
+})";
+
+
+const char *vtx_for_tcs_from_nir_expect =
+R"(VS
+CHIPCLASS EVERGREEN
+REGISTERS R0.x@fully R0.y@fully
+SHADER
+ALU MOV S1.x@free : I[0] {WL}
+ALU MOV S2.x@free : I[1.0] {WL}
+ALU MOV S3.x@free : R0.x@fully {WL}
+LOAD_BUF S4.xyzw : S3.x@free RID:0
+LOAD_BUF S5.xyzw : S4.x@group + 96b RID:0
+ALU MOV S6.x : S5.x@group {W}
+ALU MOV S6.y : S5.y@group {W}
+ALU MOV S6.z : S1.x@free {W}
+ALU MOV S6.w : S2.x@free {WL}
+ALU MOV S7.x@free : I[0] {WL}
+LOAD_BUF S8.xyzw : S7.x@free RID:16 SRF
+ALU MOV S9.x@free : R0.y@fully {WL}
+ALU MUL_UINT24 S10.x@free : S8.y@group S9.x@free {WL}
+LDS WRITE_REL __.x [ S10.x@free ] : S6.x S6.y
+ALU MOV S11.x@free : L[0x8] {WL}
+ALU ADD_INT S12.x@free : S11.x@free S10.x@free {WL}
+LDS WRITE_REL __.x [ S12.x@free ] : S6.z S6.w)";
+
+
+const char *vtx_for_tcs_inp =
+R"(VS
+CHIPCLASS EVERGREEN
+REGISTERS R0.x@fully R0.y@fully
+SHADER
+ALU MOV S1.x@free : I[0] {WL}
+ALU MOV S2.x@free : I[1.0] {WL}
+ALU MOV S3.x@free : R0.x@fully {WL}
+LOAD_BUF S4.xyzw : S3.x@free RID:0
+LOAD_BUF S5.xyzw : S4.x@group + 96b RID:0
+ALU MOV S6.x : S5.x@group {W}
+ALU MOV S6.y : S5.y@group {W}
+ALU MOV S6.z : S1.x@free {W}
+ALU MOV S6.w : S2.x@free {WL}
+ALU MOV S7.x@free : I[0] {WL}
+LOAD_BUF S8.xyzw : S7.x@free RID:16 SRF
+ALU MOV S9.x@free : R0.y@fully {WL}
+ALU MUL_UINT24 S10.x@free : S8.y@group S9.x@free {WL}
+LDS WRITE_REL __.x [ S10.x@free ] : S6.x S6.y
+ALU MOV S11.x@free : L[0x8] {WL}
+ALU ADD_INT S12.x@free : S11.x@free S10.x@free {WL}
+LDS WRITE_REL __.x [ S12.x@free ] : S6.z S6.w)";
+
+const char *vtx_for_tcs_opt =
+R"(VS
+CHIPCLASS EVERGREEN
+REGISTERS R0.x@fully R0.y@fully
+SHADER
+LOAD_BUF S4.x___ : R0.x@fully RID:0
+LOAD_BUF S5.xy__ : S4.x@group + 96b RID:0
+ALU MOV S7.x@free : I[0] {WL}
+LOAD_BUF S8._y__ : S7.x@free RID:16 SRF
+ALU MUL_UINT24 S10.x@free : S8.y@group R0.y@fully {WL}
+LDS WRITE_REL __.x [ S10.x@free ] : S5.x@group S5.y@group
+ALU ADD_INT S12.x@free : L[0x8] S10.x@free {WL}
+LDS WRITE_REL __.x [ S12.x@free ] : I[0] I[1.0])";
+
+const char *vtx_for_tcs_pre_sched =
+R"(VS
+CHIPCLASS EVERGREEN
+REGISTERS R0.x@fully R0.y@fully
+SHADER
+ALU MOV S3.x@free : R0.x@fully {WL}
+LOAD_BUF S4.xyzw : S3.x@free RID:0
+LOAD_BUF S5.xyzw : S4.x@group + 96b RID:0
+ALU MOV S7.y@free : I[0] {WL}
+LOAD_BUF S8.xyzw : S7.y@free RID:16 SRF
+ALU MUL_UINT24 S10.x@free : S8.y@group R0.y@fully {WL}
+LDS WRITE_REL __.x [ S10.x@free ] : S5.x@group S5.y@group
+ALU ADD_INT S12.x@free : L[0x8] S10.x@free {WL}
+LDS WRITE_REL __.x [ S12.x@free ] : I[0] I[1.0])";
+
+const char *vtx_for_tcs_sched =
+R"(VS
+CHIPCLASS EVERGREEN
+REGISTERS R0.x@fully R0.y@fully
+SHADER
+BLOCK_START
+ALU_GROUP_BEGIN
+  ALU MOV S3.x@free : R0.x@fully {W}
+  ALU MOV S7.y@free : I[0] {WL}
+ALU_GROUP_END
+BLOCK_END
+BLOCK_START
+LOAD_BUF S4.xyzw : S3.x@free RID:0
+LOAD_BUF S8.xyzw : S7.y@free RID:16 SRF
+BLOCK_END
+BLOCK_START
+ALU_GROUP_BEGIN
+  ALU MUL_UINT24 S10.x@free : S8.y@group R0.y@fully {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU ADD_INT S12.x@chan : L[0x8] S10.x@free {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+  ALU LDS WRITE_REL __.x : S12.x@chan I[0] I[1.0] {L}
+ALU_GROUP_END
+BLOCK_END
+BLOCK_START
+LOAD_BUF S5.xyzw : S4.x@group + 96b RID:0
+BLOCK_END
+BLOCK_START
+ALU_GROUP_BEGIN
+  ALU LDS WRITE_REL __.x : S10.x@free S5.x@group S5.y@group {L}
+ALU_GROUP_END
+BLOCK_END)";
+
+const char *tcs_nir =
+R"(shader: MESA_SHADER_TESS_CTRL
+source_sha1: {0xc83b0de6, 0x36934b97, 0xccddb436, 0xb0952cb0, 0x07a450a1}
+name: GLSL5
+inputs: 1
+outputs: 3
+uniforms: 0
+shared: 0
+ray queries: 0
+decl_function main (0 params)
+
+impl main {
+   block block_0:
+   /* preds: */
+   vec1 32 ssa_0 = undefined
+   vec2 32 ssa_1 = load_const (0x3f800000, 0x3f800000)
+   vec1 32 ssa_2 = load_const (0x00000000)
+   vec4 32 ssa_3 = intrinsic load_tcs_out_param_base_r600 () ()
+   vec1 32 ssa_4 = intrinsic load_tcs_rel_patch_id_r600 () ()
+   vec2 32 ssa_5 = umad24 ssa_3.xx, ssa_4.xx, ssa_3.wz
+   vec1 32 ssa_6 = mov ssa_5.x
+   vec1 32 ssa_7 = load_const (0x00000010)
+   vec4 32 ssa_8 = load_const (0x00000010, 0x00000000, 0x00000004, 0x00000008)
+   vec4 32 ssa_9 = iadd ssa_5.xxxx, ssa_8
+   vec1 32 ssa_10 = mov ssa_9.x
+   vec3 32 ssa_11 = mov ssa_9.yzw
+   intrinsic store_local_shared_r600 (ssa_1, ssa_10) (3)
+   vec4 32 ssa_12 = load_const (0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000)
+   vec4 32 ssa_13 = vec4 ssa_12.x, ssa_12.y, ssa_12.z, ssa_0
+   intrinsic store_local_shared_r600 (ssa_13, ssa_6) (3)
+   vec1 32 ssa_14 = load_const (0x00000008)
+   vec1 32 ssa_15 = iadd ssa_14, ssa_5.x
+   intrinsic store_local_shared_r600 (ssa_13, ssa_15) (12)
+   vec1 32 ssa_16 = intrinsic load_invocation_id () ()
+   vec4 32 ssa_17 = intrinsic load_tcs_in_param_base_r600 () ()
+   vec1 32 ssa_18 = umul24 ssa_17.x, ssa_4
+   vec1 32 ssa_19 = umad24 ssa_17.y, ssa_16, ssa_18
+   vec4 32 ssa_20 = load_const (0x00000000, 0x00000004, 0x00000008, 0x0000000c)
+   vec4 32 ssa_21 = iadd ssa_20, ssa_19.xxxx
+   vec4 32 ssa_22 = intrinsic load_local_shared_r600 (ssa_21) ()
+   vec1 32 ssa_23 = umad24 ssa_3.y, ssa_16, ssa_5.y
+   intrinsic store_local_shared_r600 (ssa_22, ssa_23) (3)
+   vec1 32 ssa_24 = iadd ssa_14, ssa_23
+   intrinsic store_local_shared_r600 (ssa_22, ssa_24) (12)
+   vec1 32 ssa_25 = ieq32 ssa_16, ssa_2
+   /* succs: block_1 block_2 */
+   if ssa_25 {
+      block block_1:
+      /* preds: block_0 */
+      vec3 32 ssa_26 = intrinsic load_local_shared_r600 (ssa_11) ()
+      vec1 32 ssa_27 = intrinsic load_tcs_tess_factor_base_r600 () ()
+      vec1 32 ssa_28 = umad24 ssa_4, ssa_7, ssa_27
+      vec3 32 ssa_29 = load_const (0x00000004, 0x00000008, 0x0000000c)
+      vec3 32 ssa_30 = iadd ssa_28.xxx, ssa_29
+      vec4 32 ssa_31 = vec4 ssa_28, ssa_26.x, ssa_30.x, ssa_26.y
+      vec2 32 ssa_32 = vec2 ssa_30.y, ssa_26.z
+      vec1 32 ssa_33 = intrinsic load_local_shared_r600 (ssa_10) ()
+      vec2 32 ssa_34 = vec2 ssa_30.z, ssa_33
+      intrinsic store_tf_r600 (ssa_31) ()
+      intrinsic store_tf_r600 (ssa_32) ()
+      intrinsic store_tf_r600 (ssa_34) ()
+      /* succs: block_3 */
+   } else {
+      block block_2:
+      /* preds: block_0 */
+      /* succs: block_3 */
+   }
+   block block_3:
+   /* preds: block_1 block_2 */
+   /* succs: block_4 */
+   block block_4:
+})";
+
+const char *tcs_from_nir_expect =
+R"(TCS
+CHIPCLASS EVERGREEN
+PROP TCS_PRIM_MODE:4
+REGISTERS R0.x@fully R0.y@fully R0.z@fully R0.w@fully
+SHADER
+ALU MOV S1.x@free : I[0] {WL}
+ALU MOV S2.x : I[1.0] {W}
+ALU MOV S2.y : I[1.0] {WL}
+ALU MOV S3.x@free : I[0] {WL}
+ALU MOV S4.x@free : I[0] {WL}
+LOAD_BUF S5.xyzw : S4.x@free + 16b RID:16 SRF
+ALU MOV S6.x@free : R0.y@fully {WL}
+ALU MULADD_UINT24 S7.x : S5.x@group S6.x@free S5.w@group {W}
+ALU MULADD_UINT24 S7.y : S5.x@group S6.x@free S5.z@group {WL}
+ALU MOV S8.x@free : S7.x {WL}
+ALU MOV S9.x@free : L[0x10] {WL}
+ALU MOV S10.x : L[0x10] {W}
+ALU MOV S10.y : I[0] {W}
+ALU MOV S10.z : L[0x4] {W}
+ALU MOV S10.w : L[0x8] {WL}
+ALU ADD_INT S11.x : S7.x S10.x {W}
+ALU ADD_INT S11.y : S7.x S10.y {W}
+ALU ADD_INT S11.z : S7.x S10.z {W}
+ALU ADD_INT S11.w : S7.x S10.w {WL}
+ALU MOV S12.x@free : S11.x {WL}
+ALU MOV S13.x : S11.y {W}
+ALU MOV S13.y : S11.z {W}
+ALU MOV S13.z : S11.w {WL}
+LDS WRITE_REL __.x [ S12.x@free ] : S2.x S2.y
+ALU MOV S14.x : I[1.0] {W}
+ALU MOV S14.y : I[1.0] {W}
+ALU MOV S14.z : I[1.0] {W}
+ALU MOV S14.w : I[1.0] {WL}
+ALU MOV S15.x : S14.x {W}
+ALU MOV S15.y : S14.y {W}
+ALU MOV S15.z : S14.z {W}
+ALU MOV S15.w : S1.x@free {WL}
+LDS WRITE_REL __.x [ S8.x@free ] : S15.x S15.y
+ALU MOV S16.x@free : L[0x8] {WL}
+ALU ADD_INT S17.x@free : S16.x@free S7.x {WL}
+LDS WRITE_REL __.x [ S17.x@free ] : S15.z S15.w
+ALU MOV S18.x@free : R0.z@fully {WL}
+ALU MOV S19.y@free : I[0] {WL}
+LOAD_BUF S20.xyzw : S19.y@free RID:16 SRF
+ALU MUL_UINT24 S21.x@free : S20.x@group S6.x@free {WL}
+ALU MULADD_UINT24 S22.x@free : S20.y@group S18.x@free S21.x@free {WL}
+ALU MOV S23.x : I[0] {W}
+ALU MOV S23.y : L[0x4] {W}
+ALU MOV S23.z : L[0x8] {W}
+ALU MOV S23.w : L[0xc] {WL}
+ALU ADD_INT S24.x : S23.x S22.x@free {W}
+ALU ADD_INT S24.y : S23.y S22.x@free {W}
+ALU ADD_INT S24.z : S23.z S22.x@free {W}
+ALU ADD_INT S24.w : S23.w S22.x@free {WL}
+LDS_READ [ S25.x@free S25.y@free S25.z@free S25.w@free ] : [ S24.x S24.y S24.z S24.w ]
+ALU MULADD_UINT24 S26.x@free : S5.y@group S18.x@free S7.y {WL}
+LDS WRITE_REL __.x [ S26.x@free ] : S25.x@free S25.y@free
+ALU ADD_INT S27.x@free : S16.x@free S26.x@free {WL}
+LDS WRITE_REL __.x [ S27.x@free ] : S25.z@free S25.w@free
+ALU SETE_INT S28.x@free : S18.x@free S3.x@free {WL}
+IF (( ALU PRED_SETNE_INT __.z@free : S28.x@free I[0] {LEP} PUSH_BEFORE ))
+  LDS_READ [ S30.x@free S30.y@free S30.z@free ] : [ S13.x S13.y S13.z ]
+  ALU MOV S31.x@free : R0.w@fully {WL}
+  ALU MULADD_UINT24 S32.x@free : S6.x@free S9.x@free S31.x@free {WL}
+  ALU MOV S33.x : L[0x4] {W}
+  ALU MOV S33.y : L[0x8] {W}
+  ALU MOV S33.z : L[0xc] {WL}
+  ALU ADD_INT S34.x : S32.x@free S33.x {W}
+  ALU ADD_INT S34.y : S32.x@free S33.y {W}
+  ALU ADD_INT S34.z : S32.x@free S33.z {WL}
+  ALU MOV S35.x : S32.x@free {W}
+  ALU MOV S35.y : S30.x@free {W}
+  ALU MOV S35.z : S34.x {W}
+  ALU MOV S35.w : S30.y@free {WL}
+  ALU MOV S36.x : S34.y {W}
+  ALU MOV S36.y : S30.z@free {WL}
+  LDS_READ [ S37.x@free ] : [ S12.x@free ]
+  ALU MOV S38.x : S34.z {W}
+  ALU MOV S38.y : S37.x@free {WL}
+  ALU MOV S39.x@group : S35.x {W}
+  ALU MOV S39.y@group : S35.y {W}
+  ALU MOV S40.z@group : S35.z {W}
+  ALU MOV S40.w@group : S35.w {WL}
+  WRITE_TF S40.zw__
+  WRITE_TF S39.xy__
+  ALU MOV S41.x@group : S36.x {W}
+  ALU MOV S41.y@group : S36.y {WL}
+  WRITE_TF S41.xy__
+  ALU MOV S42.x@group : S38.x {W}
+  ALU MOV S42.y@group : S38.y {WL}
+  WRITE_TF S42.xy__
+ENDIF)";
+
+const char *tes_nir =
+R"(shader: MESA_SHADER_TESS_EVAL
+source_sha1: {0x2db04154, 0x4884cf59, 0x50e43ee6, 0x4bb239d7, 0x0b502229}
+name: GLSL5
+inputs: 1
+outputs: 1
+uniforms: 0
+shared: 0
+ray queries: 0
+decl_function main (0 params)
+
+impl main {
+   block block_0:
+   /* preds: */
+   vec1 32 ssa_0 = load_const (0x40000000)
+   vec2 32 ssa_1 = intrinsic load_tess_coord_r600 () ()
+   vec1 32 ssa_2 = fadd ssa_1.x, ssa_1.y
+   vec1 32 ssa_3 = load_const (0x3f800000)
+   vec1 32 ssa_4 = fsub ssa_3, ssa_2
+   vec1 32 ssa_5 = ffma ssa_0, ssa_4, ssa_1.y
+   vec1 32 ssa_6 = f2i32 ssa_5
+   vec1 32 ssa_7 = load_const (0x00000000)
+   vec4 32 ssa_8 = intrinsic load_tcs_out_param_base_r600 () ()
+   vec1 32 ssa_9 = intrinsic load_tcs_rel_patch_id_r600 () ()
+   vec1 32 ssa_10 = umad24 ssa_8.x, ssa_9, ssa_8.z
+   vec1 32 ssa_11 = umad24 ssa_8.y, ssa_6, ssa_10
+   vec4 32 ssa_12 = load_const (0x00000000, 0x00000004, 0x00000008, 0x0000000c)
+   vec4 32 ssa_13 = iadd ssa_12, ssa_11.xxxx
+   vec4 32 ssa_14 = intrinsic load_local_shared_r600 (ssa_13) ()
+   intrinsic store_output (ssa_14, ssa_7) (0, 15, 0, 160, 128)
+    /* succs: block_1 */
+    block block_1:
+})";
+
+const char *tes_from_nir_expect =
+R"(TES
+CHIPCLASS EVERGREEN
+OUTPUT LOC:0 NAME:0 MASK:15
+REGISTERS R0.x@fully R0.y@fully R0.z@fully
+SHADER
+ALU MOV S1.x@free : L[0x40000000] {WL}
+ALU MOV S2.x@free : R0.x@fully {WL}
+ALU MOV S2.y@free : R0.y@fully {WL}
+ALU ADD S3.x@free : S2.x@free S2.y@free {WL}
+ALU MOV S4.x@free : I[1.0] {WL}
+ALU ADD S5.x@free : S4.x@free -S3.x@free {WL}
+ALU MULADD_IEEE S6.x@free : S1.x@free S5.x@free S2.y@free {WL}
+ALU TRUNC S7.x@free : S6.x@free {WL}
+ALU FLT_TO_INT S8.x@free : S7.x@free {WL}
+ALU MOV S9.x@free : I[0] {WL}
+ALU MOV S10.y@free : I[0] {WL}
+LOAD_BUF S11.xyzw : S10.y@free + 16b RID:16 SRF
+ALU MOV S12.x@free : R0.z@fully {WL}
+ALU MULADD_UINT24 S13.x@free : S11.x@group S12.x@free S11.z@group {WL}
+ALU MULADD_UINT24 S14.x@free : S11.y@group S8.x@free S13.x@free {WL}
+ALU MOV S15.x : I[0] {W}
+ALU MOV S15.y : L[0x4] {W}
+ALU MOV S15.z : L[0x8] {W}
+ALU MOV S15.w : L[0xc] {WL}
+ALU ADD_INT S16.x : S15.x S14.x@free {W}
+ALU ADD_INT S16.y : S15.y S14.x@free {W}
+ALU ADD_INT S16.z : S15.z S14.x@free {W}
+ALU ADD_INT S16.w : S15.w S14.x@free {WL}
+LDS_READ [ S17.x@group S17.y@group S17.z@group S17.w@group ] : [ S16.x S16.y S16.z S16.w ]
+EXPORT_DONE POS 0 S17.xyzw
+EXPORT_DONE PARAM 0 R0.____)";
+
+
+const char *tes_pre_op =
+R"(TES
+CHIPCLASS EVERGREEN
+OUTPUT LOC:0 NAME:0 MASK:15
+REGISTERS R0.x@fully R0.y@fully R0.z@fully
+SHADER
+ALU MOV S1024.x@free : L[0x40000000] {WL}
+ALU MOV S1025.x@free : R0.x@fully {WL}
+ALU MOV S1025.y@free : R0.y@fully {WL}
+ALU ADD S1026.x@free : S1025.x@free S1025.y@free {WL}
+ALU MOV S1027.x@free : I[1.0] {WL}
+ALU ADD S1028.x@free : S1027.x@free -S1026.x@free {WL}
+ALU MULADD_IEEE S1029.x@free : S1024.x@free S1028.x@free S1025.y@free {WL}
+ALU TRUNC S1030.x@free : S1029.x@free {WL}
+ALU FLT_TO_INT S1031.x@free : S1030.x@free {WL}
+ALU MOV S1032.x@free : I[0] {WL}
+ALU MOV S1033.y@free : I[0] {WL}
+LOAD_BUF S1034.xyzw : S1033.y@free RID:16 SRF
+ALU MOV S1035.x@free : R0.z@fully {WL}
+ALU MULADD_UINT24 S1036.x@free : S1034.x@group S1035.x@free S1034.z@group {WL}
+ALU MULADD_UINT24 S1037.x@free : S1034.y@group S1031.x@free S1036.x@free {WL}
+ALU MOV S1038.x : I[0] {W}
+ALU MOV S1038.y : L[0x4] {W}
+ALU MOV S1038.z : L[0x8] {W}
+ALU MOV S1038.w : L[0xc] {WL}
+ALU ADD_INT S1039.x : S1038.x S1037.x@free {W}
+ALU ADD_INT S1039.y : S1038.y S1037.x@free {W}
+ALU ADD_INT S1039.z : S1038.z S1037.x@free {W}
+ALU ADD_INT S1039.w : S1038.w S1037.x@free {WL}
+LDS_READ [ S1040.x@group S1040.y@group S1040.z@group S1040.w@group ] : [ S1039.x S1039.y S1039.z S1039.w ]
+EXPORT_DONE POS 0 S1040.xyzw
+EXPORT_DONE PARAM 0 R0.____)";
+
+const char *tes_optimized =
+R"(TES
+CHIPCLASS EVERGREEN
+OUTPUT LOC:0 NAME:0 MASK:15
+REGISTERS R0.x@fully R0.y@fully R0.z@fully
+SHADER
+ALU ADD S1026.x@free : R0.x@fully R0.y@fully {WL}
+ALU ADD S1028.x@free : I[1.0] -S1026.x@free {WL}
+ALU MULADD_IEEE S1029.x@free : L[0x40000000] S1028.x@free R0.y@fully {WL}
+ALU TRUNC S1030.x@free : S1029.x@free {WL}
+ALU FLT_TO_INT S1031.x@free : S1030.x@free {WL}
+ALU MOV S1033.y@free : I[0] {WL}
+LOAD_BUF S1034.xyz_ : S1033.y@free RID:16 SRF
+ALU MULADD_UINT24 S1036.x@free : S1034.x@group R0.z@fully S1034.z@group {WL}
+ALU MULADD_UINT24 S1037.x@free : S1034.y@group S1031.x@free S1036.x@free {WL}
+ALU MOV S1039.x : S1037.x@free {W}
+ALU ADD_INT S1039.y : L[0x4] S1037.x@free {W}
+ALU ADD_INT S1039.z : L[0x8] S1037.x@free {W}
+ALU ADD_INT S1039.w : L[0xc] S1037.x@free {WL}
+LDS_READ [ S1040.x@group S1040.y@group S1040.z@group S1040.w@group ] : [ S1039.x S1039.y S1039.z S1039.w ]
+EXPORT_DONE POS 0 S1040.xyzw
+EXPORT_DONE PARAM 0 R0.____)";
+
+const char *tes_optimized_pre_sched =
+R"(TES
+CHIPCLASS EVERGREEN
+OUTPUT LOC:0 NAME:0 MASK:15
+REGISTERS R0.x@fully R0.y@fully R0.z@fully
+SHADER
+ALU ADD S1026.x@free : R0.x@fully R0.y@fully {WL}
+ALU ADD S1028.x@free : I[1.0] -S1026.x@free {WL}
+ALU MULADD_IEEE S1029.x@free : L[0x40000000] S1028.x@free R0.y@fully {WL}
+ALU TRUNC S1030.x@free : S1029.x@free {WL}
+ALU FLT_TO_INT S1031.x@free : S1030.x@free {WL}
+ALU MOV S1033.y@free : I[0] {WL}
+LOAD_BUF S1034.xyzw : S1033.y@free RID:16 SRF
+ALU MULADD_UINT24 S1036.x@free : S1034.x@group R0.z@fully S1034.z@group {WL}
+ALU MULADD_UINT24 S1037.x@free : S1034.y@group S1031.x@free S1036.x@free {WL}
+ALU ADD_INT S1039.x : I[0] S1037.x@free {W}
+ALU ADD_INT S1039.y : L[0x4] S1037.x@free {W}
+ALU ADD_INT S1039.z : L[0x8] S1037.x@free {W}
+ALU ADD_INT S1039.w : L[0xc] S1037.x@free {WL}
+LDS_READ [ S1040.x@group S1040.y@group S1040.z@group S1040.w@group ] : [ S1039.x S1039.y S1039.z S1039.w ]
+EXPORT_DONE POS 0 S1040.xyzw
+EXPORT_DONE PARAM 0 R0.____)";
+
+const char *tes_optimized_sched =
+R"(TES
+CHIPCLASS EVERGREEN
+OUTPUT LOC:0 NAME:0 MASK:15
+REGISTERS R0.x@fully R0.y@fully R0.z@fully
+SHADER
+BLOCK_START
+ALU_GROUP_BEGIN
+  ALU ADD S1026.x@chan : R0.x@fully R0.y@fully {W}
+   ALU MOV S1033.y@chan : I[0] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU ADD S1028.x@chan : I[1.0] -S1026.x@chan {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU MULADD_IEEE S1029.x@chan : L[0x40000000] S1028.x@chan R0.y@fully {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU TRUNC S1030.x@chan : S1029.x@chan {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU FLT_TO_INT S1031.x@chan : S1030.x@chan {WL}
+ALU_GROUP_END
+BLOCK_START
+BLOCK_END
+LOAD_BUF S1034.xyzw : S1033.y@chan RID:16 SRF
+BLOCK_START
+BLOCK_END
+ALU_GROUP_BEGIN
+   ALU MULADD_UINT24 S1036.x@chan : S1034.x@group R0.z@fully S1034.z@group {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU MULADD_UINT24 S1037.x@chan : S1034.y@group S1031.x@chan S1036.x@chan {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU ADD_INT S1039.x : I[0] S1037.x@chan {W}
+   ALU ADD_INT S1039.y : L[0x4] S1037.x@chan {W}
+   ALU ADD_INT S1039.z : L[0x8] S1037.x@chan {W}
+   ALU ADD_INT S1039.w : L[0xc] S1037.x@chan {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU LDS READ_RET __.x@chan : S1039.x {L}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU LDS READ_RET __.x@chan : S1039.y {L}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU LDS READ_RET __.x@chan : S1039.z {L}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU LDS READ_RET __.x@chan : S1039.w {L}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU MOV S1040.x@group : I[LDS_OQ_A_POP] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU MOV S1040.y@group : I[LDS_OQ_A_POP] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU MOV S1040.z@group : I[LDS_OQ_A_POP] {WL}
+ALU_GROUP_END
+ALU_GROUP_BEGIN
+   ALU MOV S1040.w@group : I[LDS_OQ_A_POP] {WL}
+ALU_GROUP_END
+BLOCK_START
+BLOCK_END
+EXPORT_DONE POS 0 S1040.xyzw
+EXPORT_DONE PARAM 0 R0.____
+BLOCK_END)";
+
+void TestShader::SetUp()
+{
+   init_pool();
+   SetUpMore();
+}
+
+void TestShader::TearDown()
+{
+   TearDownMore();
+   release_pool();
+}
+
+void TestShader::SetUpMore()
+{
+}
+
+void TestShader::TearDownMore()
+{
+}
+
+Shader *TestShader::from_string(const std::string& s)
+{
+   istringstream is(s);
+   string line;
+
+   r600_shader_key key = {0};
+   key.ps.nr_cbufs = 1;
+
+   do {
+      std::getline(is, line);
+   } while (line.empty());
+
+   Shader *shader = nullptr;
+
+   if (line.substr(0,2) == "FS")
+      shader = new FragmentShader(key);
+   else if (line.substr(0,2) == "VS")
+      shader = new VertexShader(nullptr, nullptr,  key);
+   else if (line.substr(0,2) == "GS")
+      shader = new GeometryShader(key);
+   else if (line.substr(0,3) == "TCS")
+      shader = new TCSShader(key);
+   else if (line.substr(0,3) == "TES")
+      shader = new TESShader(nullptr, nullptr, key);
+   else
+      return nullptr;
+
+   while (std::getline(is, line)) {
+      if (line.find_first_not_of(" \t") == std::string::npos)
+         continue;
+      if (line[0] == '#')
+         continue;
+
+      if (line.substr(0,6) == "SHADER")
+         break;
+
+      istringstream ls(line);
+      if (!shader->add_info_from_string(ls)) {
+         std::cerr << "Don't understand '" << line << "\n";
+         return nullptr;
+      }
+   }
+
+   while (std::getline(is, line)) {
+      if (line.find_first_not_of(" \t") == std::string::npos)
+         continue;
+      if (line[0] == '#')
+         continue;
+
+      shader->emit_instruction_from_string(line);
+   }
+
+   return shader;
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h
new file mode 100644
index 0000000..9663bb3
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h
@@ -0,0 +1,115 @@
+#ifndef SFN_TEST_SHADERS_H
+#define SFN_TEST_SHADERS_H
+#include <gtest/gtest.h>
+
+namespace r600 {
+
+class Shader;
+
+extern const char *red_triangle_fs_nir;
+extern const char *red_triangle_fs_expect_from_nir;
+extern const char *red_triangle_fs_expect_from_nir_dce;
+
+extern const char *add_add_1_nir;
+extern const char *add_add_1_expect_from_nir;
+extern const char *add_add_1_expect_from_nir_copy_prop_fwd;
+extern const char *add_add_1_expect_from_nir_copy_prop_fwd_dce;
+extern const char *add_add_1_expect_from_nir_copy_prop_fwd_dce_bwd;
+
+extern const char *basic_interpolation_nir;
+extern const char *basic_interpolation_orig;
+extern const char *basic_interpolation_translated_1;
+extern const char *basic_interpolation_expect_from_nir;
+extern const char *basic_interpolation_expect_from_nir_opt;
+extern const char *basic_interpolation_expect_from_nir_sched;
+
+extern const char *glxgears_vs2_nir;
+extern const char *glxgears_vs2_from_nir_expect;
+extern const char *glxgears_vs2_from_nir_expect_optimized;
+
+extern const char *dot4_pre;
+extern const char *dot4_copy_prop_dce;
+
+extern const char *glxgears_vs2_from_nir_expect_cayman;
+extern const char *basic_interpolation_orig_cayman;
+extern const char *basic_interpolation_expect_from_nir_sched_cayman;
+extern const char *basic_interpolation_expect_opt_sched_cayman;
+
+extern const char *vs_nexted_loop_nir;
+extern const char *vs_nexted_loop_from_nir_expect;
+extern const char *vs_nexted_loop_from_nir_expect_opt;
+
+extern const char *shader_with_local_array_nir;
+extern const char *shader_with_local_array_expect;
+
+extern const char *test_schedule_group;
+extern const char *test_schedule_group_expect;
+
+extern const char *shader_with_bany_nir;
+extern const char *shader_with_bany_expect_eg;
+extern const char *shader_with_bany_expect_opt_sched_eg;
+
+extern const char *shader_copy_prop_dont_kill_double_use;
+extern const char *shader_copy_prop_dont_kill_double_use_expect;
+
+extern const char *shader_with_dest_array;
+extern const char *shader_with_dest_array_opt_expect;
+extern const char *shader_with_dest_array_opt_scheduled;
+
+extern const char *shader_with_dest_array2;
+extern const char *shader_with_dest_array2_scheduled;
+
+extern const char *shader_with_dest_array2_scheduled_ra;
+
+extern const char *shader_group_chan_pin_to_combine;
+extern const char *shader_group_chan_pin_combined;
+
+extern const char *shader_group_chan_pin_combined_sheduled;
+extern const char *shader_group_chan_pin_combined_sheduled_ra;
+
+extern const char *shader_group_chan_pin_to_combine_2;
+extern const char *shader_group_chan_pin_to_combine_2_opt;
+
+extern const char *fs_with_loop_multislot_reuse;
+extern const char *fs_with_loop_multislot_reuse_scheduled;
+
+extern const char *gs_abs_float_nir;
+extern const char *gs_abs_float_expect;
+
+extern const char *vtx_for_tcs_nir;
+extern const char *vtx_for_tcs_from_nir_expect;
+
+extern const char *tcs_nir;
+extern const char *tcs_from_nir_expect;
+
+extern const char *tes_nir;
+extern const char *tes_from_nir_expect;
+
+extern const char *tes_pre_op;
+extern const char *tes_optimized;
+extern const char *tes_optimized_pre_sched;
+extern const char *tes_optimized_sched;
+
+extern const char *vtx_for_tcs_inp;
+extern const char *vtx_for_tcs_opt;
+extern const char *vtx_for_tcs_pre_sched;
+extern const char *vtx_for_tcs_sched;
+
+class TestShader : public ::testing::Test {
+
+   void SetUp() override;
+   void TearDown() override;
+
+   virtual void SetUpMore();
+   virtual void TearDownMore();
+
+
+protected:
+   Shader *from_string(const std::string& s);
+};
+
+
+}
+
+
+#endif // SFN_TEST_SHADERS_H
diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp
new file mode 100644
index 0000000..f8b7c48
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp
@@ -0,0 +1,244 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2021 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "../sfn_virtualvalues.h"
+#include "../sfn_alu_defines.h"
+#include "../sfn_debug.h"
+
+#include "gtest/gtest.h"
+
+using namespace r600;
+
+class ValueTest : public ::testing::Test
+{
+   void SetUp() override {
+      init_pool();
+   }
+
+   void TearDown() override {
+      release_pool();
+   }
+};
+
+
+
+TEST_F(ValueTest, gpr_register_fully_pinned)
+{
+   Register reg(1, 2, pin_fully);
+
+   EXPECT_EQ(reg.sel(), 1);
+   EXPECT_EQ(reg.chan(), 2);
+   EXPECT_EQ(reg.pin(), pin_fully);
+   EXPECT_FALSE(reg.is_virtual());
+
+   Register reg2(3, 1, pin_fully);
+
+   EXPECT_EQ(reg2.sel(), 3);
+   EXPECT_EQ(reg2.chan(), 1);
+   EXPECT_EQ(reg2.pin(), pin_fully);
+   EXPECT_FALSE(reg2.is_virtual());
+}
+
+#ifdef __cpp_exceptions
+TEST_F(ValueTest, virtual_register_must_not_be_pinned_to_sel)
+{
+   EXPECT_THROW(Register(1024, 1, pin_fully), std::invalid_argument);
+}
+#endif
+
+TEST_F(ValueTest, virtual_register_not_pinned)
+{
+   Register reg(1024, 1, pin_none);
+
+   EXPECT_EQ(reg.sel(), 1024);
+   EXPECT_EQ(reg.chan(), 1);
+   EXPECT_EQ(reg.pin(), pin_none);
+   EXPECT_TRUE(reg.is_virtual());
+
+   Register reg2(1025, 2, pin_none);
+
+   EXPECT_EQ(reg2.sel(), 1025);
+   EXPECT_EQ(reg2.chan(), 2);
+   EXPECT_EQ(reg2.pin(), pin_none);
+   EXPECT_TRUE(reg2.is_virtual());
+}
+
+TEST_F(ValueTest, uniform_value)
+{
+   UniformValue reg0(512, 1);
+
+   EXPECT_EQ(reg0.sel(), 512);
+   EXPECT_EQ(reg0.chan(), 1);
+   EXPECT_EQ(reg0.kcache_bank(), 0);
+   EXPECT_FALSE(reg0.buf_addr());
+   EXPECT_FALSE(reg0.is_virtual());
+
+   UniformValue reg1(513, 2, 1);
+
+   EXPECT_EQ(reg1.sel(), 513);
+   EXPECT_EQ(reg1.chan(), 2);
+   EXPECT_EQ(reg1.kcache_bank(), 1);
+   EXPECT_FALSE(reg1.buf_addr());
+   EXPECT_FALSE(reg1.is_virtual());
+
+   auto addr = new Register( 1024, 0, pin_none);
+   ASSERT_TRUE(addr);
+
+   UniformValue reg_with_buffer_addr(513, 0, addr);
+
+   EXPECT_EQ(reg_with_buffer_addr.sel(), 513);
+   EXPECT_EQ(reg_with_buffer_addr.chan(), 0);
+   EXPECT_EQ(reg_with_buffer_addr.pin(), pin_none);
+   EXPECT_EQ(reg_with_buffer_addr.kcache_bank(), 0);
+   EXPECT_FALSE(reg_with_buffer_addr.is_virtual());
+   ASSERT_TRUE(reg_with_buffer_addr.buf_addr());
+
+   auto baddr = reg_with_buffer_addr.buf_addr();
+   EXPECT_EQ(baddr->sel(), 1024);
+   EXPECT_EQ(baddr->chan(), 0);
+   EXPECT_EQ(baddr->pin(), pin_none);
+   EXPECT_TRUE(baddr->is_virtual());
+}
+
+TEST_F(ValueTest, literal_value)
+{
+   LiteralConstant literal(12);
+   EXPECT_EQ(literal.sel(), ALU_SRC_LITERAL);
+   EXPECT_EQ(literal.chan(), -1);
+   EXPECT_EQ(literal.value(), 12);
+   EXPECT_FALSE(literal.is_virtual());
+
+   LiteralConstant literal2(2);
+   EXPECT_EQ(literal2.sel(), ALU_SRC_LITERAL);
+   EXPECT_EQ(literal2.chan(), -1);
+   EXPECT_EQ(literal2.value(), 2);
+   EXPECT_FALSE(literal2.is_virtual());
+}
+
+TEST_F(ValueTest, inline_constant)
+{
+   InlineConstant c0(ALU_SRC_1);
+
+   EXPECT_EQ(c0.sel(), ALU_SRC_1);
+   EXPECT_EQ(c0.chan(), 0);
+   EXPECT_FALSE(c0.is_virtual());
+
+   InlineConstant c1(ALU_SRC_M_1_INT);
+   EXPECT_EQ(c1.sel(), ALU_SRC_M_1_INT);
+   EXPECT_EQ(c1.chan(), 0);
+   EXPECT_FALSE(c1.is_virtual());
+
+   InlineConstant c2(ALU_SRC_PV, 1);
+   EXPECT_EQ(c2.sel(), ALU_SRC_PV);
+   EXPECT_EQ(c2.chan(), 1);
+   EXPECT_FALSE(c2.is_virtual());
+}
+
+TEST_F(ValueTest, array)
+{
+   LocalArray array(1024, 2, 12);
+
+   EXPECT_EQ(array.size(), 12);
+   EXPECT_EQ(array.nchannels(), 2);
+
+   auto elm0 = array.element(0, nullptr, 0);
+   ASSERT_TRUE(elm0);
+
+   EXPECT_EQ(elm0->sel(), 1024);
+   EXPECT_EQ(elm0->chan(), 0);
+   EXPECT_EQ(elm0->pin(), pin_array);
+
+   EXPECT_FALSE(elm0->get_addr());
+
+   auto elm1 = array.element(8, nullptr, 1);
+   ASSERT_TRUE(elm1);
+
+   EXPECT_EQ(elm1->sel(), 1024 + 8);
+   EXPECT_EQ(elm1->chan(), 1);
+   EXPECT_EQ(elm1->pin(), pin_array);
+   EXPECT_FALSE(elm1->get_addr());
+
+   auto addr = new Register( 2000, 0, pin_none);
+   ASSERT_TRUE(addr);
+
+   auto elm_indirect = array.element(0, addr, 1);
+   ASSERT_TRUE(elm_indirect);
+
+   auto elm_addr = elm_indirect->get_addr();
+   ASSERT_TRUE(elm_addr);
+
+   EXPECT_EQ(elm_indirect->sel(), 1024);
+   EXPECT_EQ(elm_indirect->chan(), 1);
+   EXPECT_EQ(elm_indirect->pin(), pin_array);
+
+   EXPECT_EQ(elm_addr->sel(), 2000);
+   EXPECT_EQ(elm_addr->chan(), 0);
+   EXPECT_EQ(elm_addr->pin(), pin_none);
+
+   // A constant addr should resolve directly
+   auto addr2 = new LiteralConstant( 3);
+   ASSERT_TRUE(addr2);
+
+   auto elm_direct = array.element(0, addr2, 0);
+   auto elm_direct_addr = elm_direct->get_addr();
+   EXPECT_FALSE(elm_direct_addr);
+
+   EXPECT_EQ(elm_direct->sel(), 1027);
+   EXPECT_EQ(elm_direct->chan(), 0);
+   EXPECT_EQ(elm_direct->pin(), pin_array);
+
+#ifdef __cpp_exceptions
+   EXPECT_THROW(array.element(12, nullptr, 0), std::invalid_argument);
+   EXPECT_THROW(array.element(3, nullptr, 2), std::invalid_argument);
+
+   auto addr3 = new LiteralConstant( 12);
+   ASSERT_TRUE(addr3);
+   EXPECT_THROW(array.element(0, addr3, 0), std::invalid_argument);
+#endif
+}
+
+TEST_F(ValueTest, reg_from_string)
+{
+   Register reg(1000, 0, pin_none);
+   auto fs = Register::from_string("R1000.x");
+   EXPECT_EQ(*fs, reg);
+
+   EXPECT_EQ(*Register::from_string("R1001.y"), Register(1001, 1, pin_none));
+   EXPECT_EQ(*Register::from_string("R1.z@fully"), Register(1, 2, pin_fully));
+   EXPECT_EQ(*Register::from_string("R1000.y@chan"), Register(1000, 1, pin_chan));
+   EXPECT_EQ(*Register::from_string("R1000.y@free"), Register(1000, 1, pin_free));
+
+
+   EXPECT_EQ(*VirtualValue::from_string("L[0x1]"), LiteralConstant(1));
+   EXPECT_EQ(*VirtualValue::from_string("L[0x2]"), LiteralConstant(2));
+   EXPECT_EQ(*VirtualValue::from_string("L[0xA]"), LiteralConstant(10));
+
+   EXPECT_EQ(*VirtualValue::from_string("I[0]"), InlineConstant(ALU_SRC_0));
+   EXPECT_EQ(*VirtualValue::from_string("I[HW_WAVE_ID]"), InlineConstant(ALU_SRC_HW_WAVE_ID));
+
+
+}
diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_valuefactory_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_valuefactory_test.cpp
new file mode 100644
index 0000000..2032fe7
--- /dev/null
+++ b/src/gallium/drivers/r600/sfn/tests/sfn_valuefactory_test.cpp
@@ -0,0 +1,285 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2021 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "../sfn_valuefactory.h"
+#include "../sfn_alu_defines.h"
+#include "../sfn_debug.h"
+
+#include "nir_builder.h"
+#include "gtest/gtest.h"
+
+#include "ralloc.h"
+
+using namespace r600;
+
+class ValuefactoryTest: public ::testing::Test {
+
+public:
+   ValuefactoryTest();
+
+protected:
+   void SetUp() override;
+   void TearDown() override;
+
+   ValueFactory *factory;
+   nir_builder b;
+   nir_shader_compiler_options options;
+
+};
+
+TEST_F(ValuefactoryTest, test_create_ssa)
+{
+   auto c1 = nir_imm_float(&b, 2.0);
+   auto c2 = nir_imm_float(&b, 4.0);
+   auto sum = nir_fadd(&b, c1, c2);
+   auto alu = nir_instr_as_alu(sum->parent_instr);
+
+   sfn_log << SfnLog::reg << "Search (test) " << &alu->dest << "\n";
+   auto dest_value = factory->dest(alu->dest, 0, pin_none);
+   EXPECT_EQ(dest_value->sel(), 1024);
+   EXPECT_EQ(dest_value->chan(), 0);
+   EXPECT_EQ(dest_value->pin(), pin_none);
+
+   nir_src src = nir_src_for_ssa(sum);
+   sfn_log << SfnLog::reg << "Search (test) " << &src << "\n";
+   PVirtualValue value = factory->src(src, 0);
+   EXPECT_EQ(value->sel(), 1024);
+   EXPECT_EQ(value->chan(), 0);
+   EXPECT_EQ(value->pin(), pin_none);
+}
+
+TEST_F(ValuefactoryTest, test_create_register_1)
+{
+   nir_src src1 = NIR_SRC_INIT;
+   src1.reg.reg  = nir_local_reg_create(b.impl);
+   src1.reg.reg->num_components = 1;
+
+   nir_src src2 = NIR_SRC_INIT;
+   src2.reg.reg = nir_local_reg_create(b.impl);
+   src2.reg.reg->num_components = 4;
+   ASSERT_FALSE(src1.is_ssa);
+
+   factory->allocate_registers(&b.impl->registers);
+
+   auto value = factory->src(src1, 0);
+   EXPECT_EQ(value->sel(), 1024);
+   EXPECT_EQ(value->chan(), 0);
+
+   for (int i = 0; i < 4; ++i) {
+      PVirtualValue value = factory->src(src2, i);
+      EXPECT_EQ(value->sel(), 1025);
+      EXPECT_EQ(value->chan(), i);
+      EXPECT_EQ(value->pin(), pin_none);
+   }
+}
+
+TEST_F(ValuefactoryTest, test_create_register_array_direct_access)
+{
+   nir_dest dst = NIR_DEST_INIT;
+   dst.reg.reg  = nir_local_reg_create(b.impl);
+   dst.reg.reg->num_components = 2;
+   dst.reg.reg->num_array_elems = 10;
+
+   factory->allocate_registers(&b.impl->registers);
+
+   auto c1 = nir_imm_float(&b, 2.0);
+
+   nir_alu_instr *mov = nir_alu_instr_create(b.shader, nir_op_mov);
+   mov->src[0].src = nir_src_for_ssa(c1);
+   mov->dest.write_mask = 3;
+   mov->dest.dest.is_ssa = false;
+   mov->dest.dest.reg.reg = dst.reg.reg;
+   mov->dest.dest.reg.base_offset = 5;
+   nir_builder_instr_insert(&b, &mov->instr);
+
+   auto regx = factory->dest(mov->dest.dest, 0, pin_none);
+   auto regy = factory->dest(mov->dest.dest, 1, pin_none);
+   EXPECT_EQ(regx->sel(), 1024 + 5);
+   EXPECT_EQ(regx->chan(), 0);
+   EXPECT_EQ(regx->pin(), pin_array);
+
+   EXPECT_EQ(regy->sel(), 1024 + 5);
+   EXPECT_EQ(regy->chan(), 1);
+   EXPECT_EQ(regy->pin(), pin_array);
+
+}
+
+
+TEST_F(ValuefactoryTest, test_create_register_array_indirect_access)
+{
+   nir_dest dst = NIR_DEST_INIT;
+   dst.reg.reg  = nir_local_reg_create(b.impl);
+   dst.reg.reg->num_components = 3;
+   dst.reg.reg->num_array_elems = 10;
+
+   factory->allocate_registers(&b.impl->registers);
+
+   auto c1 = nir_imm_vec2(&b, 2.0, 4.0);
+   auto c2 = nir_imm_int(&b, 3);
+
+   factory->dest(*c2, 0, pin_none);
+
+   nir_alu_instr *mov = nir_alu_instr_create(b.shader, nir_op_mov);
+   mov->src[0].src = nir_src_for_ssa(c1);
+   mov->dest.write_mask = 3;
+   mov->dest.dest.is_ssa = false;
+   mov->dest.dest.reg.reg = dst.reg.reg;
+   mov->dest.dest.reg.base_offset = 0;
+   mov->dest.dest.reg.indirect = (nir_src *)calloc(1, sizeof(nir_src));
+   nir_src addr = nir_src_for_ssa(c2);
+   nir_src_copy(mov->dest.dest.reg.indirect, &addr);
+   nir_builder_instr_insert(&b, &mov->instr);
+
+   auto addr_reg = factory->src(addr, 0);
+
+   auto regx = factory->dest(mov->dest.dest, 0, pin_none);
+   auto regy = factory->dest(mov->dest.dest, 1, pin_none);
+
+   auto regx_addr = regx->get_addr();
+   ASSERT_TRUE(regx_addr);
+
+   EXPECT_EQ(regx->sel(), 1024);
+   EXPECT_EQ(regx->chan(), 0);
+   EXPECT_EQ(*regx_addr, *addr_reg);
+   EXPECT_EQ(regx->pin(), pin_array);
+
+   auto regy_addr = regy->get_addr();
+   ASSERT_TRUE(regy_addr);
+
+   EXPECT_EQ(regy->sel(), 1024);
+   EXPECT_EQ(regy->chan(), 1);
+   EXPECT_EQ(*regy_addr, *addr_reg);
+   EXPECT_EQ(regy->pin(), pin_array);
+
+}
+
+TEST_F(ValuefactoryTest, test_create_ssa_pinned_chan)
+{
+   auto c1 = nir_imm_float(&b, 2.0);
+   auto c2 = nir_imm_float(&b, 4.0);
+   auto sum = nir_fadd(&b, c1, c2);
+   auto alu = nir_instr_as_alu(sum->parent_instr);
+
+   auto dest_value = factory->dest(alu->dest, 0, pin_chan);
+   EXPECT_EQ(dest_value->sel(), 1024);
+   EXPECT_EQ(dest_value->chan(), 0);
+   EXPECT_EQ(dest_value->pin(), pin_chan);
+
+   PVirtualValue value = factory->src(nir_src_for_ssa(sum), 0);
+   EXPECT_EQ(value->sel(), 1024);
+   EXPECT_EQ(value->chan(), 0);
+   EXPECT_EQ(value->pin(), pin_chan);
+}
+
+
+TEST_F(ValuefactoryTest, test_create_ssa_pinned_chan_and_reg)
+{
+   auto c1 = nir_imm_float(&b, 2.0);
+   auto c2 = nir_imm_float(&b, 4.0);
+   auto sum = nir_fadd(&b, c1, c2);
+   auto alu = nir_instr_as_alu(sum->parent_instr);
+
+   auto dest_value = factory->dest(alu->dest, 1, pin_chan);
+   EXPECT_EQ(dest_value->sel(), 1024);
+   EXPECT_EQ(dest_value->chan(), 1);
+   EXPECT_EQ(dest_value->pin(), pin_chan);
+
+   PVirtualValue value = factory->src(nir_src_for_ssa(sum), 1);
+   EXPECT_EQ(value->sel(), 1024);
+   EXPECT_EQ(value->chan(), 1);
+   EXPECT_EQ(value->pin(), pin_chan);
+}
+
+
+TEST_F(ValuefactoryTest, test_create_const)
+{
+   auto c1 = nir_imm_int(&b, 2);
+   auto c2 = nir_imm_int(&b, 4);
+   auto sum = nir_iadd(&b, c1, c2);
+
+   auto ci1 = nir_instr_as_load_const(c1->parent_instr);
+   factory->allocate_const(ci1);
+
+   auto ci2 = nir_instr_as_load_const(c2->parent_instr);
+   factory->allocate_const(ci2);
+
+   auto alu = nir_instr_as_alu(sum->parent_instr);
+
+   PVirtualValue value1 = factory->src(alu->src[0], 0);
+   PVirtualValue value2 = factory->src(alu->src[1], 0);
+
+   const auto& cvalue1 = dynamic_cast<const LiteralConstant&>(*value1);
+   const auto& cvalue2 = dynamic_cast<const LiteralConstant&>(*value2);
+
+   EXPECT_EQ(cvalue1.value(), 2);
+   EXPECT_EQ(cvalue2.value(), 4);
+}
+
+TEST_F(ValuefactoryTest, test_create_sysvalue)
+{
+   auto ic = factory->inline_const(ALU_SRC_TIME_LO, 0);
+
+   EXPECT_EQ(ic->sel(), ALU_SRC_TIME_LO);
+   EXPECT_EQ(ic->chan(), 0);
+}
+
+
+class GetKCache: public ConstRegisterVisitor {
+public:
+   void visit(const VirtualValue& value) {(void)value;}
+   void visit(const Register& value) {(void)value;};
+   void visit(const LocalArray& value) {(void)value;}
+   void visit(const LocalArrayValue& value) {(void)value;}
+   void visit(const UniformValue& value) {(void)value; m_result = value.kcache_bank();}
+   void visit(const LiteralConstant& value) {(void)value;}
+   void visit(const InlineConstant& value) {(void)value;}
+
+   GetKCache() : m_result(0) {}
+
+   int m_result;
+};
+
+ValuefactoryTest::ValuefactoryTest()
+{
+   memset(&options, 0, sizeof (options));
+   init_pool();
+}
+
+
+void ValuefactoryTest::SetUp()
+{
+   glsl_type_singleton_init_or_ref();
+   b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, &options, "test shader");
+   factory = new ValueFactory();
+}
+
+void ValuefactoryTest::TearDown()
+{
+   ralloc_free(b.shader);
+   glsl_type_singleton_decref();
+   release_pool();
+}
+
-- 
2.7.4