r600/sfn: Add the VS in and FS out vectorization
authorGert Wollny <gert.wollny@collabora.com>
Fri, 27 Dec 2019 16:49:26 +0000 (17:49 +0100)
committerMarge Bot <eric+marge@anholt.net>
Mon, 10 Feb 2020 19:09:08 +0000 (19:09 +0000)
Since the nir default implementation doesn't support vectorizing the VS
inputs and FS outputs, additional lowering passes are added here to do
just that. The work is based on the Timothy Arceri's related work.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3225>

src/compiler/nir_types.cpp
src/compiler/nir_types.h
src/gallium/drivers/r600/Makefile.sources
src/gallium/drivers/r600/meson.build
src/gallium/drivers/r600/sfn/sfn_nir.cpp
src/gallium/drivers/r600/sfn/sfn_nir.h
src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp [new file with mode: 0644]
src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h [new file with mode: 0644]
src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c [new file with mode: 0644]

index 1e45df7..37dde30 100644 (file)
@@ -528,6 +528,20 @@ glsl_array_type(const glsl_type *base, unsigned elements,
 }
 
 const glsl_type *
+glsl_replace_vector_type(const glsl_type *t, unsigned components)
+{
+   if (glsl_type_is_array(t)) {
+      return glsl_array_type(
+         glsl_replace_vector_type(t->fields.array, components), t->length,
+                                  t->explicit_stride);
+   } else if (glsl_type_is_vector_or_scalar(t)) {
+      return glsl_vector_type(t->base_type, components);
+   } else {
+      unreachable("Unhandled base type glsl_replace_vector_type()");
+   }
+}
+
+const glsl_type *
 glsl_struct_type(const glsl_struct_field *fields,
                  unsigned num_fields, const char *name,
                  bool packed)
index d61cfeb..4dfc464 100644 (file)
@@ -163,6 +163,8 @@ const struct glsl_type *glsl_bool_type(void);
 const struct glsl_type *glsl_scalar_type(enum glsl_base_type base_type);
 const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type,
                                          unsigned components);
+const struct glsl_type * glsl_replace_vector_type(const struct glsl_type *t,
+                                                  unsigned components);
 const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type,
                                          unsigned rows, unsigned columns);
 const struct glsl_type *glsl_explicit_matrix_type(const struct glsl_type *mat,
index 5010ffa..1981c02 100644 (file)
@@ -120,6 +120,9 @@ CXX_SOURCES = \
        sfn/sfn_ir_to_assembly.h \
        sfn/sfn_nir.cpp \
        sfn/sfn_nir.h \
+       sfn/sfn_nir_lower_fs_out_to_vector.cpp \
+       sfn/sfn_nir_lower_fs_out_to_vector.h \
+       sfn/sfn_nir_vectorize_vs_inputs.c \
        sfn/sfn_shader_base.cpp \
        sfn/sfn_shader_base.h \
        sfn/sfn_shader_fragment.cpp \
index 36805e1..7717072 100644 (file)
@@ -137,6 +137,9 @@ files_r600 = files(
   'sfn/sfn_ir_to_assembly.h',
   'sfn/sfn_nir.cpp',
   'sfn/sfn_nir.h',
+  'sfn/sfn_nir_lower_fs_out_to_vector.cpp',
+  'sfn/sfn_nir_lower_fs_out_to_vector.h',
+  'sfn/sfn_nir_vectorize_vs_inputs.c',
   'sfn/sfn_shader_base.cpp',
   'sfn/sfn_shader_base.h',
   'sfn/sfn_shader_fragment.cpp',
index b72b873..ef368cd 100644 (file)
@@ -34,6 +34,7 @@
 
 #include "sfn_shader_vertex.h"
 #include "sfn_shader_fragment.h"
+#include "sfn_nir_lower_fs_out_to_vector.h"
 #include "sfn_ir_to_assembly.h"
 
 #include <vector>
@@ -329,6 +330,7 @@ bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader)
 
 using r600::r600_nir_lower_int_tg4;
 using r600::r600_nir_lower_pack_unpack_2x16;
+using r600::r600_lower_fs_out_to_vector;
 
 int
 r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
@@ -431,6 +433,12 @@ int r600_shader_from_nir(struct r600_context *rctx,
    NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, r600_glsl_type_size,
               nir_lower_io_lower_64bit_to_32);
 
+   if (sel->nir->info.stage == MESA_SHADER_VERTEX)
+      NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
+
+   if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
+      NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
+
    if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
        sel->nir->info.stage == MESA_SHADER_TESS_EVAL)
       NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, r600_glsl_type_size,
index a663325..b3cb455 100644 (file)
@@ -100,6 +100,7 @@ private:
 extern "C" {
 #endif
 
+bool r600_vectorize_vs_inputs(nir_shader *shader);
 int r600_shader_from_nir(struct r600_context *rctx,
                          struct r600_pipe_shader *pipeshader,
                          union r600_shader_key *key);
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp
new file mode 100644 (file)
index 0000000..4441a47
--- /dev/null
@@ -0,0 +1,462 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_nir_lower_fs_out_to_vector.h"
+
+#include "nir_builder.h"
+#include "nir_deref.h"
+#include "util/u_math.h"
+
+#include <set>
+#include <vector>
+#include <array>
+#include <algorithm>
+
+namespace r600 {
+
+using std::multiset;
+using std::vector;
+using std::array;
+
+struct nir_intrinsic_instr_less  {
+   bool operator () (const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs)
+   {
+      nir_variable *vlhs = nir_deref_instr_get_variable(nir_src_as_deref(lhs->src[0]));
+      nir_variable *vrhs = nir_deref_instr_get_variable(nir_src_as_deref(rhs->src[0]));
+
+      auto ltype = glsl_get_base_type(vlhs->type);
+      auto rtype = glsl_get_base_type(vrhs->type);
+
+      if (ltype != rtype)
+         return ltype < rtype;
+      return vlhs->data.location < vrhs->data.location;
+   }
+};
+
+class NirLowerIOToVector {
+public:
+   NirLowerIOToVector(int base_slot);
+   bool run(nir_function_impl *shader);
+
+protected:
+   bool var_can_merge(const nir_variable *lhs, const nir_variable *rhs);
+   bool var_can_rewrite(nir_variable *var) const;
+   void create_new_io_vars(nir_shader *shader);
+   void create_new_io_var(nir_shader *shader, unsigned location, unsigned comps);
+
+   nir_deref_instr *clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
+                                      const nir_deref_instr *src_head);
+
+   bool vectorize_block(nir_builder *b, nir_block *block);
+   bool instr_can_rewrite(nir_instr *instr);
+   bool vec_instr_set_remove(nir_builder *b,nir_instr *instr);
+
+   using InstrSet  = multiset<nir_intrinsic_instr *, nir_intrinsic_instr_less>;
+   using InstrSubSet = std::pair<InstrSet::iterator, InstrSet::iterator>;
+
+   bool vec_instr_stack_pop(nir_builder *b, InstrSubSet& ir_set,
+                            nir_intrinsic_instr *instr);
+
+   array<array<nir_variable *, 4>, 16> m_vars;
+   InstrSet m_block_io;
+   int m_next_index;
+private:
+   virtual exec_list *get_io_list(nir_shader *shader) const  = 0;
+   virtual bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const  = 0;
+   virtual bool var_can_rewrite_slot(nir_variable *var) const = 0;
+   virtual void create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+                              nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps) = 0;
+
+   int m_base_slot;
+};
+
+class NirLowerFSOutToVector : public NirLowerIOToVector {
+public:
+   NirLowerFSOutToVector();
+
+private:
+   exec_list *get_io_list(nir_shader *shader) const  override;
+   bool var_can_rewrite_slot(nir_variable *var) const override;
+   void create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+                         nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps) override;
+   bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const  override;
+
+   nir_ssa_def *create_combined_vector(nir_builder *b, nir_ssa_def **srcs,
+                                       int first_comp, int num_comp);
+};
+
+bool r600_lower_fs_out_to_vector(nir_shader *shader)
+{
+   NirLowerFSOutToVector processor;
+
+   assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl)
+         progress |= processor.run(function->impl);
+   }
+   return progress;
+}
+
+NirLowerIOToVector::NirLowerIOToVector(int base_slot):
+   m_next_index(0),
+   m_base_slot(base_slot)
+{
+   for(auto& a : m_vars)
+      for(auto& aa : a)
+         aa = nullptr;
+}
+
+bool NirLowerIOToVector::run(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_metadata_require(impl, nir_metadata_dominance);
+   create_new_io_vars(impl->function->shader);
+
+   bool progress = vectorize_block(&b, nir_start_block(impl));
+   if (progress) {
+      nir_metadata_preserve(impl, (nir_metadata )
+                            (nir_metadata_block_index |
+                             nir_metadata_dominance));
+   }
+   return progress;
+}
+
+void NirLowerIOToVector::create_new_io_vars(nir_shader *shader)
+{
+   struct exec_list *io_list = get_io_list(shader);
+   if (exec_list_is_empty(io_list))
+      return;
+
+   nir_foreach_variable(var, io_list) {
+      if (var_can_rewrite(var)) {
+         unsigned loc = var->data.location - m_base_slot;
+         m_vars[loc][var->data.location_frac] = var;
+      }
+   }
+
+   /* We don't handle combining vars of different type e.g. different array
+    * lengths.
+    */
+   for (unsigned i = 0; i < 16; i++) {
+      unsigned comps = 0;
+
+      for (unsigned j = 0; j < 3; j++) {
+         if (!m_vars[i][j])
+            continue;
+
+         for (unsigned k = j + 1; k < 4; k++) {
+            if (!m_vars[i][k])
+               continue;
+
+            if (!var_can_merge(m_vars[i][j], m_vars[i][k]))
+               continue;
+
+            /* Set comps */
+            for (unsigned n = 0; n < glsl_get_components(m_vars[i][j]->type); ++n)
+               comps |= 1 << (m_vars[i][j]->data.location_frac + n);
+
+            for (unsigned n = 0; n < glsl_get_components(m_vars[i][k]->type); ++n)
+               comps |= 1 << (m_vars[i][k]->data.location_frac + n);
+
+         }
+      }
+      if (comps)
+         create_new_io_var(shader, i, comps);
+   }
+}
+
+bool
+NirLowerIOToVector::var_can_merge(const nir_variable *lhs,
+                                     const nir_variable *rhs)
+{
+   return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
+}
+
+void
+NirLowerIOToVector::create_new_io_var(nir_shader *shader,
+                                    unsigned location, unsigned comps)
+{
+   unsigned num_comps = util_bitcount(comps);
+   assert(num_comps > 1);
+
+   /* Note: u_bit_scan() strips a component of the comps bitfield here */
+   unsigned first_comp = u_bit_scan(&comps);
+
+   nir_variable *var = nir_variable_clone(m_vars[location][first_comp], shader);
+   var->data.location_frac = first_comp;
+   var->type = glsl_replace_vector_type(var->type, num_comps);
+
+   nir_shader_add_variable(shader, var);
+
+   m_vars[location][first_comp] = var;
+
+   while (comps) {
+      const int comp = u_bit_scan(&comps);
+      if (m_vars[location][comp]) {
+         m_vars[location][comp] = var;
+      }
+   }
+}
+
+bool NirLowerIOToVector::var_can_rewrite(nir_variable *var) const
+{
+   /* Skip complex types we don't split in the first place */
+   if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
+      return false;
+
+   if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
+      return false;
+
+   return var_can_rewrite_slot(var);
+}
+
+bool
+NirLowerIOToVector::vectorize_block(nir_builder *b, nir_block *block)
+{
+   bool progress = false;
+
+   nir_foreach_instr_safe(instr, block) {
+      if (instr_can_rewrite(instr)) {
+         instr->index = m_next_index++;
+         nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr);
+         m_block_io.insert(ir);
+      }
+   }
+
+   for (unsigned i = 0; i < block->num_dom_children; i++) {
+      nir_block *child = block->dom_children[i];
+      progress |= vectorize_block(b, child);
+   }
+
+   nir_foreach_instr_reverse_safe(instr, block) {
+      progress |= vec_instr_set_remove(b, instr);
+   }
+   m_block_io.clear();
+
+   return progress;
+}
+
+bool NirLowerIOToVector::instr_can_rewrite(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+   if (intr->num_components > 3)
+      return false;
+
+   return instr_can_rewrite_type(intr);
+}
+
+bool NirLowerIOToVector::vec_instr_set_remove(nir_builder *b,nir_instr *instr)
+{
+   if (!instr_can_rewrite(instr))
+      return false;
+
+   nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr);
+   auto entry = m_block_io.equal_range(ir);
+   if (entry.first != m_block_io.end()) {
+      vec_instr_stack_pop(b, entry, ir);
+   }
+   return true;
+}
+
+nir_deref_instr *
+NirLowerIOToVector::clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
+                                    const nir_deref_instr *src_head)
+{
+   const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
+
+   if (!parent)
+      return dst_tail;
+
+   assert(src_head->deref_type == nir_deref_type_array);
+
+   dst_tail = clone_deref_array(b, dst_tail, parent);
+
+   return nir_build_deref_array(b, dst_tail,
+                                nir_ssa_for_src(b, src_head->arr.index, 1));
+}
+
+NirLowerFSOutToVector::NirLowerFSOutToVector():
+  NirLowerIOToVector(FRAG_RESULT_COLOR)
+{
+
+}
+
+bool NirLowerFSOutToVector::var_can_rewrite_slot(nir_variable *var) const
+{
+   return ((var->data.mode == nir_var_shader_out) &&
+           ((var->data.location == FRAG_RESULT_COLOR) ||
+              ((var->data.location >= FRAG_RESULT_DATA0) &&
+               (var->data.location <= FRAG_RESULT_DATA7))));
+}
+
+bool NirLowerIOToVector::vec_instr_stack_pop(nir_builder *b, InstrSubSet &ir_set,
+                                           nir_intrinsic_instr *instr)
+{
+   vector< nir_intrinsic_instr *> ir_sorted_set(ir_set.first, ir_set.second);
+   std::sort(ir_sorted_set.begin(), ir_sorted_set.end(),
+             [](const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs) {
+                  return lhs->instr.index > rhs->instr.index;
+             }
+   );
+
+   nir_intrinsic_instr *intr = *ir_sorted_set.begin();
+   nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+
+   unsigned loc = var->data.location - m_base_slot;
+
+   nir_variable *new_var = m_vars[loc][var->data.location_frac];
+   unsigned num_comps = glsl_get_vector_elements(glsl_without_array(new_var->type));
+   unsigned old_num_comps = glsl_get_vector_elements(glsl_without_array(var->type));
+
+   /* Don't bother walking the stack if this component can't be vectorised. */
+   if (old_num_comps > 3) {
+      return false;
+   }
+
+   if (new_var == var) {
+      return false;
+   }
+
+   b->cursor = nir_after_instr(&intr->instr);
+   nir_ssa_undef_instr *instr_undef =
+      nir_ssa_undef_instr_create(b->shader, 1, 32);
+   nir_builder_instr_insert(b, &instr_undef->instr);
+
+   nir_ssa_def *srcs[4];
+   for (int i = 0; i < 4; i++) {
+      srcs[i] = &instr_undef->def;
+   }
+   srcs[var->data.location_frac] = intr->src[1].ssa;
+
+   for (auto k = ir_sorted_set.begin() + 1; k != ir_sorted_set.end(); ++k) {
+      nir_intrinsic_instr *intr2 = *k;
+      nir_variable *var2 =
+         nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
+      unsigned loc2 = var->data.location - m_base_slot;
+
+      if (m_vars[loc][var->data.location_frac] !=
+          m_vars[loc2][var2->data.location_frac]) {
+         continue;
+      }
+
+     assert(glsl_get_vector_elements(glsl_without_array(var2->type)) < 4);
+
+      if (srcs[var2->data.location_frac] == &instr_undef->def) {
+         assert(intr2->src[1].is_ssa);
+         assert(intr2->src[1].ssa);
+         srcs[var2->data.location_frac] = intr2->src[1].ssa;
+      }
+      nir_instr_remove(&intr2->instr);
+   }
+
+   create_new_io(b, intr, new_var, srcs, new_var->data.location_frac,
+                 num_comps);
+   return true;
+}
+
+exec_list *NirLowerFSOutToVector::get_io_list(nir_shader *shader) const
+{
+   return &shader->outputs;
+}
+
+void
+NirLowerFSOutToVector::create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+                                        nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps)
+{
+   b->cursor = nir_before_instr(&intr->instr);
+
+   nir_intrinsic_instr *new_intr =
+      nir_intrinsic_instr_create(b->shader, intr->intrinsic);
+   new_intr->num_components = num_comps;
+
+   nir_intrinsic_set_write_mask(new_intr, (1 << num_comps) - 1);
+
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+   deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
+
+   new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+   new_intr->src[1] = nir_src_for_ssa(create_combined_vector(b, srcs, first_comp, num_comps));
+
+   nir_builder_instr_insert(b, &new_intr->instr);
+
+   /* Remove the old store intrinsic */
+   nir_instr_remove(&intr->instr);
+}
+
+bool NirLowerFSOutToVector::instr_can_rewrite_type(nir_intrinsic_instr *intr) const
+{
+   if (intr->intrinsic != nir_intrinsic_store_deref)
+      return false;
+
+   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+   if (deref->mode != nir_var_shader_out)
+      return false;
+
+   return var_can_rewrite(nir_deref_instr_get_variable(deref));
+}
+
+nir_ssa_def *NirLowerFSOutToVector::create_combined_vector(nir_builder *b, nir_ssa_def **srcs,
+                                                           int first_comp, int num_comp)
+{
+   nir_op op;
+   switch (num_comp) {
+   case 2: op = nir_op_vec2; break;
+   case 3: op = nir_op_vec3; break;
+   case 4: op = nir_op_vec4; break;
+   default:
+      assert(0 && "combined vector must have 2 to 4 components");
+
+   }
+   nir_alu_instr * instr = nir_alu_instr_create(b->shader, op);
+   instr->exact = b->exact;
+
+   int i = 0;
+   unsigned k = 0;
+   while (i < num_comp) {
+      nir_ssa_def *s = srcs[first_comp + k];
+      for(uint8_t kk = 0; kk < s->num_components && i < num_comp; ++kk) {
+         instr->src[i].src  = nir_src_for_ssa(s);
+         instr->src[i].swizzle[0] = kk;
+         ++i;
+      }
+      k += s->num_components;
+   }
+
+   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_comp, 32, NULL);
+   instr->dest.write_mask = (1 << num_comp) - 1;
+   nir_builder_instr_insert(b, &instr->instr);
+   return &instr->dest.dest.ssa;
+}
+
+}
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h b/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h
new file mode 100644 (file)
index 0000000..016b7a2
--- /dev/null
@@ -0,0 +1,38 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H
+#define SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H
+
+#include "nir.h"
+
+namespace r600 {
+
+bool r600_lower_fs_out_to_vector(nir_shader *sh);
+
+}
+
+#endif // SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H
\ No newline at end of file
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c b/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c
new file mode 100644 (file)
index 0000000..252fa5d
--- /dev/null
@@ -0,0 +1,462 @@
+/*
+ * Copyright © 2018 Timothy Arceri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
+#include "util/u_dynarray.h"
+#include "util/u_math.h"
+
+/** @file nir_opt_vectorize_io.c
+ *
+ * Replaces scalar nir_load_input/nir_store_output operations with
+ * vectorized instructions.
+ */
+bool
+r600_vectorize_vs_inputs(nir_shader *shader);
+
+static nir_deref_instr *
+r600_clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
+                  const nir_deref_instr *src_head)
+{
+   const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
+
+   if (!parent)
+      return dst_tail;
+
+   assert(src_head->deref_type == nir_deref_type_array);
+
+   dst_tail = r600_clone_deref_array(b, dst_tail, parent);
+
+   return nir_build_deref_array(b, dst_tail,
+                                nir_ssa_for_src(b, src_head->arr.index, 1));
+}
+
+static bool
+r600_variable_can_rewrite(nir_variable *var)
+{
+
+   /* Skip complex types we don't split in the first place */
+   if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
+      return false;
+
+
+   /* TODO: add 64/16bit support ? */
+   if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
+      return false;
+
+   /* We only check VSand attribute imputs */
+   return (var->data.location >= VERT_ATTRIB_GENERIC0 &&
+           var->data.location <= VERT_ATTRIB_GENERIC15);
+}
+
+static bool
+r600_instr_can_rewrite(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+   if (intr->num_components > 3)
+      return false;
+
+   if (intr->intrinsic != nir_intrinsic_load_deref)
+      return false;
+
+   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+   if (deref->mode != nir_var_shader_in)
+      return false;
+
+   return r600_variable_can_rewrite(nir_deref_instr_get_variable(deref));
+}
+
+static bool
+r600_io_access_same_var(const nir_instr *instr1, const nir_instr *instr2)
+{
+   assert(instr1->type == nir_instr_type_intrinsic &&
+          instr2->type == nir_instr_type_intrinsic);
+
+   nir_intrinsic_instr *intr1 = nir_instr_as_intrinsic(instr1);
+   nir_intrinsic_instr *intr2 = nir_instr_as_intrinsic(instr2);
+
+   nir_variable *var1 =
+      nir_deref_instr_get_variable(nir_src_as_deref(intr1->src[0]));
+   nir_variable *var2 =
+      nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
+
+   /* We don't handle combining vars of different base types, so skip those */
+   if (glsl_get_base_type(var1->type) != glsl_get_base_type(var2->type))
+      return false;
+
+   if (var1->data.location != var2->data.location)
+      return false;
+
+   return true;
+}
+
+static struct util_dynarray *
+r600_vec_instr_stack_create(void *mem_ctx)
+{
+   struct util_dynarray *stack = ralloc(mem_ctx, struct util_dynarray);
+   util_dynarray_init(stack, mem_ctx);
+   return stack;
+}
+
+static void
+r600_vec_instr_stack_push(struct util_dynarray *stack, nir_instr *instr)
+{
+   util_dynarray_append(stack, nir_instr *, instr);
+}
+
+static unsigned r600_correct_location(nir_variable *var)
+{
+   return var->data.location - VERT_ATTRIB_GENERIC0;
+}
+
+static void
+r600_create_new_load(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+                unsigned comp, unsigned num_comps, unsigned old_num_comps)
+{
+   unsigned channels[4];
+
+   b->cursor = nir_before_instr(&intr->instr);
+
+   assert(intr->dest.is_ssa);
+
+   nir_intrinsic_instr *new_intr =
+      nir_intrinsic_instr_create(b->shader, intr->intrinsic);
+   nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, num_comps,
+                     intr->dest.ssa.bit_size, NULL);
+   new_intr->num_components = num_comps;
+
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+   deref = r600_clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
+
+   new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+
+   if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset ||
+       intr->intrinsic == nir_intrinsic_interp_deref_at_sample)
+      nir_src_copy(&new_intr->src[1], &intr->src[1], &new_intr->instr);
+
+   nir_builder_instr_insert(b, &new_intr->instr);
+
+   for (unsigned i = 0; i < old_num_comps; ++i)
+      channels[i] = comp - var->data.location_frac + i;
+   nir_ssa_def *load = nir_swizzle(b, &new_intr->dest.ssa, channels, old_num_comps);
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
+
+   /* Remove the old load intrinsic */
+   nir_instr_remove(&intr->instr);
+}
+
+
+static bool
+r600_vec_instr_stack_pop(nir_builder *b, struct util_dynarray *stack,
+                         nir_instr *instr,
+                         nir_variable *updated_vars[16][4])
+{
+   nir_instr *last = util_dynarray_pop(stack, nir_instr *);
+
+   assert(last == instr);
+   assert(last->type == nir_instr_type_intrinsic);
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(last);
+   nir_variable *var =
+      nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+   unsigned loc = r600_correct_location(var);
+
+   nir_variable *new_var;
+   new_var = updated_vars[loc][var->data.location_frac];
+
+   unsigned num_comps =
+      glsl_get_vector_elements(glsl_without_array(new_var->type));
+
+   unsigned old_num_comps =
+         glsl_get_vector_elements(glsl_without_array(var->type));
+
+   /* Don't bother walking the stack if this component can't be vectorised. */
+   if (old_num_comps > 3) {
+      return false;
+   }
+
+   if (new_var == var) {
+      return false;
+   }
+
+   r600_create_new_load(b, intr, new_var, var->data.location_frac,
+                        num_comps, old_num_comps);
+   return true;
+}
+
+static bool
+r600_cmp_func(const void *data1, const void *data2)
+{
+   const struct util_dynarray *arr1 = data1;
+   const struct util_dynarray *arr2 = data2;
+
+   const nir_instr *instr1 = *(nir_instr **)util_dynarray_begin(arr1);
+   const nir_instr *instr2 = *(nir_instr **)util_dynarray_begin(arr2);
+
+   return r600_io_access_same_var(instr1, instr2);
+}
+
+#define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
+
+static uint32_t
+r600_hash_instr(const nir_instr *instr)
+{
+   assert(instr->type == nir_instr_type_intrinsic);
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_variable *var =
+      nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+
+   uint32_t hash = _mesa_fnv32_1a_offset_bias;
+
+   hash = HASH(hash, var->type);
+   return HASH(hash, var->data.location);
+}
+
+static uint32_t
+r600_hash_stack(const void *data)
+{
+   const struct util_dynarray *stack = data;
+   const nir_instr *first = *(nir_instr **)util_dynarray_begin(stack);
+   return r600_hash_instr(first);
+}
+
+static struct set *
+r600_vec_instr_set_create(void)
+{
+   return _mesa_set_create(NULL, r600_hash_stack, r600_cmp_func);
+}
+
+static void
+r600_vec_instr_set_destroy(struct set *instr_set)
+{
+   _mesa_set_destroy(instr_set, NULL);
+}
+
+static void
+r600_vec_instr_set_add(struct set *instr_set, nir_instr *instr)
+{
+   if (!r600_instr_can_rewrite(instr)) {
+      return;
+   }
+
+   struct util_dynarray *new_stack = r600_vec_instr_stack_create(instr_set);
+   r600_vec_instr_stack_push(new_stack, instr);
+
+   struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
+
+   if (entry) {
+      ralloc_free(new_stack);
+      struct util_dynarray *stack = (struct util_dynarray *) entry->key;
+      r600_vec_instr_stack_push(stack, instr);
+      return;
+   }
+
+   _mesa_set_add(instr_set, new_stack);
+
+   return;
+}
+
+static bool
+r600_vec_instr_set_remove(nir_builder *b, struct set *instr_set, nir_instr *instr,
+                          nir_variable *updated_vars[16][4])
+{
+   if (!r600_instr_can_rewrite(instr)) {
+      return false;
+   }
+   /*
+    * It's pretty unfortunate that we have to do this, but it's a side effect
+    * of the hash set interfaces. The hash set assumes that we're only
+    * interested in storing one equivalent element at a time, and if we try to
+    * insert a duplicate element it will remove the original. We could hack up
+    * the comparison function to "know" which input is an instruction we
+    * passed in and which is an array that's part of the entry, but that
+    * wouldn't work because we need to pass an array to _mesa_set_add() in
+    * vec_instr_add() above, and _mesa_set_add() will call our comparison
+    * function as well.
+    */
+   struct util_dynarray *temp = r600_vec_instr_stack_create(instr_set);
+   r600_vec_instr_stack_push(temp, instr);
+   struct set_entry *entry = _mesa_set_search(instr_set, temp);
+   ralloc_free(temp);
+
+   if (entry) {
+      struct util_dynarray *stack = (struct util_dynarray *) entry->key;
+      bool progress = r600_vec_instr_stack_pop(b, stack, instr, updated_vars);
+
+      if (!util_dynarray_num_elements(stack, nir_instr *))
+         _mesa_set_remove(instr_set, entry);
+
+      return progress;
+   }
+
+   return false;
+}
+
+static bool
+r600_vectorize_block(nir_builder *b, nir_block *block, struct set *instr_set,
+                nir_variable *updated_vars[16][4])
+{
+   bool progress = false;
+
+   nir_foreach_instr_safe(instr, block) {
+      r600_vec_instr_set_add(instr_set, instr);
+   }
+
+   for (unsigned i = 0; i < block->num_dom_children; i++) {
+      nir_block *child = block->dom_children[i];
+      progress |= r600_vectorize_block(b, child, instr_set, updated_vars);
+   }
+
+   nir_foreach_instr_reverse_safe(instr, block) {
+      progress |= r600_vec_instr_set_remove(b, instr_set, instr, updated_vars);
+   }
+
+   return progress;
+}
+
+static void
+r600_create_new_io_var(nir_shader *shader,
+                  nir_variable *vars[16][4],
+                  unsigned location, unsigned comps)
+{
+   unsigned num_comps = util_bitcount(comps);
+   assert(num_comps > 1);
+
+   /* Note: u_bit_scan() strips a component of the comps bitfield here */
+   unsigned first_comp = u_bit_scan(&comps);
+
+   nir_variable *var = nir_variable_clone(vars[location][first_comp], shader);
+   var->data.location_frac = first_comp;
+   var->type = glsl_replace_vector_type(var->type, num_comps);
+
+   nir_shader_add_variable(shader, var);
+
+   vars[location][first_comp] = var;
+
+   while (comps) {
+      const int comp = u_bit_scan(&comps);
+      if (vars[location][comp]) {
+         vars[location][comp] = var;
+      }
+   }
+}
+
+static inline bool
+r600_variables_can_merge(const nir_variable *lhs, const nir_variable *rhs)
+{
+   return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
+}
+
+static void
+r600_create_new_io_vars(nir_shader *shader, struct exec_list *io_list,
+                   nir_variable *vars[16][4])
+{
+   if (exec_list_is_empty(io_list))
+      return;
+
+   nir_foreach_variable(var, io_list) {
+      if (r600_variable_can_rewrite(var)) {
+         unsigned loc = r600_correct_location(var);
+         vars[loc][var->data.location_frac] = var;
+      }
+   }
+
+   /* We don't handle combining vars of different type e.g. different array
+    * lengths.
+    */
+   for (unsigned i = 0; i < 16; i++) {
+      unsigned comps = 0;
+
+      for (unsigned j = 0; j < 3; j++) {
+
+         if (!vars[i][j])
+            continue;
+
+         for (unsigned k = j + 1; k < 4; k++) {
+            if (!vars[i][k])
+               continue;
+
+            if (!r600_variables_can_merge(vars[i][j], vars[i][k]))
+               continue;
+
+            /* Set comps */
+            for (unsigned n = 0; n < glsl_get_components(vars[i][j]->type); ++n)
+               comps |= 1 << (vars[i][j]->data.location_frac + n);
+
+            for (unsigned n = 0; n < glsl_get_components(vars[i][k]->type); ++n)
+               comps |= 1 << (vars[i][k]->data.location_frac + n);
+
+         }
+      }
+      if (comps)
+         r600_create_new_io_var(shader, vars, i, comps);
+   }
+}
+
+static bool
+r600_vectorize_io_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_metadata_require(impl, nir_metadata_dominance);
+
+   nir_shader *shader = impl->function->shader;
+   nir_variable *updated_vars[16][4] = {0};
+
+   r600_create_new_io_vars(shader, &shader->inputs, updated_vars);
+
+   struct set *instr_set = r600_vec_instr_set_create();
+   bool progress = r600_vectorize_block(&b, nir_start_block(impl), instr_set,
+                                        updated_vars);
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
+   r600_vec_instr_set_destroy(instr_set);
+   return false;
+}
+
+bool
+r600_vectorize_vs_inputs(nir_shader *shader)
+{
+   bool progress = false;
+
+   if (shader->info.stage != MESA_SHADER_VERTEX)
+      return false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl)
+         progress |= r600_vectorize_io_impl(function->impl);
+   }
+
+   return progress;
+}