r600/sfn: lower CLIPVERTEX to clip planes

author Gert Wollny <gert.wollny@collabora.com>

Sun, 3 Jul 2022 17:04:05 +0000 (19:04 +0200)

committer Gert Wollny <gert.wollny@collabora.com>

Wed, 13 Jul 2022 13:17:17 +0000 (15:17 +0200)
author Gert Wollny <gert.wollny@collabora.com>
Sun, 3 Jul 2022 17:04:05 +0000 (19:04 +0200)
committer Gert Wollny <gert.wollny@collabora.com>
Wed, 13 Jul 2022 13:17:17 +0000 (15:17 +0200)
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c

index 4ba7075..2caf51c 100644 (file)
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -358,7 +358,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
  
         case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
                 return 4;
-
+       case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
+               if (!is_nir_enabled(&rscreen->b))
+                       return 140;
+               FALLTHROUGH;
         case PIPE_CAP_GLSL_FEATURE_LEVEL:
                 if (family >= CHIP_CEDAR)
                    return is_nir_enabled(&rscreen->b) ? 450 : 430;
@@ -367,9 +370,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
                    return 330;
                 return 140;
  
-       case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
-               return 140;
-
         /* Supported except the original R600. */
         case PIPE_CAP_INDEP_BLEND_ENABLE:
         case PIPE_CAP_INDEP_BLEND_FUNC:
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c

index c0ff0b8..1e92ca0 100644 (file)
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -284,6 +284,8 @@ static void r600_set_clip_state(struct pipe_context *ctx,
         rctx->clip_state.state = *state;
         r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
         rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true;
+       rctx->driver_consts[PIPE_SHADER_TESS_EVAL].vs_ucp_dirty = true;
+       rctx->driver_consts[PIPE_SHADER_GEOMETRY].vs_ucp_dirty = true;
  }
  
  static void r600_set_stencil_ref(struct pipe_context *ctx,
@@ -1350,6 +1352,12 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
         start = compute_only ? PIPE_SHADER_COMPUTE : 0;
         end = compute_only ? PIPE_SHADER_TYPES : PIPE_SHADER_COMPUTE;
  
+       int last_vertex_stage = PIPE_SHADER_VERTEX;
+       if (rctx->tes_shader)
+               last_vertex_stage = PIPE_SHADER_TESS_EVAL;
+       if (rctx->gs_shader)
+               last_vertex_stage  = PIPE_SHADER_GEOMETRY;
+
         for (sh = start; sh < end; sh++) {
                 struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh];
                 if (!info->vs_ucp_dirty &&
@@ -1362,7 +1370,9 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
                 ptr = info->constants;
                 size = info->alloc_size;
                 if (info->vs_ucp_dirty) {
-                       assert(sh == PIPE_SHADER_VERTEX);
+                       assert(sh == PIPE_SHADER_VERTEX ||
+                              sh == PIPE_SHADER_GEOMETRY ||
+                              sh == PIPE_SHADER_TESS_EVAL);
                         if (!size) {
                                 ptr = rctx->clip_state.state.ucp;
                                 size = R600_UCP_SIZE;
@@ -1411,7 +1421,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
                 if (info->texture_const_dirty) {
                         assert (ptr);
                         assert (size);
-                       if (sh == PIPE_SHADER_VERTEX)
+                       if (sh == last_vertex_stage)
                                 memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
                         if (sh == PIPE_SHADER_FRAGMENT)
                                 memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp

index fe5397e..8f813c2 100644 (file)
--- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
@@ -54,7 +54,6 @@ using std::vector;
  NirLowerInstruction::NirLowerInstruction():
         b(nullptr)
  {
-
  }
  
  bool NirLowerInstruction::filter_instr(const nir_instr *instr, const void *data)
@@ -191,6 +190,75 @@ void sort_fsoutput(nir_shader *shader)
     exec_list_append(&shader->variables, &new_list);
  }
  
+class LowerClipvertexWrite : public NirLowerInstruction {
+
+public:
+   LowerClipvertexWrite(int noutputs, pipe_stream_output_info& so_info) :
+      m_clipplane1(noutputs),
+      m_clipvtx(noutputs + 1),
+      m_so_info(so_info){}
+private:
+   bool filter(const nir_instr *instr) const override {
+      if (instr->type != nir_instr_type_intrinsic)
+         return false;
+
+      auto intr = nir_instr_as_intrinsic(instr);
+      if (intr->intrinsic != nir_intrinsic_store_output)
+         return false;
+
+      return nir_intrinsic_io_semantics(intr).location == VARYING_SLOT_CLIP_VERTEX;
+   }
+
+   nir_ssa_def *lower(nir_instr *instr) override {
+
+      auto intr = nir_instr_as_intrinsic(instr);
+      nir_ssa_def *output[8] = {nullptr};
+
+      // for UBO loads we correct the buffer ID by adding 1
+      auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER - 1);
+
+      assert(intr->src[0].is_ssa);
+      auto clip_vtx = intr->src[0].ssa;
+
+      for (int i = 0; i < 8; ++i) {
+         auto sel = nir_imm_int(b, i);
+         auto mrow = nir_load_ubo_vec4(b, 4, 32, buf_id, sel);
+         output[i] = nir_fdot4(b, clip_vtx, mrow);
+      }
+
+      unsigned clip_vertex_index = nir_intrinsic_base(intr);
+
+      for (int i = 0; i < 2; ++i) {
+         auto clip_i = nir_vec(b, &output[4 * i], 4);
+         auto store = nir_store_output(b, clip_i,  intr->src[1].ssa);
+         nir_intrinsic_set_write_mask(store, 0xf);
+         nir_intrinsic_set_base(store, clip_vertex_index);
+         nir_io_semantics semantic = nir_intrinsic_io_semantics(intr);
+         semantic.location = VARYING_SLOT_CLIP_DIST0 + i;
+         semantic.no_varying = 1;
+
+         if (i > 0)
+            nir_intrinsic_set_base(store, m_clipplane1);
+         nir_intrinsic_set_write_mask(store, 0xf);
+         nir_intrinsic_set_io_semantics(store, semantic);
+      }
+      nir_intrinsic_set_base(intr, m_clipvtx);
+
+      nir_ssa_def *result = NIR_LOWER_INSTR_PROGRESS_REPLACE;
+      for (unsigned  i = 0; i < m_so_info.num_outputs; ++i) {
+         if (m_so_info.output[i].register_index == clip_vertex_index) {
+            m_so_info.output[i].register_index = m_clipvtx;
+            result = NIR_LOWER_INSTR_PROGRESS;
+         }
+      }
+      return result;
+   }
+   int m_clipplane1;
+   int m_clipvtx;
+   pipe_stream_output_info& m_so_info;
+};
+
+
  }
  
  static nir_intrinsic_op
@@ -278,6 +346,19 @@ r600_lower_deref_instr(nir_builder *b, nir_instr *instr_, UNUSED void *cb_data)
     return true;
  }
  
+
+static bool
+r600_lower_clipvertex_to_clipdist(nir_shader *sh,
+                                  pipe_stream_output_info& so_info)
+{
+   if (!(sh->info.outputs_written & VARYING_BIT_CLIP_VERTEX))
+      return false;
+
+   int noutputs = util_bitcount64(sh->info.outputs_written);
+   bool result = r600::LowerClipvertexWrite(noutputs, so_info).run(sh);
+   return result;
+}
+
  static bool
  r600_nir_lower_atomics(nir_shader *shader)
  {
@@ -504,6 +585,22 @@ bool has_saturate(const nir_function *func)
     return false;
  }
  
+static bool r600_is_last_vertex_stage(nir_shader *nir, const r600_shader_key& key)
+{
+   if (nir->info.stage == MESA_SHADER_GEOMETRY)
+      return true;
+
+   if (nir->info.stage == MESA_SHADER_TESS_EVAL &&
+       !key.tes.as_es)
+      return true;
+
+   if (nir->info.stage == MESA_SHADER_VERTEX &&
+       !key.vs.as_es && !key.vs.as_ls)
+      return true;
+
+   return false;
+}
+
  extern "C"
  bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
  {
@@ -618,6 +715,9 @@ int r600_shader_from_nir(struct r600_context *rctx,
  
     auto sh = nir_shader_clone(sel->nir, sel->nir);
  
+   if (r600_is_last_vertex_stage(sh, *key))
+      r600_lower_clipvertex_to_clipdist(sh, sel->so);
+
     if (sh->info.stage == MESA_SHADER_TESS_CTRL ||
         sh->info.stage == MESA_SHADER_TESS_EVAL ||
         (sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
@@ -645,7 +745,6 @@ int r600_shader_from_nir(struct r600_context *rctx,
  
     NIR_PASS_V(sh, nir_lower_ubo_vec4);
  
-
     if (lower_64bit)
        NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
  
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.h b/src/gallium/drivers/r600/sfn/sfn_nir.h

index 0514cc3..73ce5c4 100644 (file)
--- a/src/gallium/drivers/r600/sfn/sfn_nir.h
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.h
@@ -67,6 +67,8 @@ bool r600_merge_vec2_stores(nir_shader *shader);
  bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh);
  bool r600_lower_64bit_to_vec2(nir_shader *sh);
  bool r600_split_64bit_alu_and_phi(nir_shader *sh);
+bool r600_lower_clipvertex_to_clipdist(nir_shader *sh);
+
  
  class AssemblyFromShader {
  public:
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp

index 7d8de05..52ebc18 100644 (file)
--- a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp
@@ -85,20 +85,26 @@ bool GeometryShader::process_store_output(nir_intrinsic_instr *instr)
        tgsi_semantic name = (tgsi_semantic)semantic.first;
        auto write_mask = nir_intrinsic_write_mask(instr);
        ShaderOutput output(driver_location, name, write_mask);
-      output.set_sid(semantic.second);
-      add_output(output);
  
-      if (location == VARYING_SLOT_CLIP_DIST0 ||
-          location == VARYING_SLOT_CLIP_DIST1) {
-         m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0);
-      }
+      if (!nir_intrinsic_io_semantics(instr).no_varying)
+         output.set_sid(semantic.second);
+      if (nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
+         add_output(output);
  
        if (location == VARYING_SLOT_VIEWPORT) {
           m_out_viewport = true;
           m_out_misc_write = true;
+      }
  
+      if (location == VARYING_SLOT_CLIP_DIST0 ||
+          location == VARYING_SLOT_CLIP_DIST1)   {
+         auto write_mask = nir_intrinsic_write_mask(instr);
+         m_cc_dist_mask |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
+         m_clip_dist_write |= write_mask <<  (4 * (location - VARYING_SLOT_CLIP_DIST0));
        }
-      if (m_noutputs <= driver_location)
+
+      if (m_noutputs <= driver_location &&
+          nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
           m_noutputs = driver_location + 1;
  
        return true;
@@ -222,17 +228,17 @@ bool GeometryShader::emit_vertex(nir_intrinsic_instr* instr, bool cut)
        auto ir = new AluInstr(op2_add_int, m_export_base[stream], m_export_base[stream],
                               value_factory().literal(m_noutputs),
                               AluInstr::last_write);
-      //ir->add_required_instr(cut_instr);
        emit_instruction(ir);
     }
  
-
-
     return true;
  }
  
  bool GeometryShader::store_output(nir_intrinsic_instr* instr)
  {
+   if (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_CLIP_VERTEX)
+      return true;
+
     auto location = nir_intrinsic_io_semantics(instr).location;
     auto index = nir_src_as_const_value(instr->src[1]);
     assert(index);
@@ -300,9 +306,6 @@ bool GeometryShader::store_output(nir_intrinsic_instr* instr)
        }
     }
  
-
-
-
     return true;
  }
  
@@ -347,6 +350,8 @@ void GeometryShader::do_get_shader_info(r600_shader *sh_info)
  {
     sh_info->processor_type = PIPE_SHADER_GEOMETRY;
     sh_info->ring_item_sizes[0] =  m_ring_item_sizes[0];
+   sh_info->cc_dist_mask = m_cc_dist_mask;
+   sh_info->clip_dist_write = m_clip_dist_write;
  }
  
  bool GeometryShader::read_prop(std::istream& is)
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_gs.h b/src/gallium/drivers/r600/sfn/sfn_shader_gs.h

index 61bc494..f26d73a 100644 (file)
--- a/src/gallium/drivers/r600/sfn/sfn_shader_gs.h
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_gs.h
@@ -50,7 +50,8 @@ private:
     bool m_first_vertex_emitted{false};
     int m_offset{0};
     int m_next_input_ring_offset{0};
-   int m_clip_dist_mask{0};
+   int m_cc_dist_mask{0};
+   int m_clip_dist_write{0};
     int m_cur_ring_output{0};
     bool m_gs_tri_strip_adj_fix{false};
     uint64_t m_input_mask{0};
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp

index 8d6e750..8ded3d7 100644 (file)
--- a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp
@@ -97,9 +97,13 @@ bool VertexExportForFs::do_store_output(const store_loc &store_info, nir_intrins
     case VARYING_SLOT_CLIP_VERTEX:
        return emit_clip_vertices(store_info, intr);
     case VARYING_SLOT_CLIP_DIST0:
-   case VARYING_SLOT_CLIP_DIST1:
+   case VARYING_SLOT_CLIP_DIST1: {
+      bool success = emit_varying_pos(store_info, intr);
        m_num_clip_dist += 4;
-      return emit_varying_param(store_info, intr) && emit_varying_pos(store_info, intr);
+      if (!nir_intrinsic_io_semantics(&intr).no_varying)
+         success &= emit_varying_param(store_info, intr);
+      return success;
+      }
     case VARYING_SLOT_LAYER: {
        m_out_misc_write = 1;
        m_vs_out_layer = 1;
@@ -128,29 +132,6 @@ bool VertexExportForFs::emit_clip_vertices(const store_loc &store_info, const ni
  
     m_output_registers[nir_intrinsic_base(&instr)] = &m_clip_vertex;
  
-   RegisterVec4 clip_dist[2] = { vf.temp_vec4(pin_group), vf.temp_vec4(pin_group)};
-
-   for (int i = 0; i < 8; i++) {
-      int oreg = i >> 2;
-      int ochan = i & 3;
-      AluInstr *ir = nullptr;
-      AluInstr::SrcValues src(8);
-
-      for (int j = 0; j < 4; j++) {
-         src[2 * j] = m_clip_vertex[j];
-         src[2 * j + 1] = vf.uniform(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER);
-      }
-
-      ir = new AluInstr(op2_dot4_ieee, clip_dist[oreg][ochan], src, AluInstr::last_write, 4);
-      m_parent->emit_instruction(ir);
-   }
-
-   m_last_pos_export = new ExportInstr(ExportInstr::pos, m_cur_clip_pos++, clip_dist[0]);
-   m_parent->emit_instruction(m_last_pos_export);
-
-   m_last_pos_export = new ExportInstr(ExportInstr::pos, m_cur_clip_pos++, clip_dist[1]);
-   m_parent->emit_instruction(m_last_pos_export);
-
     return true;
  }
  
@@ -450,22 +431,24 @@ bool VertexShader::do_scan_instruction(nir_instr *instr)
        output.set_sid(sid);
  
        switch (location) {
-      case VARYING_SLOT_PSIZ:
-      case VARYING_SLOT_POS:
-      case VARYING_SLOT_CLIP_VERTEX:
-      case VARYING_SLOT_EDGE: {
-         break;
-      }
        case VARYING_SLOT_CLIP_DIST0:
        case VARYING_SLOT_CLIP_DIST1:
+         if (nir_intrinsic_io_semantics(intr).no_varying)
+            break;
+         FALLTHROUGH;
        case VARYING_SLOT_VIEWPORT:
        case VARYING_SLOT_LAYER:
        case VARYING_SLOT_VIEW_INDEX:
        default:
           output.set_is_param(true);
+         FALLTHROUGH;
+      case VARYING_SLOT_PSIZ:
+      case VARYING_SLOT_POS:
+      case VARYING_SLOT_CLIP_VERTEX:
+      case VARYING_SLOT_EDGE:
+         add_output(output);
+         break;
        }
-      add_output(output);
-      break;
     }
     case nir_intrinsic_load_vertex_id:
        m_sv_values.set(es_vertexid);
author	Gert Wollny <gert.wollny@collabora.com>
	Sun, 3 Jul 2022 17:04:05 +0000 (19:04 +0200)
committer	Gert Wollny <gert.wollny@collabora.com>
	Wed, 13 Jul 2022 13:17:17 +0000 (15:17 +0200)
src/gallium/drivers/r600/r600_pipe.c		patch \| blob \| history
src/gallium/drivers/r600/r600_state_common.c		patch \| blob \| history
src/gallium/drivers/r600/sfn/sfn_nir.cpp		patch \| blob \| history
src/gallium/drivers/r600/sfn/sfn_nir.h		patch \| blob \| history
src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp		patch \| blob \| history
src/gallium/drivers/r600/sfn/sfn_shader_gs.h		patch \| blob \| history
src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp		patch \| blob \| history