r600/sfn: Factor out fragement shader class for EG
authorGert Wollny <gert.wollny@collabora.com>
Wed, 27 Jul 2022 14:17:12 +0000 (16:17 +0200)
committerMarge Bot <emma+marge@anholt.net>
Mon, 1 Aug 2022 08:44:27 +0000 (08:44 +0000)
Pre-EG hardware handles the FS inputs differently, so we
need to prepare a different code path.

v2: Make m_interolators_used private (Filip)

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Reviewed-by: Filip Gawin <filip@gawin.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17822>

src/gallium/drivers/r600/sfn/sfn_shader.cpp
src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp
src/gallium/drivers/r600/sfn/sfn_shader_fs.h
src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp

index 61fd084..e2e3398 100644 (file)
@@ -411,7 +411,7 @@ Shader *Shader::translate_from_nir(nir_shader *nir, const pipe_stream_output_inf
 
    switch (nir->info.stage) {
    case MESA_SHADER_FRAGMENT:
-      shader = new FragmentShader(key);
+      shader = new FragmentShaderEG(key);
    break;
    case MESA_SHADER_VERTEX:
       shader = new VertexShader(so_info, gs_shader, key);
index 8a75c92..c9dd9a5 100644 (file)
@@ -79,10 +79,10 @@ void FragmentShader::do_get_shader_info(r600_shader *sh_info)
 bool FragmentShader::load_input(nir_intrinsic_instr *intr)
 {
    auto& vf = value_factory();
-   AluInstr *ir = nullptr;
 
    auto location = nir_intrinsic_io_semantics(intr).location;
    if (location == VARYING_SLOT_POS) {
+      AluInstr *ir = nullptr;
       for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
          ir = new AluInstr(op1_mov,
                            vf.dest(intr->dest, i, pin_none),
@@ -96,41 +96,17 @@ bool FragmentShader::load_input(nir_intrinsic_instr *intr)
    }
 
    if (location == VARYING_SLOT_FACE) {
-      ir = new AluInstr(op2_setge_dx10,
-                        vf.dest(intr->dest, 0, pin_none),
-                        m_face_input,
-                        vf.inline_const(ALU_SRC_0, 0),
-                        AluInstr::last_write);
+      auto ir = new AluInstr(op2_setge_dx10,
+                             vf.dest(intr->dest, 0, pin_none),
+                             m_face_input,
+                             vf.inline_const(ALU_SRC_0, 0),
+                             AluInstr::last_write);
       set_input_gpr(nir_intrinsic_base(intr), m_face_input->sel());
-
       emit_instruction(ir);
       return true;
    }
 
-   auto io = input(nir_intrinsic_base(intr));
-   auto comp = nir_intrinsic_component(intr);
-   bool need_temp = comp > 0 || !intr->dest.is_ssa;
-   for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
-      if (need_temp) {
-         auto tmp = vf.temp_register(comp + i);
-         ir = new AluInstr(op1_interp_load_p0,
-                           tmp,
-                           new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
-                           AluInstr::last_write);
-         emit_instruction(ir);
-         emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write));
-      } else {
-
-         ir = new AluInstr(op1_interp_load_p0,
-                           vf.dest(intr->dest, i, pin_chan),
-                           new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
-                           AluInstr::write);
-         emit_instruction(ir);
-      }
-
-   }
-   ir->set_alu_flag(alu_last_instr);
-   return true;
+   return load_input_hw(intr);
 }
 
 bool FragmentShader::store_output(nir_intrinsic_instr *intr)
@@ -193,22 +169,10 @@ barycentric_ij_index(nir_intrinsic_instr *intr)
 
 bool FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
 {
-   auto& vf = value_factory();
-   switch (intr->intrinsic) {
-   case nir_intrinsic_load_barycentric_centroid:
-   case nir_intrinsic_load_barycentric_pixel:
-   case nir_intrinsic_load_barycentric_sample: {
-      unsigned ij = barycentric_ij_index(intr);
-      vf.inject_value(intr->dest, 0, m_interpolator[ij].i);
-      vf.inject_value(intr->dest, 1, m_interpolator[ij].j);
+   if (process_stage_intrinsic_hw(intr))
       return true;
-   }    
-   case nir_intrinsic_load_input:
-      return load_input(intr);
-   case nir_intrinsic_load_barycentric_at_offset:
-      return load_barycentric_at_offset(intr);
-   case nir_intrinsic_load_barycentric_at_sample:
-      return load_barycentric_at_sample(intr);
+
+   switch (intr->intrinsic) {
    case nir_intrinsic_load_interpolated_input:
       return load_interpolated_input(intr);
    case nir_intrinsic_discard_if:
@@ -250,176 +214,21 @@ bool FragmentShader::load_interpolated_input(nir_intrinsic_instr *intr)
    case VARYING_SLOT_POS:
       for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i)
          vf.inject_value(intr->dest, i,  m_pos_input[i]);
-   return true;
+      return true;
    case VARYING_SLOT_FACE:
       return false;
    default:
       ;
    }
 
-   auto param = nir_src_as_const_value(intr->src[1]);
-   assert(param && "Indirect PS inputs not (yet) supported");
-
-   int dest_num_comp = nir_dest_num_components(intr->dest);
-   int start_comp = nir_intrinsic_component(intr);
-   bool need_temp = start_comp > 0 || !intr->dest.is_ssa;
-
-   auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan);
-
-   InterpolateParams params;
-
-   params.i = vf.src(intr->src[0], 0);
-   params.j = vf.src(intr->src[0], 1);
-   params.base = input(nir_intrinsic_base(intr)).lds_pos();
-
-   if (!load_interpolated(dst, params, dest_num_comp, start_comp))
-      return false;
-
-   if (need_temp) {
-      AluInstr *ir = nullptr;
-      for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
-         auto real_dst = vf.dest(intr->dest, i, pin_chan);
-         ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
-         emit_instruction(ir);
-      }
-      assert(ir);
-      ir->set_alu_flag(alu_last_instr);
-   }
-
-   return true;
-}
-
-bool FragmentShader::load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
-                                       int num_dest_comp, int start_comp)
-{
-   sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i <<  ")" << "\n";
-
-   if (num_dest_comp == 1) {
-      switch (start_comp) {
-      case 0: return load_interpolated_one_comp(dest, params, op2_interp_x);
-      case 1: return load_interpolated_two_comp_for_one(dest, params,  op2_interp_xy, 0, 1);
-      case 2: return load_interpolated_one_comp(dest, params, op2_interp_z);
-      case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 2, 3);
-      default:
-         assert(0);
-      }
-   }
-
-   if (num_dest_comp == 2) {
-      switch (start_comp) {
-      case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
-      case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
-      case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) &&
-               load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1);
-      default:
-         assert(0);
-      }
-   }
-
-   if (num_dest_comp == 3 && start_comp == 0)
-      return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
-            load_interpolated_one_comp(dest, params, op2_interp_z);
-
-   int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
-
-   bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
-   success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
-   return success;
-}
-
-bool FragmentShader::load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op)
-{
-   auto group = new AluGroup();
-   bool success = true;
-
-   AluInstr *ir = nullptr;
-   for (unsigned i = 0; i < 2 && success; ++i) {
-      int chan = i;
-      if (op == op2_interp_z)
-         chan += 2;
-
-
-      ir = new AluInstr(op, dest[chan],
-                        i & 1 ? params.j : params.i,
-                        new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
-                        i == 0  ? AluInstr::write : AluInstr::last);
-
-      ir->set_bank_swizzle(alu_vec_210);
-      success = group->add_instruction(ir);
-   }
-   ir->set_alu_flag(alu_last_instr);
-   if (success)
-      emit_instruction(group);
-   return success;
-}
-
-bool FragmentShader::load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask)
-{
-   auto group = new AluGroup();
-   bool success = true;
-
-   AluInstr *ir = nullptr;
-   assert(params.j);
-   assert(params.i);
-   for (unsigned i = 0; i < 4 ; ++i) {
-      ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
-                        new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
-                        (writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
-      ir->set_bank_swizzle(alu_vec_210);
-      success = group->add_instruction(ir);
-   }
-   ir->set_alu_flag(alu_last_instr);
-   if (success)
-      emit_instruction(group);
-   return success;
+   return load_interpolated_input_hw(intr);
 }
 
-bool FragmentShader::load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
-                                                        UNUSED int start, int comp)
-{
-   auto group = new AluGroup();
-   bool success = true;
-   AluInstr *ir = nullptr;
-
-   for (int i = 0; i <  4 ; ++i) {
-      ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
-                        new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
-                        i == comp ? AluInstr::write : AluInstr::empty);
-      ir->set_bank_swizzle(alu_vec_210);
-      success = group->add_instruction(ir);
-   }
-   ir->set_alu_flag(alu_last_instr);
-   if (success)
-      emit_instruction(group);
-
-   return success;
-}
 
 int FragmentShader::do_allocate_reserved_registers()
 {
-   for (unsigned i = 0; i < s_max_interpolators; ++i) {
-      if (m_interpolators_used.test(i)) {
-         sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
-         m_interpolator[i].enabled = true;
-      }
-   }
 
-   int num_baryc = 0;
-   for (int i = 0; i < 6; ++i) {
-      if (m_interpolator[i].enabled) {
-         sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
-         unsigned sel = num_baryc / 2;
-         unsigned chan = 2 * (num_baryc % 2);
-
-         m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
-         m_interpolator[i].i->pin_live_range(true, false);
-
-         m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
-         m_interpolator[i].j->pin_live_range(true, false);
-
-         m_interpolator[i].ij_index = num_baryc++;
-      }
-   }
+   int num_baryc = allocate_register_inputs();
 
    int next_register = (num_baryc + 1) >> 1;
 
@@ -658,128 +467,44 @@ bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id)
    }
 }
 
-bool FragmentShader::load_barycentric_at_sample(nir_intrinsic_instr* instr)
+bool FragmentShader::emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs)
 {
-   auto& vf = value_factory();
-   RegisterVec4 slope = vf.temp_vec4(pin_group);
-   auto  src = emit_load_to_register(vf.src(instr->src[0], 0));
-   auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0,
-                                   R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
-
-   fetch->set_fetch_flag(FetchInstr::srf_mode);
-   emit_instruction(fetch);
-
-   auto grad = vf.temp_vec4(pin_group);
+   RegisterVec4::Swizzle swizzle;
+   auto semantics = nir_intrinsic_io_semantics(&intr);
+   unsigned driver_location = nir_intrinsic_base(&intr);
+   unsigned write_mask = nir_intrinsic_write_mask(&intr);
 
-   auto interpolator = m_interpolator[barycentric_ij_index(instr)];
-   assert(interpolator.enabled);
+   switch (semantics.location) {
+   case FRAG_RESULT_DEPTH:
+      swizzle = {0,7,7,7};
+   break;
+   case FRAG_RESULT_STENCIL:
+      swizzle = {7,0,7,7};
+   break;
+   case FRAG_RESULT_SAMPLE_MASK:
+      swizzle = {7,7,0,7};
+   break;
+   default:
+      for (int i = 0; i < 4; ++i) {
+         swizzle[i] = (1 << i) & write_mask ? i : 7;
+      }
+   }
 
-   RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
+   auto value = value_factory().src_vec4(intr.src[0], pin_group, swizzle);
 
-   auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
-   tex->set_tex_flag(TexInstr::grad_fine);
-   tex->set_tex_flag(TexInstr::x_unnormalized);
-   tex->set_tex_flag(TexInstr::y_unnormalized);
-   tex->set_tex_flag(TexInstr::z_unnormalized);
-   tex->set_tex_flag(TexInstr::w_unnormalized);
-   emit_instruction(tex);
+   if (semantics.location == FRAG_RESULT_COLOR ||
+       (semantics.location >= FRAG_RESULT_DATA0 &&
+        semantics.location <= FRAG_RESULT_DATA7)) {
 
-   tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0);
-   tex->set_tex_flag(TexInstr::x_unnormalized);
-   tex->set_tex_flag(TexInstr::y_unnormalized);
-   tex->set_tex_flag(TexInstr::z_unnormalized);
-   tex->set_tex_flag(TexInstr::w_unnormalized);
-   tex->set_tex_flag(TexInstr::grad_fine);
-   emit_instruction(tex);
+      ShaderOutput output(driver_location, TGSI_SEMANTIC_COLOR, write_mask);
+      add_output(output);
 
-   auto tmp0 = vf.temp_register();
-   auto tmp1 = vf.temp_register();
+      for (int k = 0 ; k < num_outputs; ++k) {
 
-   emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
-   emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
+         unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR)
+                              ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports;
 
-   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write}));
-   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr}));
-
-   return true;
-}
-
-bool FragmentShader::load_barycentric_at_offset(nir_intrinsic_instr* instr)
-{
-   auto& vf = value_factory();
-   auto interpolator = m_interpolator[barycentric_ij_index(instr)];
-
-   auto help = vf.temp_vec4(pin_group);
-   RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
-
-   auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0);
-   getgradh->set_tex_flag(TexInstr::x_unnormalized);
-   getgradh->set_tex_flag(TexInstr::y_unnormalized);
-   getgradh->set_tex_flag(TexInstr::z_unnormalized);
-   getgradh->set_tex_flag(TexInstr::w_unnormalized);
-   getgradh->set_tex_flag(TexInstr::grad_fine);
-   emit_instruction(getgradh);
-
-   auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0);
-   getgradv->set_tex_flag(TexInstr::x_unnormalized);
-   getgradv->set_tex_flag(TexInstr::y_unnormalized);
-   getgradv->set_tex_flag(TexInstr::z_unnormalized);
-   getgradv->set_tex_flag(TexInstr::w_unnormalized);
-   getgradv->set_tex_flag(TexInstr::grad_fine);
-   emit_instruction(getgradv);
-
-   auto ofs_x = vf.src(instr->src[0], 0);
-   auto ofs_y = vf.src(instr->src[0], 1);
-   auto tmp0 = vf.temp_register();
-   auto tmp1 = vf.temp_register();
-   emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
-   emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
-   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
-   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr}));
-
-   return true;
-}
-
-
-
-bool FragmentShader::emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs)
-{
-   RegisterVec4::Swizzle swizzle;
-   auto semantics = nir_intrinsic_io_semantics(&intr);
-   unsigned driver_location = nir_intrinsic_base(&intr);
-   unsigned write_mask = nir_intrinsic_write_mask(&intr);
-
-   switch (semantics.location) {
-   case FRAG_RESULT_DEPTH:
-      swizzle = {0,7,7,7};
-   break;
-   case FRAG_RESULT_STENCIL:
-      swizzle = {7,0,7,7};
-   break;
-   case FRAG_RESULT_SAMPLE_MASK:
-      swizzle = {7,7,0,7};
-   break;
-   default:
-      for (int i = 0; i < 4; ++i) {
-         swizzle[i] = (1 << i) & write_mask ? i : 7;
-      }
-   }
-
-   auto value = value_factory().src_vec4(intr.src[0], pin_group, swizzle);
-
-   if (semantics.location == FRAG_RESULT_COLOR ||
-       (semantics.location >= FRAG_RESULT_DATA0 &&
-        semantics.location <= FRAG_RESULT_DATA7)) {
-
-      ShaderOutput output(driver_location, TGSI_SEMANTIC_COLOR, write_mask);
-      add_output(output);
-
-      for (int k = 0 ; k < num_outputs; ++k) {
-
-         unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR)
-                              ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports;
-
-         sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n";
+         sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n";
 
          if (location >= m_max_color_exports) {
             sfn_log << SfnLog::io << "Pixel output loc:" << location
@@ -890,7 +615,319 @@ void FragmentShader::do_print_properties(std::ostream& os) const
 }
 
 
-FragmentShader::Interpolator::Interpolator():
+bool FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr)
+{
+   auto& vf = value_factory();
+   auto io = input(nir_intrinsic_base(intr));
+   auto comp = nir_intrinsic_component(intr);
+
+   bool need_temp = comp > 0 || !intr->dest.is_ssa;
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
+      if (need_temp) {
+         auto tmp = vf.temp_register(comp + i);
+         ir = new AluInstr(op1_interp_load_p0,
+                           tmp,
+                           new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
+                           AluInstr::last_write);
+         emit_instruction(ir);
+         emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write));
+      } else {
+
+         ir = new AluInstr(op1_interp_load_p0,
+                           vf.dest(intr->dest, i, pin_chan),
+                           new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
+                           AluInstr::write);
+         emit_instruction(ir);
+      }
+
+   }
+   ir->set_alu_flag(alu_last_instr);
+   return true;
+}
+
+bool FragmentShaderEG::allocate_register_inputs()
+{
+   for (unsigned i = 0; i < s_max_interpolators; ++i) {
+      if (interpolators_used(i)) {
+         sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
+         m_interpolator[i].enabled = true;
+      }
+   }
+
+   int num_baryc = 0;
+   for (int i = 0; i < 6; ++i) {
+      if (m_interpolator[i].enabled) {
+         sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
+         unsigned sel = num_baryc / 2;
+         unsigned chan = 2 * (num_baryc % 2);
+
+         m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
+         m_interpolator[i].i->pin_live_range(true, false);
+
+         m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
+         m_interpolator[i].j->pin_live_range(true, false);
+
+         m_interpolator[i].ij_index = num_baryc++;
+      }
+   }
+   return num_baryc;
+}
+
+bool FragmentShaderEG::process_stage_intrinsic_hw(nir_intrinsic_instr *intr)
+{
+   auto& vf = value_factory();
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_barycentric_centroid:
+   case nir_intrinsic_load_barycentric_pixel:
+   case nir_intrinsic_load_barycentric_sample: {
+      unsigned ij = barycentric_ij_index(intr);
+      vf.inject_value(intr->dest, 0, m_interpolator[ij].i);
+      vf.inject_value(intr->dest, 1, m_interpolator[ij].j);
+      return true;
+   }
+   case nir_intrinsic_load_input:
+      return load_input(intr);
+   case nir_intrinsic_load_barycentric_at_offset:
+      return load_barycentric_at_offset(intr);
+   case nir_intrinsic_load_barycentric_at_sample:
+      return load_barycentric_at_sample(intr);
+   default:
+      return false;
+   }
+}
+
+bool FragmentShaderEG::load_interpolated_input_hw(nir_intrinsic_instr *intr)
+{
+   auto& vf = value_factory();
+   auto param = nir_src_as_const_value(intr->src[1]);
+   assert(param && "Indirect PS inputs not (yet) supported");
+
+   int dest_num_comp = nir_dest_num_components(intr->dest);
+   int start_comp = nir_intrinsic_component(intr);
+   bool need_temp = start_comp > 0 || !intr->dest.is_ssa;
+
+   auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan);
+
+   InterpolateParams params;
+
+   params.i = vf.src(intr->src[0], 0);
+   params.j = vf.src(intr->src[0], 1);
+   params.base = input(nir_intrinsic_base(intr)).lds_pos();
+
+   if (!load_interpolated(dst, params, dest_num_comp, start_comp))
+      return false;
+
+   if (need_temp) {
+      AluInstr *ir = nullptr;
+      for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
+         auto real_dst = vf.dest(intr->dest, i, pin_chan);
+         ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
+         emit_instruction(ir);
+      }
+      assert(ir);
+      ir->set_alu_flag(alu_last_instr);
+   }
+
+   return true;
+}
+
+bool FragmentShaderEG::load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
+                                         int num_dest_comp, int start_comp)
+{
+   sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i <<  ")" << "\n";
+
+   if (num_dest_comp == 1) {
+      switch (start_comp) {
+      case 0: return load_interpolated_one_comp(dest, params, op2_interp_x);
+      case 1: return load_interpolated_two_comp_for_one(dest, params,  op2_interp_xy, 1);
+      case 2: return load_interpolated_one_comp(dest, params, op2_interp_z);
+      case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 3);
+      default:
+         assert(0);
+      }
+   }
+
+   if (num_dest_comp == 2) {
+      switch (start_comp) {
+      case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
+      case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
+      case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) &&
+               load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
+      default:
+         assert(0);
+      }
+   }
+
+   if (num_dest_comp == 3 && start_comp == 0)
+      return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
+            load_interpolated_one_comp(dest, params, op2_interp_z);
+
+   int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
+
+   bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
+   success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
+   return success;
+}
+
+
+bool FragmentShaderEG::load_barycentric_at_sample(nir_intrinsic_instr* instr)
+{
+   auto& vf = value_factory();
+   RegisterVec4 slope = vf.temp_vec4(pin_group);
+   auto  src = emit_load_to_register(vf.src(instr->src[0], 0));
+   auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0,
+                                   R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
+
+   fetch->set_fetch_flag(FetchInstr::srf_mode);
+   emit_instruction(fetch);
+
+   auto grad = vf.temp_vec4(pin_group);
+
+   auto interpolator = m_interpolator[barycentric_ij_index(instr)];
+   assert(interpolator.enabled);
+
+   RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
+
+   auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
+   tex->set_tex_flag(TexInstr::grad_fine);
+   tex->set_tex_flag(TexInstr::x_unnormalized);
+   tex->set_tex_flag(TexInstr::y_unnormalized);
+   tex->set_tex_flag(TexInstr::z_unnormalized);
+   tex->set_tex_flag(TexInstr::w_unnormalized);
+   emit_instruction(tex);
+
+   tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0);
+   tex->set_tex_flag(TexInstr::x_unnormalized);
+   tex->set_tex_flag(TexInstr::y_unnormalized);
+   tex->set_tex_flag(TexInstr::z_unnormalized);
+   tex->set_tex_flag(TexInstr::w_unnormalized);
+   tex->set_tex_flag(TexInstr::grad_fine);
+   emit_instruction(tex);
+
+   auto tmp0 = vf.temp_register();
+   auto tmp1 = vf.temp_register();
+
+   emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
+   emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
+
+   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write}));
+   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr}));
+
+   return true;
+}
+
+bool FragmentShaderEG::load_barycentric_at_offset(nir_intrinsic_instr* instr)
+{
+   auto& vf = value_factory();
+   auto interpolator = m_interpolator[barycentric_ij_index(instr)];
+
+   auto help = vf.temp_vec4(pin_group);
+   RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
+
+   auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0);
+   getgradh->set_tex_flag(TexInstr::x_unnormalized);
+   getgradh->set_tex_flag(TexInstr::y_unnormalized);
+   getgradh->set_tex_flag(TexInstr::z_unnormalized);
+   getgradh->set_tex_flag(TexInstr::w_unnormalized);
+   getgradh->set_tex_flag(TexInstr::grad_fine);
+   emit_instruction(getgradh);
+
+   auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0);
+   getgradv->set_tex_flag(TexInstr::x_unnormalized);
+   getgradv->set_tex_flag(TexInstr::y_unnormalized);
+   getgradv->set_tex_flag(TexInstr::z_unnormalized);
+   getgradv->set_tex_flag(TexInstr::w_unnormalized);
+   getgradv->set_tex_flag(TexInstr::grad_fine);
+   emit_instruction(getgradv);
+
+   auto ofs_x = vf.src(instr->src[0], 0);
+   auto ofs_y = vf.src(instr->src[0], 1);
+   auto tmp0 = vf.temp_register();
+   auto tmp1 = vf.temp_register();
+   emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
+   emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
+   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
+   emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr}));
+
+   return true;
+}
+
+bool FragmentShaderEG::load_interpolated_one_comp(RegisterVec4& dest,
+                                                const InterpolateParams& params,
+                                                EAluOp op)
+{
+   auto group = new AluGroup();
+   bool success = true;
+
+   AluInstr *ir = nullptr;
+   for (unsigned i = 0; i < 2 && success; ++i) {
+      int chan = i;
+      if (op == op2_interp_z)
+         chan += 2;
+
+
+      ir = new AluInstr(op, dest[chan],
+                        i & 1 ? params.j : params.i,
+                        new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
+                        i == 0  ? AluInstr::write : AluInstr::last);
+
+      ir->set_bank_swizzle(alu_vec_210);
+      success = group->add_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   if (success)
+      emit_instruction(group);
+   return success;
+}
+
+bool FragmentShaderEG::load_interpolated_two_comp(RegisterVec4& dest,
+                                                const InterpolateParams& params,
+                                                EAluOp op, int writemask)
+{
+   auto group = new AluGroup();
+   bool success = true;
+
+   AluInstr *ir = nullptr;
+   assert(params.j);
+   assert(params.i);
+   for (unsigned i = 0; i < 4 ; ++i) {
+      ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
+                        new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
+                        (writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
+      ir->set_bank_swizzle(alu_vec_210);
+      success = group->add_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   if (success)
+      emit_instruction(group);
+   return success;
+}
+
+bool FragmentShaderEG::load_interpolated_two_comp_for_one(RegisterVec4& dest,
+                                                          const InterpolateParams& params, EAluOp op,
+                                                          int comp)
+{
+   auto group = new AluGroup();
+   bool success = true;
+   AluInstr *ir = nullptr;
+
+   for (int i = 0; i <  4 ; ++i) {
+      ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
+                        new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
+                        i == comp ? AluInstr::write : AluInstr::empty);
+      ir->set_bank_swizzle(alu_vec_210);
+      success = group->add_instruction(ir);
+   }
+   ir->set_alu_flag(alu_last_instr);
+   if (success)
+      emit_instruction(group);
+
+   return success;
+}
+
+
+FragmentShaderEG::Interpolator::Interpolator():
    enabled(false)
 {
 }
index 913e1e0..23ca27b 100644 (file)
@@ -39,22 +39,18 @@ public:
 
    bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
 
+protected:
+
+   static const int s_max_interpolators = 6;
+   bool interpolators_used(int i) const {return m_interpolators_used.test(i);}
 private:
-   class Interpolator {
-   public:
-      Interpolator();
-      bool enabled : 1;
-      unsigned ij_index : 4;
-      PRegister i;
-      PRegister j;
-   };
 
-   struct InterpolateParams {
-      PVirtualValue i,j;
-      int base;
-   };
+   bool load_interpolated_input(nir_intrinsic_instr *intr);
 
-   static const int s_max_interpolators = 6;
+   virtual bool allocate_register_inputs() = 0;
+   virtual bool load_input_hw(nir_intrinsic_instr *intr) = 0;
+   virtual bool process_stage_intrinsic_hw(nir_intrinsic_instr *intr)  = 0;
+   virtual bool load_interpolated_input_hw(nir_intrinsic_instr *intr) = 0;
 
    bool do_scan_instruction(nir_instr *instr) override;
    int do_allocate_reserved_registers() override;
@@ -63,17 +59,6 @@ private:
 
    bool scan_input(nir_intrinsic_instr *instr, int index_src_id);
 
-   bool load_barycentric_pixel(nir_intrinsic_instr *intr);
-   bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
-   bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
-   bool load_interpolated_input(nir_intrinsic_instr *intr);
-   bool load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
-                          int num_dest_comp, int start_comp);
-
-   bool load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op);
-   bool load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask);
-   bool load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
-                                           int start, int dest_slot);
 
    bool emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs);
    bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
@@ -94,7 +79,6 @@ private:
    ExportInstr *m_last_pixel_export;
 
    std::bitset<s_max_interpolators> m_interpolators_used;
-   std::array<Interpolator, s_max_interpolators> m_interpolator;
    RegisterVec4 m_pos_input;
    Register *m_face_input{nullptr};
    bool m_fs_write_all;
@@ -109,6 +93,45 @@ private:
    int m_rat_base{0};
 };
 
+class FragmentShaderEG : public FragmentShader {
+public:
+   using FragmentShader::FragmentShader;
+
+private:
+   class Interpolator {
+   public:
+      Interpolator();
+      bool enabled : 1;
+      unsigned ij_index : 4;
+      PRegister i;
+      PRegister j;
+   };
+
+   struct InterpolateParams {
+      PVirtualValue i,j;
+      int base;
+   };
+
+   bool allocate_register_inputs() override;
+   bool load_input_hw(nir_intrinsic_instr *intr) override;
+   bool process_stage_intrinsic_hw(nir_intrinsic_instr *intr) override;
+   bool load_interpolated_input_hw(nir_intrinsic_instr *intr) override;
+
+   bool load_barycentric_pixel(nir_intrinsic_instr *intr);
+   bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
+   bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
+   bool load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
+                          int num_dest_comp, int start_comp);
+
+   bool load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op);
+   bool load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask);
+   bool load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
+                                           int dest_slot);
+
+   std::array<Interpolator, s_max_interpolators> m_interpolator;
+
+};
+
 }
 
 #endif
index 1dde8ce..5073028 100644 (file)
@@ -3073,7 +3073,7 @@ Shader *TestShader::from_string(const std::string& s)
    Shader *shader = nullptr;
 
    if (line.substr(0,2) == "FS")
-      shader = new FragmentShader(key);
+      shader = new FragmentShaderEG(key);
    else if (line.substr(0,2) == "VS")
       shader = new VertexShader(nullptr, nullptr,  key);
    else if (line.substr(0,2) == "GS")