case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 4;
-
+ case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
+ if (!is_nir_enabled(&rscreen->b))
+ return 140;
+ FALLTHROUGH;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
if (family >= CHIP_CEDAR)
return is_nir_enabled(&rscreen->b) ? 450 : 430;
return 330;
return 140;
- case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
- return 140;
-
/* Supported except the original R600. */
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
rctx->clip_state.state = *state;
r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true;
+ rctx->driver_consts[PIPE_SHADER_TESS_EVAL].vs_ucp_dirty = true;
+ rctx->driver_consts[PIPE_SHADER_GEOMETRY].vs_ucp_dirty = true;
}
static void r600_set_stencil_ref(struct pipe_context *ctx,
start = compute_only ? PIPE_SHADER_COMPUTE : 0;
end = compute_only ? PIPE_SHADER_TYPES : PIPE_SHADER_COMPUTE;
+ int last_vertex_stage = PIPE_SHADER_VERTEX;
+ if (rctx->tes_shader)
+ last_vertex_stage = PIPE_SHADER_TESS_EVAL;
+ if (rctx->gs_shader)
+ last_vertex_stage = PIPE_SHADER_GEOMETRY;
+
for (sh = start; sh < end; sh++) {
struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh];
if (!info->vs_ucp_dirty &&
ptr = info->constants;
size = info->alloc_size;
if (info->vs_ucp_dirty) {
- assert(sh == PIPE_SHADER_VERTEX);
+ assert(sh == PIPE_SHADER_VERTEX ||
+ sh == PIPE_SHADER_GEOMETRY ||
+ sh == PIPE_SHADER_TESS_EVAL);
if (!size) {
ptr = rctx->clip_state.state.ucp;
size = R600_UCP_SIZE;
if (info->texture_const_dirty) {
assert (ptr);
assert (size);
- if (sh == PIPE_SHADER_VERTEX)
+ if (sh == last_vertex_stage)
memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
if (sh == PIPE_SHADER_FRAGMENT)
memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
NirLowerInstruction::NirLowerInstruction():
b(nullptr)
{
-
}
bool NirLowerInstruction::filter_instr(const nir_instr *instr, const void *data)
exec_list_append(&shader->variables, &new_list);
}
+class LowerClipvertexWrite : public NirLowerInstruction {
+
+public:
+ LowerClipvertexWrite(int noutputs, pipe_stream_output_info& so_info) :
+ m_clipplane1(noutputs),
+ m_clipvtx(noutputs + 1),
+ m_so_info(so_info){}
+private:
+ bool filter(const nir_instr *instr) const override {
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ auto intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ return false;
+
+ return nir_intrinsic_io_semantics(intr).location == VARYING_SLOT_CLIP_VERTEX;
+ }
+
+ nir_ssa_def *lower(nir_instr *instr) override {
+
+ auto intr = nir_instr_as_intrinsic(instr);
+ nir_ssa_def *output[8] = {nullptr};
+
+ // for UBO loads we correct the buffer ID by adding 1
+ auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER - 1);
+
+ assert(intr->src[0].is_ssa);
+ auto clip_vtx = intr->src[0].ssa;
+
+ for (int i = 0; i < 8; ++i) {
+ auto sel = nir_imm_int(b, i);
+ auto mrow = nir_load_ubo_vec4(b, 4, 32, buf_id, sel);
+ output[i] = nir_fdot4(b, clip_vtx, mrow);
+ }
+
+ unsigned clip_vertex_index = nir_intrinsic_base(intr);
+
+ for (int i = 0; i < 2; ++i) {
+ auto clip_i = nir_vec(b, &output[4 * i], 4);
+ auto store = nir_store_output(b, clip_i, intr->src[1].ssa);
+ nir_intrinsic_set_write_mask(store, 0xf);
+ nir_intrinsic_set_base(store, clip_vertex_index);
+ nir_io_semantics semantic = nir_intrinsic_io_semantics(intr);
+ semantic.location = VARYING_SLOT_CLIP_DIST0 + i;
+ semantic.no_varying = 1;
+
+ if (i > 0)
+ nir_intrinsic_set_base(store, m_clipplane1);
+ nir_intrinsic_set_write_mask(store, 0xf);
+ nir_intrinsic_set_io_semantics(store, semantic);
+ }
+ nir_intrinsic_set_base(intr, m_clipvtx);
+
+ nir_ssa_def *result = NIR_LOWER_INSTR_PROGRESS_REPLACE;
+ for (unsigned i = 0; i < m_so_info.num_outputs; ++i) {
+ if (m_so_info.output[i].register_index == clip_vertex_index) {
+ m_so_info.output[i].register_index = m_clipvtx;
+ result = NIR_LOWER_INSTR_PROGRESS;
+ }
+ }
+ return result;
+ }
+ int m_clipplane1;
+ int m_clipvtx;
+ pipe_stream_output_info& m_so_info;
+};
+
+
}
static nir_intrinsic_op
return true;
}
+
+static bool
+r600_lower_clipvertex_to_clipdist(nir_shader *sh,
+ pipe_stream_output_info& so_info)
+{
+ if (!(sh->info.outputs_written & VARYING_BIT_CLIP_VERTEX))
+ return false;
+
+ int noutputs = util_bitcount64(sh->info.outputs_written);
+ bool result = r600::LowerClipvertexWrite(noutputs, so_info).run(sh);
+ return result;
+}
+
static bool
r600_nir_lower_atomics(nir_shader *shader)
{
return false;
}
+static bool r600_is_last_vertex_stage(nir_shader *nir, const r600_shader_key& key)
+{
+ if (nir->info.stage == MESA_SHADER_GEOMETRY)
+ return true;
+
+ if (nir->info.stage == MESA_SHADER_TESS_EVAL &&
+ !key.tes.as_es)
+ return true;
+
+ if (nir->info.stage == MESA_SHADER_VERTEX &&
+ !key.vs.as_es && !key.vs.as_ls)
+ return true;
+
+ return false;
+}
+
extern "C"
bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
{
auto sh = nir_shader_clone(sel->nir, sel->nir);
+ if (r600_is_last_vertex_stage(sh, *key))
+ r600_lower_clipvertex_to_clipdist(sh, sel->so);
+
if (sh->info.stage == MESA_SHADER_TESS_CTRL ||
sh->info.stage == MESA_SHADER_TESS_EVAL ||
(sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
NIR_PASS_V(sh, nir_lower_ubo_vec4);
-
if (lower_64bit)
NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh);
bool r600_lower_64bit_to_vec2(nir_shader *sh);
bool r600_split_64bit_alu_and_phi(nir_shader *sh);
+bool r600_lower_clipvertex_to_clipdist(nir_shader *sh);
+
class AssemblyFromShader {
public:
tgsi_semantic name = (tgsi_semantic)semantic.first;
auto write_mask = nir_intrinsic_write_mask(instr);
ShaderOutput output(driver_location, name, write_mask);
- output.set_sid(semantic.second);
- add_output(output);
- if (location == VARYING_SLOT_CLIP_DIST0 ||
- location == VARYING_SLOT_CLIP_DIST1) {
- m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0);
- }
+ if (!nir_intrinsic_io_semantics(instr).no_varying)
+ output.set_sid(semantic.second);
+ if (nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
+ add_output(output);
if (location == VARYING_SLOT_VIEWPORT) {
m_out_viewport = true;
m_out_misc_write = true;
+ }
+ if (location == VARYING_SLOT_CLIP_DIST0 ||
+ location == VARYING_SLOT_CLIP_DIST1) {
+ auto write_mask = nir_intrinsic_write_mask(instr);
+ m_cc_dist_mask |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
+ m_clip_dist_write |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
}
- if (m_noutputs <= driver_location)
+
+ if (m_noutputs <= driver_location &&
+ nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
m_noutputs = driver_location + 1;
return true;
auto ir = new AluInstr(op2_add_int, m_export_base[stream], m_export_base[stream],
value_factory().literal(m_noutputs),
AluInstr::last_write);
- //ir->add_required_instr(cut_instr);
emit_instruction(ir);
}
-
-
return true;
}
bool GeometryShader::store_output(nir_intrinsic_instr* instr)
{
+ if (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_CLIP_VERTEX)
+ return true;
+
auto location = nir_intrinsic_io_semantics(instr).location;
auto index = nir_src_as_const_value(instr->src[1]);
assert(index);
}
}
-
-
-
return true;
}
{
sh_info->processor_type = PIPE_SHADER_GEOMETRY;
sh_info->ring_item_sizes[0] = m_ring_item_sizes[0];
+ sh_info->cc_dist_mask = m_cc_dist_mask;
+ sh_info->clip_dist_write = m_clip_dist_write;
}
bool GeometryShader::read_prop(std::istream& is)
bool m_first_vertex_emitted{false};
int m_offset{0};
int m_next_input_ring_offset{0};
- int m_clip_dist_mask{0};
+ int m_cc_dist_mask{0};
+ int m_clip_dist_write{0};
int m_cur_ring_output{0};
bool m_gs_tri_strip_adj_fix{false};
uint64_t m_input_mask{0};
case VARYING_SLOT_CLIP_VERTEX:
return emit_clip_vertices(store_info, intr);
case VARYING_SLOT_CLIP_DIST0:
- case VARYING_SLOT_CLIP_DIST1:
+ case VARYING_SLOT_CLIP_DIST1: {
+ bool success = emit_varying_pos(store_info, intr);
m_num_clip_dist += 4;
- return emit_varying_param(store_info, intr) && emit_varying_pos(store_info, intr);
+ if (!nir_intrinsic_io_semantics(&intr).no_varying)
+ success &= emit_varying_param(store_info, intr);
+ return success;
+ }
case VARYING_SLOT_LAYER: {
m_out_misc_write = 1;
m_vs_out_layer = 1;
m_output_registers[nir_intrinsic_base(&instr)] = &m_clip_vertex;
- RegisterVec4 clip_dist[2] = { vf.temp_vec4(pin_group), vf.temp_vec4(pin_group)};
-
- for (int i = 0; i < 8; i++) {
- int oreg = i >> 2;
- int ochan = i & 3;
- AluInstr *ir = nullptr;
- AluInstr::SrcValues src(8);
-
- for (int j = 0; j < 4; j++) {
- src[2 * j] = m_clip_vertex[j];
- src[2 * j + 1] = vf.uniform(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER);
- }
-
- ir = new AluInstr(op2_dot4_ieee, clip_dist[oreg][ochan], src, AluInstr::last_write, 4);
- m_parent->emit_instruction(ir);
- }
-
- m_last_pos_export = new ExportInstr(ExportInstr::pos, m_cur_clip_pos++, clip_dist[0]);
- m_parent->emit_instruction(m_last_pos_export);
-
- m_last_pos_export = new ExportInstr(ExportInstr::pos, m_cur_clip_pos++, clip_dist[1]);
- m_parent->emit_instruction(m_last_pos_export);
-
return true;
}
output.set_sid(sid);
switch (location) {
- case VARYING_SLOT_PSIZ:
- case VARYING_SLOT_POS:
- case VARYING_SLOT_CLIP_VERTEX:
- case VARYING_SLOT_EDGE: {
- break;
- }
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CLIP_DIST1:
+ if (nir_intrinsic_io_semantics(intr).no_varying)
+ break;
+ FALLTHROUGH;
case VARYING_SLOT_VIEWPORT:
case VARYING_SLOT_LAYER:
case VARYING_SLOT_VIEW_INDEX:
default:
output.set_is_param(true);
+ FALLTHROUGH;
+ case VARYING_SLOT_PSIZ:
+ case VARYING_SLOT_POS:
+ case VARYING_SLOT_CLIP_VERTEX:
+ case VARYING_SLOT_EDGE:
+ add_output(output);
+ break;
}
- add_output(output);
- break;
}
case nir_intrinsic_load_vertex_id:
m_sv_values.set(es_vertexid);