From 9c096e4ace70c0b4f9c390bfc275596b10a0bd72 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Tue, 22 Aug 2023 21:30:05 +0200 Subject: [PATCH] ac/nir: Slightly refactor how pos0 exports are added when missing. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Prepares for a workaround. Makes it possible for this function to not emit the pos0 export at all so that it can be emitted by a subsequent call to the function later. Cc: mesa-stable Signed-off-by: Timur Kristóf Reviewed-by: Qiang Yu Reviewed-by: Rhys Perry Part-of: --- src/amd/common/ac_nir.c | 62 ++++++++++++++++++++++++++------------- src/amd/common/ac_nir_lower_ngg.c | 6 ++-- 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 2598812..60f9044 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -204,6 +204,25 @@ get_export_output(nir_builder *b, nir_def **output) return nir_vec(b, vec, 4); } +static nir_def * +get_pos0_output(nir_builder *b, nir_def **output) +{ + /* Some applications don't write position but expect (0, 0, 0, 1) + * so use that value instead of undef when it isn't written. + */ + + nir_def *vec[4]; + + for (int i = 0; i < 4; i++) { + if (output[i]) + vec[i] = nir_u2u32(b, output[i]); + else + vec[i] = nir_imm_float(b, i == 3 ? 1.0 : 0.0); + } + + return nir_vec(b, vec, 4); +} + void ac_nir_export_position(nir_builder *b, enum amd_gfx_level gfx_level, @@ -216,26 +235,23 @@ ac_nir_export_position(nir_builder *b, { nir_intrinsic_instr *exp[4]; unsigned exp_num = 0; + unsigned exp_pos_offset = 0; - nir_def *pos; if (outputs_written & VARYING_BIT_POS) { - pos = get_export_output(b, outputs[VARYING_SLOT_POS]); + /* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang. + * Setting valid_mask=1 prevents it and has no other effect. + */ + const unsigned pos_flags = gfx_level == GFX10 ? AC_EXP_FLAG_VALID_MASK : 0; + nir_def *pos = get_pos0_output(b, outputs[VARYING_SLOT_POS]); + + exp[exp_num] = nir_export_amd( + b, pos, .base = V_008DFC_SQ_EXP_POS + exp_num, + .flags = pos_flags, .write_mask = 0xf); + exp_num++; } else { - nir_def *zero = nir_imm_float(b, 0); - nir_def *one = nir_imm_float(b, 1); - pos = nir_vec4(b, zero, zero, zero, one); + exp_pos_offset++; } - /* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang. - * Setting valid_mask=1 prevents it and has no other effect. - */ - unsigned pos_flags = gfx_level == GFX10 ? AC_EXP_FLAG_VALID_MASK : 0; - - exp[exp_num] = nir_export_amd( - b, pos, .base = V_008DFC_SQ_EXP_POS + exp_num, - .flags = pos_flags, .write_mask = 0xf); - exp_num++; - uint64_t mask = VARYING_BIT_PSIZ | VARYING_BIT_EDGE | @@ -276,7 +292,8 @@ ac_nir_export_position(nir_builder *b, rates = outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE][0]; } else if (force_vrs) { /* If Pos.W != 1 (typical for non-GUI elements), use coarse shading. */ - nir_def *pos_w = nir_channel(b, pos, 3); + nir_def *pos_w = outputs[VARYING_SLOT_POS][3]; + pos_w = pos_w ? nir_u2u32(b, pos_w) : nir_imm_float(b, 1.0); nir_def *cond = nir_fneu_imm(b, pos_w, 1); rates = nir_bcsel(b, cond, nir_load_force_vrs_rates_amd(b), nir_imm_int(b, 0)); } @@ -305,7 +322,7 @@ ac_nir_export_position(nir_builder *b, exp[exp_num] = nir_export_amd( b, nir_vec(b, vec, 4), - .base = V_008DFC_SQ_EXP_POS + exp_num, + .base = V_008DFC_SQ_EXP_POS + exp_num + exp_pos_offset, .flags = flags, .write_mask = write_mask); exp_num++; @@ -316,7 +333,7 @@ ac_nir_export_position(nir_builder *b, (clip_cull_mask & BITFIELD_RANGE(i * 4, 4))) { exp[exp_num] = nir_export_amd( b, get_export_output(b, outputs[VARYING_SLOT_CLIP_DIST0 + i]), - .base = V_008DFC_SQ_EXP_POS + exp_num, + .base = V_008DFC_SQ_EXP_POS + exp_num + exp_pos_offset, .write_mask = (clip_cull_mask >> (i * 4)) & 0xf); exp_num++; } @@ -336,13 +353,16 @@ ac_nir_export_position(nir_builder *b, if (clip_cull_mask & BITFIELD_RANGE(i * 4, 4)) { exp[exp_num] = nir_export_amd( b, get_export_output(b, clip_dist + i * 4), - .base = V_008DFC_SQ_EXP_POS + exp_num, + .base = V_008DFC_SQ_EXP_POS + exp_num + exp_pos_offset, .write_mask = (clip_cull_mask >> (i * 4)) & 0xf); exp_num++; } } } + if (!exp_num) + return; + nir_intrinsic_instr *final_exp = exp[exp_num - 1]; if (done) { @@ -713,7 +733,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, emit_streamout(&b, stream, info, &outputs); if (stream == 0) { - uint64_t export_outputs = b.shader->info.outputs_written; + uint64_t export_outputs = b.shader->info.outputs_written | VARYING_BIT_POS; if (kill_pointsize) export_outputs &= ~VARYING_BIT_PSIZ; @@ -820,7 +840,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir, preserved = nir_metadata_none; } - uint64_t export_outputs = nir->info.outputs_written; + uint64_t export_outputs = nir->info.outputs_written | VARYING_BIT_POS; if (kill_pointsize) export_outputs &= ~VARYING_BIT_PSIZ; diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index a8d882a..d2e924b 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -2498,7 +2498,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option emit_ngg_nogs_prim_export(b, &state, nir_load_var(b, prim_exp_arg_var)); } - uint64_t export_outputs = shader->info.outputs_written; + uint64_t export_outputs = shader->info.outputs_written | VARYING_BIT_POS; if (options->kill_pointsize) export_outputs &= ~VARYING_BIT_PSIZ; @@ -3029,7 +3029,7 @@ ngg_gs_export_vertices(nir_builder *b, nir_def *max_num_out_vtx, nir_def *tid_in } } - uint64_t export_outputs = b->shader->info.outputs_written; + uint64_t export_outputs = b->shader->info.outputs_written | VARYING_BIT_POS; if (s->options->kill_pointsize) export_outputs &= ~VARYING_BIT_PSIZ; @@ -4335,7 +4335,7 @@ emit_ms_finale(nir_builder *b, lower_ngg_ms_state *s) ac_nir_export_position(b, s->gfx_level, s->clipdist_enable_mask, !s->has_param_exports, false, true, - s->per_vertex_outputs, s->outputs); + s->per_vertex_outputs | VARYING_BIT_POS, s->outputs); /* Export generic attributes on GFX10.3 * (On GFX11 they are already stored in the attribute ring.) -- 2.7.4