/* ABI: position first, then user, then psiz */
static void
-agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings)
+agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings,
+ struct agx_shader_key *key)
{
unsigned base = 0;
varyings->slots[VARYING_SLOT_POS] = base;
base += 4;
- u_foreach_bit64(loc, nir->info.outputs_written) {
+ assert(!(key->vs.outputs_flat_shaded & key->vs.outputs_linear_shaded));
+
+ /* Smooth 32-bit user bindings go next */
+ u_foreach_bit64(loc, nir->info.outputs_written &
+ ~key->vs.outputs_flat_shaded &
+ ~key->vs.outputs_linear_shaded) {
+ if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ)
+ continue;
+
+ varyings->slots[loc] = base;
+ base += 4;
+ varyings->num_32_smooth += 4;
+ }
+
+ /* Flat 32-bit user bindings go next */
+ u_foreach_bit64(loc,
+ nir->info.outputs_written & key->vs.outputs_flat_shaded) {
+ if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ)
+ continue;
+
+ varyings->slots[loc] = base;
+ base += 4;
+ varyings->num_32_flat += 4;
+ }
+
+ /* Linear 32-bit user bindings go next */
+ u_foreach_bit64(loc,
+ nir->info.outputs_written & key->vs.outputs_linear_shaded) {
if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ)
continue;
varyings->slots[loc] = base;
base += 4;
+ varyings->num_32_linear += 4;
}
/* TODO: Link FP16 varyings */
varyings->base_index_fp16 = base;
+ varyings->num_16_smooth = 0;
+ varyings->num_16_flat = 0;
+ varyings->num_16_linear = 0;
if (nir->info.outputs_written & VARYING_BIT_PSIZ) {
varyings->slots[VARYING_SLOT_PSIZ] = base;
/* Must be last since NIR passes can remap driver_location freely */
if (nir->info.stage == MESA_SHADER_VERTEX)
- agx_remap_varyings_vs(nir, &out->varyings.vs);
+ agx_remap_varyings_vs(nir, &out->varyings.vs, key);
if (agx_should_dump(nir, AGX_DBG_SHADERS))
nir_print_shader(nir, stdout);
#include "util/u_dynarray.h"
struct agx_varyings_vs {
+ /* The number of user varyings of each type. The varyings must be allocated
+ * in this order ({smooth, flat, linear} × {32, 16}), which may require
+ * remapping.
+ */
+ unsigned num_32_smooth;
+ unsigned num_32_flat;
+ unsigned num_32_linear;
+ unsigned num_16_smooth;
+ unsigned num_16_flat;
+ unsigned num_16_linear;
+
/* The first index used for FP16 varyings. Indices less than this are treated
* as FP32. This may require remapping slots to guarantee.
*/
struct agx_ppp_update ppp =
agx_new_ppp_update(&batch->pool, (struct AGX_PPP_HEADER){
.w_clamp = true,
- .varying_counts_16 = true,
.cull_2 = true,
.occlusion_query_2 = true,
.output_unknown = true,
/* clang-format off */
agx_ppp_push(&ppp, W_CLAMP, cfg) cfg.w_clamp = 1e-10;
- agx_ppp_push(&ppp, VARYING_COUNTS, cfg);
agx_ppp_push(&ppp, CULL_2, cfg);
agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY_2, cfg);
agx_ppp_push(&ppp, OUTPUT_UNKNOWN, cfg);
.fragment_back_stencil = IS_DIRTY(ZS),
.output_select = IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG),
.varying_counts_32 = IS_DIRTY(VS_PROG),
+ .varying_counts_16 = IS_DIRTY(VS_PROG),
.cull = IS_DIRTY(RS),
.fragment_shader =
IS_DIRTY(FS) || varyings_dirty || IS_DIRTY(SAMPLE_MASK),
}
}
+ assert(dirty.varying_counts_32 == dirty.varying_counts_16);
+
if (dirty.varying_counts_32) {
agx_ppp_push(&ppp, VARYING_COUNTS, cfg) {
- cfg.smooth = agx_num_general_outputs(&ctx->vs->info.varyings.vs);
+ cfg.smooth = vs->info.varyings.vs.num_32_smooth;
+ cfg.flat = vs->info.varyings.vs.num_32_flat;
+ cfg.linear = vs->info.varyings.vs.num_32_linear;
+ }
+
+ agx_ppp_push(&ppp, VARYING_COUNTS, cfg) {
+ cfg.smooth = vs->info.varyings.vs.num_16_smooth;
+ cfg.flat = vs->info.varyings.vs.num_16_flat;
+ cfg.linear = vs->info.varyings.vs.num_16_linear;
}
}