From 96211adf771da5211b9d5d8178f1cee0626a0792 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 14 Nov 2021 04:38:04 -0500 Subject: [PATCH] freedreno/a4xx: add swizzles to shader keys for tg4 workaround Signed-off-by: Ilia Mirkin Part-of: --- src/freedreno/ir3/ir3.h | 5 ++ src/freedreno/ir3/ir3_compiler_nir.c | 68 ++++++++++++++++++++++-- src/freedreno/ir3/ir3_context.c | 2 + src/freedreno/ir3/ir3_context.h | 3 ++ src/freedreno/ir3/ir3_shader.c | 2 + src/freedreno/ir3/ir3_shader.h | 20 ++++++- src/gallium/drivers/freedreno/a4xx/fd4_context.c | 5 ++ src/gallium/drivers/freedreno/a4xx/fd4_context.h | 3 ++ src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 12 +++++ src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 32 ++++++++++- src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 24 +++++++-- src/gallium/drivers/freedreno/a4xx/fd4_texture.h | 1 + 12 files changed, 166 insertions(+), 11 deletions(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 5ef1eae..db0d737 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -507,6 +507,11 @@ struct ir3 { */ DECLARE_ARRAY(struct ir3_instruction *, astc_srgb); + /* Track tg4 instructions which need texture state patched in (for tg4 + * swizzling workaround): + */ + DECLARE_ARRAY(struct ir3_instruction *, tg4); + /* List of blocks: */ struct list_head block_list; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 8469417..fc21650 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2754,10 +2754,6 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex) opc = OPC_GETLOD; break; case nir_texop_tg4: - /* NOTE: a4xx might need to emulate gather w/ txf (this is - * what blob does, seems gather is broken?), and a3xx did - * not support it (but probably could also emulate). - */ switch (tex->component) { case 0: opc = OPC_GATHER4R; @@ -2915,6 +2911,27 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex) info = get_tex_samp_tex_src(ctx, tex); } + bool tg4_swizzle_fixup = false; + if (tex->op == nir_texop_tg4 && ctx->compiler->gen == 4 && + ctx->sampler_swizzles[tex->texture_index] != 0x688 /* rgba */) { + /* XXX fix-up ASTC alpha as well? */ + uint16_t swizzles = ctx->sampler_swizzles[tex->texture_index]; + uint16_t swizzle = (swizzles >> (tex->component * 3)) & 7; + if (swizzle > 3) { + /* this would mean that we can just return 0 / 1, no texturing + * necessary + */ + struct ir3_instruction *imm = create_immed(b, + type_float(type) ? fui(swizzle - 4) : (swizzle - 4)); + for (int i = 0; i < 4; i++) + dst[i] = imm; + ir3_put_dst(ctx, &tex->dest); + return; + } + opc = OPC_GATHER4R + swizzle; + tg4_swizzle_fixup = true; + } + struct ir3_instruction *col0 = ir3_create_collect(b, src0, nsrc0); struct ir3_instruction *col1 = ir3_create_collect(b, src1, nsrc1); @@ -2937,7 +2954,11 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex) sam = emit_sam(ctx, opc, info, type, MASK(ncomp), col0, col1); } + if (tg4_swizzle_fixup) + array_insert(ctx->ir, ctx->ir->tg4, sam); + if ((ctx->astc_srgb & (1 << tex->texture_index)) && + tex->op != nir_texop_tg4 && /* leave out tg4, unless it's on alpha? */ !nir_tex_instr_is_query(tex)) { assert(opc != OPC_META_TEX_PREFETCH); @@ -2945,7 +2966,7 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex) sam->dsts[0]->wrmask = 0x7; ir3_split_dest(b, dst, sam, 0, 3); - /* we need to sample the alpha separately with a non-ASTC + /* we need to sample the alpha separately with a non-SRGB * texture state: */ sam = ir3_SAM(b, opc, type, 0b1000, flags | info.flags, info.samp_tex, @@ -4119,6 +4140,40 @@ fixup_astc_srgb(struct ir3_context *ctx) } } +/* Fixup tex sampler state for tg4 workaround instructions. We + * need to assign the tex state indexes for these after we know the + * max tex index. + */ +static void +fixup_tg4(struct ir3_context *ctx) +{ + struct ir3_shader_variant *so = ctx->so; + /* indexed by original tex idx, value is newly assigned alpha sampler + * state tex idx. Zero is invalid since there is at least one sampler + * if we get here. + */ + unsigned alt_tex_state[16] = {0}; + unsigned tex_idx = ctx->max_texture_index + so->astc_srgb.count + 1; + unsigned idx = 0; + + so->tg4.base = tex_idx; + + for (unsigned i = 0; i < ctx->ir->tg4_count; i++) { + struct ir3_instruction *sam = ctx->ir->tg4[i]; + + compile_assert(ctx, sam->cat5.tex < ARRAY_SIZE(alt_tex_state)); + + if (alt_tex_state[sam->cat5.tex] == 0) { + /* assign new alternate/alpha tex state slot: */ + alt_tex_state[sam->cat5.tex] = tex_idx++; + so->tg4.orig_idx[idx++] = sam->cat5.tex; + so->tg4.count++; + } + + sam->cat5.tex = alt_tex_state[sam->cat5.tex]; + } +} + static bool output_slot_used_for_binning(gl_varying_slot slot) { @@ -4589,6 +4644,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, if (ctx->astc_srgb) fixup_astc_srgb(ctx); + if (ctx->compiler->gen == 4 && ctx->s->info.uses_texture_gather) + fixup_tg4(ctx); + /* We need to do legalize after (for frag shader's) the "bary.f" * offsets (inloc) have been assigned. */ diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 03df168..bf08f84 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -38,8 +38,10 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so) if (compiler->gen == 4) { if (so->type == MESA_SHADER_VERTEX) { ctx->astc_srgb = so->key.vastc_srgb; + memcpy(ctx->sampler_swizzles, so->key.vsampler_swizzles, sizeof(ctx->sampler_swizzles)); } else if (so->type == MESA_SHADER_FRAGMENT) { ctx->astc_srgb = so->key.fastc_srgb; + memcpy(ctx->sampler_swizzles, so->key.fsampler_swizzles, sizeof(ctx->sampler_swizzles)); } } else if (compiler->gen == 3) { if (so->type == MESA_SHADER_VERTEX) { diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index 98aa108..d2141c0 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -153,6 +153,9 @@ struct ir3_context { /* on a4xx, bitmask of samplers which need astc+srgb workaround: */ unsigned astc_srgb; + /* on a4xx, per-sampler per-component swizzles, for tg4: */ + uint16_t sampler_swizzles[16]; + unsigned samples; /* bitmask of x,y sample shifts */ unsigned max_texture_index; diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index dbdc617..8a4cd23 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -477,6 +477,7 @@ ir3_setup_used_key(struct ir3_shader *shader) if (info->stage == MESA_SHADER_FRAGMENT) { key->fastc_srgb = ~0; key->fsamples = ~0; + memset(key->fsampler_swizzles, 0xff, sizeof(key->fsampler_swizzles)); if (info->inputs_read & VARYING_BITS_COLOR) { key->rasterflat = true; @@ -507,6 +508,7 @@ ir3_setup_used_key(struct ir3_shader *shader) if (info->stage == MESA_SHADER_VERTEX) { key->vastc_srgb = ~0; key->vsamples = ~0; + memset(key->vsampler_swizzles, 0xff, sizeof(key->vsampler_swizzles)); } if (info->stage == MESA_SHADER_TESS_CTRL) diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 9a94fce..64597ca 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -342,6 +342,10 @@ struct ir3_shader_key { /* bitmask of samplers which need astc srgb workaround (a4xx): */ uint16_t vastc_srgb, fastc_srgb; + + /* per-component (3-bit) swizzles of each sampler (a4xx tg4): */ + uint16_t vsampler_swizzles[16]; + uint16_t fsampler_swizzles[16]; }; static inline unsigned @@ -392,7 +396,9 @@ ir3_shader_key_changes_fs(struct ir3_shader_key *key, { if (last_key->has_per_samp || key->has_per_samp) { if ((last_key->fsamples != key->fsamples) || - (last_key->fastc_srgb != key->fastc_srgb)) + (last_key->fastc_srgb != key->fastc_srgb) || + memcmp(last_key->fsampler_swizzles, key->fsampler_swizzles, + sizeof(key->fsampler_swizzles))) return true; } @@ -418,7 +424,9 @@ ir3_shader_key_changes_vs(struct ir3_shader_key *key, { if (last_key->has_per_samp || key->has_per_samp) { if ((last_key->vsamples != key->vsamples) || - (last_key->vastc_srgb != key->vastc_srgb)) + (last_key->vastc_srgb != key->vastc_srgb) || + memcmp(last_key->vsampler_swizzles, key->vsampler_swizzles, + sizeof(key->vsampler_swizzles))) return true; } @@ -699,6 +707,14 @@ struct ir3_shader_variant { unsigned orig_idx[16]; } astc_srgb; + /* for tg4 workaround, the number/base of additional + * unswizzled tex states we need, and index of original tex states + */ + struct { + unsigned base, count; + unsigned orig_idx[16]; + } tg4; + /* texture sampler pre-dispatches */ uint32_t num_sampler_prefetch; struct ir3_sampler_prefetch sampler_prefetch[IR3_MAX_SAMPLER_PREFETCH]; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c index 58b6f2d..18663a9 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c @@ -110,5 +110,10 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv, fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0); + for (int i = 0; i < 16; i++) { + fd4_ctx->vsampler_swizzles[i] = 0x688; + fd4_ctx->fsampler_swizzles[i] = 0x688; + } + return pctx; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h index 7cf0b00..194c424 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h @@ -51,6 +51,9 @@ struct fd4_context { /* bitmask of samplers which need astc srgb workaround: */ uint16_t vastc_srgb, fastc_srgb; + /* samplers swizzles, needed for tg4 workaround: */ + uint16_t vsampler_swizzles[16], fsampler_swizzles[16]; + /* storage for ctx->last.key: */ struct ir3_shader_key last_key; }; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index cf1828a..803db56 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -102,6 +102,18 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, }; + /* Check if we actually need the tg4 workarounds */ + if (ir3_get_shader_info(emit.key.vs)->uses_texture_gather) { + emit.key.key.has_per_samp = true; + memcpy(emit.key.key.vsampler_swizzles, fd4_ctx->vsampler_swizzles, + sizeof(emit.key.key.vsampler_swizzles)); + } + if (ir3_get_shader_info(emit.key.fs)->uses_texture_gather) { + emit.key.key.has_per_samp = true; + memcpy(emit.key.key.fsampler_swizzles, fd4_ctx->fsampler_swizzles, + sizeof(emit.key.key.fsampler_swizzles)); + } + if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart && !u_trim_pipe_prim(info->mode, (unsigned *)&draw->count)) return false; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 066fd5d..ec43cff 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -190,7 +190,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (tex->num_textures > 0) { - unsigned num_textures = tex->num_textures + v->astc_srgb.count; + unsigned num_textures = tex->num_textures + v->astc_srgb.count + v->tg4.count; /* emit texture state: */ OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * num_textures)); @@ -247,8 +247,38 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); } + + for (i = 0; i < v->tg4.count; i++) { + static const struct fd4_pipe_sampler_view dummy_view = {}; + const struct fd4_pipe_sampler_view *view; + unsigned idx = v->tg4.orig_idx[i]; + + view = tex->textures[idx] ? fd4_pipe_sampler_view(tex->textures[idx]) + : &dummy_view; + + unsigned texconst0 = view->texconst0 & ~(0xfff << 4); + texconst0 |= A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_X) | + A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_Y) | + A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_Z) | + A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_W); + + OUT_RING(ring, texconst0); + OUT_RING(ring, view->texconst1); + OUT_RING(ring, view->texconst2); + OUT_RING(ring, view->texconst3); + if (view->base.texture) { + struct fd_resource *rsc = fd_resource(view->base.texture); + OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0); + } else { + OUT_RING(ring, 0x00000000); + } + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } } else { debug_assert(v->astc_srgb.count == 0); + debug_assert(v->tg4.count == 0); } if (needs_border) { diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c index ae6e2e1..b03fc9e 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c @@ -155,10 +155,12 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, so->base.reference.count = 1; so->base.context = pctx; + so->swizzle = fd4_tex_swiz(format, cso->swizzle_r, cso->swizzle_g, + cso->swizzle_b, cso->swizzle_a); + so->texconst0 = A4XX_TEX_CONST_0_TYPE(fd4_tex_type(cso->target)) | A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) | - fd4_tex_swiz(format, cso->swizzle_r, cso->swizzle_g, - cso->swizzle_b, cso->swizzle_a); + so->swizzle; if (util_format_is_srgb(format)) { if (use_astc_srgb_workaround(pctx, format)) @@ -238,19 +240,35 @@ fd4_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader, struct fd_context *ctx = fd_context(pctx); struct fd4_context *fd4_ctx = fd4_context(ctx); uint16_t astc_srgb = 0; + uint16_t *sampler_swizzles; unsigned i; + if (shader == PIPE_SHADER_FRAGMENT) { + sampler_swizzles = fd4_ctx->fsampler_swizzles; + } else if (shader == PIPE_SHADER_VERTEX) { + sampler_swizzles = fd4_ctx->vsampler_swizzles; + } else { + debug_assert(0); + sampler_swizzles = fd4_ctx->fsampler_swizzles; + } + for (i = 0; i < nr; i++) { if (views[i]) { struct fd4_pipe_sampler_view *view = fd4_pipe_sampler_view(views[i]); if (view->astc_srgb) - astc_srgb |= (1 << i); + astc_srgb |= (1 << (start + i)); + sampler_swizzles[start + i] = view->swizzle >> 4; } } fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots, take_ownership, views); + for (i = 0; i < unbind_num_trailing_slots; i++) { + astc_srgb &= ~(1 << (start + nr + i)); + sampler_swizzles[start + nr + i] = 0x688; + } + if (shader == PIPE_SHADER_FRAGMENT) { fd4_ctx->fastc_srgb = astc_srgb; } else if (shader == PIPE_SHADER_VERTEX) { diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h index 4ba990c..64fae45 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h @@ -52,6 +52,7 @@ struct fd4_pipe_sampler_view { uint32_t texconst0, texconst1, texconst2, texconst3, texconst4; uint32_t offset; bool astc_srgb; + uint32_t swizzle; }; static inline struct fd4_pipe_sampler_view * -- 2.7.4