*/
DECLARE_ARRAY(struct ir3_instruction *, astc_srgb);
+ /* Track tg4 instructions which need texture state patched in (for tg4
+ * swizzling workaround):
+ */
+ DECLARE_ARRAY(struct ir3_instruction *, tg4);
+
/* List of blocks: */
struct list_head block_list;
opc = OPC_GETLOD;
break;
case nir_texop_tg4:
- /* NOTE: a4xx might need to emulate gather w/ txf (this is
- * what blob does, seems gather is broken?), and a3xx did
- * not support it (but probably could also emulate).
- */
switch (tex->component) {
case 0:
opc = OPC_GATHER4R;
info = get_tex_samp_tex_src(ctx, tex);
}
+ bool tg4_swizzle_fixup = false;
+ if (tex->op == nir_texop_tg4 && ctx->compiler->gen == 4 &&
+ ctx->sampler_swizzles[tex->texture_index] != 0x688 /* rgba */) {
+ /* XXX fix-up ASTC alpha as well? */
+ uint16_t swizzles = ctx->sampler_swizzles[tex->texture_index];
+ uint16_t swizzle = (swizzles >> (tex->component * 3)) & 7;
+ if (swizzle > 3) {
+ /* this would mean that we can just return 0 / 1, no texturing
+ * necessary
+ */
+ struct ir3_instruction *imm = create_immed(b,
+ type_float(type) ? fui(swizzle - 4) : (swizzle - 4));
+ for (int i = 0; i < 4; i++)
+ dst[i] = imm;
+ ir3_put_dst(ctx, &tex->dest);
+ return;
+ }
+ opc = OPC_GATHER4R + swizzle;
+ tg4_swizzle_fixup = true;
+ }
+
struct ir3_instruction *col0 = ir3_create_collect(b, src0, nsrc0);
struct ir3_instruction *col1 = ir3_create_collect(b, src1, nsrc1);
sam = emit_sam(ctx, opc, info, type, MASK(ncomp), col0, col1);
}
+ if (tg4_swizzle_fixup)
+ array_insert(ctx->ir, ctx->ir->tg4, sam);
+
if ((ctx->astc_srgb & (1 << tex->texture_index)) &&
+ tex->op != nir_texop_tg4 && /* leave out tg4, unless it's on alpha? */
!nir_tex_instr_is_query(tex)) {
assert(opc != OPC_META_TEX_PREFETCH);
sam->dsts[0]->wrmask = 0x7;
ir3_split_dest(b, dst, sam, 0, 3);
- /* we need to sample the alpha separately with a non-ASTC
+ /* we need to sample the alpha separately with a non-SRGB
* texture state:
*/
sam = ir3_SAM(b, opc, type, 0b1000, flags | info.flags, info.samp_tex,
}
}
+/* Fixup tex sampler state for tg4 workaround instructions. We
+ * need to assign the tex state indexes for these after we know the
+ * max tex index.
+ */
+static void
+fixup_tg4(struct ir3_context *ctx)
+{
+ struct ir3_shader_variant *so = ctx->so;
+ /* indexed by original tex idx, value is newly assigned alpha sampler
+ * state tex idx. Zero is invalid since there is at least one sampler
+ * if we get here.
+ */
+ unsigned alt_tex_state[16] = {0};
+ unsigned tex_idx = ctx->max_texture_index + so->astc_srgb.count + 1;
+ unsigned idx = 0;
+
+ so->tg4.base = tex_idx;
+
+ for (unsigned i = 0; i < ctx->ir->tg4_count; i++) {
+ struct ir3_instruction *sam = ctx->ir->tg4[i];
+
+ compile_assert(ctx, sam->cat5.tex < ARRAY_SIZE(alt_tex_state));
+
+ if (alt_tex_state[sam->cat5.tex] == 0) {
+ /* assign new alternate/alpha tex state slot: */
+ alt_tex_state[sam->cat5.tex] = tex_idx++;
+ so->tg4.orig_idx[idx++] = sam->cat5.tex;
+ so->tg4.count++;
+ }
+
+ sam->cat5.tex = alt_tex_state[sam->cat5.tex];
+ }
+}
+
static bool
output_slot_used_for_binning(gl_varying_slot slot)
{
if (ctx->astc_srgb)
fixup_astc_srgb(ctx);
+ if (ctx->compiler->gen == 4 && ctx->s->info.uses_texture_gather)
+ fixup_tg4(ctx);
+
/* We need to do legalize after (for frag shader's) the "bary.f"
* offsets (inloc) have been assigned.
*/
if (compiler->gen == 4) {
if (so->type == MESA_SHADER_VERTEX) {
ctx->astc_srgb = so->key.vastc_srgb;
+ memcpy(ctx->sampler_swizzles, so->key.vsampler_swizzles, sizeof(ctx->sampler_swizzles));
} else if (so->type == MESA_SHADER_FRAGMENT) {
ctx->astc_srgb = so->key.fastc_srgb;
+ memcpy(ctx->sampler_swizzles, so->key.fsampler_swizzles, sizeof(ctx->sampler_swizzles));
}
} else if (compiler->gen == 3) {
if (so->type == MESA_SHADER_VERTEX) {
/* on a4xx, bitmask of samplers which need astc+srgb workaround: */
unsigned astc_srgb;
+ /* on a4xx, per-sampler per-component swizzles, for tg4: */
+ uint16_t sampler_swizzles[16];
+
unsigned samples; /* bitmask of x,y sample shifts */
unsigned max_texture_index;
if (info->stage == MESA_SHADER_FRAGMENT) {
key->fastc_srgb = ~0;
key->fsamples = ~0;
+ memset(key->fsampler_swizzles, 0xff, sizeof(key->fsampler_swizzles));
if (info->inputs_read & VARYING_BITS_COLOR) {
key->rasterflat = true;
if (info->stage == MESA_SHADER_VERTEX) {
key->vastc_srgb = ~0;
key->vsamples = ~0;
+ memset(key->vsampler_swizzles, 0xff, sizeof(key->vsampler_swizzles));
}
if (info->stage == MESA_SHADER_TESS_CTRL)
/* bitmask of samplers which need astc srgb workaround (a4xx): */
uint16_t vastc_srgb, fastc_srgb;
+
+ /* per-component (3-bit) swizzles of each sampler (a4xx tg4): */
+ uint16_t vsampler_swizzles[16];
+ uint16_t fsampler_swizzles[16];
};
static inline unsigned
{
if (last_key->has_per_samp || key->has_per_samp) {
if ((last_key->fsamples != key->fsamples) ||
- (last_key->fastc_srgb != key->fastc_srgb))
+ (last_key->fastc_srgb != key->fastc_srgb) ||
+ memcmp(last_key->fsampler_swizzles, key->fsampler_swizzles,
+ sizeof(key->fsampler_swizzles)))
return true;
}
{
if (last_key->has_per_samp || key->has_per_samp) {
if ((last_key->vsamples != key->vsamples) ||
- (last_key->vastc_srgb != key->vastc_srgb))
+ (last_key->vastc_srgb != key->vastc_srgb) ||
+ memcmp(last_key->vsampler_swizzles, key->vsampler_swizzles,
+ sizeof(key->vsampler_swizzles)))
return true;
}
unsigned orig_idx[16];
} astc_srgb;
+ /* for tg4 workaround, the number/base of additional
+ * unswizzled tex states we need, and index of original tex states
+ */
+ struct {
+ unsigned base, count;
+ unsigned orig_idx[16];
+ } tg4;
+
/* texture sampler pre-dispatches */
uint32_t num_sampler_prefetch;
struct ir3_sampler_prefetch sampler_prefetch[IR3_MAX_SAMPLER_PREFETCH];
fd4_ctx->border_color_uploader =
u_upload_create(pctx, 4096, 0, PIPE_USAGE_STREAM, 0);
+ for (int i = 0; i < 16; i++) {
+ fd4_ctx->vsampler_swizzles[i] = 0x688;
+ fd4_ctx->fsampler_swizzles[i] = 0x688;
+ }
+
return pctx;
}
/* bitmask of samplers which need astc srgb workaround: */
uint16_t vastc_srgb, fastc_srgb;
+ /* samplers swizzles, needed for tg4 workaround: */
+ uint16_t vsampler_swizzles[16], fsampler_swizzles[16];
+
/* storage for ctx->last.key: */
struct ir3_shader_key last_key;
};
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
+ /* Check if we actually need the tg4 workarounds */
+ if (ir3_get_shader_info(emit.key.vs)->uses_texture_gather) {
+ emit.key.key.has_per_samp = true;
+ memcpy(emit.key.key.vsampler_swizzles, fd4_ctx->vsampler_swizzles,
+ sizeof(emit.key.key.vsampler_swizzles));
+ }
+ if (ir3_get_shader_info(emit.key.fs)->uses_texture_gather) {
+ emit.key.key.has_per_samp = true;
+ memcpy(emit.key.key.fsampler_swizzles, fd4_ctx->fsampler_swizzles,
+ sizeof(emit.key.key.fsampler_swizzles));
+ }
+
if (info->mode != PIPE_PRIM_MAX && !indirect && !info->primitive_restart &&
!u_trim_pipe_prim(info->mode, (unsigned *)&draw->count))
return false;
}
if (tex->num_textures > 0) {
- unsigned num_textures = tex->num_textures + v->astc_srgb.count;
+ unsigned num_textures = tex->num_textures + v->astc_srgb.count + v->tg4.count;
/* emit texture state: */
OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * num_textures));
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}
+
+ for (i = 0; i < v->tg4.count; i++) {
+ static const struct fd4_pipe_sampler_view dummy_view = {};
+ const struct fd4_pipe_sampler_view *view;
+ unsigned idx = v->tg4.orig_idx[i];
+
+ view = tex->textures[idx] ? fd4_pipe_sampler_view(tex->textures[idx])
+ : &dummy_view;
+
+ unsigned texconst0 = view->texconst0 & ~(0xfff << 4);
+ texconst0 |= A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_X) |
+ A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_Y) |
+ A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_Z) |
+ A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_W);
+
+ OUT_RING(ring, texconst0);
+ OUT_RING(ring, view->texconst1);
+ OUT_RING(ring, view->texconst2);
+ OUT_RING(ring, view->texconst3);
+ if (view->base.texture) {
+ struct fd_resource *rsc = fd_resource(view->base.texture);
+ OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ }
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
} else {
debug_assert(v->astc_srgb.count == 0);
+ debug_assert(v->tg4.count == 0);
}
if (needs_border) {
so->base.reference.count = 1;
so->base.context = pctx;
+ so->swizzle = fd4_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
+ cso->swizzle_b, cso->swizzle_a);
+
so->texconst0 = A4XX_TEX_CONST_0_TYPE(fd4_tex_type(cso->target)) |
A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
- fd4_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
- cso->swizzle_b, cso->swizzle_a);
+ so->swizzle;
if (util_format_is_srgb(format)) {
if (use_astc_srgb_workaround(pctx, format))
struct fd_context *ctx = fd_context(pctx);
struct fd4_context *fd4_ctx = fd4_context(ctx);
uint16_t astc_srgb = 0;
+ uint16_t *sampler_swizzles;
unsigned i;
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ sampler_swizzles = fd4_ctx->fsampler_swizzles;
+ } else if (shader == PIPE_SHADER_VERTEX) {
+ sampler_swizzles = fd4_ctx->vsampler_swizzles;
+ } else {
+ debug_assert(0);
+ sampler_swizzles = fd4_ctx->fsampler_swizzles;
+ }
+
for (i = 0; i < nr; i++) {
if (views[i]) {
struct fd4_pipe_sampler_view *view = fd4_pipe_sampler_view(views[i]);
if (view->astc_srgb)
- astc_srgb |= (1 << i);
+ astc_srgb |= (1 << (start + i));
+ sampler_swizzles[start + i] = view->swizzle >> 4;
}
}
fd_set_sampler_views(pctx, shader, start, nr, unbind_num_trailing_slots,
take_ownership, views);
+ for (i = 0; i < unbind_num_trailing_slots; i++) {
+ astc_srgb &= ~(1 << (start + nr + i));
+ sampler_swizzles[start + nr + i] = 0x688;
+ }
+
if (shader == PIPE_SHADER_FRAGMENT) {
fd4_ctx->fastc_srgb = astc_srgb;
} else if (shader == PIPE_SHADER_VERTEX) {
uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
uint32_t offset;
bool astc_srgb;
+ uint32_t swizzle;
};
static inline struct fd4_pipe_sampler_view *