From 8885f2befaea68ce7f9d550c9b9ff5ae77524406 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 11 Aug 2015 08:48:34 -0400 Subject: [PATCH] freedreno/a4xx: point-size and spritelist fixes a4xx needs similar treatment as 995f55a6 Also fixup a few point-size and vpsrepl issues and drop fix_blit_fp() hack previously needed for mem2gmem. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 3 + src/gallium/drivers/freedreno/a4xx/fd4_draw.h | 8 ++- src/gallium/drivers/freedreno/a4xx/fd4_emit.h | 4 ++ src/gallium/drivers/freedreno/a4xx/fd4_gmem.c | 1 + src/gallium/drivers/freedreno/a4xx/fd4_program.c | 67 +++++++++------------- .../drivers/freedreno/a4xx/fd4_rasterizer.c | 8 +-- src/gallium/drivers/freedreno/ir3/ir3_shader.h | 5 -- 7 files changed, 46 insertions(+), 50 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index 0927b0d..2bd2ca2 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -131,6 +131,9 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .fsaturate_t = fd4_ctx->fsaturate_t, .fsaturate_r = fd4_ctx->fsaturate_r, }, + .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, + .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : false, + .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false, }; unsigned dirty; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h index 1bd376c..b89a30a 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h @@ -106,6 +106,7 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, { struct pipe_index_buffer *idx = &ctx->indexbuf; struct fd_bo *idx_bo = NULL; + enum pc_di_primtype primtype = ctx->primtypes[info->mode]; enum a4xx_index_size idx_type; enum pc_di_src_sel src_sel; uint32_t idx_size, idx_offset; @@ -126,7 +127,12 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, src_sel = DI_SRC_SEL_AUTO_INDEX; } - fd4_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel, + /* points + psize -> spritelist: */ + if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex && + (info->mode == PIPE_PRIM_POINTS)) + primtype = DI_PT_POINTLIST_PSIZE; + + fd4_draw(ctx, ring, primtype, vismode, src_sel, info->count, info->instance_count, idx_type, idx_size, idx_offset, idx_bo); } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index 99c7596..ab7850e 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -53,6 +53,10 @@ struct fd4_emit { struct ir3_shader_key key; uint32_t dirty; + uint32_t sprite_coord_enable; /* bitmask */ + bool sprite_coord_mode; + bool rasterflat; + /* cached to avoid repeated lookups of same variants: */ struct ir3_shader_variant *vp, *fp; /* TODO: other shader stages.. */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index 6541da5..81c37f7 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -309,6 +309,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd4_emit emit = { .vtx = &fd4_ctx->blit_vbuf_state, + .sprite_coord_enable = 1, /* NOTE: They all use the same VP, this is for vtx bufs. */ .prog = &ctx->blit_prog[0], .key = { diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index fdfdee5..1a6d014 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -465,10 +465,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE)); OUT_RING(ring, 0x00000000); } else { - uint32_t vinterp[8], flatshade[2]; + uint32_t vinterp[8], vpsrepl[8]; memset(vinterp, 0, sizeof(vinterp)); - memset(flatshade, 0, sizeof(flatshade)); + memset(vpsrepl, 0, sizeof(vpsrepl)); /* looks like we need to do int varyings in the frag * shader on a4xx (no flatshad reg? or a420.0 bug?): @@ -485,29 +485,40 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, * something like the code below instead of workaround * in the shader: */ -#if 0 - /* figure out VARYING_INTERP / FLAT_SHAD register values: */ + /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */ for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) { uint32_t interp = s[FS].v->inputs[j].interpolate; + + /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG + * instead.. rather than -8 everywhere else.. + */ + uint32_t inloc = s[FS].v->inputs[j].inloc - 8; + + /* currently assuming varyings aligned to 4 (not + * packed): + */ + debug_assert((inloc % 4) == 0); + if ((interp == TGSI_INTERPOLATE_CONSTANT) || ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) { - /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG - * instead.. rather than -8 everywhere else.. - */ - uint32_t loc = s[FS].v->inputs[j].inloc - 8; - - /* currently assuming varyings aligned to 4 (not - * packed): - */ - debug_assert((loc % 4) == 0); + uint32_t loc = inloc; for (i = 0; i < 4; i++, loc++) { vinterp[loc / 16] |= 1 << ((loc % 16) * 2); - flatshade[loc / 32] |= 1 << (loc % 32); + //flatshade[loc / 32] |= 1 << (loc % 32); } } + + /* Replace the .xy coordinates with S/T from the point sprite. Set + * interpolation bits for .zw such that they become .01 + */ + if (emit->sprite_coord_enable & (1 << sem2idx(s[FS].v->inputs[j].semantic))) { + vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09) + << ((inloc % 16) * 2); + vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2); + vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2); + } } -#endif OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2); OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) | @@ -524,7 +535,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8); for (i = 0; i < 8; i++) - OUT_RING(ring, s[FS].v->shader->vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */ + OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */ } if (s[VS].instrlen) @@ -535,28 +546,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, emit_shader(ring, s[FS].v); } -/* hack.. until we figure out how to deal w/ vpsrepl properly.. */ -static void -fix_blit_fp(struct fd4_shader_stateobj *so) -{ - so->shader->vpsrepl[0] = 0x99999999; - so->shader->vpsrepl[1] = 0x99999999; - so->shader->vpsrepl[2] = 0x99999999; - so->shader->vpsrepl[3] = 0x99999999; -} -static void -fix_blit_fps(struct pipe_context *pctx) -{ - struct fd_context *ctx = fd_context(pctx); - int i; - - for (i = 0; i < ctx->screen->max_rts; i++) - fix_blit_fp(ctx->blit_prog[i].fp); - - fix_blit_fp(ctx->blit_z.fp); - fix_blit_fp(ctx->blit_zs.fp); -} - void fd4_prog_init(struct pipe_context *pctx) { @@ -567,6 +556,4 @@ fd4_prog_init(struct pipe_context *pctx) pctx->delete_vs_state = fd4_vp_state_delete; fd_prog_init(pctx); - - fix_blit_fps(pctx); } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c index e54b606..dc7e98b 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c @@ -50,7 +50,7 @@ fd4_rasterizer_state_create(struct pipe_context *pctx, if (cso->point_size_per_vertex) { psize_min = util_get_min_point_size(cso); - psize_max = 8192; + psize_max = 4092; } else { /* Force the point size to be as if the vertex output was disabled. */ psize_min = cso->point_size; @@ -67,9 +67,9 @@ fd4_rasterizer_state_create(struct pipe_context *pctx, */ so->gras_cl_clip_cntl = 0x80000; /* ??? */ so->gras_su_point_minmax = - A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) | - A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2); - so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size/2); + A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) | + A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max); + so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size); so->gras_su_poly_offset_scale = A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale); so->gras_su_poly_offset_offset = diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index c0fd44d..1bbbdbd 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -219,11 +219,6 @@ struct ir3_shader { struct pipe_stream_output_info stream_output; struct ir3_shader_variant *variants; - - /* so far, only used for blit_prog shader.. values for - * VPC_VARYING_PS_REPL[i].MODE - */ - uint32_t vpsrepl[8]; }; void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id); -- 2.7.4