freedreno/a4xx: point-size and spritelist fixes
authorRob Clark <robclark@freedesktop.org>
Tue, 11 Aug 2015 12:48:34 +0000 (08:48 -0400)
committerRob Clark <robclark@freedesktop.org>
Wed, 12 Aug 2015 22:37:43 +0000 (18:37 -0400)
a4xx needs similar treatment as 995f55a6

Also fixup a few point-size and vpsrepl issues and drop fix_blit_fp()
hack previously needed for mem2gmem.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a4xx/fd4_draw.c
src/gallium/drivers/freedreno/a4xx/fd4_draw.h
src/gallium/drivers/freedreno/a4xx/fd4_emit.h
src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
src/gallium/drivers/freedreno/a4xx/fd4_program.c
src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index 0927b0d..2bd2ca2 100644 (file)
@@ -131,6 +131,9 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
                        .fsaturate_t = fd4_ctx->fsaturate_t,
                        .fsaturate_r = fd4_ctx->fsaturate_r,
                },
+               .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
+               .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : false,
+               .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
        };
        unsigned dirty;
 
index 1bd376c..b89a30a 100644 (file)
@@ -106,6 +106,7 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
 {
        struct pipe_index_buffer *idx = &ctx->indexbuf;
        struct fd_bo *idx_bo = NULL;
+       enum pc_di_primtype primtype = ctx->primtypes[info->mode];
        enum a4xx_index_size idx_type;
        enum pc_di_src_sel src_sel;
        uint32_t idx_size, idx_offset;
@@ -126,7 +127,12 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
                src_sel = DI_SRC_SEL_AUTO_INDEX;
        }
 
-       fd4_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
+       /* points + psize -> spritelist: */
+       if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
+                       (info->mode == PIPE_PRIM_POINTS))
+               primtype = DI_PT_POINTLIST_PSIZE;
+
+       fd4_draw(ctx, ring, primtype, vismode, src_sel,
                        info->count, info->instance_count,
                        idx_type, idx_size, idx_offset, idx_bo);
 }
index 99c7596..ab7850e 100644 (file)
@@ -53,6 +53,10 @@ struct fd4_emit {
        struct ir3_shader_key key;
        uint32_t dirty;
 
+       uint32_t sprite_coord_enable;  /* bitmask */
+       bool sprite_coord_mode;
+       bool rasterflat;
+
        /* cached to avoid repeated lookups of same variants: */
        struct ir3_shader_variant *vp, *fp;
        /* TODO: other shader stages.. */
index 6541da5..81c37f7 100644 (file)
@@ -309,6 +309,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
        struct fd4_emit emit = {
                        .vtx = &fd4_ctx->blit_vbuf_state,
+                       .sprite_coord_enable = 1,
                        /* NOTE: They all use the same VP, this is for vtx bufs. */
                        .prog = &ctx->blit_prog[0],
                        .key = {
index fdfdee5..1a6d014 100644 (file)
@@ -465,10 +465,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
                                COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
                OUT_RING(ring, 0x00000000);
        } else {
-               uint32_t vinterp[8], flatshade[2];
+               uint32_t vinterp[8], vpsrepl[8];
 
                memset(vinterp, 0, sizeof(vinterp));
-               memset(flatshade, 0, sizeof(flatshade));
+               memset(vpsrepl, 0, sizeof(vpsrepl));
 
                /* looks like we need to do int varyings in the frag
                 * shader on a4xx (no flatshad reg?  or a420.0 bug?):
@@ -485,29 +485,40 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
                 * something like the code below instead of workaround
                 * in the shader:
                 */
-#if 0
-               /* figure out VARYING_INTERP / FLAT_SHAD register values: */
+               /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
                for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
                        uint32_t interp = s[FS].v->inputs[j].interpolate;
+
+                       /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
+                        * instead.. rather than -8 everywhere else..
+                        */
+                       uint32_t inloc = s[FS].v->inputs[j].inloc - 8;
+
+                       /* currently assuming varyings aligned to 4 (not
+                        * packed):
+                        */
+                       debug_assert((inloc % 4) == 0);
+
                        if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
                                        ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
-                               /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
-                                * instead.. rather than -8 everywhere else..
-                                */
-                               uint32_t loc = s[FS].v->inputs[j].inloc - 8;
-
-                               /* currently assuming varyings aligned to 4 (not
-                                * packed):
-                                */
-                               debug_assert((loc % 4) == 0);
+                               uint32_t loc = inloc;
 
                                for (i = 0; i < 4; i++, loc++) {
                                        vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
-                                       flatshade[loc / 32] |= 1 << (loc % 32);
+                                       //flatshade[loc / 32] |= 1 << (loc % 32);
                                }
                        }
+
+                       /* Replace the .xy coordinates with S/T from the point sprite. Set
+                        * interpolation bits for .zw such that they become .01
+                        */
+                       if (emit->sprite_coord_enable & (1 << sem2idx(s[FS].v->inputs[j].semantic))) {
+                               vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+                                       << ((inloc % 16) * 2);
+                               vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+                               vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+                       }
                }
-#endif
 
                OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
                OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
@@ -524,7 +535,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
 
                OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8);
                for (i = 0; i < 8; i++)
-                       OUT_RING(ring, s[FS].v->shader->vpsrepl[i]);   /* VPC_VARYING_PS_REPL[i] */
+                       OUT_RING(ring, vpsrepl[i]);   /* VPC_VARYING_PS_REPL[i] */
        }
 
        if (s[VS].instrlen)
@@ -535,28 +546,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
                        emit_shader(ring, s[FS].v);
 }
 
-/* hack.. until we figure out how to deal w/ vpsrepl properly.. */
-static void
-fix_blit_fp(struct fd4_shader_stateobj *so)
-{
-       so->shader->vpsrepl[0] = 0x99999999;
-       so->shader->vpsrepl[1] = 0x99999999;
-       so->shader->vpsrepl[2] = 0x99999999;
-       so->shader->vpsrepl[3] = 0x99999999;
-}
-static void
-fix_blit_fps(struct pipe_context *pctx)
-{
-       struct fd_context *ctx = fd_context(pctx);
-       int i;
-
-       for (i = 0; i < ctx->screen->max_rts; i++)
-               fix_blit_fp(ctx->blit_prog[i].fp);
-
-       fix_blit_fp(ctx->blit_z.fp);
-       fix_blit_fp(ctx->blit_zs.fp);
-}
-
 void
 fd4_prog_init(struct pipe_context *pctx)
 {
@@ -567,6 +556,4 @@ fd4_prog_init(struct pipe_context *pctx)
        pctx->delete_vs_state = fd4_vp_state_delete;
 
        fd_prog_init(pctx);
-
-       fix_blit_fps(pctx);
 }
index e54b606..dc7e98b 100644 (file)
@@ -50,7 +50,7 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
 
        if (cso->point_size_per_vertex) {
                psize_min = util_get_min_point_size(cso);
-               psize_max = 8192;
+               psize_max = 4092;
        } else {
                /* Force the point size to be as if the vertex output was disabled. */
                psize_min = cso->point_size;
@@ -67,9 +67,9 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
 */
        so->gras_cl_clip_cntl = 0x80000; /* ??? */
        so->gras_su_point_minmax =
-                       A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) |
-                       A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2);
-       so->gras_su_point_size   = A4XX_GRAS_SU_POINT_SIZE(cso->point_size/2);
+                       A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
+                       A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
+       so->gras_su_point_size   = A4XX_GRAS_SU_POINT_SIZE(cso->point_size);
        so->gras_su_poly_offset_scale =
                        A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
        so->gras_su_poly_offset_offset =
index c0fd44d..1bbbdbd 100644 (file)
@@ -219,11 +219,6 @@ struct ir3_shader {
        struct pipe_stream_output_info stream_output;
 
        struct ir3_shader_variant *variants;
-
-       /* so far, only used for blit_prog shader.. values for
-        * VPC_VARYING_PS_REPL[i].MODE
-        */
-       uint32_t vpsrepl[8];
 };
 
 void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);