nvfx: support clip planes sensibly and fix them on nv30
authorLuca Barbieri <luca@luca-barbieri.com>
Sun, 22 Aug 2010 22:31:08 +0000 (00:31 +0200)
committerLuca Barbieri <luca@luca-barbieri.com>
Mon, 23 Aug 2010 13:09:22 +0000 (15:09 +0200)
Before, we were discarding the compiled vertex program on each
vertex program change.

Now we compile the program as if there were 6 clip planes and
dynamically patch in an "end program" bit at the right place.

Also, nv30 should now work.

src/gallium/auxiliary/util/u_dynarray.h
src/gallium/drivers/nvfx/nv30_vertprog.h
src/gallium/drivers/nvfx/nvfx_context.c
src/gallium/drivers/nvfx/nvfx_context.h
src/gallium/drivers/nvfx/nvfx_fragprog.c
src/gallium/drivers/nvfx/nvfx_state.h
src/gallium/drivers/nvfx/nvfx_state_emit.c
src/gallium/drivers/nvfx/nvfx_vertprog.c

index 9d1c171..980cadf 100644 (file)
@@ -106,6 +106,9 @@ util_dynarray_trim(struct util_dynarray *buf)
 #define util_dynarray_pop_ptr(buf, type) (type*)((char*)(buf)->data + ((buf)->size -= sizeof(type)))
 #define util_dynarray_pop(buf, type) *util_dynarray_pop_ptr(buf, type)
 #define util_dynarray_contains(buf, type) ((buf)->size >= sizeof(type))
+#define util_dynarray_element(buf, type, idx) ((type*)(buf)->data + (idx))
+#define util_dynarray_begin(buf) ((buf)->data)
+#define util_dynarray_end(buf) ((void*)util_dynarray_element((buf), char, (buf)->size))
 
 #endif /* U_DYNARRAY_H */
 
index df92469..9a68f5c 100644 (file)
 #define NV30_VP_INST_VDEST_WRITEMASK_SHIFT      12    /*NV20*/
 #define NV30_VP_INST_VDEST_WRITEMASK_MASK      (0x0F << 12)  /*NV20*/
 #define NV30_VP_INST_DEST_SHIFT        2
-#define NV30_VP_INST_DEST_MASK        (0x0F <<  2)
+#define NV30_VP_INST_DEST_MASK        (0x1F <<  2)
 #  define NV30_VP_INST_DEST_POS  0
 #  define NV30_VP_INST_DEST_BFC0  1
 #  define NV30_VP_INST_DEST_BFC1  2
 #  define NV30_VP_INST_DEST_COL1  4
 #  define NV30_VP_INST_DEST_FOGC  5
 #  define NV30_VP_INST_DEST_PSZ   6
-#  define NV30_VP_INST_DEST_TC(n)  (8+n)
+#  define NV30_VP_INST_DEST_TC(n)  (8+(n))
+#  define NV30_VP_INST_DEST_CLP(n) (17 + (n))
 
 /* Useful to split the source selection regs into their pieces */
 #define NV30_VP_SRC0_HIGH_SHIFT                                                6
index 80b36fb..2f775f9 100644 (file)
@@ -75,6 +75,10 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
        screen->base.channel->user_private = nvfx;
 
        nvfx->is_nv4x = screen->is_nv4x;
+       /* TODO: it seems that nv30 might have fixed function clipping usable with vertex programs
+        * However, my code for that doesn't work, so use vp clipping for all cards, which works.
+        */
+       nvfx->use_vp_clipping = TRUE;
 
        nvfx_init_query_functions(nvfx);
        nvfx_init_surface_functions(nvfx);
index 2134f3c..680f4c6 100644 (file)
@@ -134,6 +134,7 @@ struct nvfx_context {
        struct nvfx_screen *screen;
 
        unsigned is_nv4x; /* either 0 or ~0 */
+       boolean use_vp_clipping;
 
        struct draw_context *draw;
        struct blitter_context* blitter;
index a7e43b1..23a85c9 100644 (file)
@@ -1468,19 +1468,6 @@ update:
                        nvfx->hw_pointsprite_control = pointsprite_control;
                }
        }
-
-       if(nvfx->is_nv4x)
-       {
-               unsigned vp_output = vp->or | fp->or;
-
-               if(vp_output != nvfx->hw_vp_output)
-               {
-                       WAIT_RING(chan, 2);
-                       OUT_RING(chan, RING_3D(NV40TCL_VP_RESULT_EN, 1));
-                       OUT_RING(chan, vp_output);
-                       nvfx->hw_vp_output = vp_output;
-               }
-       }
 }
 
 void
index 3795191..e9c1f2c 100644 (file)
@@ -24,8 +24,6 @@ struct nvfx_vertex_program {
 
        boolean translated;
 
-       struct pipe_clip_state ucp;
-
        struct nvfx_vertex_program_exec *insns;
        unsigned nr_insns;
        struct nvfx_vertex_program_data *consts;
@@ -42,7 +40,7 @@ struct nvfx_vertex_program {
 
        uint32_t ir;
        uint32_t or;
-       uint32_t clip_ctrl;
+       int clip_nr;
 
        struct util_dynarray branch_relocs;
        struct util_dynarray const_relocs;
index bd89a38..c43a75a 100644 (file)
@@ -90,6 +90,74 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
        if(dirty & NVFX_NEW_STIPPLE)
                nvfx_state_stipple_validate(nvfx);
 
+       if(nvfx->dirty & NVFX_NEW_UCP)
+       {
+               unsigned enables[7] =
+               {
+                               0,
+                               NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0,
+                               NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1,
+                               NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2,
+                               NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3,
+                               NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4,
+                               NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5,
+               };
+
+               if(!nvfx->use_vp_clipping)
+               {
+                       WAIT_RING(chan, 2);
+                       OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
+                       OUT_RING(chan, 0);
+
+                       WAIT_RING(chan, 6 * 4 + 1);
+                       OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANE_A(0), nvfx->clip.nr * 4));
+                       OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4);
+               }
+
+               WAIT_RING(chan, 2);
+               OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
+               OUT_RING(chan, enables[nvfx->clip.nr]);
+       }
+
+       if(nvfx->use_vp_clipping && (nvfx->dirty & (NVFX_NEW_UCP | NVFX_NEW_VERTPROG)))
+       {
+               unsigned i;
+               struct nvfx_vertex_program* vp = nvfx->vertprog;
+               if(nvfx->clip.nr != vp->clip_nr)
+               {
+                       unsigned idx;
+                       WAIT_RING(chan, 14);
+
+                       /* remove last instruction bit */
+                       if(vp->clip_nr >= 0)
+                       {
+                               idx = vp->nr_insns - 7 + vp->clip_nr;
+                               OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_FROM_ID, 1));
+                               OUT_RING(chan,  vp->exec->start + idx);
+                               OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_INST(0), 4));
+                               OUT_RINGp (chan, vp->insns[idx].data, 4);
+                       }
+
+                        /* set last instruction bit */
+                       idx = vp->nr_insns - 7 + nvfx->clip.nr;
+                       OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_FROM_ID, 1));
+                       OUT_RING(chan,  vp->exec->start + idx);
+                       OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_INST(0), 4));
+                       OUT_RINGp(chan, vp->insns[idx].data, 3);
+                       OUT_RING(chan, vp->insns[idx].data[3] | 1);
+                       vp->clip_nr = nvfx->clip.nr;
+               }
+
+               // TODO: only do this for the ones changed
+               WAIT_RING(chan, 6 * 6);
+               for(i = 0; i < nvfx->clip.nr; ++i)
+               {
+                       OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_CONST_ID, 5));
+                       OUT_RING(chan, vp->data->start + i);
+                       OUT_RINGp (chan, nvfx->clip.ucp[i], 4);
+               }
+       }
+
        if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST | NVFX_NEW_VERTPROG | NVFX_NEW_SPRITE))
        {
                nvfx_fragprog_validate(nvfx);
@@ -97,6 +165,20 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
                        flush_tex_cache = TRUE; // TODO: do we need this?
        }
 
+       if(nvfx->is_nv4x)
+       {
+               unsigned vp_output = nvfx->vertprog->or | nvfx->hw_fragprog->or;
+               vp_output |= (1 << (nvfx->clip.nr + 6)) - (1 << 6);
+
+               if(vp_output != nvfx->hw_vp_output)
+               {
+                       WAIT_RING(chan, 2);
+                       OUT_RING(chan, RING_3D(NV40TCL_VP_RESULT_EN, 1));
+                       OUT_RING(chan, vp_output);
+                       nvfx->hw_vp_output = vp_output;
+               }
+       }
+
        if(all_swizzled >= 0)
                nvfx_framebuffer_validate(nvfx, all_swizzled);
 
index 3b8d385..ea7e88c 100644 (file)
@@ -29,8 +29,6 @@
 #include "nv30_vertprog.h"
 #include "nv40_vertprog.h"
 
-#define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
-
 struct nvfx_loop_entry
 {
        unsigned brk_target;
@@ -205,52 +203,33 @@ emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot
                break;
        case NVFXSR_OUTPUT:
                /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */
-               switch (dst.index) {
-               case NVFX_VP_INST_DEST_CLIP(0):
-                       vp->or |= (1 << 6);
-                       vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0;
-                       dst.index = NVFX_VP(INST_DEST_FOGC);
-                       break;
-               case NVFX_VP_INST_DEST_CLIP(1):
-                       vp->or |= (1 << 7);
-                       vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1;
-                       dst.index = NVFX_VP(INST_DEST_FOGC);
-                       break;
-               case NVFX_VP_INST_DEST_CLIP(2):
-                       vp->or |= (1 << 8);
-                       vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2;
-                       dst.index = NVFX_VP(INST_DEST_FOGC);
-                       break;
-               case NVFX_VP_INST_DEST_CLIP(3):
-                       vp->or |= (1 << 9);
-                       vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3;
-                       dst.index = NVFX_VP(INST_DEST_PSZ);
-                       break;
-               case NVFX_VP_INST_DEST_CLIP(4):
-                       vp->or |= (1 << 10);
-                       vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4;
-                       dst.index = NVFX_VP(INST_DEST_PSZ);
-                       break;
-               case NVFX_VP_INST_DEST_CLIP(5):
-                       vp->or |= (1 << 11);
-                       vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5;
-                       dst.index = NVFX_VP(INST_DEST_PSZ);
-                       break;
-               default:
-                       if(nvfx->is_nv4x) {
-                               /* we don't need vp->or on nv3x
-                                * texcoords are handled by fragment program
-                                */
-                               switch (dst.index) {
-                               case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
-                               case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
-                               case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
-                               case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
-                               case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
-                               case NV40_VP_INST_DEST_PSZ  : vp->or |= (1 << 5); break;
-                               }
+               if(nvfx->is_nv4x) {
+                       switch (dst.index) {
+                       case NV30_VP_INST_DEST_CLP(0):
+                               dst.index = NVFX_VP(INST_DEST_FOGC);
+                               break;
+                       case NV30_VP_INST_DEST_CLP(1):
+                               dst.index = NVFX_VP(INST_DEST_FOGC);
+                               break;
+                       case NV30_VP_INST_DEST_CLP(2):
+                               dst.index = NVFX_VP(INST_DEST_FOGC);
+                               break;
+                       case NV30_VP_INST_DEST_CLP(3):
+                               dst.index = NVFX_VP(INST_DEST_PSZ);
+                               break;
+                       case NV30_VP_INST_DEST_CLP(4):
+                               dst.index = NVFX_VP(INST_DEST_PSZ);
+                               break;
+                       case NV30_VP_INST_DEST_CLP(5):
+                               dst.index = NVFX_VP(INST_DEST_PSZ);
+                               break;
+                       case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+                       case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+                       case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+                       case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+                       case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
+                       case NV40_VP_INST_DEST_PSZ  : vp->or |= (1 << 5); break;
                        }
-                       break;
                }
 
                if(!nvfx->is_nv4x) {
@@ -914,6 +893,13 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
        vpc->nvfx = nvfx;
        vpc->vp = vp;
 
+       /* reserve space for ucps */
+       if(nvfx->use_vp_clipping)
+       {
+               for(i = 0; i < 6; ++i)
+                       constant(vpc, -1, 0, 0, 0, 0);
+       }
+
        if (!nvfx_vertprog_prepare(nvfx, vpc)) {
                FREE(vpc);
                return;
@@ -923,7 +909,8 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
         * planes are enabled.  We need to append code to the vtxprog
         * to handle clip planes later.
         */
-       if (vp->ucp.nr)  {
+       /* TODO: maybe support patching this depending on whether there are ucps: not sure if it is really matters much */
+       if (nvfx->use_vp_clipping)  {
                vpc->r_result[vpc->hpos_idx] = temp(vpc);
                vpc->r_temps_discard = 0;
        }
@@ -994,34 +981,39 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
        }
 
        /* Insert code to handle user clip planes */
-       for (i = 0; i < vp->ucp.nr; i++) {
-               struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT,
-                                               NVFX_VP_INST_DEST_CLIP(i));
-               struct nvfx_src ceqn = nvfx_src(constant(vpc, -1,
-                                                nvfx->clip.ucp[i][0],
-                                                nvfx->clip.ucp[i][1],
-                                                nvfx->clip.ucp[i][2],
-                                                nvfx->clip.ucp[i][3]));
-               struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
-               unsigned mask;
+       if(nvfx->use_vp_clipping)
+       {
+               for (i = 0; i < 6; i++) {
+                       struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i));
+                       struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, i));
+                       struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
+                       unsigned mask;
 
-               switch (i) {
-               case 0: case 3: mask = NVFX_VP_MASK_Y; break;
-               case 1: case 4: mask = NVFX_VP_MASK_Z; break;
-               case 2: case 5: mask = NVFX_VP_MASK_W; break;
-               default:
-                       NOUVEAU_ERR("invalid clip dist #%d\n", i);
-                       goto out_err;
-               }
+                       if(nvfx->is_nv4x)
+                       {
+                               switch (i) {
+                               case 0: case 3: mask = NVFX_VP_MASK_Y; break;
+                               case 1: case 4: mask = NVFX_VP_MASK_Z; break;
+                               case 2: case 5: mask = NVFX_VP_MASK_W; break;
+                               default:
+                                       NOUVEAU_ERR("invalid clip dist #%d\n", i);
+                                       goto out_err;
+                               }
+                       }
+                       else
+                               mask = NVFX_VP_MASK_X;
 
-               nvfx_vp_emit(vpc, arith(VEC, DP4, cdst, mask, htmp, ceqn, none));
+                       nvfx_vp_emit(vpc, arith(VEC, DP4, cdst, mask, htmp, ceqn, none));
+               }
        }
+       else
+       {
+               if(vp->nr_insns)
+                       vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
 
-       //vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
-
-       /* Append NOP + END instruction for branches to the end of the program */
-       nvfx_vp_emit(vpc, arith(VEC, NOP, none.reg, 0, none, none, none));
-        vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST | 0x1000;
+               nvfx_vp_emit(vpc, arith(VEC, NOP, none.reg, 0, none, none, none));
+               vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
+       }
 
        if(debug_get_option_nvfx_dump_vp())
        {
@@ -1034,6 +1026,7 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
                debug_printf("\n");
        }
 
+       vp->clip_nr = -1;
        vp->exec_start = -1;
        vp->translated = TRUE;
 out_err:
@@ -1063,13 +1056,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
        if (nvfx->render_mode == HW) {
                vp = nvfx->vertprog;
                constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX];
-
-               // TODO: ouch! can't we just use constant slots for these?!
-               if ((nvfx->dirty & NVFX_NEW_UCP) ||
-                   memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) {
-                       nvfx_vertprog_destroy(nvfx, vp);
-                       memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp));
-               }
        } else {
                vp = nvfx->swtnl.vertprog;
                constbuf = NULL;
@@ -1169,7 +1155,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
                vp->exec_start = vp->exec->start;
        }
 
-       if (vp->nr_consts && vp->data_start != vp->data->start) {
+       if (vp->data_start != vp->data->start) {
                for(unsigned i = 0; i < vp->const_relocs.size; i += sizeof(struct nvfx_relocation))
                {
                        struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->const_relocs.data + i);
@@ -1182,6 +1168,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
                }
 
                vp->data_start = vp->data->start;
+               upload_code = TRUE;
        }
 
        /* Update + Upload constant values */
@@ -1191,7 +1178,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
                if (constbuf)
                        map = (float*)nvfx_buffer(constbuf)->data;
 
-               for (i = 0; i < vp->nr_consts; i++) {
+               for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) {
                        struct nvfx_vertex_program_data *vpd = &vp->consts[i];
 
                        if (vpd->index >= 0) {
@@ -1217,9 +1204,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
                        BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4);
                        OUT_RINGp (chan, vp->insns[i].data, 4);
                }
+               vp->clip_nr = -1;
        }
 
-       if(nvfx->dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
+       if(nvfx->dirty & (NVFX_NEW_VERTPROG))
        {
                WAIT_RING(chan, 6);
                OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1));
@@ -1228,8 +1216,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
                        OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 1));
                        OUT_RING(chan, vp->ir);
                }
-               OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
-               OUT_RING(chan, vp->clip_ctrl);
        }
 
        return TRUE;
@@ -1238,27 +1224,15 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
 void
 nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp)
 {
-       vp->translated = FALSE;
-
-       if (vp->nr_insns) {
+       if (vp->nr_insns)
                FREE(vp->insns);
-               vp->insns = NULL;
-               vp->nr_insns = 0;
-       }
 
-       if (vp->nr_consts) {
+       if (vp->nr_consts)
                FREE(vp->consts);
-               vp->consts = NULL;
-               vp->nr_consts = 0;
-       }
 
        nouveau_resource_free(&vp->exec);
-       vp->exec_start = 0;
        nouveau_resource_free(&vp->data);
-       vp->data_start = 0;
-       vp->data_start_min = 0;
 
-       vp->ir = vp->or = vp->clip_ctrl = 0;
        util_dynarray_fini(&vp->branch_relocs);
        util_dynarray_fini(&vp->const_relocs);
 }