nv40: ensure all required buffers are accounted for during state validation
authorBen Skeggs <skeggsb@gmail.com>
Mon, 24 Dec 2007 07:53:41 +0000 (18:53 +1100)
committerBen Skeggs <skeggsb@gmail.com>
Mon, 24 Dec 2007 07:53:41 +0000 (18:53 +1100)
src/mesa/pipe/nv40/nv40_context.h
src/mesa/pipe/nv40/nv40_fragprog.c
src/mesa/pipe/nv40/nv40_state.c
src/mesa/pipe/nv40/nv40_state_emit.c
src/mesa/pipe/nv40/nv40_state_tex.c
src/mesa/pipe/nv40/nv40_vbo.c

index 9a93ed3..5d0bf89 100644 (file)
@@ -44,6 +44,21 @@ struct nv40_context {
        struct pipe_texture       *tex_miptree[PIPE_MAX_SAMPLERS];
        uint32_t                   tex_dirty;
 
+       uint32_t rt_enable;
+       struct pipe_buffer_handle *rt[4];
+       struct pipe_buffer_handle *zeta;
+
+       struct {
+               struct pipe_buffer_handle *buffer;
+               uint32_t format;
+       } tex[16];
+
+       unsigned vb_enable;
+       struct {
+               struct pipe_buffer_handle *buffer;
+               unsigned delta;
+       } vb[16];
+
        struct {
                struct nouveau_resource *exec_heap;
                struct nouveau_resource *data_heap;
index 480c4c9..6ee6a7d 100644 (file)
@@ -796,10 +796,6 @@ nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp)
                fp->on_hw = TRUE;
        }
 
-       BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1);
-       OUT_RELOC (fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
-                  NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
-                  NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
        BEGIN_RING(curie, NV40TCL_FP_CONTROL, 1);
        OUT_RING  (fp->fp_control);
 
index 8fa40ce..30cd429 100644 (file)
@@ -630,50 +630,36 @@ nv40_set_framebuffer_state(struct pipe_context *pipe,
        }
 
        if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
-               BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1);
-               OUT_RELOCo(rt[0]->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-               BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 2);
+               BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 1);
                OUT_RING  (rt[0]->pitch * rt[0]->cpp);
-               OUT_RELOCl(rt[0]->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+               nv40->rt[0] = rt[0]->buffer;
        }
 
        if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
-               BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
-               OUT_RELOCo(rt[1]->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-               BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 2);
-               OUT_RELOCl(rt[1]->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+               BEGIN_RING(curie, NV40TCL_COLOR1_PITCH, 2);
                OUT_RING  (rt[1]->pitch * rt[1]->cpp);
+               nv40->rt[1] = rt[1]->buffer;
        }
 
        if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
-               BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1);
-               OUT_RELOCo(rt[2]->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-               BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1);
-               OUT_RELOCl(rt[2]->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
                BEGIN_RING(curie, NV40TCL_COLOR2_PITCH, 1);
                OUT_RING  (rt[2]->pitch * rt[2]->cpp);
+               nv40->rt[2] = rt[2]->buffer;
        }
 
        if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
-               BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1);
-               OUT_RELOCo(rt[3]->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-               BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1);
-               OUT_RELOCl(rt[3]->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
                BEGIN_RING(curie, NV40TCL_COLOR3_PITCH, 1);
                OUT_RING  (rt[3]->pitch * rt[3]->cpp);
+               nv40->rt[3] = rt[3]->buffer;
        }
 
        if (zeta_format) {
-               BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1);
-               OUT_RELOCo(zeta->buffer,
-                          NOUVEAU_BO_VRAM | NOUVEAU_BO_WR | NOUVEAU_BO_RD);
-               BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1);
-               OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR |
-                          NOUVEAU_BO_RD);
                BEGIN_RING(curie, NV40TCL_ZETA_PITCH, 1);
                OUT_RING  (zeta->pitch * zeta->cpp);
+               nv40->zeta = zeta->buffer;
        }
 
+       nv40->rt_enable = rt_enable;
        BEGIN_RING(curie, NV40TCL_RT_ENABLE, 1);
        OUT_RING  (rt_enable);
        BEGIN_RING(curie, NV40TCL_RT_HORIZ, 3);
index c53b5d8..fc8a0a9 100644 (file)
@@ -5,6 +5,8 @@
 void
 nv40_emit_hw_state(struct nv40_context *nv40)
 {
+       int i;
+
        if (nv40->dirty & NV40_NEW_FRAGPROG) {
                nv40_fragprog_bind(nv40, nv40->fragprog.current);
                /*XXX: clear NV40_NEW_FRAGPROG if no now program uploaded */
@@ -25,5 +27,68 @@ nv40_emit_hw_state(struct nv40_context *nv40)
                nv40_vertprog_bind(nv40, nv40->vertprog.current);
                nv40->dirty &= ~NV40_NEW_VERTPROG;
        }
+
+       /* Emit relocs for every referenced buffer.
+        * This is to ensure the bufmgr has an accurate idea of how
+        * the buffer is used.  This isn't very efficient, but we don't
+        * seem to take a significant performance hit.  Will be improved
+        * at some point.  Vertex arrays are emitted by nv40_vbo.c
+        */
+
+       /* Render targets */
+       if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
+               BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1);
+               OUT_RELOCo(nv40->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+               BEGIN_RING(curie, NV40TCL_COLOR0_OFFSET, 1);
+               OUT_RELOCl(nv40->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+       }
+
+       if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
+               BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
+               OUT_RELOCo(nv40->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+               BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 1);
+               OUT_RELOCl(nv40->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+       }
+
+       if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
+               BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1);
+               OUT_RELOCo(nv40->rt[2], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+               BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1);
+               OUT_RELOCl(nv40->rt[2], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+       }
+
+       if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
+               BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1);
+               OUT_RELOCo(nv40->rt[3], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+               BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1);
+               OUT_RELOCl(nv40->rt[3], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+       }
+
+       if (nv40->zeta) {
+               BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1);
+               OUT_RELOCo(nv40->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+               BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1);
+               OUT_RELOCl(nv40->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+       }
+
+       /* Texture images */
+       for (i = 0; i < 16; i++) {
+               if (!nv40->tex[i].buffer)
+                       continue;
+               BEGIN_RING(curie, NV40TCL_TEX_OFFSET(i), 2);
+               OUT_RELOCl(nv40->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+                          NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+               OUT_RELOCd(nv40->tex[i].buffer, nv40->tex[i].format,
+                          NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
+                          NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0,
+                          NV40TCL_TEX_FORMAT_DMA1);
+       }
+
+       /* Fragment program */
+       BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1);
+       OUT_RELOC (nv40->fragprog.active->buffer, 0, NOUVEAU_BO_VRAM |
+                  NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
+                  NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0,
+                  NV40TCL_FP_ADDRESS_DMA1);
 }
 
index 05c136f..9fb274d 100644 (file)
@@ -109,12 +109,10 @@ nv40_tex_unit_enable(struct nv40_context *nv40, int unit)
        if (pt->format == PIPE_FORMAT_U_A8_L8)
                txs |= (1<<16); /*nfi*/
 
-       BEGIN_RING(curie, NV40TCL_TEX_OFFSET(unit), 8);
-       OUT_RELOCl(nv40mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
-                  NOUVEAU_BO_RD);
-       OUT_RELOCd(nv40mt->buffer, txf, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
-                  NOUVEAU_BO_OR | NOUVEAU_BO_RD, NV40TCL_TEX_FORMAT_DMA0,
-                  NV40TCL_TEX_FORMAT_DMA1);
+       nv40->tex[unit].buffer = nv40mt->buffer;
+       nv40->tex[unit].format = txf;
+
+       BEGIN_RING(curie, NV40TCL_TEX_WRAP(unit), 6);
        OUT_RING  (ps->wrap);
        OUT_RING  (NV40TCL_TEX_ENABLE_ENABLE | ps->en |
                   (0x00078000) /* mipmap related? */);
@@ -135,6 +133,7 @@ nv40_state_tex_update(struct nv40_context *nv40)
                if (nv40->tex_miptree[unit]) {
                        nv40_tex_unit_enable(nv40, unit);
                } else {
+                       nv40->tex[unit].buffer = NULL;
                        BEGIN_RING(curie, NV40TCL_TEX_ENABLE(unit), 1);
                        OUT_RING  (0);
                }
index 4bd6c44..fda32cd 100644 (file)
@@ -6,6 +6,9 @@
 #include "nv40_dma.h"
 #include "nv40_state.h"
 
+#include "pipe/nouveau/nouveau_channel.h"
+#include "pipe/nouveau/nouveau_pushbuf.h"
+
 static INLINE int
 nv40_vbo_ncomp(uint format)
 {
@@ -101,6 +104,8 @@ nv40_vbo_arrays_update(struct nv40_context *nv40)
        uint32_t inputs, vtxfmt[16];
        int hw, num_hw;
 
+       nv40->vb_enable = 0;
+
        inputs = vp->ir;
        for (hw = 0; hw < 16 && inputs; hw++) {
                if (inputs & (1 << hw)) {
@@ -129,19 +134,16 @@ nv40_vbo_arrays_update(struct nv40_context *nv40)
                                continue;
                }
 
-               BEGIN_RING(curie, NV40TCL_VTXBUF_ADDRESS(hw), 1);
-               OUT_RELOC(vb->buffer, vb->buffer_offset + ve->src_offset,
-                         NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
-                         NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0,
-                         NV40TCL_VTXBUF_ADDRESS_DMA1);
+               nv40->vb_enable |= (1 << hw);
+               nv40->vb[hw].delta = vb->buffer_offset + ve->src_offset;
+               nv40->vb[hw].buffer = vb->buffer;
+
                vtxfmt[hw] = ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) |
                              (nv40_vbo_ncomp(ve->src_format) <<
                               NV40TCL_VTXFMT_SIZE_SHIFT) |
                              nv40_vbo_type(ve->src_format));
        }
 
-       BEGIN_RING(curie, 0x1710, 1);
-       OUT_RING  (0); /* vtx cache flush */
        BEGIN_RING(curie, NV40TCL_VTXFMT(0), num_hw);
        OUT_RINGp (vtxfmt, num_hw);
 }
@@ -149,14 +151,31 @@ nv40_vbo_arrays_update(struct nv40_context *nv40)
 static boolean
 nv40_vbo_validate_state(struct nv40_context *nv40)
 {
-       if (nv40->dirty & ~NV40_NEW_ARRAYS)
-               nv40_emit_hw_state(nv40);
+       unsigned inputs;
+
+       nv40_emit_hw_state(nv40);
 
        if (nv40->dirty & NV40_NEW_ARRAYS) {
                nv40_vbo_arrays_update(nv40);
                nv40->dirty &= ~NV40_NEW_ARRAYS;
        }
 
+       inputs = nv40->vb_enable;
+       while (inputs) {
+               unsigned a = ffs(inputs) - 1;
+
+               inputs &= ~(1 << a);
+
+               BEGIN_RING(curie, NV40TCL_VTXBUF_ADDRESS(a), 1);
+               OUT_RELOC (nv40->vb[a].buffer, nv40->vb[a].delta,
+                          NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_LOW |
+                          NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0,
+                          NV40TCL_VTXBUF_ADDRESS_DMA1);
+       }
+
+       BEGIN_RING(curie, 0x1710, 1);
+       OUT_RING  (0); /* vtx cache flush */
+
        return TRUE;
 }