nv40: less dodgy vp const/insn handling

author Ben Skeggs <skeggsb@gmail.com>

Fri, 14 Dec 2007 14:50:15 +0000 (01:50 +1100)

committer Ben Skeggs <skeggsb@gmail.com>

Fri, 14 Dec 2007 16:49:35 +0000 (03:49 +1100)
author Ben Skeggs <skeggsb@gmail.com>
Fri, 14 Dec 2007 14:50:15 +0000 (01:50 +1100)
committer Ben Skeggs <skeggsb@gmail.com>
Fri, 14 Dec 2007 16:49:35 +0000 (03:49 +1100)
diff --git a/src/mesa/pipe/nv40/nv40_shader.h b/src/mesa/pipe/nv40/nv40_shader.h

index 01c0652..5909c70 100644 (file)
--- a/src/mesa/pipe/nv40/nv40_shader.h
+++ b/src/mesa/pipe/nv40/nv40_shader.h
@@ -90,8 +90,8 @@
  #    define NV40_VP_INST_OP_ADD                                             0x03
  #    define NV40_VP_INST_OP_MAD                                             0x04
  #    define NV40_VP_INST_OP_DP3                                             0x05
-#    define NV40_VP_INST_OP_DP4                                             0x07
  #    define NV40_VP_INST_OP_DPH                                             0x06
+#    define NV40_VP_INST_OP_DP4                                             0x07
  #    define NV40_VP_INST_OP_DST                                             0x08
  #    define NV40_VP_INST_OP_MIN                                             0x09
  #    define NV40_VP_INST_OP_MAX                                             0x0A
@@ -109,9 +109,11 @@
  #    define NV40_VP_INST_OP_SSG                                             0x16
  #    define NV40_VP_INST_OP_ARR                                             0x17
  #    define NV40_VP_INST_OP_ARA                                             0x18
-#    define NV40_VP_INST_OP_TXWHAT                                          0x19
+#    define NV40_VP_INST_OP_TXL                                             0x19
  #define NV40_VP_INST_SCA_OPCODE_SHIFT                                         27
  #define NV40_VP_INST_SCA_OPCODE_MASK                                (0x1F << 27)
+#    define NV40_VP_INST_OP_NOP                                             0x00
+#    define NV40_VP_INST_OP_MOV                                             0x01
  #    define NV40_VP_INST_OP_RCP                                             0x02
  #    define NV40_VP_INST_OP_RCC                                             0x03
  #    define NV40_VP_INST_OP_RSQ                                             0x04
diff --git a/src/mesa/pipe/nv40/nv40_state.h b/src/mesa/pipe/nv40/nv40_state.h

index 80c76cd..8ab334d 100644 (file)
--- a/src/mesa/pipe/nv40/nv40_state.h
+++ b/src/mesa/pipe/nv40/nv40_state.h
@@ -54,24 +54,31 @@ struct nv40_rasterizer_state {
         uint32_t point_sprite;
  };
  
+struct nv40_vertex_program_exec {
+       uint32_t data[4];
+       boolean has_branch_offset;
+       int const_index;
+};
+
+struct nv40_vertex_program_data {
+       int index; /* immediates == -1 */
+       float value[4];
+};
+
  struct nv40_vertex_program {
         const struct pipe_shader_state *pipe;
  
         boolean translated;
+       struct nv40_vertex_program_exec *insns;
+       unsigned nr_insns;
+       struct nv40_vertex_program_data *consts;
+       unsigned nr_consts;
  
         struct nouveau_resource *exec;
-       uint32_t *insn;
-       uint insn_len;
-
+       unsigned exec_start;
         struct nouveau_resource *data;
-       uint data_start;
-
-       struct {
-               int pipe_id;
-               int hw_id;
-               float value[4];
-       } consts[256];
-       int num_consts;
+       unsigned data_start;
+       unsigned data_start_min;
  
         uint32_t ir;
         uint32_t or;
diff --git a/src/mesa/pipe/nv40/nv40_vertprog.c b/src/mesa/pipe/nv40/nv40_vertprog.c

index b6ebaee..c9e1f25 100644 (file)
--- a/src/mesa/pipe/nv40/nv40_vertprog.c
+++ b/src/mesa/pipe/nv40/nv40_vertprog.c
@@ -9,6 +9,18 @@
  #include "nv40_dma.h"
  #include "nv40_state.h"
  
+/* TODO (at least...):
+ *  1. Indexed consts  + ARL
+ *  2. Arb. swz/negation
+ *  3. NV_vp11, NV_vp2, NV_vp3 features
+ *       - extra arith opcodes
+ *       - branching
+ *       - texture sampling
+ *       - indexed attribs
+ *       - indexed results
+ *  4. bugs
+ */
+
  #define SWZ_X 0
  #define SWZ_Y 1
  #define SWZ_Z 2
@@ -26,28 +38,12 @@
  #define neg(s) nv40_sr_neg((s))
  #define abs(s) nv40_sr_abs((s))
  
-static uint32_t
-passthrough_vp_data[] = {
-       0x40041c6c, 0x0040010d, 0x8106c083, 0x6041ff84,
-       0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff81,
-};
-
-static struct nv40_vertex_program
-passthrough_vp = {
-       .pipe = NULL,
-       .translated = TRUE,
-       
-       .insn     = passthrough_vp_data,
-       .insn_len = sizeof(passthrough_vp_data) / sizeof(uint32_t),
-
-       .ir = 0x00000003,
-       .or = 0x00000001,
-};
-
  struct nv40_vpc {
         struct nv40_vertex_program *vp;
  
-       uint output_map[PIPE_MAX_SHADER_OUTPUTS];
+       struct nv40_vertex_program_exec *vpi;
+
+       unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
  
         int high_temp;
         int temp_temp_count;
@@ -59,7 +55,7 @@ temp(struct nv40_vpc *vpc)
         int idx;
  
         idx  = vpc->temp_temp_count++;
-       idx += vpc->high_temp;
+       idx += vpc->high_temp + 1;
         return nv40_sr(NV40SR_TEMP, idx);
  }
  
@@ -67,16 +63,25 @@ static INLINE struct nv40_sreg
  constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
  {
         struct nv40_vertex_program *vp = vpc->vp;
-       int idx = vp->num_consts;
+       struct nv40_vertex_program_data *vpd;
+       int idx;
+
+       if (pipe >= 0) {
+               for (idx = 0; idx < vp->nr_consts; idx++) {
+                       if (vp->consts[idx].index == pipe)
+                               return nv40_sr(NV40SR_CONST, idx);
+               }
+       }
  
-       vp->consts[idx].pipe_id  = pipe;
-       vp->consts[idx].hw_id    = idx;
-       vp->consts[idx].value[0] = x;
-       vp->consts[idx].value[1] = y;
-       vp->consts[idx].value[2] = z;
-       vp->consts[idx].value[3] = w;
-       vp->num_consts++;
+       idx = vp->nr_consts++;
+       vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+       vpd = &vp->consts[idx];
  
+       vpd->index = pipe;
+       vpd->value[0] = x;
+       vpd->value[1] = y;
+       vpd->value[2] = z;
+       vpd->value[3] = w;
         return nv40_sr(NV40SR_CONST, idx);
  }
  
@@ -103,7 +108,9 @@ emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)
         case NV40SR_CONST:
                 sr |= (NV40_VP_SRC_REG_TYPE_CONST <<
                        NV40_VP_SRC_REG_TYPE_SHIFT);
-               hw[1] |= (src.index << NV40_VP_INST_CONST_SRC_SHIFT);
+               assert(vpc->vpi->const_index == -1 ||
+                      vpc->vpi->const_index == src.index);
+               vpc->vpi->const_index = src.index;
                 break;
         case NV40SR_NONE:
                 sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
@@ -202,7 +209,14 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
               struct nv40_sreg s2)
  {
         struct nv40_vertex_program *vp = vpc->vp;
-       uint32_t *hw = &vp->insn[vp->insn_len];
+       uint32_t *hw;
+
+       vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+       vpc->vpi = &vp->insns[vp->nr_insns - 1];
+       memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+       vpc->vpi->const_index = -1;
+
+       hw = vpc->vpi->data;
  
         hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT);
         hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) |
@@ -224,8 +238,6 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
         emit_src(vpc, hw, 0, s0);
         emit_src(vpc, hw, 1, s1);
         emit_src(vpc, hw, 2, s2);
-
-       vp->insn_len += 4;
  }
  
  static INLINE struct nv40_sreg
@@ -326,8 +338,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
                                 ai = fsrc->SrcRegister.Index;
                                 src[i] = tgsi_src(vpc, fsrc);
                         } else {
-                               NOUVEAU_MSG("extra src attr %d\n",
-                                        fsrc->SrcRegister.Index);
                                 src[i] = temp(vpc);
                                 arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
                                       tgsi_src(vpc, fsrc), none, none);
@@ -518,7 +528,6 @@ nv40_vertprog_translate(struct nv40_context *nv40,
         vpc = calloc(1, sizeof(struct nv40_vpc));
         if (!vpc)
                 return;
-       vp->insn = calloc(1, 128*4*sizeof(uint32_t));
         vpc->vp = vp;
         vpc->high_temp = -1;
  
@@ -547,7 +556,6 @@ nv40_vertprog_translate(struct nv40_context *nv40,
                 case TGSI_TOKEN_TYPE_INSTRUCTION:
                 {
                         const struct tgsi_full_instruction *finst;
-
                         finst = &parse.FullToken.FullInstruction;
                         if (!nv40_vertprog_parse_instruction(vpc, finst))
                                 goto out_err;
@@ -558,14 +566,7 @@ nv40_vertprog_translate(struct nv40_context *nv40,
                 }
         }
  
-       vp->insn[vp->insn_len - 1] |= NV40_VP_INST_LAST;
-#if 0
-       {
-               int i;
-               for (i = 0; i < vp->insn_len; i++)
-                       NOUVEAU_ERR("inst[%d] = 0x%08x\n", i, vp->insn[i]);
-       }
-#endif
+       vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST;
         vp->translated = TRUE;
  out_err:
         tgsi_parse_free(&parse);
@@ -576,9 +577,8 @@ void
  nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp)
  { 
         struct nouveau_winsys *nvws = nv40->nvws;
-       struct pipe_context *pipe = &nv40->pipe;
+       struct pipe_winsys *ws = nv40->pipe.winsys;
         boolean upload_code = FALSE, upload_data = FALSE;
-       float *map;
         int i;
  
         /* Translate TGSI shader into hw bytecode */
@@ -589,11 +589,9 @@ nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp)
         }
  
         /* Allocate hw vtxprog exec slots */
-       /*XXX: when we do branching, need to patch targets if program moves.
-        */
         if (!vp->exec) {
                 struct nouveau_resource *heap = nv40->vertprog.exec_heap;
-               uint vplen = vp->insn_len / 4;
+               uint vplen = vp->nr_insns;
  
                 if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
                         while (heap->next && heap->size < vplen) {
@@ -611,75 +609,106 @@ nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp)
         }
  
         /* Allocate hw vtxprog const slots */
-       if (vp->num_consts && !vp->data) {
+       if (vp->nr_consts && !vp->data) {
                 struct nouveau_resource *heap = nv40->vertprog.data_heap;
-               int count = vp->num_consts;
  
-               if (nvws->res_alloc(heap, count, vp, &vp->data)) {
-                       while (heap->next && heap->size < count) {
+               if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+                       while (heap->next && heap->size < vp->nr_consts) {
                                 struct nv40_vertex_program *evict;
                                 
                                 evict = heap->next->priv;
                                 nvws->res_free(&evict->data);
                         }
  
-                       if (nvws->res_alloc(heap, count, vp, &vp->data))
+                       if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
                                 assert(0);
                 }
  
+               /*XXX: handle this some day */
+               assert(vp->data->start >= vp->data_start_min);
+
                 upload_data = TRUE;
+               if (vp->data_start != vp->data->start)
+                       upload_code = TRUE;
         }
  
-       /* If constants moved, patch the vtxprog to fix the offsets */
-       if (vp->num_consts && vp->data_start != vp->data->start) {
-               for (i = 0; i < vp->insn_len; i += 4) {
-                       int id;
+       /* If exec or data segments moved we need to patch the program to
+        * fixup offsets and register IDs.
+        */
+       if (vp->exec_start != vp->exec->start) {
+               for (i = 0; i < vp->nr_insns; i++) {
+                       struct nv40_vertex_program_exec *vpi = &vp->insns[i];
+
+                       if (vpi->has_branch_offset) {
+                               assert(0);
+                       }
+               }
  
-                       id = (vp->insn[i + 1] & NV40_VP_INST_CONST_SRC_MASK) >>
-                            NV40_VP_INST_CONST_SRC_SHIFT;
-                       id -= vp->data_start;
-                       id += vp->data->start;
+               vp->exec_start = vp->exec->start;
+       }
+
+       if (vp->nr_consts && vp->data_start != vp->data->start) {
+               for (i = 0; i < vp->nr_insns; i++) {
+                       struct nv40_vertex_program_exec *vpi = &vp->insns[i];
+
+                       if (vpi->const_index >= 0) {
+                               vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK;
+                               vpi->data[1] |=
+                                       (vpi->const_index + vp->data->start) <<
+                                       NV40_VP_INST_CONST_SRC_SHIFT;
  
-                       vp->insn[i + 1] &= ~NV40_VP_INST_CONST_SRC_MASK;
-                       vp->insn[i + 1] |= (id << NV40_VP_INST_CONST_SRC_SHIFT);
+                       }
                 }
  
                 vp->data_start = vp->data->start;
-               upload_code = TRUE;
         }
  
         /* Update + Upload constant values */
-       if (vp->num_consts) {
-               map = pipe->winsys->buffer_map(pipe->winsys,
-                                              nv40->vertprog.constant_buf,
-                                              PIPE_BUFFER_FLAG_READ);
-               for (i = 0; i < vp->num_consts; i++) {
-                       uint pid = vp->consts[i].pipe_id;
-
-                       if (pid >= 0) {
+       if (vp->nr_consts) {
+               float *map = NULL;
+
+               if (nv40->vertprog.constant_buf) {
+                       map = ws->buffer_map(ws, nv40->vertprog.constant_buf,
+                                            PIPE_BUFFER_FLAG_READ);
+               }
+
+               for (i = 0; i < vp->nr_consts; i++) {
+                       struct nv40_vertex_program_data *vpd = &vp->consts[i];
+
+                       if (vpd->index >= 0) {
                                 if (!upload_data &&
-                                   !memcmp(vp->consts[i].value, &map[pid*4],
+                                   !memcmp(vpd->value, &map[vpd->index * 4],
                                             4 * sizeof(float)))
                                         continue;
-                               memcpy(vp->consts[i].value, &map[pid*4],
+                               memcpy(vpd->value, &map[vpd->index * 4],
                                        4 * sizeof(float));
                         }
  
                         BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
-                       OUT_RING  (vp->consts[i].hw_id + vp->data->start);
-                       OUT_RINGp ((uint32_t *)vp->consts[i].value, 4);
+                       OUT_RING  (i + vp->data->start);
+                       OUT_RINGp ((uint32_t *)vpd->value, 4);
+               }
+
+               if (map) {
+                       ws->buffer_unmap(ws, nv40->vertprog.constant_buf);
                 }
-               pipe->winsys->buffer_unmap(pipe->winsys,
-                                          nv40->vertprog.constant_buf);
         }
  
         /* Upload vtxprog */
         if (upload_code) {
+#if 0
+               for (i = 0; i < vp->nr_insns; i++) {
+                       NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]);
+                       NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]);
+                       NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]);
+                       NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]);
+               }
+#endif
                 BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
                 OUT_RING  (vp->exec->start);
-               for (i = 0; i < vp->insn_len; i += 4) {
+               for (i = 0; i < vp->nr_insns; i++) {
                         BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4);
-                       OUT_RINGp (&vp->insn[i], 4);
+                       OUT_RINGp (vp->insns[i].data, 4);
                 }
         }
author	Ben Skeggs <skeggsb@gmail.com>
	Fri, 14 Dec 2007 14:50:15 +0000 (01:50 +1100)
committer	Ben Skeggs <skeggsb@gmail.com>
	Fri, 14 Dec 2007 16:49:35 +0000 (03:49 +1100)
src/mesa/pipe/nv40/nv40_shader.h		patch \| blob \| history
src/mesa/pipe/nv40/nv40_state.h		patch \| blob \| history
src/mesa/pipe/nv40/nv40_vertprog.c		patch \| blob \| history