nouveau: add nv30.
authorStephane Marchesin <marchesin@icps.u-strasbg.fr>
Fri, 15 Feb 2008 01:36:28 +0000 (02:36 +0100)
committerStephane Marchesin <marchesin@icps.u-strasbg.fr>
Fri, 15 Feb 2008 01:36:28 +0000 (02:36 +0100)
17 files changed:
src/mesa/pipe/nv30/Makefile [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_clear.c [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_context.c [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_context.h [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_dma.h [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_draw.c [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_fragprog.c [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_fragtex.c [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_miptree.c [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_query.c [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_shader.h [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_state.c [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_state.h [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_state_emit.c [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_surface.c [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_vbo.c [new file with mode: 0644]
src/mesa/pipe/nv30/nv30_vertprog.c [new file with mode: 0644]

diff --git a/src/mesa/pipe/nv30/Makefile b/src/mesa/pipe/nv30/Makefile
new file mode 100644 (file)
index 0000000..dd4b7e7
--- /dev/null
@@ -0,0 +1,29 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv30
+
+DRIVER_SOURCES = \
+       nv30_clear.c \
+       nv30_context.c \
+       nv30_draw.c \
+       nv30_fragprog.c \
+       nv30_fragtex.c \
+       nv30_miptree.c \
+       nv30_query.c \
+       nv30_state.c \
+       nv30_state_emit.c \
+       nv30_surface.c \
+       nv30_vbo.c \
+       nv30_vertprog.c
+
+C_SOURCES = \
+       $(COMMON_SOURCES) \
+       $(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+include ../Makefile.template
+
+symlinks:
+
diff --git a/src/mesa/pipe/nv30/nv30_clear.c b/src/mesa/pipe/nv30/nv30_clear.c
new file mode 100644 (file)
index 0000000..71f4135
--- /dev/null
@@ -0,0 +1,12 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv30_context.h"
+
+void
+nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+          unsigned clearValue)
+{
+       pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+}
diff --git a/src/mesa/pipe/nv30/nv30_context.c b/src/mesa/pipe/nv30/nv30_context.c
new file mode 100644 (file)
index 0000000..c56f918
--- /dev/null
@@ -0,0 +1,429 @@
+#include "pipe/draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_util.h"
+
+#include "nv30_context.h"
+
+static const char *
+nv30_get_name(struct pipe_context *pipe)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       static char buffer[128];
+
+       snprintf(buffer, sizeof(buffer), "NV%02X", nv30->chipset);
+       return buffer;
+}
+
+static const char *
+nv30_get_vendor(struct pipe_context *pipe)
+{
+       return "nouveau";
+}
+
+static int
+nv30_get_param(struct pipe_context *pipe, int param)
+{
+       switch (param) {
+       case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+               return 16;
+       case PIPE_CAP_NPOT_TEXTURES:
+               return 0;
+       case PIPE_CAP_TWO_SIDED_STENCIL:
+               return 1;
+       case PIPE_CAP_GLSL:
+               return 0;
+       case PIPE_CAP_S3TC:
+               return 0;
+       case PIPE_CAP_ANISOTROPIC_FILTER:
+               return 1;
+       case PIPE_CAP_POINT_SPRITE:
+               return 1;
+       case PIPE_CAP_MAX_RENDER_TARGETS:
+               return 2;
+       case PIPE_CAP_OCCLUSION_QUERY:
+               return 1;
+       case PIPE_CAP_TEXTURE_SHADOW_MAP:
+               return 1;
+       case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+               return 13;
+       case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+               return 10;
+       case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+               return 13;
+       default:
+               NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+               return 0;
+       }
+}
+
+static float
+nv30_get_paramf(struct pipe_context *pipe, int param)
+{
+       switch (param) {
+       case PIPE_CAP_MAX_LINE_WIDTH:
+       case PIPE_CAP_MAX_LINE_WIDTH_AA:
+               return 10.0;
+       case PIPE_CAP_MAX_POINT_WIDTH:
+       case PIPE_CAP_MAX_POINT_WIDTH_AA:
+               return 64.0;
+       case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+               return 16.0;
+       case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+               return 4.0;
+       default:
+               NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+               return 0.0;
+       }
+}
+
+static void
+nv30_flush(struct pipe_context *pipe, unsigned flags)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nouveau_winsys *nvws = nv30->nvws;
+       
+       if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+               BEGIN_RING(rankine, 0x1fd8, 1);
+               OUT_RING  (2);
+               BEGIN_RING(rankine, 0x1fd8, 1);
+               OUT_RING  (1);
+       }
+
+       if (flags & PIPE_FLUSH_WAIT) {
+               nvws->notifier_reset(nv30->sync, 0);
+               BEGIN_RING(rankine, 0x104, 1);
+               OUT_RING  (0);
+               BEGIN_RING(rankine, 0x100, 1);
+               OUT_RING  (0);
+       }
+
+       FIRE_RING();
+
+       if (flags & PIPE_FLUSH_WAIT)
+               nvws->notifier_wait(nv30->sync, 0, 0, 2000);
+}
+
+static void
+nv30_destroy(struct pipe_context *pipe)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nouveau_winsys *nvws = nv30->nvws;
+
+       if (nv30->draw)
+               draw_destroy(nv30->draw);
+
+       nvws->res_free(&nv30->vertprog.exec_heap);
+       nvws->res_free(&nv30->vertprog.data_heap);
+
+       nvws->res_free(&nv30->query_heap);
+       nvws->notifier_free(&nv30->query);
+
+       nvws->notifier_free(&nv30->sync);
+
+       nvws->grobj_free(&nv30->rankine);
+
+       free(nv30);
+}
+
+static boolean
+nv30_init_hwctx(struct nv30_context *nv30, int rankine_class)
+{
+       struct nouveau_winsys *nvws = nv30->nvws;
+       int ret;
+       int i;
+
+       ret = nvws->grobj_alloc(nvws, rankine_class, &nv30->rankine);
+       if (ret) {
+               NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+               return FALSE;
+       }
+
+       BEGIN_RING(rankine, NV34TCL_DMA_NOTIFY, 1);
+       OUT_RING  (nv30->sync->handle);
+       BEGIN_RING(rankine, NV34TCL_DMA_TEXTURE0, 2);
+       OUT_RING  (nvws->channel->vram->handle);
+       OUT_RING  (nvws->channel->gart->handle);
+       BEGIN_RING(rankine, NV34TCL_DMA_COLOR1, 1);
+       OUT_RING  (nvws->channel->vram->handle);
+       BEGIN_RING(rankine, NV34TCL_DMA_COLOR0, 2);
+       OUT_RING  (nvws->channel->vram->handle);
+       OUT_RING  (nvws->channel->vram->handle);
+       BEGIN_RING(rankine, NV34TCL_DMA_VTXBUF0, 2);
+       OUT_RING  (nvws->channel->vram->handle);
+       OUT_RING  (nvws->channel->gart->handle);
+/*     BEGIN_RING(rankine, NV34TCL_DMA_FENCE, 2);
+       OUT_RING  (0);
+       OUT_RING  (nv30->query->handle);*/
+       BEGIN_RING(rankine, NV34TCL_DMA_IN_MEMORY7, 1);
+       OUT_RING  (nvws->channel->vram->handle);
+       BEGIN_RING(rankine, NV34TCL_DMA_IN_MEMORY8, 1);
+       OUT_RING  (nvws->channel->vram->handle);
+
+       for (i=1; i<8; i++) {
+               BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+               OUT_RING  (0);
+               BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1);
+               OUT_RING  (0);
+       }
+
+       BEGIN_RING(rankine, 0x220, 1);
+       OUT_RING  (1);
+
+       BEGIN_RING(rankine, 0x03b0, 1);
+       OUT_RING  (0x00100000);
+       BEGIN_RING(rankine, 0x1454, 1);
+       OUT_RING  (0);
+       BEGIN_RING(rankine, 0x1d80, 1);
+       OUT_RING  (3);
+       BEGIN_RING(rankine, 0x1450, 1);
+       OUT_RING  (0x00030004);
+       
+       /* NEW */
+       BEGIN_RING(rankine, 0x1e98, 1);
+       OUT_RING  (0);
+       BEGIN_RING(rankine, 0x17e0, 3);
+       OUT_RING  (0);
+       OUT_RING  (0);
+       OUT_RING  (0x3f800000);
+       BEGIN_RING(rankine, 0x1f80, 16);
+       OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
+       OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
+       OUT_RING  (0x0000ffff);
+       OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
+       OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
+
+       BEGIN_RING(rankine, 0x120, 3);
+       OUT_RING  (0);
+       OUT_RING  (1);
+       OUT_RING  (2);
+
+       BEGIN_RING(rankine, 0x1d88, 1);
+       OUT_RING  (0x00001200);
+
+       BEGIN_RING(rankine, NV34TCL_RC_ENABLE, 1);
+       OUT_RING  (0);
+
+       /* Attempt to setup a known state.. Probably missing a heap of
+        * stuff here..
+        */
+       BEGIN_RING(rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1);
+       OUT_RING  (0);
+       BEGIN_RING(rankine, NV34TCL_STENCIL_BACK_ENABLE, 1);
+       OUT_RING  (0);
+       BEGIN_RING(rankine, NV34TCL_ALPHA_FUNC_ENABLE, 1);
+       OUT_RING  (0);
+       BEGIN_RING(rankine, NV34TCL_DEPTH_WRITE_ENABLE, 2);
+       OUT_RING  (0); /* wr disable */
+       OUT_RING  (0); /* test disable */
+       BEGIN_RING(rankine, NV34TCL_COLOR_MASK, 1);
+       OUT_RING  (0x01010101); /* TR,TR,TR,TR */
+       BEGIN_RING(rankine, NV34TCL_CULL_FACE_ENABLE, 1);
+       OUT_RING  (0);
+       BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_ENABLE, 5);
+       OUT_RING  (0);                          /* Blend enable */
+       OUT_RING  (0);                          /* Blend src */
+       OUT_RING  (0);                          /* Blend dst */
+       OUT_RING  (0x00000000);                 /* Blend colour */
+       OUT_RING  (0x8006);                     /* FUNC_ADD */
+       BEGIN_RING(rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
+       OUT_RING  (0);
+       OUT_RING  (0x1503 /*GL_COPY*/);
+       BEGIN_RING(rankine, NV34TCL_DITHER_ENABLE, 1);
+       OUT_RING  (1);
+       BEGIN_RING(rankine, NV34TCL_SHADE_MODEL, 1);
+       OUT_RING  (0x1d01 /*GL_SMOOTH*/);
+       BEGIN_RING(rankine, NV34TCL_POLYGON_OFFSET_FACTOR,2);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       BEGIN_RING(rankine, NV34TCL_POLYGON_MODE_FRONT, 2);
+       OUT_RING  (0x1b02 /*GL_FILL*/);
+       OUT_RING  (0x1b02 /*GL_FILL*/);
+       /* - Disable texture units
+        * - Set fragprog to MOVR result.color, fragment.color */
+       for (i=0;i<16;i++) {
+               BEGIN_RING(rankine,
+                               NV34TCL_TX_ENABLE(i), 1);
+               OUT_RING  (0);
+       }
+       /* Polygon stipple */
+       BEGIN_RING(rankine,
+                       NV34TCL_POLYGON_STIPPLE_PATTERN(0), 0x20);
+       for (i=0;i<0x20;i++)
+               OUT_RING  (0xFFFFFFFF);
+
+       int w=4096;
+       int h=4096;
+       int pitch=4096*4;
+       BEGIN_RING(rankine, NV34TCL_VIEWPORT_HORIZ, 5);
+       OUT_RING  (w<<16);
+       OUT_RING  (h<<16);
+       OUT_RING  (0x148); /* format */
+       OUT_RING  (pitch << 16 | pitch);
+       OUT_RING  (0x0);
+        BEGIN_RING(rankine, 0x0a00, 2);
+        OUT_RING  ((w<<16) | 0);
+        OUT_RING  ((h<<16) | 0);
+       BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+       OUT_RING  ((w-1)<<16);
+       OUT_RING  ((h-1)<<16);
+       BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2);
+       OUT_RING  (w<<16);
+       OUT_RING  (h<<16);
+       BEGIN_RING(rankine, NV34TCL_VIEWPORT_HORIZ, 2);
+       OUT_RING  (w<<16);
+       OUT_RING  (h<<16);
+
+       BEGIN_RING(rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (1.0);
+       OUT_RINGf (1.0);
+       OUT_RINGf (1.0);
+       OUT_RINGf (0.0);
+
+       BEGIN_RING(rankine, NV34TCL_MODELVIEW_MATRIX(0), 16);
+       OUT_RINGf (1.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (1.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (1.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (1.0);
+
+       BEGIN_RING(rankine, NV34TCL_PROJECTION_MATRIX(0), 16);
+       OUT_RINGf (1.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (1.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (1.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (0.0);
+       OUT_RINGf (1.0);
+
+       BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2);
+       OUT_RING  (4096<<16);
+       OUT_RING  (4096<<16);
+
+       BEGIN_RING(rankine, NV34TCL_MULTISAMPLE_CONTROL, 1);
+       OUT_RING  (0xffff0000);
+
+       FIRE_RING ();
+       return TRUE;
+}
+
+#define NV30TCL_CHIPSET_3X_MASK 0x00000003
+#define NV34TCL_CHIPSET_3X_MASK 0x00000010
+#define NV35TCL_CHIPSET_3X_MASK 0x000001e0
+
+struct pipe_context *
+nv30_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws,
+           unsigned chipset)
+{
+       struct nv30_context *nv30;
+       int rankine_class = 0, ret;
+
+       if ((chipset & 0xf0) != 0x30) {
+               NOUVEAU_ERR("Not a NV3X chipset\n");
+               return NULL;
+       }
+
+       if (NV30TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) {
+               rankine_class = 0x0397;
+       } else if (NV34TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) {
+               rankine_class = 0x0697;
+       } else if (NV35TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) {
+               rankine_class = 0x0497;
+       } else {
+               NOUVEAU_ERR("Unknown NV3X chipset: NV%02x\n", chipset);
+               return NULL;
+       }
+
+       nv30 = CALLOC_STRUCT(nv30_context);
+       if (!nv30)
+               return NULL;
+       nv30->chipset = chipset;
+       nv30->nvws = nvws;
+
+       /* Notifier for sync purposes */
+       ret = nvws->notifier_alloc(nvws, 1, &nv30->sync);
+       if (ret) {
+               NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+               nv30_destroy(&nv30->pipe);
+               return NULL;
+       }
+
+       /* Query objects */
+       ret = nvws->notifier_alloc(nvws, 32, &nv30->query);
+       if (ret) {
+               NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
+               nv30_destroy(&nv30->pipe);
+               return NULL;
+       }
+
+       ret = nvws->res_init(&nv30->query_heap, 0, 32);
+       if (ret) {
+               NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
+               nv30_destroy(&nv30->pipe);
+               return NULL;
+       }
+
+       /* Vtxprog resources */
+       if (nvws->res_init(&nv30->vertprog.exec_heap, 0, 512) ||
+           nvws->res_init(&nv30->vertprog.data_heap, 0, 256)) {
+               nv30_destroy(&nv30->pipe);
+               return NULL;
+       }
+
+       /* Static rankine initialisation */
+       if (!nv30_init_hwctx(nv30, rankine_class)) {
+               nv30_destroy(&nv30->pipe);
+               return NULL;
+       }
+
+       /* Pipe context setup */
+       nv30->pipe.winsys = pipe_winsys;
+
+       nv30->pipe.destroy = nv30_destroy;
+       nv30->pipe.get_name = nv30_get_name;
+       nv30->pipe.get_vendor = nv30_get_vendor;
+       nv30->pipe.get_param = nv30_get_param;
+       nv30->pipe.get_paramf = nv30_get_paramf;
+
+       nv30->pipe.draw_arrays = nv30_draw_arrays;
+       nv30->pipe.draw_elements = nv30_draw_elements;
+       nv30->pipe.clear = nv30_clear;
+
+       nv30->pipe.flush = nv30_flush;
+
+       nv30_init_query_functions(nv30);
+       nv30_init_surface_functions(nv30);
+       nv30_init_state_functions(nv30);
+       nv30_init_miptree_functions(nv30);
+
+       nv30->draw = draw_create();
+       assert(nv30->draw);
+       draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30));
+
+       return &nv30->pipe;
+}
+       
diff --git a/src/mesa/pipe/nv30/nv30_context.h b/src/mesa/pipe/nv30/nv30_context.h
new file mode 100644 (file)
index 0000000..d2262c5
--- /dev/null
@@ -0,0 +1,136 @@
+#ifndef __NV30_CONTEXT_H__
+#define __NV30_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/draw/draw_vertex.h"
+
+#include "pipe/nouveau/nouveau_winsys.h"
+#include "pipe/nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \
+       struct nv30_context *ctx = nv30
+#include "pipe/nouveau/nouveau_push.h"
+
+#include "nv30_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+       fprintf(stderr, "%s:%d -  "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+       fprintf(stderr, "nouveau: "fmt, ##args);
+
+#define NV30_NEW_VERTPROG      (1 << 1)
+#define NV30_NEW_FRAGPROG      (1 << 2)
+#define NV30_NEW_ARRAYS                (1 << 3)
+
+struct nv30_context {
+       struct pipe_context pipe;
+       struct nouveau_winsys *nvws;
+
+       struct draw_context *draw;
+
+       int chipset;
+       struct nouveau_grobj *rankine;
+       struct nouveau_notifier *sync;
+
+       /* query objects */
+       struct nouveau_notifier *query;
+       struct nouveau_resource *query_heap;
+
+       uint32_t dirty;
+
+       struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+       struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+       unsigned dirty_samplers;
+       unsigned fp_samplers;
+       unsigned vp_samplers;
+
+       uint32_t rt_enable;
+       struct pipe_buffer *rt[4];
+       struct pipe_buffer *zeta;
+
+       struct {
+               struct pipe_buffer *buffer;
+               uint32_t format;
+       } tex[16];
+
+       unsigned vb_enable;
+       struct {
+               struct pipe_buffer *buffer;
+               unsigned delta;
+       } vb[16];
+
+       struct {
+               struct nouveau_resource *exec_heap;
+               struct nouveau_resource *data_heap;
+
+               struct nv30_vertex_program *active;
+
+               struct nv30_vertex_program *current;
+               struct pipe_buffer *constant_buf;
+       } vertprog;
+
+       struct {
+               struct nv30_fragment_program *active;
+
+               struct nv30_fragment_program *current;
+               struct pipe_buffer *constant_buf;
+       } fragprog;
+
+       struct pipe_vertex_buffer  vtxbuf[PIPE_ATTRIB_MAX];
+       struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX];
+};
+
+static inline struct nv30_context *
+nv30_context(struct pipe_context *pipe)
+{
+       return (struct nv30_context *)pipe;
+}
+
+extern void nv30_init_state_functions(struct nv30_context *nv30);
+extern void nv30_init_surface_functions(struct nv30_context *nv30);
+extern void nv30_init_miptree_functions(struct nv30_context *nv30);
+extern void nv30_init_query_functions(struct nv30_context *nv30);
+
+/* nv30_draw.c */
+extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30);
+
+/* nv30_vertprog.c */
+extern void nv30_vertprog_translate(struct nv30_context *,
+                                   struct nv30_vertex_program *);
+extern void nv30_vertprog_bind(struct nv30_context *,
+                              struct nv30_vertex_program *);
+extern void nv30_vertprog_destroy(struct nv30_context *,
+                                 struct nv30_vertex_program *);
+
+/* nv30_fragprog.c */
+extern void nv30_fragprog_translate(struct nv30_context *,
+                                   struct nv30_fragment_program *);
+extern void nv30_fragprog_bind(struct nv30_context *,
+                              struct nv30_fragment_program *);
+extern void nv30_fragprog_destroy(struct nv30_context *,
+                                 struct nv30_fragment_program *);
+
+/* nv30_fragtex.c */
+extern void nv30_fragtex_bind(struct nv30_context *);
+
+/* nv30_state.c and friends */
+extern void nv30_emit_hw_state(struct nv30_context *nv30);
+extern void nv30_state_tex_update(struct nv30_context *nv30);
+
+/* nv30_vbo.c */
+extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode,
+                               unsigned start, unsigned count);
+extern boolean nv30_draw_elements(struct pipe_context *pipe,
+                                 struct pipe_buffer *indexBuffer,
+                                 unsigned indexSize,
+                                 unsigned mode, unsigned start,
+                                 unsigned count);
+
+/* nv30_clear.c */
+extern void nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+                      unsigned clearValue);
+
+#endif
diff --git a/src/mesa/pipe/nv30/nv30_dma.h b/src/mesa/pipe/nv30/nv30_dma.h
new file mode 100644 (file)
index 0000000..6eff6b4
--- /dev/null
@@ -0,0 +1,66 @@
+#ifndef __NV30_DMA_H__
+#define __NV30_DMA_H__
+
+#include "pipe/nouveau/nouveau_winsys.h"
+
+#define OUT_RING(data) do {                                                    \
+       (*nv30->nvws->channel->pushbuf->cur++) = (data);                       \
+} while(0)
+
+#define OUT_RINGp(src,size) do {                                               \
+       memcpy(nv30->nvws->channel->pushbuf->cur, (src), (size) * 4);          \
+       nv30->nvws->channel->pushbuf->cur += (size);                           \
+} while(0)
+
+#define OUT_RINGf(data) do {                                                   \
+       union { float v; uint32_t u; } c;                                      \
+       c.v = (data);                                                          \
+       OUT_RING(c.u);                                                         \
+} while(0)
+
+#define BEGIN_RING(obj,mthd,size) do {                                         \
+       if (nv30->nvws->channel->pushbuf->remaining < ((size) + 1))            \
+               nv30->nvws->push_flush(nv30->nvws->channel, ((size) + 1));     \
+       OUT_RING((nv30->obj->subc << 13) | ((size) << 18) | (mthd));           \
+       nv30->nvws->channel->pushbuf->remaining -= ((size) + 1);               \
+} while(0)
+
+#define BEGIN_RING_NI(obj,mthd,size) do {                                      \
+       BEGIN_RING(obj, (mthd) | 0x40000000, (size));                          \
+} while(0)
+
+#define FIRE_RING() do {                                                       \
+       nv30->nvws->push_flush(nv30->nvws->channel, 0);                        \
+} while(0)
+
+#define OUT_RELOC(bo,data,flags,vor,tor) do {                                  \
+       nv30->nvws->push_reloc(nv30->nvws->channel,                            \
+                              nv30->nvws->channel->pushbuf->cur,              \
+                              (struct nouveau_bo *)(bo),                      \
+                              (data), (flags), (vor), (tor));                 \
+       OUT_RING(0);                                                           \
+} while(0)
+
+/* Raw data + flags depending on FB/TT buffer */
+#define OUT_RELOCd(bo,data,flags,vor,tor) do {                                 \
+       OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor));        \
+} while(0)
+
+/* FB/TT object handle */
+#define OUT_RELOCo(bo,flags) do {                                              \
+       OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR,                            \
+                 nv30->nvws->channel->vram->handle,                           \
+                 nv30->nvws->channel->gart->handle);                          \
+} while(0)
+
+/* Low 32-bits of offset */
+#define OUT_RELOCl(bo,delta,flags) do {                                        \
+       OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0);              \
+} while(0)
+
+/* High 32-bits of offset */
+#define OUT_RELOCh(bo,delta,flags) do {                                        \
+       OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0);             \
+} while(0)
+
+#endif
diff --git a/src/mesa/pipe/nv30/nv30_draw.c b/src/mesa/pipe/nv30/nv30_draw.c
new file mode 100644 (file)
index 0000000..bdeb975
--- /dev/null
@@ -0,0 +1,62 @@
+#include "pipe/draw/draw_private.h"
+#include "pipe/p_util.h"
+
+#include "nv30_context.h"
+
+struct nv30_draw_stage {
+       struct draw_stage draw;
+       struct nv30_context *nv30;
+};
+
+static void
+nv30_draw_point(struct draw_stage *draw, struct prim_header *prim)
+{
+       NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_line(struct draw_stage *draw, struct prim_header *prim)
+{
+       NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim)
+{
+       NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_flush(struct draw_stage *draw, unsigned flags)
+{
+}
+
+static void
+nv30_draw_reset_stipple_counter(struct draw_stage *draw)
+{
+       NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_destroy(struct draw_stage *draw)
+{
+       free(draw);
+}
+
+struct draw_stage *
+nv30_draw_render_stage(struct nv30_context *nv30)
+{
+       struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage);
+
+       nv30draw->nv30 = nv30;
+       nv30draw->draw.draw = nv30->draw;
+       nv30draw->draw.point = nv30_draw_point;
+       nv30draw->draw.line = nv30_draw_line;
+       nv30draw->draw.tri = nv30_draw_tri;
+       nv30draw->draw.flush = nv30_draw_flush;
+       nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter;
+       nv30draw->draw.destroy = nv30_draw_destroy;
+
+       return &nv30draw->draw;
+}
+
diff --git a/src/mesa/pipe/nv30/nv30_fragprog.c b/src/mesa/pipe/nv30/nv30_fragprog.c
new file mode 100644 (file)
index 0000000..0233873
--- /dev/null
@@ -0,0 +1,834 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
+#include "pipe/tgsi/util/tgsi_util.h"
+
+#include "nv30_context.h"
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 1
+#define MASK_Y 2
+#define MASK_Z 4
+#define MASK_W 8
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NV30_FP_OP_COND_TR
+#include "nv30_shader.h"
+
+#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv30_sr_neg((s))
+#define abs(s) nv30_sr_abs((s))
+#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
+
+#define MAX_CONSTS 128
+#define MAX_IMM 32
+struct nv30_fpc {
+       struct nv30_fragment_program *fp;
+
+       uint attrib_map[PIPE_MAX_SHADER_INPUTS];
+
+       int high_temp;
+       int temp_temp_count;
+       int num_regs;
+
+       uint depth_id;
+       uint colour_id;
+
+       unsigned inst_offset;
+
+       struct {
+               int pipe;
+               float vals[4];
+       } consts[MAX_CONSTS];
+       int nr_consts;
+
+       struct nv30_sreg imm[MAX_IMM];
+       unsigned nr_imm;
+};
+
+static INLINE struct nv30_sreg
+temp(struct nv30_fpc *fpc)
+{
+       int idx;
+
+       idx  = fpc->temp_temp_count++;
+       idx += fpc->high_temp + 1;
+       return nv30_sr(NV30SR_TEMP, idx);
+}
+
+static INLINE struct nv30_sreg
+constant(struct nv30_fpc *fpc, int pipe, float vals[4])
+{
+       int idx;
+
+       if (fpc->nr_consts == MAX_CONSTS)
+               assert(0);
+       idx = fpc->nr_consts++;
+
+       fpc->consts[idx].pipe = pipe;
+       if (pipe == -1)
+               memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
+       return nv30_sr(NV30SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+       nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
+                       (d), (m), (s0), (s1), (s2))
+#define tex(cc,s,o,u,d,m,s0,s1,s2) \
+       nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
+                   (d), (m), (s0), none, none)
+
+static void
+grow_insns(struct nv30_fpc *fpc, int size)
+{
+       struct nv30_fragment_program *fp = fpc->fp;
+
+       fp->insn_len += size;
+       fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
+}
+
+static void
+emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
+{
+       struct nv30_fragment_program *fp = fpc->fp;
+       uint32_t *hw = &fp->insn[fpc->inst_offset];
+       uint32_t sr = 0;
+
+       switch (src.type) {
+       case NV30SR_INPUT:
+               sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
+               hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT);
+               break;
+       case NV30SR_OUTPUT:
+               sr |= NV30_FP_REG_SRC_HALF;
+               /* fall-through */
+       case NV30SR_TEMP:
+               sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT);
+               sr |= (src.index << NV30_FP_REG_SRC_SHIFT);
+               break;
+       case NV30SR_CONST:
+               grow_insns(fpc, 4);
+               hw = &fp->insn[fpc->inst_offset];
+               if (fpc->consts[src.index].pipe >= 0) {
+                       struct nv30_fragment_program_data *fpd;
+
+                       fp->consts = realloc(fp->consts, ++fp->nr_consts *
+                                            sizeof(*fpd));
+                       fpd = &fp->consts[fp->nr_consts - 1];
+                       fpd->offset = fpc->inst_offset + 4;
+                       fpd->index = fpc->consts[src.index].pipe;
+                       memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
+               } else {
+                       memcpy(&fp->insn[fpc->inst_offset + 4],
+                               fpc->consts[src.index].vals,
+                               sizeof(uint32_t) * 4);
+               }
+
+               sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);       
+               break;
+       case NV30SR_NONE:
+               sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
+               break;
+       default:
+               assert(0);
+       }
+
+       if (src.negate)
+               sr |= NV30_FP_REG_NEGATE;
+
+       if (src.abs)
+               hw[1] |= (1 << (29 + pos));
+
+       sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) |
+              (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) |
+              (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) |
+              (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT));
+
+       hw[pos + 1] |= sr;
+}
+
+static void
+emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst)
+{
+       struct nv30_fragment_program *fp = fpc->fp;
+       uint32_t *hw = &fp->insn[fpc->inst_offset];
+
+       switch (dst.type) {
+       case NV30SR_TEMP:
+               if (fpc->num_regs < (dst.index + 1))
+                       fpc->num_regs = dst.index + 1;
+               break;
+       case NV30SR_OUTPUT:
+               if (dst.index == 1) {
+                       fp->fp_control |= 0xe;
+               } else {
+                       hw[0] |= NV30_FP_OP_OUT_REG_HALF;
+               }
+               break;
+       case NV30SR_NONE:
+               hw[0] |= (1 << 30);
+               break;
+       default:
+               assert(0);
+       }
+
+       hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT);
+}
+
+static void
+nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
+             struct nv30_sreg dst, int mask,
+             struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+{
+       struct nv30_fragment_program *fp = fpc->fp;
+       uint32_t *hw;
+
+       fpc->inst_offset = fp->insn_len;
+       grow_insns(fpc, 4);
+       hw = &fp->insn[fpc->inst_offset];
+       memset(hw, 0, sizeof(uint32_t) * 4);
+
+       if (op == NV30_FP_OP_OPCODE_KIL)
+               fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
+       hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT);
+       hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT);
+       hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT);
+
+       if (sat)
+               hw[0] |= NV30_FP_OP_OUT_SAT;
+
+       if (dst.cc_update)
+               hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE;
+       hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT);
+       hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) |
+                 (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) |
+                 (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) |
+                 (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT));
+
+       emit_dst(fpc, dst);
+       emit_src(fpc, 0, s0);
+       emit_src(fpc, 1, s1);
+       emit_src(fpc, 2, s2);
+}
+
+static void
+nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit,
+           struct nv30_sreg dst, int mask,
+           struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+{
+       struct nv30_fragment_program *fp = fpc->fp;
+
+       nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
+
+       fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT);
+       fp->samplers |= (1 << unit);
+}
+
+static INLINE struct nv30_sreg
+tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
+{
+       struct nv30_sreg src;
+
+       switch (fsrc->SrcRegister.File) {
+       case TGSI_FILE_INPUT:
+               src = nv30_sr(NV30SR_INPUT,
+                             fpc->attrib_map[fsrc->SrcRegister.Index]);
+               break;
+       case TGSI_FILE_CONSTANT:
+               src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+               break;
+       case TGSI_FILE_IMMEDIATE:
+               assert(fsrc->SrcRegister.Index < fpc->nr_imm);
+               src = fpc->imm[fsrc->SrcRegister.Index];
+               break;
+       case TGSI_FILE_TEMPORARY:
+               src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1);
+               if (fpc->high_temp < src.index)
+                       fpc->high_temp = src.index;
+               break;
+       /* This is clearly insane, but gallium hands us shaders like this.
+        * Luckily fragprog results are just temp regs..
+        */
+       case TGSI_FILE_OUTPUT:
+               if (fsrc->SrcRegister.Index == fpc->colour_id)
+                       return nv30_sr(NV30SR_OUTPUT, 0);
+               else
+                       return nv30_sr(NV30SR_OUTPUT, 1);
+               break;
+       default:
+               NOUVEAU_ERR("bad src file\n");
+               break;
+       }
+
+       src.abs = fsrc->SrcRegisterExtMod.Absolute;
+       src.negate = fsrc->SrcRegister.Negate;
+       src.swz[0] = fsrc->SrcRegister.SwizzleX;
+       src.swz[1] = fsrc->SrcRegister.SwizzleY;
+       src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+       src.swz[3] = fsrc->SrcRegister.SwizzleW;
+       return src;
+}
+
+static INLINE struct nv30_sreg
+tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
+       int idx;
+
+       switch (fdst->DstRegister.File) {
+       case TGSI_FILE_OUTPUT:
+               if (fdst->DstRegister.Index == fpc->colour_id)
+                       return nv30_sr(NV30SR_OUTPUT, 0);
+               else
+                       return nv30_sr(NV30SR_OUTPUT, 1);
+               break;
+       case TGSI_FILE_TEMPORARY:
+               idx = fdst->DstRegister.Index + 1;
+               if (fpc->high_temp < idx)
+                       fpc->high_temp = idx;
+               return nv30_sr(NV30SR_TEMP, idx);
+       case TGSI_FILE_NULL:
+               return nv30_sr(NV30SR_NONE, 0);
+       default:
+               NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+               return nv30_sr(NV30SR_NONE, 0);
+       }
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+       int mask = 0;
+
+       if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+       if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+       if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+       if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+       return mask;
+}
+
+static boolean
+src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
+              struct nv30_sreg *src)
+{
+       const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+       struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
+       uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+       uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+                       fsrc->SrcRegisterExtSwz.NegateY,
+                       fsrc->SrcRegisterExtSwz.NegateZ,
+                       fsrc->SrcRegisterExtSwz.NegateW };
+       uint c;
+
+       for (c = 0; c < 4; c++) {
+               switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+               case TGSI_EXTSWIZZLE_X:
+               case TGSI_EXTSWIZZLE_Y:
+               case TGSI_EXTSWIZZLE_Z:
+               case TGSI_EXTSWIZZLE_W:
+                       mask |= (1 << c);
+                       break;
+               case TGSI_EXTSWIZZLE_ZERO:
+                       zero_mask |= (1 << c);
+                       tgsi.swz[c] = SWZ_X;
+                       break;
+               case TGSI_EXTSWIZZLE_ONE:
+                       one_mask |= (1 << c);
+                       tgsi.swz[c] = SWZ_X;
+                       break;
+               default:
+                       assert(0);
+               }
+
+               if (!tgsi.negate && neg[c])
+                       neg_mask |= (1 << c);
+       }
+
+       if (mask == MASK_ALL && !neg_mask)
+               return TRUE;
+
+       *src = temp(fpc);
+
+       if (mask)
+               arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
+
+       if (zero_mask)
+               arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
+
+       if (one_mask)
+               arith(fpc, 0, STR, *src, one_mask, *src, none, none);
+
+       if (neg_mask) {
+               struct nv30_sreg one = temp(fpc);
+               arith(fpc, 0, STR, one, neg_mask, one, none, none);
+               arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
+       }
+
+       return FALSE;
+}
+
+static boolean
+nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
+                               const struct tgsi_full_instruction *finst)
+{
+       const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+       struct nv30_sreg src[3], dst, tmp;
+       int mask, sat, unit;
+       int ai = -1, ci = -1;
+       int i;
+
+       if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+               return TRUE;
+
+       fpc->temp_temp_count = 0;
+       for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+               const struct tgsi_full_src_register *fsrc;
+
+               fsrc = &finst->FullSrcRegisters[i];
+               if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+                       src[i] = tgsi_src(fpc, fsrc);
+               }
+       }
+
+       for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+               const struct tgsi_full_src_register *fsrc;
+
+               fsrc = &finst->FullSrcRegisters[i];
+
+               switch (fsrc->SrcRegister.File) {
+               case TGSI_FILE_INPUT:
+               case TGSI_FILE_CONSTANT:
+               case TGSI_FILE_TEMPORARY:
+                       if (!src_native_swz(fpc, fsrc, &src[i]))
+                               continue;
+                       break;
+               default:
+                       break;
+               }
+
+               switch (fsrc->SrcRegister.File) {
+               case TGSI_FILE_INPUT:
+                       if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+                               ai = fsrc->SrcRegister.Index;
+                               src[i] = tgsi_src(fpc, fsrc);
+                       } else {
+                               NOUVEAU_MSG("extra src attr %d\n",
+                                        fsrc->SrcRegister.Index);
+                               src[i] = temp(fpc);
+                               arith(fpc, 0, MOV, src[i], MASK_ALL,
+                                     tgsi_src(fpc, fsrc), none, none);
+                       }
+                       break;
+               case TGSI_FILE_CONSTANT:
+               case TGSI_FILE_IMMEDIATE:
+                       if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+                               ci = fsrc->SrcRegister.Index;
+                               src[i] = tgsi_src(fpc, fsrc);
+                       } else {
+                               src[i] = temp(fpc);
+                               arith(fpc, 0, MOV, src[i], MASK_ALL,
+                                     tgsi_src(fpc, fsrc), none, none);
+                       }
+                       break;
+               case TGSI_FILE_TEMPORARY:
+                       /* handled above */
+                       break;
+               case TGSI_FILE_SAMPLER:
+                       unit = fsrc->SrcRegister.Index;
+                       break;
+               case TGSI_FILE_OUTPUT:
+                       break;
+               default:
+                       NOUVEAU_ERR("bad src file\n");
+                       return FALSE;
+               }
+       }
+
+       dst  = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
+       mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+       sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
+
+       switch (finst->Instruction.Opcode) {
+       case TGSI_OPCODE_ABS:
+               arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
+               break;
+       case TGSI_OPCODE_ADD:
+               arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_CMP:
+               tmp = temp(fpc);
+               arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+               tmp.cc_update = 1;
+               arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+               dst.cc_test = NV30_VP_INST_COND_LT;
+               arith(fpc, sat, MOV, dst, mask, src[1], none, none);
+               break;
+       case TGSI_OPCODE_COS:
+               arith(fpc, sat, COS, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_DP3:
+               arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_DP4:
+               arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_DPH:
+               tmp = temp(fpc);
+               arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
+               arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
+                     swz(src[1], W, W, W, W), none);
+               break;
+       case TGSI_OPCODE_DST:
+               arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_EX2:
+               arith(fpc, sat, EX2, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_FLR:
+               arith(fpc, sat, FLR, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_FRC:
+               arith(fpc, sat, FRC, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_KIL:
+               arith(fpc, 0, KIL, none, 0, none, none, none);
+               break;
+       case TGSI_OPCODE_KILP:
+               dst = nv30_sr(NV30SR_NONE, 0);
+               dst.cc_update = 1;
+               arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
+               dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT;
+               arith(fpc, 0, KIL, dst, 0, none, none, none);
+               break;
+       case TGSI_OPCODE_LG2:
+               arith(fpc, sat, LG2, dst, mask, src[0], none, none);
+               break;
+//     case TGSI_OPCODE_LIT:
+       case TGSI_OPCODE_LRP:
+               tmp = temp(fpc);
+               arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
+               arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
+               break;
+       case TGSI_OPCODE_MAD:
+               arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
+               break;
+       case TGSI_OPCODE_MAX:
+               arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_MIN:
+               arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_MOV:
+               arith(fpc, sat, MOV, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_MUL:
+               arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_POW:
+               tmp = temp(fpc);
+               arith(fpc, 0, LG2, tmp, MASK_X,
+                     swz(src[0], X, X, X, X), none, none);
+               arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+                     swz(src[1], X, X, X, X), none);
+               arith(fpc, sat, EX2, dst, mask,
+                     swz(tmp, X, X, X, X), none, none);
+               break;
+       case TGSI_OPCODE_RCP:
+               arith(fpc, sat, RCP, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_RET:
+               assert(0);
+               break;
+       case TGSI_OPCODE_RFL:
+               tmp = temp(fpc);
+               arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none);
+               arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none);
+               arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z,
+                     swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
+               arith(fpc, sat, MAD, dst, mask,
+                     swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
+               break;
+       case TGSI_OPCODE_RSQ:
+               tmp = temp(fpc);
+               arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X,
+                     abs(swz(src[0], X, X, X, X)), none, none);
+               arith(fpc, sat, EX2, dst, mask,
+                     neg(swz(tmp, X, X, X, X)), none, none);
+               break;
+       case TGSI_OPCODE_SCS:
+               if (mask & MASK_X) {
+                       arith(fpc, sat, COS, dst, MASK_X,
+                             swz(src[0], X, X, X, X), none, none);
+               }
+               if (mask & MASK_Y) {
+                       arith(fpc, sat, SIN, dst, MASK_Y,
+                             swz(src[0], X, X, X, X), none, none);
+               }
+               break;
+       case TGSI_OPCODE_SIN:
+               arith(fpc, sat, SIN, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_SGE:
+               arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_SLT:
+               arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_SUB:
+               arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
+               break;
+       case TGSI_OPCODE_TEX:
+               if (finst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide ==
+                               TGSI_EXTSWIZZLE_W) {
+                       tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
+               } else
+                       tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_TXB:
+               tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_XPD:
+               tmp = temp(fpc);
+               arith(fpc, 0, MUL, tmp, mask,
+                     swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+               arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
+                     swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+                     neg(tmp));
+               break;
+       default:
+               NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+               return FALSE;
+       }
+
+       return TRUE;
+}
+
+static boolean
+nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
+                               const struct tgsi_full_declaration *fdec)
+{
+       int hw;
+
+       switch (fdec->Semantic.SemanticName) {
+       case TGSI_SEMANTIC_POSITION:
+               hw = NV30_FP_OP_INPUT_SRC_POSITION;
+               break;
+       case TGSI_SEMANTIC_COLOR:
+               if (fdec->Semantic.SemanticIndex == 0) {
+                       hw = NV30_FP_OP_INPUT_SRC_COL0;
+               } else
+               if (fdec->Semantic.SemanticIndex == 1) {
+                       hw = NV30_FP_OP_INPUT_SRC_COL1;
+               } else {
+                       NOUVEAU_ERR("bad colour semantic index\n");
+                       return FALSE;
+               }
+               break;
+       case TGSI_SEMANTIC_FOG:
+               hw = NV30_FP_OP_INPUT_SRC_FOGC;
+               break;
+       case TGSI_SEMANTIC_GENERIC:
+               if (fdec->Semantic.SemanticIndex <= 7) {
+                       hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+                                                    SemanticIndex);
+               } else {
+                       NOUVEAU_ERR("bad generic semantic index\n");
+                       return FALSE;
+               }
+               break;
+       default:
+               NOUVEAU_ERR("bad input semantic\n");
+               return FALSE;
+       }
+
+       fpc->attrib_map[fdec->u.DeclarationRange.First] = hw;
+       return TRUE;
+}
+
+static boolean
+nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
+                               const struct tgsi_full_declaration *fdec)
+{
+       switch (fdec->Semantic.SemanticName) {
+       case TGSI_SEMANTIC_POSITION:
+               fpc->depth_id = fdec->u.DeclarationRange.First;
+               break;
+       case TGSI_SEMANTIC_COLOR:
+               fpc->colour_id = fdec->u.DeclarationRange.First;
+               break;
+       default:
+               NOUVEAU_ERR("bad output semantic\n");
+               return FALSE;
+       }
+
+       return TRUE;
+}
+
+void
+nv30_fragprog_translate(struct nv30_context *nv30,
+                       struct nv30_fragment_program *fp)
+{
+       struct tgsi_parse_context parse;
+       struct nv30_fpc *fpc = NULL;
+
+       fpc = calloc(1, sizeof(struct nv30_fpc));
+       if (!fpc)
+               return;
+       fpc->fp = fp;
+       fpc->high_temp = -1;
+       fpc->num_regs = 2;
+
+       tgsi_parse_init(&parse, fp->pipe->tokens);
+
+       while (!tgsi_parse_end_of_tokens(&parse)) {
+               tgsi_parse_token(&parse);
+
+               switch (parse.FullToken.Token.Type) {
+               case TGSI_TOKEN_TYPE_DECLARATION:
+               {
+                       const struct tgsi_full_declaration *fdec;
+                       fdec = &parse.FullToken.FullDeclaration;
+                       switch (fdec->Declaration.File) {
+                       case TGSI_FILE_INPUT:
+                               if (!nv30_fragprog_parse_decl_attrib(fpc, fdec))
+                                       goto out_err;
+                               break;
+                       case TGSI_FILE_OUTPUT:
+                               if (!nv30_fragprog_parse_decl_output(fpc, fdec))
+                                       goto out_err;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+                       break;
+               case TGSI_TOKEN_TYPE_IMMEDIATE:
+               {
+                       struct tgsi_full_immediate *imm;
+                       float vals[4];
+                       int i;
+                       
+                       imm = &parse.FullToken.FullImmediate;
+                       assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+                       assert(fpc->nr_imm < MAX_IMM);
+
+                       for (i = 0; i < imm->Immediate.Size; i++)
+                               vals[i] = imm->u.ImmediateFloat32[i].Float;
+                       fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
+               }
+                       break;
+               case TGSI_TOKEN_TYPE_INSTRUCTION:
+               {
+                       const struct tgsi_full_instruction *finst;
+
+                       finst = &parse.FullToken.FullInstruction;
+                       if (!nv30_fragprog_parse_instruction(fpc, finst))
+                               goto out_err;
+               }
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       fp->fp_control |= (fpc->num_regs-1)/2;
+       fp->fp_reg_control = (1<<16)|0x4;
+
+       /* Terminate final instruction */
+       fp->insn[fpc->inst_offset] |= 0x00000001;
+
+       /* Append NOP + END instruction, may or may not be necessary. */
+       fpc->inst_offset = fp->insn_len;
+       grow_insns(fpc, 4);
+       fp->insn[fpc->inst_offset + 0] = 0x00000001;
+       fp->insn[fpc->inst_offset + 1] = 0x00000000;
+       fp->insn[fpc->inst_offset + 2] = 0x00000000;
+       fp->insn[fpc->inst_offset + 3] = 0x00000000;
+       
+       fp->translated = TRUE;
+       fp->on_hw = FALSE;
+out_err:
+       tgsi_parse_free(&parse);
+       free(fpc);
+}
+
+void
+nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp)
+{
+       struct pipe_winsys *ws = nv30->pipe.winsys;
+       int i;
+
+       if (!fp->translated) {
+               nv30_fragprog_translate(nv30, fp);
+               if (!fp->translated)
+                       assert(0);
+       }
+
+       if (fp->nr_consts) {
+               float *map = ws->buffer_map(ws, nv30->fragprog.constant_buf,
+                                           PIPE_BUFFER_USAGE_CPU_READ);
+               for (i = 0; i < fp->nr_consts; i++) {
+                       struct nv30_fragment_program_data *fpd = &fp->consts[i];
+                       uint32_t *p = &fp->insn[fpd->offset];
+                       uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
+
+                       if (!memcmp(p, cb, 4 * sizeof(float)))
+                               continue;
+                       memcpy(p, cb, 4 * sizeof(float));
+                       fp->on_hw = 0;
+               }
+               ws->buffer_unmap(ws, nv30->fragprog.constant_buf);
+       }
+
+       if (!fp->on_hw) {
+               const uint32_t le = 1;
+               uint32_t *map;
+
+               if (!fp->buffer)
+                       fp->buffer = ws->buffer_create(ws, 0x100, 0,
+                                                      fp->insn_len * 4);
+               map = ws->buffer_map(ws, fp->buffer,
+                                    PIPE_BUFFER_USAGE_CPU_WRITE);
+
+#if 0
+               for (i = 0; i < fp->insn_len; i++) {
+                       NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
+               }
+#endif
+
+               if ((*(const uint8_t *)&le)) {
+                       for (i = 0; i < fp->insn_len; i++) {
+                               map[i] = fp->insn[i];
+                       }
+               } else {
+                       /* Weird swapping for big-endian chips */
+                       for (i = 0; i < fp->insn_len; i++) {
+                               map[i] = ((fp->insn[i] & 0xffff) << 16) |
+                                         ((fp->insn[i] >> 16) & 0xffff);
+                       }
+               }
+
+               ws->buffer_unmap(ws, fp->buffer);
+               fp->on_hw = TRUE;
+       }
+
+       BEGIN_RING(rankine, NV34TCL_FP_CONTROL, 1);
+       OUT_RING  (fp->fp_control);
+       BEGIN_RING(rankine, NV34TCL_FP_REG_CONTROL, 1);
+       OUT_RING  (fp->fp_reg_control);
+
+       nv30->fragprog.active = fp;
+}
+
+void
+nv30_fragprog_destroy(struct nv30_context *nv30,
+                     struct nv30_fragment_program *fp)
+{
+       if (fp->insn_len)
+               free(fp->insn);
+}
+
diff --git a/src/mesa/pipe/nv30/nv30_fragtex.c b/src/mesa/pipe/nv30/nv30_fragtex.c
new file mode 100644 (file)
index 0000000..e75b1f7
--- /dev/null
@@ -0,0 +1,160 @@
+#include "nv30_context.h"
+
+static inline int log2i(int i)
+{
+       int r = 0;
+
+       if (i & 0xffff0000) {
+               i >>= 16;
+               r += 16;
+       }
+       if (i & 0x0000ff00) {
+               i >>= 8;
+               r += 8;
+       }
+       if (i & 0x000000f0) {
+               i >>= 4;
+               r += 4;
+       }
+       if (i & 0x0000000c) {
+               i >>= 2;
+               r += 2;
+       }
+       if (i & 0x00000002) {
+               r += 1;
+       }
+       return r;
+}
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                        \
+{                                                                              \
+  TRUE,                                                                        \
+  PIPE_FORMAT_##m,                                                             \
+  NV34TCL_TX_FORMAT_FORMAT_##tf,                                               \
+  (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y |           \
+   NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w |           \
+   NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y |           \
+   NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w),           \
+}
+
+struct nv30_texture_format {
+       boolean defined;
+       uint    pipe;
+       int     format;
+       int     swizzle;
+};
+
+static struct nv30_texture_format
+nv30_texture_formats[] = {
+       _(A8R8G8B8_UNORM, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W),
+       _(A1R5G5B5_UNORM, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W),
+       _(A4R4G4B4_UNORM, A4R4G4B4,   S1,   S1,   S1,   S1, X, Y, Z, W),
+//     _(R5G6B5_UNORM  , R5G6B5  ,   S1,   S1,   S1,  ONE, X, Y, Z, W),
+       _(U_L8          , L8      ,   S1,   S1,   S1,  ONE, X, X, X, X),
+       _(U_A8          , L8      , ZERO, ZERO, ZERO,   S1, X, X, X, X),
+       _(U_I8          , L8      ,   S1,   S1,   S1,   S1, X, X, X, X),
+       _(U_A8_L8       , A8L8    ,   S1,   S1,   S1,   S1, X, X, X, Y),
+//     _(Z16_UNORM     , Z16     ,   S1,   S1,   S1,  ONE, X, X, X, X),
+//     _(Z24S8_UNORM   , Z24     ,   S1,   S1,   S1,  ONE, X, X, X, X),
+//     _(RGB_DXT1      , 0x86,   S1,   S1,   S1,  ONE, X, Y, Z, W, 0x00, 0x00),
+//     _(RGBA_DXT1     , 0x86,   S1,   S1,   S1,   S1, X, Y, Z, W, 0x00, 0x00),
+//     _(RGBA_DXT3     , 0x87,   S1,   S1,   S1,   S1, X, Y, Z, W, 0x00, 0x00),
+//     _(RGBA_DXT5     , 0x88,   S1,   S1,   S1,   S1, X, Y, Z, W, 0x00, 0x00),
+       {},
+};
+
+static struct nv30_texture_format *
+nv30_fragtex_format(uint pipe_format)
+{
+       struct nv30_texture_format *tf = nv30_texture_formats;
+
+       while (tf->defined) {
+               if (tf->pipe == pipe_format)
+                       return tf;
+               tf++;
+       }
+
+       return NULL;
+}
+
+
+static void
+nv30_fragtex_build(struct nv30_context *nv30, int unit)
+{
+       struct nv30_sampler_state *ps = nv30->tex_sampler[unit];
+       struct nv30_miptree *nv30mt = nv30->tex_miptree[unit];
+       struct pipe_texture *pt = &nv30mt->base;
+       struct nv30_texture_format *tf;
+       uint32_t txf, txs, txp;
+       int swizzled = 0; /*XXX: implement in region code? */
+
+       tf = nv30_fragtex_format(pt->format);
+       if (!tf || !tf->defined) {
+               NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format);
+               return;
+       }
+
+       txf  = tf->format << 8;
+       txf |= (pt->last_level - pt->first_level + 1) << 16;
+       txf |= log2i(pt->width[0]) << 20;
+       txf |= log2i(pt->height[0]) << 24;
+       txf |= log2i(pt->depth[0]) << 28;
+       txf |= 8;
+
+       switch (pt->target) {
+/*     case PIPE_TEXTURE_CUBE:
+               txf |= NV34TCL_TEX_FORMAT_CUBIC;*/
+               /* fall-through */
+       case PIPE_TEXTURE_2D:
+               txf |= (2<<4);
+               break;
+       case PIPE_TEXTURE_3D:
+               txf |= (3<<4);
+               break;
+       case PIPE_TEXTURE_1D:
+               txf |= (1<<4);
+               break;
+       default:
+               NOUVEAU_ERR("Unknown target %d\n", pt->target);
+               return;
+       }
+
+       txs = tf->swizzle;
+
+       BEGIN_RING(rankine, NV34TCL_TX_OFFSET(unit), 8);
+       OUT_RELOCl(nv30mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+       OUT_RELOCd(nv30mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+       OUT_RING  (ps->wrap);
+       OUT_RING  (0x40000000); /* enable */
+       OUT_RING  (txs);
+       OUT_RING  (ps->filt | 0x2000 /* magic */);
+       OUT_RING  ((pt->width[0] << 16) | pt->height[0]);
+       OUT_RING  (ps->bcol);
+}
+
+void
+nv30_fragtex_bind(struct nv30_context *nv30)
+{
+       struct nv30_fragment_program *fp = nv30->fragprog.active;
+       unsigned samplers, unit;
+
+       samplers = nv30->fp_samplers & ~fp->samplers;
+       while (samplers) {
+               unit = ffs(samplers) - 1;
+               samplers &= ~(1 << unit);
+
+               BEGIN_RING(rankine, NV34TCL_TX_ENABLE(unit), 1);
+               OUT_RING  (0);
+       }
+
+       samplers = nv30->dirty_samplers & fp->samplers;
+       while (samplers) {
+               unit = ffs(samplers) - 1;
+               samplers &= ~(1 << unit);
+
+               nv30_fragtex_build(nv30, unit);
+       }
+
+       nv30->fp_samplers = fp->samplers;
+}
+
diff --git a/src/mesa/pipe/nv30/nv30_miptree.c b/src/mesa/pipe/nv30/nv30_miptree.c
new file mode 100644 (file)
index 0000000..75e9b99
--- /dev/null
@@ -0,0 +1,105 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_util.h"
+#include "pipe/p_inlines.h"
+
+#include "nv30_context.h"
+
+static void
+nv30_miptree_layout(struct nv30_miptree *nv30mt)
+{
+       struct pipe_texture *pt = &nv30mt->base;
+       boolean swizzled = FALSE;
+       uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+       uint offset = 0;
+       int nr_faces, l, f;
+
+       if (pt->target == PIPE_TEXTURE_CUBE) {
+               nr_faces = 6;
+       } else
+       if (pt->target == PIPE_TEXTURE_3D) {
+               nr_faces = pt->depth[0];
+       } else {
+               nr_faces = 1;
+       }
+       
+       for (l = pt->first_level; l <= pt->last_level; l++) {
+               pt->width[l] = width;
+               pt->height[l] = height;
+               pt->depth[l] = depth;
+
+               if (swizzled)
+                       nv30mt->level[l].pitch = pt->width[l] * pt->cpp;
+               else
+                       nv30mt->level[l].pitch = pt->width[0] * pt->cpp;
+               nv30mt->level[l].pitch = (nv30mt->level[l].pitch + 63) & ~63;
+
+               nv30mt->level[l].image_offset =
+                       calloc(nr_faces, sizeof(unsigned));
+
+               width  = MAX2(1, width  >> 1);
+               height = MAX2(1, height >> 1);
+               depth  = MAX2(1, depth  >> 1);
+
+       }
+
+       for (f = 0; f < nr_faces; f++) {
+               for (l = pt->first_level; l <= pt->last_level; l++) {
+                       nv30mt->level[l].image_offset[f] = offset;
+                       offset += nv30mt->level[l].pitch * pt->height[l];
+               }
+       }
+
+       nv30mt->total_size = offset;
+}
+
+static void
+nv30_miptree_create(struct pipe_context *pipe, struct pipe_texture **pt)
+{
+       struct pipe_winsys *ws = pipe->winsys;
+       struct nv30_miptree *nv30mt;
+
+       nv30mt = realloc(*pt, sizeof(struct nv30_miptree));
+       if (!nv30mt)
+               return;
+       *pt = NULL;
+
+       nv30_miptree_layout(nv30mt);
+
+       nv30mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL,
+                                          nv30mt->total_size);
+       if (!nv30mt->buffer) {
+               free(nv30mt);
+               return;
+       }
+       
+       *pt = &nv30mt->base;
+}
+
+static void
+nv30_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt)
+{
+       struct pipe_winsys *ws = pipe->winsys;
+       struct pipe_texture *mt = *pt;
+
+       *pt = NULL;
+       if (--mt->refcount <= 0) {
+               struct nv30_miptree *nv30mt = (struct nv30_miptree *)mt;
+               int l;
+
+               pipe_buffer_reference(ws, &nv30mt->buffer, NULL);
+               for (l = mt->first_level; l <= mt->last_level; l++) {
+                       if (nv30mt->level[l].image_offset)
+                               free(nv30mt->level[l].image_offset);
+               }
+               free(nv30mt);
+       }
+}
+
+void
+nv30_init_miptree_functions(struct nv30_context *nv30)
+{
+       nv30->pipe.texture_create = nv30_miptree_create;
+       nv30->pipe.texture_release = nv30_miptree_release;
+}
+
diff --git a/src/mesa/pipe/nv30/nv30_query.c b/src/mesa/pipe/nv30/nv30_query.c
new file mode 100644 (file)
index 0000000..ea74c0f
--- /dev/null
@@ -0,0 +1,112 @@
+#include "pipe/p_context.h"
+
+#include "nv30_context.h"
+
+struct nv30_query {
+       struct nouveau_resource *object;
+       unsigned type;
+       boolean ready;
+       uint64_t result;
+};
+
+static inline struct nv30_query *
+nv30_query(struct pipe_query *pipe)
+{
+       return (struct nv30_query *)pipe;
+}
+
+static struct pipe_query *
+nv30_query_create(struct pipe_context *pipe, unsigned query_type)
+{
+       struct nv30_query *q;
+
+       q = calloc(1, sizeof(struct nv30_query));
+       q->type = query_type;
+
+       return (struct pipe_query *)q;
+}
+
+static void
+nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_query *q = nv30_query(pq);
+
+       if (q->object)
+               nv30->nvws->res_free(&q->object);
+       free(q);
+}
+
+static void
+nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_query *q = nv30_query(pq);
+
+       assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+       if (nv30->nvws->res_alloc(nv30->query_heap, 1, NULL, &q->object))
+               assert(0);
+       nv30->nvws->notifier_reset(nv30->query, q->object->start);
+
+       BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1);
+       OUT_RING  (1);
+       BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1);
+       OUT_RING  (1);
+
+       q->ready = FALSE;
+}
+
+static void
+nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_query *q = nv30_query(pq);
+
+       BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1);
+       OUT_RING  ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
+                  ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT));
+       FIRE_RING();
+}
+
+static boolean
+nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+                 boolean wait, uint64 *result)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_query *q = nv30_query(pq);
+       struct nouveau_winsys *nvws = nv30->nvws;
+
+       assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+       if (!q->ready) {
+               unsigned status;
+
+               status = nvws->notifier_status(nv30->query, q->object->start);
+               if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) {
+                       if (wait == FALSE)
+                               return FALSE;
+                       nvws->notifier_wait(nv30->query, q->object->start,
+                                           NV_NOTIFY_STATE_STATUS_COMPLETED,
+                                           0);
+               }
+
+               q->result = nvws->notifier_retval(nv30->query,
+                                                 q->object->start);
+               q->ready = TRUE;
+               nvws->res_free(&q->object);
+       }
+
+       *result = q->result;
+       return TRUE;
+}
+
+void
+nv30_init_query_functions(struct nv30_context *nv30)
+{
+       nv30->pipe.create_query = nv30_query_create;
+       nv30->pipe.destroy_query = nv30_query_destroy;
+       nv30->pipe.begin_query = nv30_query_begin;
+       nv30->pipe.end_query = nv30_query_end;
+       nv30->pipe.get_query_result = nv30_query_result;
+}
diff --git a/src/mesa/pipe/nv30/nv30_shader.h b/src/mesa/pipe/nv30/nv30_shader.h
new file mode 100644 (file)
index 0000000..dd3a36f
--- /dev/null
@@ -0,0 +1,490 @@
+#ifndef __NV30_SHADER_H__
+#define __NV30_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * 128bit opcodes, split into 4 32-bit ones for ease of use.
+ *
+ * Non-native instructions
+ *   ABS - MOV + NV40_VP_INST0_DEST_ABS
+ *   POW - EX2 + MUL + LG2
+ *   SUB - ADD, second source negated
+ *   SWZ - MOV
+ *   XPD -  
+ *
+ * Register access
+ *   - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
+ *   - Only one CONST can be accessed per-instruction (move extras into TEMPs)
+ *
+ * Relative Addressing
+ *   According to the value returned for
+ *   MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
+ *
+ *   there are only two address registers available.  The destination in the
+ *   ARL instruction is set to TEMP <n> (The temp isn't actually written).
+ *
+ *   When using vanilla ARB_v_p, the proprietary driver will squish both the
+ *   available ADDRESS regs into the first hardware reg in the X and Y
+ *   components.
+ *
+ *   To use an address reg as an index into consts, the CONST_SRC is set to
+ *   (const_base + offset) and INDEX_CONST is set.
+ *
+ *   To access the second address reg use ADDR_REG_SELECT_1. A particular
+ *   component of the address regs is selected with ADDR_SWZ.
+ *
+ *   Only one address register can be accessed per instruction.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details) Conditional
+ * execution of an instruction is enabled by setting COND_TEST_ENABLE, and
+ * selecting the condition which will allow the test to pass with
+ * COND_{FL,LT,...}.  It is possible to swizzle the values in the condition
+ * register, which allows for testing against an individual component.
+ *
+ * Branching:
+ *
+ *   The BRA/CAL instructions seem to follow a slightly different opcode
+ *   layout.  The destination instruction ID (IADDR) overlaps a source field.
+ *   Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO
+ *   command, and is incremented automatically on each UPLOAD_INST FIFO
+ *   command.
+ *
+ *   Conditional branching is achieved by using the condition tests described
+ *   above.  There doesn't appear to be dedicated looping instructions, but
+ *   this can be done using a temp reg + conditional branching.
+ *
+ *   Subroutines may be uploaded before the main program itself, but the first
+ *   executed instruction is determined by the PROGRAM_START_ID FIFO command.
+ *
+ */
+
+/* DWORD 0 */
+
+#define NV30_VP_INST_ADDR_REG_SELECT_1        (1 << 24)
+#define NV30_VP_INST_SRC2_ABS           (1 << 23) /* guess */
+#define NV30_VP_INST_SRC1_ABS           (1 << 22) /* guess */
+#define NV30_VP_INST_SRC0_ABS           (1 << 21) /* guess */
+#define NV30_VP_INST_VEC_RESULT         (1 << 20)
+#define NV30_VP_INST_DEST_TEMP_ID_SHIFT        16
+#define NV30_VP_INST_DEST_TEMP_ID_MASK        (0x0F << 16)
+#define NV30_VP_INST_COND_UPDATE_ENABLE        (1<<15)
+#define NV30_VP_INST_VEC_DEST_TEMP_MASK      (0xF << 16)
+#define NV30_VP_INST_COND_TEST_ENABLE        (1<<14)
+#define NV30_VP_INST_COND_SHIFT          11
+#define NV30_VP_INST_COND_MASK          (0x07 << 11)
+#  define NV30_VP_INST_COND_FL  0 /* guess */  
+#  define NV30_VP_INST_COND_LT  1  
+#  define NV30_VP_INST_COND_EQ  2
+#  define NV30_VP_INST_COND_LE  3
+#  define NV30_VP_INST_COND_GT  4
+#  define NV30_VP_INST_COND_NE  5
+#  define NV30_VP_INST_COND_GE  6
+#  define NV30_VP_INST_COND_TR  7 /* guess */
+#define NV30_VP_INST_COND_SWZ_X_SHIFT        9
+#define NV30_VP_INST_COND_SWZ_X_MASK        (0x03 <<  9)
+#define NV30_VP_INST_COND_SWZ_Y_SHIFT        7
+#define NV30_VP_INST_COND_SWZ_Y_MASK        (0x03 <<  7)
+#define NV30_VP_INST_COND_SWZ_Z_SHIFT        5
+#define NV30_VP_INST_COND_SWZ_Z_MASK        (0x03 <<  5)
+#define NV30_VP_INST_COND_SWZ_W_SHIFT        3
+#define NV30_VP_INST_COND_SWZ_W_MASK        (0x03 <<  3)
+#define NV30_VP_INST_COND_SWZ_ALL_SHIFT        3
+#define NV30_VP_INST_COND_SWZ_ALL_MASK        (0xFF <<  3)
+#define NV30_VP_INST_ADDR_SWZ_SHIFT        1
+#define NV30_VP_INST_ADDR_SWZ_MASK        (0x03 <<  1)
+#define NV30_VP_INST_SCA_OPCODEH_SHIFT        0
+#define NV30_VP_INST_SCA_OPCODEH_MASK        (0x01 <<  0)
+
+/* DWORD 1 */
+#define NV30_VP_INST_SCA_OPCODEL_SHIFT        28
+#define NV30_VP_INST_SCA_OPCODEL_MASK        (0x0F << 28)
+#  define NV30_VP_INST_OP_NOP  0x00
+#  define NV30_VP_INST_OP_RCP  0x02
+#  define NV30_VP_INST_OP_RCC  0x03
+#  define NV30_VP_INST_OP_RSQ  0x04
+#  define NV30_VP_INST_OP_EXP  0x05
+#  define NV30_VP_INST_OP_LOG  0x06
+#  define NV30_VP_INST_OP_LIT  0x07
+#  define NV30_VP_INST_OP_BRA  0x09
+#  define NV30_VP_INST_OP_CAL  0x0B
+#  define NV30_VP_INST_OP_RET  0x0C
+#  define NV30_VP_INST_OP_LG2  0x0D
+#  define NV30_VP_INST_OP_EX2  0x0E
+#  define NV30_VP_INST_OP_SIN  0x0F
+#  define NV30_VP_INST_OP_COS  0x10
+#define NV30_VP_INST_VEC_OPCODE_SHIFT        23
+#define NV30_VP_INST_VEC_OPCODE_MASK        (0x1F << 23)
+#  define NV30_VP_INST_OP_NOPV  0x00
+#  define NV30_VP_INST_OP_MOV  0x01
+#  define NV30_VP_INST_OP_MUL  0x02
+#  define NV30_VP_INST_OP_ADD  0x03
+#  define NV30_VP_INST_OP_MAD  0x04
+#  define NV30_VP_INST_OP_DP3  0x05
+#  define NV30_VP_INST_OP_DP4  0x07
+#  define NV30_VP_INST_OP_DPH  0x06
+#  define NV30_VP_INST_OP_DST  0x08
+#  define NV30_VP_INST_OP_MIN  0x09
+#  define NV30_VP_INST_OP_MAX  0x0A
+#  define NV30_VP_INST_OP_SLT  0x0B
+#  define NV30_VP_INST_OP_SGE  0x0C
+#  define NV30_VP_INST_OP_ARL  0x0D
+#  define NV30_VP_INST_OP_FRC  0x0E
+#  define NV30_VP_INST_OP_FLR  0x0F
+#  define NV30_VP_INST_OP_SEQ  0x10
+#  define NV30_VP_INST_OP_SFL  0x11
+#  define NV30_VP_INST_OP_SGT  0x12
+#  define NV30_VP_INST_OP_SLE  0x13
+#  define NV30_VP_INST_OP_SNE  0x14
+#  define NV30_VP_INST_OP_STR  0x15
+#  define NV30_VP_INST_OP_SSG  0x16
+#  define NV30_VP_INST_OP_ARR  0x17
+#  define NV30_VP_INST_OP_ARA  0x18
+#define NV30_VP_INST_CONST_SRC_SHIFT        14
+#define NV30_VP_INST_CONST_SRC_MASK        (0xFF << 14)
+#define NV30_VP_INST_INPUT_SRC_SHIFT        9    /*NV20*/
+#define NV30_VP_INST_INPUT_SRC_MASK        (0x0F <<  9)  /*NV20*/
+#  define NV30_VP_INST_IN_POS  0    /* These seem to match the bindings specified in */
+#  define NV30_VP_INST_IN_WEIGHT  1    /* the ARB_v_p spec (2.14.3.1) */
+#  define NV30_VP_INST_IN_NORMAL  2    
+#  define NV30_VP_INST_IN_COL0  3    /* Should probably confirm them all though */
+#  define NV30_VP_INST_IN_COL1  4
+#  define NV30_VP_INST_IN_FOGC  5
+#  define NV30_VP_INST_IN_TC0  8
+#  define NV30_VP_INST_IN_TC(n)  (8+n)
+#define NV30_VP_INST_SRC0H_SHIFT        0    /*NV20*/
+#define NV30_VP_INST_SRC0H_MASK          (0x1FF << 0)  /*NV20*/
+
+/* Please note: the IADDR fields overlap other fields because they are used
+ * only for branch instructions.  See Branching: label above
+ *
+ * DWORD 2
+ */
+#define NV30_VP_INST_SRC0L_SHIFT        26    /*NV20*/
+#define NV30_VP_INST_SRC0L_MASK         (0x3F  <<26)  /* NV30_VP_SRC0_LOW_MASK << 26 */
+#define NV30_VP_INST_SRC1_SHIFT         11    /*NV20*/
+#define NV30_VP_INST_SRC1_MASK          (0x7FFF<<11)  /*NV20*/
+#define NV30_VP_INST_SRC2H_SHIFT        0    /*NV20*/
+#define NV30_VP_INST_SRC2H_MASK          (0x7FF << 0)  /* NV30_VP_SRC2_HIGH_MASK >> 4*/
+#define NV30_VP_INST_IADDR_SHIFT        2
+#define NV30_VP_INST_IADDR_MASK          (0xF <<  28)   /* NV30_VP_SRC2_LOW_MASK << 28 */
+
+/* DWORD 3 */
+#define NV30_VP_INST_SRC2L_SHIFT        28    /*NV20*/
+#define NV30_VP_INST_SRC2L_MASK          (0x0F  <<28)  /*NV20*/
+#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT      24
+#define NV30_VP_INST_STEMP_WRITEMASK_MASK      (0x0F << 24)
+#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT      20
+#define NV30_VP_INST_VTEMP_WRITEMASK_MASK      (0x0F << 20)
+#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT      16
+#define NV30_VP_INST_SDEST_WRITEMASK_MASK      (0x0F << 16)
+#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT      12    /*NV20*/
+#define NV30_VP_INST_VDEST_WRITEMASK_MASK      (0x0F << 12)  /*NV20*/
+#define NV30_VP_INST_DEST_SHIFT        2
+#define NV30_VP_INST_DEST_MASK        (0x0F <<  2)
+#  define NV30_VP_INST_DEST_POS  0
+#  define NV30_VP_INST_DEST_BFC0  1
+#  define NV30_VP_INST_DEST_BFC1  2
+#  define NV30_VP_INST_DEST_COL0  3
+#  define NV30_VP_INST_DEST_COL1  4
+#  define NV30_VP_INST_DEST_FOGC  5
+#  define NV30_VP_INST_DEST_PSZ   6
+#  define NV30_VP_INST_DEST_TC(n)  (8+n)
+
+#define NV30_VP_INST_LAST                           (1 << 0)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV30_VP_SRC0_HIGH_SHIFT                                                6
+#define NV30_VP_SRC0_HIGH_MASK                                        0x00007FC0
+#define NV30_VP_SRC0_LOW_MASK                                         0x0000003F
+#define NV30_VP_SRC2_HIGH_SHIFT                                                4
+#define NV30_VP_SRC2_HIGH_MASK                                        0x00007FF0
+#define NV30_VP_SRC2_LOW_MASK                                         0x0000000F
+
+
+/* Source-register definition - matches NV20 exactly */
+#define NV30_VP_SRC_NEGATE          (1<<14)
+#define NV30_VP_SRC_SWZ_X_SHIFT        12
+#define NV30_VP_SRC_REG_SWZ_X_MASK        (0x03  <<12)
+#define NV30_VP_SRC_SWZ_Y_SHIFT        10
+#define NV30_VP_SRC_REG_SWZ_Y_MASK        (0x03  <<10)
+#define NV30_VP_SRC_SWZ_Z_SHIFT        8
+#define NV30_VP_SRC_REG_SWZ_Z_MASK        (0x03  << 8)
+#define NV30_VP_SRC_SWZ_W_SHIFT        6
+#define NV30_VP_SRC_REG_SWZ_W_MASK        (0x03  << 6)
+#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT        6
+#define NV30_VP_SRC_REG_SWZ_ALL_MASK        (0xFF  << 6)
+#define NV30_VP_SRC_TEMP_SRC_SHIFT        2
+#define NV30_VP_SRC_REG_TEMP_ID_MASK        (0x0F  << 0)
+#define NV30_VP_SRC_REG_TYPE_SHIFT        0
+#define NV30_VP_SRC_REG_TYPE_MASK        (0x03  << 0)
+#define NV30_VP_SRC_REG_TYPE_TEMP  1
+#define NV30_VP_SRC_REG_TYPE_INPUT  2
+#define NV30_VP_SRC_REG_TYPE_CONST  3 /* guess */
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ *   0 - Opcode, output reg/mask, ATTRIB source
+ *   1 - Source 0
+ *   2 - Source 1
+ *   3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ *     result.color == R0.xyzw
+ *     result.depth == R1.z
+ * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0
+ * otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ * 
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords.  As such instructions such as:
+ * 
+ *     ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO
+ * is implemented simply by not writing to the relevant components of the destination.
+ *
+ * Conditional execution
+ *   TODO
+ * 
+ * Non-native instructions:
+ *   LIT
+ *   LRP - MAD+MAD
+ *   SUB - ADD, negate second source
+ *   RSQ - LG2 + EX2
+ *   POW - LG2 + MUL + EX2
+ *   SCS - COS + SIN
+ *   XPD
+ */
+
+//== Opcode / Destination selection ==
+#define NV30_FP_OP_PROGRAM_END          (1 << 0)
+#define NV30_FP_OP_OUT_REG_SHIFT        1
+#define NV30_FP_OP_OUT_REG_MASK          (31 << 1)  /* uncertain */
+/* Needs to be set when writing outputs to get expected result.. */
+#define NV30_FP_OP_OUT_REG_HALF          (1 << 7)
+#define NV30_FP_OP_COND_WRITE_ENABLE        (1 << 8)
+#define NV30_FP_OP_OUTMASK_SHIFT        9
+#define NV30_FP_OP_OUTMASK_MASK          (0xF << 9)
+#  define NV30_FP_OP_OUT_X  (1<<9)
+#  define NV30_FP_OP_OUT_Y  (1<<10)
+#  define NV30_FP_OP_OUT_Z  (1<<11)
+#  define NV30_FP_OP_OUT_W  (1<<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NV30_FP_OP_INPUT_SRC_SHIFT        13
+#define NV30_FP_OP_INPUT_SRC_MASK        (15 << 13)
+#  define NV30_FP_OP_INPUT_SRC_POSITION  0x0
+#  define NV30_FP_OP_INPUT_SRC_COL0  0x1
+#  define NV30_FP_OP_INPUT_SRC_COL1  0x2
+#  define NV30_FP_OP_INPUT_SRC_FOGC  0x3
+#  define NV30_FP_OP_INPUT_SRC_TC0    0x4
+#  define NV30_FP_OP_INPUT_SRC_TC(n)  (0x4 + n)
+#define NV30_FP_OP_TEX_UNIT_SHIFT        17
+#define NV30_FP_OP_TEX_UNIT_MASK        (0xF << 17) /* guess */
+#define NV30_FP_OP_PRECISION_SHIFT        22
+#define NV30_FP_OP_PRECISION_MASK        (3 << 22)
+#   define NV30_FP_PRECISION_FP32  0
+#   define NV30_FP_PRECISION_FP16  1
+#   define NV30_FP_PRECISION_FX12  2
+#define NV30_FP_OP_OPCODE_SHIFT          24
+#define NV30_FP_OP_OPCODE_MASK          (0x3F << 24)
+#  define NV30_FP_OP_OPCODE_NOP  0x00
+#  define NV30_FP_OP_OPCODE_MOV  0x01
+#  define NV30_FP_OP_OPCODE_MUL  0x02
+#  define NV30_FP_OP_OPCODE_ADD  0x03
+#  define NV30_FP_OP_OPCODE_MAD  0x04
+#  define NV30_FP_OP_OPCODE_DP3  0x05
+#  define NV30_FP_OP_OPCODE_DP4  0x06
+#  define NV30_FP_OP_OPCODE_DST  0x07
+#  define NV30_FP_OP_OPCODE_MIN  0x08
+#  define NV30_FP_OP_OPCODE_MAX  0x09
+#  define NV30_FP_OP_OPCODE_SLT  0x0A
+#  define NV30_FP_OP_OPCODE_SGE  0x0B
+#  define NV30_FP_OP_OPCODE_SLE  0x0C
+#  define NV30_FP_OP_OPCODE_SGT  0x0D
+#  define NV30_FP_OP_OPCODE_SNE  0x0E
+#  define NV30_FP_OP_OPCODE_SEQ  0x0F
+#  define NV30_FP_OP_OPCODE_FRC  0x10
+#  define NV30_FP_OP_OPCODE_FLR  0x11
+#  define NV30_FP_OP_OPCODE_KIL  0x12
+#  define NV30_FP_OP_OPCODE_PK4B   0x13
+#  define NV30_FP_OP_OPCODE_UP4B   0x14
+#  define NV30_FP_OP_OPCODE_DDX  0x15 /* can only write XY */
+#  define NV30_FP_OP_OPCODE_DDY  0x16 /* can only write XY */
+#  define NV30_FP_OP_OPCODE_TEX  0x17
+#  define NV30_FP_OP_OPCODE_TXP  0x18
+#  define NV30_FP_OP_OPCODE_TXD  0x19
+#  define NV30_FP_OP_OPCODE_RCP  0x1A
+#  define NV30_FP_OP_OPCODE_RSQ  0x1B
+#  define NV30_FP_OP_OPCODE_EX2  0x1C
+#  define NV30_FP_OP_OPCODE_LG2  0x1D
+#  define NV30_FP_OP_OPCODE_LIT  0x1E
+#  define NV30_FP_OP_OPCODE_LRP  0x1F
+#  define NV30_FP_OP_OPCODE_STR  0x20 
+#  define NV30_FP_OP_OPCODE_SFL  0x21
+#  define NV30_FP_OP_OPCODE_COS  0x22
+#  define NV30_FP_OP_OPCODE_SIN  0x23
+#  define NV30_FP_OP_OPCODE_PK2H   0x24
+#  define NV30_FP_OP_OPCODE_UP2H   0x25
+#  define NV30_FP_OP_OPCODE_POW  0x26
+#  define NV30_FP_OP_OPCODE_PK4UB  0x27
+#  define NV30_FP_OP_OPCODE_UP4UB  0x28
+#  define NV30_FP_OP_OPCODE_PK2US  0x29
+#  define NV30_FP_OP_OPCODE_UP2US  0x2A
+#  define NV30_FP_OP_OPCODE_DP2A   0x2E
+#  define NV30_FP_OP_OPCODE_TXB  0x31
+#  define NV30_FP_OP_OPCODE_RFL  0x36
+#  define NV30_FP_OP_OPCODE_DIV  0x3A
+#define NV30_FP_OP_OUT_SAT          (1 << 31)
+
+/* high order bits of SRC0 */
+#define NV30_FP_OP_OUT_ABS          (1 << 29)
+#define NV30_FP_OP_COND_SWZ_W_SHIFT        27
+#define NV30_FP_OP_COND_SWZ_W_MASK        (3 << 27)
+#define NV30_FP_OP_COND_SWZ_Z_SHIFT        25
+#define NV30_FP_OP_COND_SWZ_Z_MASK        (3 << 25)
+#define NV30_FP_OP_COND_SWZ_Y_SHIFT        23
+#define NV30_FP_OP_COND_SWZ_Y_MASK        (3 << 23)
+#define NV30_FP_OP_COND_SWZ_X_SHIFT        21
+#define NV30_FP_OP_COND_SWZ_X_MASK        (3 << 21)
+#define NV30_FP_OP_COND_SWZ_ALL_SHIFT        21
+#define NV30_FP_OP_COND_SWZ_ALL_MASK        (0xFF << 21)
+#define NV30_FP_OP_COND_SHIFT          18
+#define NV30_FP_OP_COND_MASK          (0x07 << 18)
+#  define NV30_FP_OP_COND_FL  0
+#  define NV30_FP_OP_COND_LT  1
+#  define NV30_FP_OP_COND_EQ  2
+#  define NV30_FP_OP_COND_LE  3
+#  define NV30_FP_OP_COND_GT  4
+#  define NV30_FP_OP_COND_NE  5
+#  define NV30_FP_OP_COND_GE  6
+#  define NV30_FP_OP_COND_TR  7
+
+/* high order bits of SRC1 */
+#define NV30_FP_OP_DST_SCALE_SHIFT        28
+#define NV30_FP_OP_DST_SCALE_MASK        (3 << 28)
+#define NV30_FP_OP_DST_SCALE_1X                                                0
+#define NV30_FP_OP_DST_SCALE_2X                                                1
+#define NV30_FP_OP_DST_SCALE_4X                                                2
+#define NV30_FP_OP_DST_SCALE_8X                                                3
+#define NV30_FP_OP_DST_SCALE_INV_2X                                            5
+#define NV30_FP_OP_DST_SCALE_INV_4X                                            6
+#define NV30_FP_OP_DST_SCALE_INV_8X                                            7
+
+
+/* high order bits of SRC2 */
+#define NV30_FP_OP_INDEX_INPUT          (1 << 30)
+
+//== Register selection ==
+#define NV30_FP_REG_TYPE_SHIFT          0
+#define NV30_FP_REG_TYPE_MASK          (3 << 0)
+#  define NV30_FP_REG_TYPE_TEMP  0
+#  define NV30_FP_REG_TYPE_INPUT  1
+#  define NV30_FP_REG_TYPE_CONST  2
+#define NV30_FP_REG_SRC_SHIFT          2 /* uncertain */
+#define NV30_FP_REG_SRC_MASK          (31 << 2)
+#define NV30_FP_REG_SRC_HALF          (1 << 8)
+#define NV30_FP_REG_SWZ_ALL_SHIFT        9
+#define NV30_FP_REG_SWZ_ALL_MASK        (255 << 9)
+#define NV30_FP_REG_SWZ_X_SHIFT          9
+#define NV30_FP_REG_SWZ_X_MASK          (3 << 9)
+#define NV30_FP_REG_SWZ_Y_SHIFT          11
+#define NV30_FP_REG_SWZ_Y_MASK          (3 << 11)
+#define NV30_FP_REG_SWZ_Z_SHIFT          13
+#define NV30_FP_REG_SWZ_Z_MASK          (3 << 13)
+#define NV30_FP_REG_SWZ_W_SHIFT          15
+#define NV30_FP_REG_SWZ_W_MASK          (3 << 15)
+#  define NV30_FP_SWIZZLE_X  0
+#  define NV30_FP_SWIZZLE_Y  1
+#  define NV30_FP_SWIZZLE_Z  2
+#  define NV30_FP_SWIZZLE_W  3
+#define NV30_FP_REG_NEGATE          (1 << 17)
+
+#define NV30SR_NONE    0
+#define NV30SR_OUTPUT  1
+#define NV30SR_INPUT   2
+#define NV30SR_TEMP    3
+#define NV30SR_CONST   4
+
+struct nv30_sreg {
+       int type;
+       int index;
+
+       int dst_scale;
+
+       int negate;
+       int abs;
+       int swz[4];
+
+       int cc_update;
+       int cc_update_reg;
+       int cc_test;
+       int cc_test_reg;
+       int cc_swz[4];
+};
+
+static INLINE struct nv30_sreg
+nv30_sr(int type, int index)
+{
+       struct nv30_sreg temp = {
+               .type = type,
+               .index = index,
+               .dst_scale = DEF_SCALE,
+               .abs = 0,
+               .negate = 0,
+               .swz = { 0, 1, 2, 3 },
+               .cc_update = 0,
+               .cc_update_reg = 0,
+               .cc_test = DEF_CTEST,
+               .cc_test_reg = 0,
+               .cc_swz = { 0, 1, 2, 3 },
+       };
+       return temp;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w)
+{
+       struct nv30_sreg dst = src;
+
+       dst.swz[SWZ_X] = src.swz[x];
+       dst.swz[SWZ_Y] = src.swz[y];
+       dst.swz[SWZ_Z] = src.swz[z];
+       dst.swz[SWZ_W] = src.swz[w];
+       return dst;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_neg(struct nv30_sreg src)
+{
+       src.negate = !src.negate;
+       return src;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_abs(struct nv30_sreg src)
+{
+       src.abs = 1;
+       return src;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_scale(struct nv30_sreg src, int scale)
+{
+       src.dst_scale = scale;
+       return src;
+}
+
+#endif
diff --git a/src/mesa/pipe/nv30/nv30_state.c b/src/mesa/pipe/nv30/nv30_state.c
new file mode 100644 (file)
index 0000000..c29a644
--- /dev/null
@@ -0,0 +1,740 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_util.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+static void *
+nv30_blend_state_create(struct pipe_context *pipe,
+                       const struct pipe_blend_state *cso)
+{
+       struct nv30_blend_state *cb;
+
+       cb = malloc(sizeof(struct nv30_blend_state));
+
+       cb->b_enable = cso->blend_enable ? 1 : 0;
+       cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) |
+                        (nvgl_blend_func(cso->rgb_src_factor)));
+       cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) |
+                        (nvgl_blend_func(cso->rgb_dst_factor)));
+       cb->b_eqn = ((nvgl_blend_eqn(cso->alpha_func) << 16) |
+                    (nvgl_blend_eqn(cso->rgb_func)));
+
+       cb->l_enable = cso->logicop_enable ? 1 : 0;
+       cb->l_op = nvgl_logicop_func(cso->logicop_func);
+
+       cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) |
+                     ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) |
+                     ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) |
+                     ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0));
+
+       cb->d_enable = cso->dither ? 1 : 0;
+
+       return (void *)cb;
+}
+
+static void
+nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_blend_state *cb = hwcso;
+
+       BEGIN_RING(rankine, NV34TCL_DITHER_ENABLE, 1);
+       OUT_RING  (cb->d_enable);
+
+       BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_ENABLE, 3);
+       OUT_RING  (cb->b_enable);
+       OUT_RING  (cb->b_srcfunc);
+       OUT_RING  (cb->b_dstfunc);
+       BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_EQUATION, 1);
+       OUT_RING  (cb->b_eqn);
+
+       BEGIN_RING(rankine, NV34TCL_COLOR_MASK, 1);
+       OUT_RING  (cb->c_mask);
+
+       BEGIN_RING(rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
+       OUT_RING  (cb->l_enable);
+       OUT_RING  (cb->l_op);
+}
+
+static void
+nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+       free(hwcso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+       unsigned ret;
+
+       switch (wrap) {
+       case PIPE_TEX_WRAP_REPEAT:
+               ret = NV34TCL_TX_WRAP_S_REPEAT;
+               break;
+       case PIPE_TEX_WRAP_MIRROR_REPEAT:
+               ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT;
+               break;
+       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+               ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE;
+               break;
+       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+               ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER;
+               break;
+       case PIPE_TEX_WRAP_CLAMP:
+               ret = NV34TCL_TX_WRAP_S_CLAMP;
+               break;
+/*     case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+               ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
+               break;
+       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+               ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
+               break;
+       case PIPE_TEX_WRAP_MIRROR_CLAMP:
+               ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP;
+               break;*/
+       default:
+               NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+               ret = NV34TCL_TX_WRAP_S_REPEAT;
+               break;
+       }
+
+       return ret >> NV34TCL_TX_WRAP_S_SHIFT;
+}
+
+static void *
+nv30_sampler_state_create(struct pipe_context *pipe,
+                         const struct pipe_sampler_state *cso)
+{
+       struct nv30_sampler_state *ps;
+       uint32_t filter = 0;
+
+       ps = malloc(sizeof(struct nv30_sampler_state));
+
+       ps->fmt = 0;
+       if (!cso->normalized_coords)
+               ps->fmt |= NV34TCL_TX_FORMAT_RECT;
+
+       ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) |
+                   (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) |
+                   (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT));
+
+       ps->en = 0;
+       if (cso->max_anisotropy >= 2.0) {
+               /* no idea, binary driver sets it, works without it.. meh.. */
+               ps->wrap |= (1 << 5);
+
+/*             if (cso->max_anisotropy >= 16.0) {
+                       ps->en |= NV34TCL_TX_ENABLE_ANISO_16X;
+               } else
+               if (cso->max_anisotropy >= 12.0) {
+                       ps->en |= NV34TCL_TX_ENABLE_ANISO_12X;
+               } else
+               if (cso->max_anisotropy >= 10.0) {
+                       ps->en |= NV34TCL_TX_ENABLE_ANISO_10X;
+               } else
+               if (cso->max_anisotropy >= 8.0) {
+                       ps->en |= NV34TCL_TX_ENABLE_ANISO_8X;
+               } else
+               if (cso->max_anisotropy >= 6.0) {
+                       ps->en |= NV34TCL_TX_ENABLE_ANISO_6X;
+               } else
+               if (cso->max_anisotropy >= 4.0) {
+                       ps->en |= NV34TCL_TX_ENABLE_ANISO_4X;
+               } else {
+                       ps->en |= NV34TCL_TX_ENABLE_ANISO_2X;
+               }*/
+       }
+
+       switch (cso->mag_img_filter) {
+       case PIPE_TEX_FILTER_LINEAR:
+               filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR;
+               break;
+       case PIPE_TEX_FILTER_NEAREST:
+       default:
+               filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST;
+               break;
+       }
+
+       switch (cso->min_img_filter) {
+       case PIPE_TEX_FILTER_LINEAR:
+               switch (cso->min_mip_filter) {
+               case PIPE_TEX_MIPFILTER_NEAREST:
+                       filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+                       break;
+               case PIPE_TEX_MIPFILTER_LINEAR:
+                       filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+                       break;
+               case PIPE_TEX_MIPFILTER_NONE:
+               default:
+                       filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR;
+                       break;
+               }
+               break;
+       case PIPE_TEX_FILTER_NEAREST:
+       default:
+               switch (cso->min_mip_filter) {
+               case PIPE_TEX_MIPFILTER_NEAREST:
+                       filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+               break;
+               case PIPE_TEX_MIPFILTER_LINEAR:
+                       filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+                       break;
+               case PIPE_TEX_MIPFILTER_NONE:
+               default:
+                       filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST;
+                       break;
+               }
+               break;
+       }
+
+       ps->filt = filter;
+
+/*     if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+               switch (cso->compare_func) {
+               case PIPE_FUNC_NEVER:
+                       ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER;
+                       break;
+               case PIPE_FUNC_GREATER:
+                       ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER;
+                       break;
+               case PIPE_FUNC_EQUAL:
+                       ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL;
+                       break;
+               case PIPE_FUNC_GEQUAL:
+                       ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL;
+                       break;
+               case PIPE_FUNC_LESS:
+                       ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS;
+                       break;
+               case PIPE_FUNC_NOTEQUAL:
+                       ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL;
+                       break;
+               case PIPE_FUNC_LEQUAL:
+                       ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL;
+                       break;
+               case PIPE_FUNC_ALWAYS:
+                       ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS;
+                       break;
+               default:
+                       break;
+               }
+       }*/
+
+       ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+                   (float_to_ubyte(cso->border_color[0]) << 16) |
+                   (float_to_ubyte(cso->border_color[1]) <<  8) |
+                   (float_to_ubyte(cso->border_color[2]) <<  0));
+
+       return (void *)ps;
+}
+
+static void
+nv30_sampler_state_bind(struct pipe_context *pipe, unsigned unit,
+                       void *hwcso)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_sampler_state *ps = hwcso;
+
+       nv30->tex_sampler[unit] = ps;
+       nv30->dirty_samplers |= (1 << unit);
+}
+
+static void
+nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+       free(hwcso);
+}
+
+static void
+nv30_set_sampler_texture(struct pipe_context *pipe, unsigned unit,
+                        struct pipe_texture *miptree)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+
+       nv30->tex_miptree[unit] = (struct nv30_miptree *)miptree;
+       nv30->dirty_samplers |= (1 << unit);
+}
+
+static void *
+nv30_rasterizer_state_create(struct pipe_context *pipe,
+                            const struct pipe_rasterizer_state *cso)
+{
+       struct nv30_rasterizer_state *rs;
+       int i;
+
+       /*XXX: ignored:
+        *      light_twoside
+        *      offset_cw/ccw -nohw
+        *      scissor
+        *      point_smooth -nohw
+        *      multisample
+        *      offset_units / offset_scale
+        */
+       rs = malloc(sizeof(struct nv30_rasterizer_state));
+
+       rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01;
+
+       rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff;
+       rs->line_smooth_en = cso->line_smooth ? 1 : 0;
+       rs->line_stipple_en = cso->line_stipple_enable ? 1 : 0;
+       rs->line_stipple = (cso->line_stipple_pattern << 16) |
+                           cso->line_stipple_factor;
+
+       rs->point_size = *(uint32_t*)&cso->point_size;
+
+       rs->poly_smooth_en = cso->poly_smooth ? 1 : 0;
+       rs->poly_stipple_en = cso->poly_stipple_enable ? 1 : 0;
+
+       if (cso->front_winding == PIPE_WINDING_CCW) {
+               rs->front_face = NV34TCL_FRONT_FACE_CCW;
+               rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw);
+               rs->poly_mode_back  = nvgl_polygon_mode(cso->fill_cw);
+       } else {
+               rs->front_face = NV34TCL_FRONT_FACE_CW;
+               rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw);
+               rs->poly_mode_back  = nvgl_polygon_mode(cso->fill_ccw);
+       }
+
+       switch (cso->cull_mode) {
+       case PIPE_WINDING_CCW:
+               rs->cull_face_en = 1;
+               if (cso->front_winding == PIPE_WINDING_CCW)
+                       rs->cull_face    = NV34TCL_CULL_FACE_FRONT;
+               else
+                       rs->cull_face    = NV34TCL_CULL_FACE_BACK;
+               break;
+       case PIPE_WINDING_CW:
+               rs->cull_face_en = 1;
+               if (cso->front_winding == PIPE_WINDING_CW)
+                       rs->cull_face    = NV34TCL_CULL_FACE_FRONT;
+               else
+                       rs->cull_face    = NV34TCL_CULL_FACE_BACK;
+               break;
+       case PIPE_WINDING_BOTH:
+               rs->cull_face_en = 1;
+               rs->cull_face    = NV34TCL_CULL_FACE_FRONT_AND_BACK;
+               break;
+       case PIPE_WINDING_NONE:
+       default:
+               rs->cull_face_en = 0;
+               rs->cull_face    = 0;
+               break;
+       }
+
+       if (cso->point_sprite) {
+               rs->point_sprite = (1 << 0);
+               for (i = 0; i < 8; i++) {
+                       if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+                               rs->point_sprite |= (1 << (8 + i));
+               }
+       } else {
+               rs->point_sprite = 0;
+       }
+
+       return (void *)rs;
+}
+
+static void
+nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_rasterizer_state *rs = hwcso;
+
+       BEGIN_RING(rankine, NV34TCL_SHADE_MODEL, 1);
+       OUT_RING  (rs->shade_model);
+
+       BEGIN_RING(rankine, NV34TCL_LINE_WIDTH, 2);
+       OUT_RING  (rs->line_width);
+       OUT_RING  (rs->line_smooth_en);
+       BEGIN_RING(rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2);
+       OUT_RING  (rs->line_stipple_en);
+       OUT_RING  (rs->line_stipple);
+
+       BEGIN_RING(rankine, NV34TCL_POINT_SIZE, 1);
+       OUT_RING  (rs->point_size);
+
+       BEGIN_RING(rankine, NV34TCL_POLYGON_MODE_FRONT, 6);
+       OUT_RING  (rs->poly_mode_front);
+       OUT_RING  (rs->poly_mode_back);
+       OUT_RING  (rs->cull_face);
+       OUT_RING  (rs->front_face);
+       OUT_RING  (rs->poly_smooth_en);
+       OUT_RING  (rs->cull_face_en);
+
+       BEGIN_RING(rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
+       OUT_RING  (rs->poly_stipple_en);
+
+       BEGIN_RING(rankine, NV34TCL_POINT_SPRITE, 1);
+       OUT_RING  (rs->point_sprite);
+}
+
+static void
+nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+       free(hwcso);
+}
+
+static void
+nv30_translate_stencil(const struct pipe_depth_stencil_alpha_state *cso,
+                      unsigned idx, struct nv30_stencil_push *hw)
+{
+       hw->enable = cso->stencil[idx].enabled ? 1 : 0;
+       hw->wmask = cso->stencil[idx].write_mask;
+       hw->func = nvgl_comparison_op(cso->stencil[idx].func);
+       hw->ref = cso->stencil[idx].ref_value;
+       hw->vmask = cso->stencil[idx].value_mask;
+       hw->fail = nvgl_stencil_op(cso->stencil[idx].fail_op);
+       hw->zfail = nvgl_stencil_op(cso->stencil[idx].zfail_op);
+       hw->zpass = nvgl_stencil_op(cso->stencil[idx].zpass_op);
+}
+
+static void *
+nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+                       const struct pipe_depth_stencil_alpha_state *cso)
+{
+       struct nv30_depth_stencil_alpha_state *hw;
+
+       hw = malloc(sizeof(struct nv30_depth_stencil_alpha_state));
+
+       hw->depth.func          = nvgl_comparison_op(cso->depth.func);
+       hw->depth.write_enable  = cso->depth.writemask ? 1 : 0;
+       hw->depth.test_enable   = cso->depth.enabled ? 1 : 0;
+
+       nv30_translate_stencil(cso, 0, &hw->stencil.front);
+       nv30_translate_stencil(cso, 1, &hw->stencil.back);
+
+       hw->alpha.enabled = cso->alpha.enabled ? 1 : 0;
+       hw->alpha.func = nvgl_comparison_op(cso->alpha.func);
+       hw->alpha.ref  = float_to_ubyte(cso->alpha.ref);
+
+       return (void *)hw;
+}
+
+static void
+nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_depth_stencil_alpha_state *hw = hwcso;
+
+       BEGIN_RING(rankine, NV34TCL_DEPTH_FUNC, 3);
+       OUT_RINGp ((uint32_t *)&hw->depth, 3);
+       BEGIN_RING(rankine, NV34TCL_STENCIL_FRONT_ENABLE, 16);
+       OUT_RINGp ((uint32_t *)&hw->stencil.front, 8);
+       OUT_RINGp ((uint32_t *)&hw->stencil.back, 8);
+       BEGIN_RING(rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3);
+       OUT_RINGp ((uint32_t *)&hw->alpha.enabled, 3);
+}
+
+static void
+nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+       free(hwcso);
+}
+
+static void *
+nv30_vp_state_create(struct pipe_context *pipe,
+                    const struct pipe_shader_state *cso)
+{
+       struct nv30_vertex_program *vp;
+
+       vp = calloc(1, sizeof(struct nv30_vertex_program));
+       vp->pipe = cso;
+
+       return (void *)vp;
+}
+
+static void
+nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_vertex_program *vp = hwcso;
+
+       nv30->vertprog.current = vp;
+       nv30->dirty |= NV30_NEW_VERTPROG;
+}
+
+static void
+nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_vertex_program *vp = hwcso;
+
+       nv30_vertprog_destroy(nv30, vp);
+       free(vp);
+}
+
+static void *
+nv30_fp_state_create(struct pipe_context *pipe,
+                    const struct pipe_shader_state *cso)
+{
+       struct nv30_fragment_program *fp;
+
+       fp = calloc(1, sizeof(struct nv30_fragment_program));
+       fp->pipe = cso;
+
+       return (void *)fp;
+}
+
+static void
+nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_fragment_program *fp = hwcso;
+
+       nv30->fragprog.current = fp;
+       nv30->dirty |= NV30_NEW_FRAGPROG;
+}
+
+static void
+nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nv30_fragment_program *fp = hwcso;
+
+       nv30_fragprog_destroy(nv30, fp);
+       free(fp);
+}
+
+static void
+nv30_set_blend_color(struct pipe_context *pipe,
+                    const struct pipe_blend_color *bcol)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+
+       BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_COLOR, 1);
+       OUT_RING  ((float_to_ubyte(bcol->color[3]) << 24) |
+                  (float_to_ubyte(bcol->color[0]) << 16) |
+                  (float_to_ubyte(bcol->color[1]) <<  8) |
+                  (float_to_ubyte(bcol->color[2]) <<  0));
+}
+
+static void
+nv30_set_clip_state(struct pipe_context *pipe,
+                   const struct pipe_clip_state *clip)
+{
+}
+
+static void
+nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+                        const struct pipe_constant_buffer *buf )
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+
+       if (shader == PIPE_SHADER_VERTEX) {
+               nv30->vertprog.constant_buf = buf->buffer;
+               nv30->dirty |= NV30_NEW_VERTPROG;
+       } else
+       if (shader == PIPE_SHADER_FRAGMENT) {
+               nv30->fragprog.constant_buf = buf->buffer;
+               nv30->dirty |= NV30_NEW_FRAGPROG;
+       }
+}
+
+static void
+nv30_set_framebuffer_state(struct pipe_context *pipe,
+                          const struct pipe_framebuffer_state *fb)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct pipe_surface *rt[4], *zeta;
+       uint32_t rt_enable, rt_format, w, h;
+       int i, colour_format = 0, zeta_format = 0;
+
+       rt_enable = 0;
+       for (i = 0; i < 4; i++) {
+               if (!fb->cbufs[i])
+                       continue;
+
+               if (colour_format) {
+                       assert(w == fb->cbufs[i]->width);
+                       assert(h == fb->cbufs[i]->height);
+                       assert(colour_format == fb->cbufs[i]->format);
+               } else {
+                       w = fb->cbufs[i]->width;
+                       h = fb->cbufs[i]->height;
+                       colour_format = fb->cbufs[i]->format;
+                       rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i);
+                       rt[i] = fb->cbufs[i];
+               }
+       }
+
+       if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | NV34TCL_RT_ENABLE_COLOR2 |
+                        NV34TCL_RT_ENABLE_COLOR3))
+               rt_enable |= NV34TCL_RT_ENABLE_MRT;
+
+       if (fb->zsbuf) {
+               if (colour_format) {
+                       assert(w == fb->zsbuf->width);
+                       assert(h == fb->zsbuf->height);
+               } else {
+                       w = fb->zsbuf->width;
+                       h = fb->zsbuf->height;
+               }
+
+               zeta_format = fb->zsbuf->format;
+               zeta = fb->zsbuf;
+       }
+
+       rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+
+       switch (colour_format) {
+       case PIPE_FORMAT_A8R8G8B8_UNORM:
+       case 0:
+               rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
+               break;
+       case PIPE_FORMAT_R5G6B5_UNORM:
+               rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
+               break;
+       default:
+               assert(0);
+       }
+
+       switch (zeta_format) {
+       case PIPE_FORMAT_Z16_UNORM:
+               rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
+               break;
+       case PIPE_FORMAT_Z24S8_UNORM:
+       case 0:
+               rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
+               break;
+       default:
+               assert(0);
+       }
+
+       if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) {
+               BEGIN_RING(rankine, NV34TCL_COLOR0_PITCH, 1);
+               OUT_RING  (rt[0]->pitch * rt[0]->cpp);
+               nv30->rt[0] = rt[0]->buffer;
+       }
+
+       if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) {
+               BEGIN_RING(rankine, NV34TCL_COLOR1_PITCH, 2);
+               OUT_RING  (rt[1]->pitch * rt[1]->cpp);
+               nv30->rt[1] = rt[1]->buffer;
+       }
+
+       if (zeta_format) {
+               BEGIN_RING(rankine, NV34TCL_ZETA_PITCH, 1);
+               OUT_RING  (zeta->pitch * zeta->cpp);
+               nv30->zeta = zeta->buffer;
+       }
+
+       nv30->rt_enable = rt_enable;
+       BEGIN_RING(rankine, NV34TCL_RT_ENABLE, 1);
+       OUT_RING  (rt_enable);
+       BEGIN_RING(rankine, NV34TCL_RT_HORIZ, 3);
+       OUT_RING  ((w << 16) | 0);
+       OUT_RING  ((h << 16) | 0);
+       OUT_RING  (rt_format);
+       BEGIN_RING(rankine, NV34TCL_VIEWPORT_HORIZ, 2);
+       OUT_RING  ((w << 16) | 0);
+       OUT_RING  ((h << 16) | 0);
+       BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+       OUT_RING  (((w - 1) << 16) | 0);
+       OUT_RING  (((h - 1) << 16) | 0);
+}
+
+static void
+nv30_set_polygon_stipple(struct pipe_context *pipe,
+                        const struct pipe_poly_stipple *stipple)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+
+       BEGIN_RING(rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+       OUT_RINGp ((uint32_t *)stipple->stipple, 32);
+}
+
+static void
+nv30_set_scissor_state(struct pipe_context *pipe,
+                      const struct pipe_scissor_state *s)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+
+       BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2);
+       OUT_RING  (((s->maxx - s->minx) << 16) | s->minx);
+       OUT_RING  (((s->maxy - s->miny) << 16) | s->miny);
+}
+
+static void
+nv30_set_viewport_state(struct pipe_context *pipe,
+                       const struct pipe_viewport_state *vpt)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+
+       BEGIN_RING(rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8);
+       OUT_RINGf (vpt->translate[0]);
+       OUT_RINGf (vpt->translate[1]);
+       OUT_RINGf (vpt->translate[2]);
+       OUT_RINGf (vpt->translate[3]);
+       OUT_RINGf (vpt->scale[0]);
+       OUT_RINGf (vpt->scale[1]);
+       OUT_RINGf (vpt->scale[2]);
+       OUT_RINGf (vpt->scale[3]);
+}
+
+static void
+nv30_set_vertex_buffer(struct pipe_context *pipe, unsigned index,
+                      const struct pipe_vertex_buffer *vb)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+
+       nv30->vtxbuf[index] = *vb;
+
+       nv30->dirty |= NV30_NEW_ARRAYS;
+}
+
+static void
+nv30_set_vertex_element(struct pipe_context *pipe, unsigned index,
+                       const struct pipe_vertex_element *ve)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+
+       nv30->vtxelt[index] = *ve;
+
+       nv30->dirty |= NV30_NEW_ARRAYS;
+}
+
+void
+nv30_init_state_functions(struct nv30_context *nv30)
+{
+       nv30->pipe.create_blend_state = nv30_blend_state_create;
+       nv30->pipe.bind_blend_state = nv30_blend_state_bind;
+       nv30->pipe.delete_blend_state = nv30_blend_state_delete;
+
+       nv30->pipe.create_sampler_state = nv30_sampler_state_create;
+       nv30->pipe.bind_sampler_state = nv30_sampler_state_bind;
+       nv30->pipe.delete_sampler_state = nv30_sampler_state_delete;
+       nv30->pipe.set_sampler_texture = nv30_set_sampler_texture;
+
+       nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create;
+       nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind;
+       nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete;
+
+       nv30->pipe.create_depth_stencil_alpha_state =
+               nv30_depth_stencil_alpha_state_create;
+       nv30->pipe.bind_depth_stencil_alpha_state =
+               nv30_depth_stencil_alpha_state_bind;
+       nv30->pipe.delete_depth_stencil_alpha_state =
+               nv30_depth_stencil_alpha_state_delete;
+
+       nv30->pipe.create_vs_state = nv30_vp_state_create;
+       nv30->pipe.bind_vs_state = nv30_vp_state_bind;
+       nv30->pipe.delete_vs_state = nv30_vp_state_delete;
+
+       nv30->pipe.create_fs_state = nv30_fp_state_create;
+       nv30->pipe.bind_fs_state = nv30_fp_state_bind;
+       nv30->pipe.delete_fs_state = nv30_fp_state_delete;
+
+       nv30->pipe.set_blend_color = nv30_set_blend_color;
+       nv30->pipe.set_clip_state = nv30_set_clip_state;
+       nv30->pipe.set_constant_buffer = nv30_set_constant_buffer;
+       nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state;
+       nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple;
+       nv30->pipe.set_scissor_state = nv30_set_scissor_state;
+       nv30->pipe.set_viewport_state = nv30_set_viewport_state;
+
+       nv30->pipe.set_vertex_buffer = nv30_set_vertex_buffer;
+       nv30->pipe.set_vertex_element = nv30_set_vertex_element;
+}
+
diff --git a/src/mesa/pipe/nv30/nv30_state.h b/src/mesa/pipe/nv30/nv30_state.h
new file mode 100644 (file)
index 0000000..233600f
--- /dev/null
@@ -0,0 +1,147 @@
+#ifndef __NV30_STATE_H__
+#define __NV30_STATE_H__
+
+#include "pipe/p_state.h"
+
+struct nv30_blend_state {
+       uint32_t b_enable;
+       uint32_t b_srcfunc;
+       uint32_t b_dstfunc;
+       uint32_t b_eqn;
+
+       uint32_t l_enable;
+       uint32_t l_op;
+
+       uint32_t c_mask;
+
+       uint32_t d_enable;
+};
+
+struct nv30_sampler_state {
+       uint32_t fmt;
+       uint32_t wrap;
+       uint32_t en;
+       uint32_t filt;
+       uint32_t bcol;
+};
+
+struct nv30_rasterizer_state {
+       uint32_t shade_model;
+
+       uint32_t line_width;
+       uint32_t line_smooth_en;
+       uint32_t line_stipple_en;
+       uint32_t line_stipple;
+
+       uint32_t point_size;
+
+       uint32_t poly_smooth_en;
+       uint32_t poly_stipple_en;
+       
+       uint32_t poly_mode_front;
+       uint32_t poly_mode_back;
+
+       uint32_t front_face;
+       uint32_t cull_face;
+       uint32_t cull_face_en;
+
+       uint32_t point_sprite;
+};
+
+struct nv30_vertex_program_exec {
+       uint32_t data[4];
+       boolean has_branch_offset;
+       int const_index;
+};
+
+struct nv30_vertex_program_data {
+       int index; /* immediates == -1 */
+       float value[4];
+};
+
+struct nv30_vertex_program {
+       const struct pipe_shader_state *pipe;
+
+       boolean translated;
+       struct nv30_vertex_program_exec *insns;
+       unsigned nr_insns;
+       struct nv30_vertex_program_data *consts;
+       unsigned nr_consts;
+
+       struct nouveau_resource *exec;
+       unsigned exec_start;
+       struct nouveau_resource *data;
+       unsigned data_start;
+       unsigned data_start_min;
+
+       uint32_t ir;
+       uint32_t or;
+};
+
+struct nv30_fragment_program_data {
+       unsigned offset;
+       unsigned index;
+};
+
+struct nv30_fragment_program {
+       const struct pipe_shader_state *pipe;
+
+       boolean translated;
+       boolean on_hw;
+       unsigned samplers;
+
+       uint32_t *insn;
+       int       insn_len;
+
+       struct nv30_fragment_program_data *consts;
+       unsigned nr_consts;
+
+       struct pipe_buffer *buffer;
+
+       uint32_t fp_control;
+       uint32_t fp_reg_control;
+};
+
+struct nv30_stencil_push {
+       uint32_t enable;
+       uint32_t wmask;
+       uint32_t func;
+       uint32_t ref;
+       uint32_t vmask;
+       uint32_t fail;
+       uint32_t zfail;
+       uint32_t zpass;
+};
+
+struct nv30_depth_stencil_alpha_state {
+       struct {
+               uint32_t func;
+               uint32_t write_enable;
+               uint32_t test_enable;
+       } depth;
+
+       struct {
+               struct nv30_stencil_push back;
+               struct nv30_stencil_push front;
+       } stencil;
+
+       struct {
+               uint32_t enabled;
+               uint32_t func;
+               uint32_t ref;
+       } alpha;
+};
+
+struct nv30_miptree {
+       struct pipe_texture base;
+
+       struct pipe_buffer *buffer;
+       uint total_size;
+
+       struct {
+               uint pitch;
+               uint *image_offset;
+       } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/mesa/pipe/nv30/nv30_state_emit.c b/src/mesa/pipe/nv30/nv30_state_emit.c
new file mode 100644 (file)
index 0000000..70b9883
--- /dev/null
@@ -0,0 +1,83 @@
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+void
+nv30_emit_hw_state(struct nv30_context *nv30)
+{
+       int i;
+
+       if (nv30->dirty & NV30_NEW_FRAGPROG) {
+               nv30_fragprog_bind(nv30, nv30->fragprog.current);
+               /*XXX: clear NV30_NEW_FRAGPROG if no new program uploaded */
+       }
+
+       if (nv30->dirty_samplers || (nv30->dirty & NV30_NEW_FRAGPROG)) {
+               nv30_fragtex_bind(nv30);
+/*
+               BEGIN_RING(rankine, NV34TCL_TX_CACHE_CTL, 1);
+               OUT_RING  (2);
+               BEGIN_RING(rankine, NV34TCL_TX_CACHE_CTL, 1);
+               OUT_RING  (1);*/
+               nv30->dirty &= ~NV30_NEW_FRAGPROG;
+       }
+
+       if (nv30->dirty & NV30_NEW_VERTPROG) {
+               nv30_vertprog_bind(nv30, nv30->vertprog.current);
+               nv30->dirty &= ~NV30_NEW_VERTPROG;
+       }
+
+       nv30->dirty_samplers = 0;
+
+       /* Emit relocs for every referenced buffer.
+        * This is to ensure the bufmgr has an accurate idea of how
+        * the buffer is used.  This isn't very efficient, but we don't
+        * seem to take a significant performance hit.  Will be improved
+        * at some point.  Vertex arrays are emitted by nv30_vbo.c
+        */
+
+       /* Render targets */
+       if (nv30->rt_enable & NV34TCL_RT_ENABLE_COLOR0) {
+               BEGIN_RING(rankine, NV34TCL_DMA_COLOR0, 1);
+               OUT_RELOCo(nv30->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+               BEGIN_RING(rankine, NV34TCL_COLOR0_OFFSET, 1);
+               OUT_RELOCl(nv30->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+       }
+
+       if (nv30->rt_enable & NV34TCL_RT_ENABLE_COLOR1) {
+               BEGIN_RING(rankine, NV34TCL_DMA_COLOR1, 1);
+               OUT_RELOCo(nv30->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+               BEGIN_RING(rankine, NV34TCL_COLOR1_OFFSET, 1);
+               OUT_RELOCl(nv30->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+       }
+
+       if (nv30->zeta) {
+               BEGIN_RING(rankine, NV34TCL_DMA_ZETA, 1);
+               OUT_RELOCo(nv30->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+               BEGIN_RING(rankine, NV34TCL_ZETA_OFFSET, 1);
+               OUT_RELOCl(nv30->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+               /* XXX allocate LMA */
+/*             BEGIN_RING(rankine, NV34TCL_LMA_DEPTH_OFFSET, 1);
+               OUT_RING(0);*/
+       }
+
+       /* Texture images */
+       for (i = 0; i < 16; i++) {
+               if (!(nv30->fp_samplers & (1 << i)))
+                       continue;
+               BEGIN_RING(rankine, NV34TCL_TX_OFFSET(i), 2);
+               OUT_RELOCl(nv30->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+                          NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+               OUT_RELOCd(nv30->tex[i].buffer, nv30->tex[i].format,
+                          NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
+                          NOUVEAU_BO_OR, NV34TCL_TX_FORMAT_DMA0,
+                          NV34TCL_TX_FORMAT_DMA1);
+       }
+
+       /* Fragment program */
+       BEGIN_RING(rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
+       OUT_RELOC (nv30->fragprog.active->buffer, 0, NOUVEAU_BO_VRAM |
+                  NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
+                  NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
+                  NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
+}
+
diff --git a/src/mesa/pipe/nv30/nv30_surface.c b/src/mesa/pipe/nv30/nv30_surface.c
new file mode 100644 (file)
index 0000000..31745e3
--- /dev/null
@@ -0,0 +1,136 @@
+
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "nv30_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_util.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+#include "pipe/util/p_tile.h"
+
+static boolean
+nv30_surface_format_supported(struct pipe_context *pipe,
+                             enum pipe_format format, uint type)
+{
+       switch (type) {
+       case PIPE_SURFACE:
+               switch (format) {
+               case PIPE_FORMAT_A8R8G8B8_UNORM:
+               case PIPE_FORMAT_R5G6B5_UNORM: 
+               case PIPE_FORMAT_Z24S8_UNORM:
+               case PIPE_FORMAT_Z16_UNORM:
+                       return TRUE;
+               default:
+                       break;
+               }
+               break;
+       case PIPE_TEXTURE:
+               switch (format) {
+               case PIPE_FORMAT_A8R8G8B8_UNORM:
+               case PIPE_FORMAT_A1R5G5B5_UNORM:
+               case PIPE_FORMAT_A4R4G4B4_UNORM:
+               case PIPE_FORMAT_R5G6B5_UNORM: 
+               case PIPE_FORMAT_U_L8:
+               case PIPE_FORMAT_U_A8:
+               case PIPE_FORMAT_U_I8:
+               case PIPE_FORMAT_U_A8_L8:
+               case PIPE_FORMAT_Z16_UNORM:
+               case PIPE_FORMAT_Z24S8_UNORM:
+                       return TRUE;
+               default:
+                       break;
+               }
+               break;
+       default:
+               assert(0);
+       };
+
+       return FALSE;
+}
+
+static struct pipe_surface *
+nv30_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt,
+                     unsigned face, unsigned level, unsigned zslice)
+{
+       struct pipe_winsys *ws = pipe->winsys;
+       struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt;
+       struct pipe_surface *ps;
+
+       ps = ws->surface_alloc(ws);
+       if (!ps)
+               return NULL;
+       pipe_buffer_reference(ws, &ps->buffer, nv30mt->buffer);
+       ps->format = pt->format;
+       ps->cpp = pt->cpp;
+       ps->width = pt->width[level];
+       ps->height = pt->height[level];
+       ps->pitch = nv30mt->level[level].pitch / ps->cpp;
+
+       if (pt->target == PIPE_TEXTURE_CUBE) {
+               ps->offset = nv30mt->level[level].image_offset[face];
+       } else
+       if (pt->target == PIPE_TEXTURE_3D) {
+               ps->offset = nv30mt->level[level].image_offset[zslice];
+       } else {
+               ps->offset = nv30mt->level[level].image_offset[0];
+       }
+
+       return ps;
+}
+
+static void
+nv30_surface_copy(struct pipe_context *pipe, struct pipe_surface *dest,
+                 unsigned destx, unsigned desty, struct pipe_surface *src,
+                 unsigned srcx, unsigned srcy, unsigned width, unsigned height)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nouveau_winsys *nvws = nv30->nvws;
+
+       nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+                          width, height);
+}
+
+static void
+nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+                 unsigned destx, unsigned desty, unsigned width,
+                 unsigned height, unsigned value)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct nouveau_winsys *nvws = nv30->nvws;
+
+       nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+void
+nv30_init_surface_functions(struct nv30_context *nv30)
+{
+       nv30->pipe.is_format_supported = nv30_surface_format_supported;
+       nv30->pipe.get_tex_surface = nv30_get_tex_surface;
+       nv30->pipe.surface_copy = nv30_surface_copy;
+       nv30->pipe.surface_fill = nv30_surface_fill;
+}
diff --git a/src/mesa/pipe/nv30/nv30_vbo.c b/src/mesa/pipe/nv30/nv30_vbo.c
new file mode 100644 (file)
index 0000000..e6c50d3
--- /dev/null
@@ -0,0 +1,406 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_util.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+#include "pipe/nouveau/nouveau_channel.h"
+#include "pipe/nouveau/nouveau_pushbuf.h"
+
+static INLINE int
+nv30_vbo_ncomp(uint format)
+{
+       int ncomp = 0;
+
+       if (pf_size_x(format)) ncomp++;
+       if (pf_size_y(format)) ncomp++;
+       if (pf_size_z(format)) ncomp++;
+       if (pf_size_w(format)) ncomp++;
+
+       return ncomp;
+}
+
+static INLINE int
+nv30_vbo_type(uint format)
+{
+       switch (pf_type(format)) {
+       case PIPE_FORMAT_TYPE_FLOAT:
+               return NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT;
+       case PIPE_FORMAT_TYPE_UNORM:
+               return NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_UBYTE;
+       default:
+               assert(0);
+       }
+}
+
+static boolean
+nv30_vbo_static_attrib(struct nv30_context *nv30, int attrib,
+                      struct pipe_vertex_element *ve,
+                      struct pipe_vertex_buffer *vb)
+{
+       struct pipe_winsys *ws = nv30->pipe.winsys;
+       int type, ncomp;
+       void *map;
+
+       type = nv30_vbo_type(ve->src_format);
+       ncomp = nv30_vbo_ncomp(ve->src_format);
+
+       map  = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+       map += vb->buffer_offset + ve->src_offset;
+
+       switch (type) {
+       case NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT:
+       {
+               float *v = map;
+
+               BEGIN_RING(rankine, NV34TCL_VERTEX_ATTR_4F_X(attrib), 4);
+               switch (ncomp) {
+               case 4:
+                       OUT_RINGf(v[0]);
+                       OUT_RINGf(v[1]);
+                       OUT_RINGf(v[2]);
+                       OUT_RINGf(v[3]);
+                       break;
+               case 3:
+                       OUT_RINGf(v[0]);
+                       OUT_RINGf(v[1]);
+                       OUT_RINGf(v[2]);
+                       OUT_RINGf(1.0);
+                       break;
+               case 2:
+                       OUT_RINGf(v[0]);
+                       OUT_RINGf(v[1]);
+                       OUT_RINGf(0.0);
+                       OUT_RINGf(1.0);
+                       break;
+               case 1:
+                       OUT_RINGf(v[0]);
+                       OUT_RINGf(0.0);
+                       OUT_RINGf(0.0);
+                       OUT_RINGf(1.0);
+                       break;
+               default:
+                       ws->buffer_unmap(ws, vb->buffer);
+                       return FALSE;
+               }
+       }
+               break;
+       default:
+               ws->buffer_unmap(ws, vb->buffer);
+               return FALSE;
+       }
+
+       ws->buffer_unmap(ws, vb->buffer);
+
+       return TRUE;
+}
+
+static void
+nv30_vbo_arrays_update(struct nv30_context *nv30)
+{
+       struct nv30_vertex_program *vp = nv30->vertprog.active;
+       uint32_t inputs, vtxfmt[16];
+       int hw, num_hw;
+
+       nv30->vb_enable = 0;
+
+       inputs = vp->ir;
+       for (hw = 0; hw < 16 && inputs; hw++) {
+               if (inputs & (1 << hw)) {
+                       num_hw = hw;
+                       inputs &= ~(1 << hw);
+               }
+       }
+       num_hw++;
+
+       inputs = vp->ir;
+       for (hw = 0; hw < num_hw; hw++) {
+               struct pipe_vertex_element *ve;
+               struct pipe_vertex_buffer *vb;
+
+               if (!(inputs & (1 << hw))) {
+                       vtxfmt[hw] = NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT;
+                       continue;
+               }
+
+               ve = &nv30->vtxelt[hw];
+               vb = &nv30->vtxbuf[ve->vertex_buffer_index];
+
+               if (vb->pitch == 0) {
+                       vtxfmt[hw] = NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT;
+                       if (nv30_vbo_static_attrib(nv30, hw, ve, vb) == TRUE)
+                               continue;
+               }
+
+               nv30->vb_enable |= (1 << hw);
+               nv30->vb[hw].delta = vb->buffer_offset + ve->src_offset;
+               nv30->vb[hw].buffer = vb->buffer;
+
+               vtxfmt[hw] = ((vb->pitch << NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT) |
+                             (nv30_vbo_ncomp(ve->src_format) <<
+                              NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_SHIFT) |
+                             nv30_vbo_type(ve->src_format));
+       }
+
+       BEGIN_RING(rankine, NV34TCL_VERTEX_ARRAY_FORMAT(0), num_hw);
+       OUT_RINGp (vtxfmt, num_hw);
+}
+
+static boolean
+nv30_vbo_validate_state(struct nv30_context *nv30,
+                       struct pipe_buffer *ib, unsigned ib_format)
+{
+       unsigned inputs;
+
+       nv30_emit_hw_state(nv30);
+
+       if (nv30->dirty & NV30_NEW_ARRAYS) {
+               nv30_vbo_arrays_update(nv30);
+               nv30->dirty &= ~NV30_NEW_ARRAYS;
+       }
+
+       inputs = nv30->vb_enable;
+       while (inputs) {
+               unsigned a = ffs(inputs) - 1;
+
+               inputs &= ~(1 << a);
+
+               BEGIN_RING(rankine, NV34TCL_VERTEX_BUFFER_ADDRESS(a), 1);
+               OUT_RELOC (nv30->vb[a].buffer, nv30->vb[a].delta,
+                          NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_LOW |
+                          NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0,
+                          NV34TCL_VERTEX_BUFFER_ADDRESS_DMA1);
+       }
+
+       if (ib) {
+               BEGIN_RING(rankine, NV40TCL_IDXBUF_ADDRESS, 2);
+               OUT_RELOCl(ib, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+                          NOUVEAU_BO_RD);
+               OUT_RELOCd(ib, ib_format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+                          NOUVEAU_BO_RD | NOUVEAU_BO_OR,
+                          0, NV40TCL_IDXBUF_FORMAT_DMA1);
+       }
+
+       BEGIN_RING(rankine, 0x1710, 1);
+       OUT_RING  (0); /* vtx cache flush */
+
+       return TRUE;
+}
+
+boolean
+nv30_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
+                unsigned count)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       unsigned nr;
+
+       assert(nv30_vbo_validate_state(nv30, NULL, 0));
+
+       BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+       OUT_RING  (nvgl_primitive(mode));
+
+       nr = (count & 0xff);
+       if (nr) {
+               BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1);
+               OUT_RING  (((nr - 1) << 24) | start);
+               start += nr;
+       }
+
+       nr = count >> 8;
+       while (nr) {
+               unsigned push = nr > 2047 ? 2047 : nr;
+
+               nr -= push;
+
+               BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push);
+               while (push--) {
+                       OUT_RING(((0x100 - 1) << 24) | start);
+                       start += 0x100;
+               }
+       }
+
+       BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+       OUT_RING  (0);
+
+       pipe->flush(pipe, 0);
+       return TRUE;
+}
+
+static INLINE void
+nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
+                      unsigned start, unsigned count)
+{
+       uint8_t *elts = (uint8_t *)ib + start;
+       int push, i;
+
+       if (count & 1) {
+               BEGIN_RING(rankine, NV40TCL_VB_ELEMENT_U32, 1);
+               OUT_RING  (elts[0]);
+               elts++; count--;
+       }
+
+       while (count) {
+               push = MIN2(count, 2046);
+
+               BEGIN_RING_NI(rankine, NV40TCL_VB_ELEMENT_U16, push);
+               for (i = 0; i < push; i+=2)
+                       OUT_RING((elts[i+1] << 16) | elts[i]);
+
+               count -= push;
+               elts  += push;
+       }
+}
+
+static INLINE void
+nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
+                      unsigned start, unsigned count)
+{
+       uint16_t *elts = (uint16_t *)ib + start;
+       int push, i;
+
+       if (count & 1) {
+               BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
+               OUT_RING  (elts[0]);
+               elts++; count--;
+       }
+
+       while (count) {
+               push = MIN2(count, 2046);
+
+               BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push);
+               for (i = 0; i < push; i+=2)
+                       OUT_RING((elts[i+1] << 16) | elts[i]);
+
+               count -= push;
+               elts  += push;
+       }
+}
+
+static INLINE void
+nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
+                      unsigned start, unsigned count)
+{
+       uint32_t *elts = (uint32_t *)ib + start;
+       int push;
+
+       while (count) {
+               push = MIN2(count, 2047);
+
+               BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push);
+               OUT_RINGp    (elts, push);
+
+               count -= push;
+               elts  += push;
+       }
+}
+
+static boolean
+nv30_draw_elements_inline(struct pipe_context *pipe,
+                         struct pipe_buffer *ib, unsigned ib_size,
+                         unsigned mode, unsigned start, unsigned count)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       struct pipe_winsys *ws = pipe->winsys;
+       void *map;
+
+       assert(nv30_vbo_validate_state(nv30, NULL, 0));
+
+       map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ);
+       if (!ib)
+               assert(0);
+
+       BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+       OUT_RING  (nvgl_primitive(mode));
+
+       switch (ib_size) {
+       case 1:
+               nv30_draw_elements_u08(nv30, map, start, count);
+               break;
+       case 2:
+               nv30_draw_elements_u16(nv30, map, start, count);
+               break;
+       case 4:
+               nv30_draw_elements_u32(nv30, map, start, count);
+               break;
+       default:
+               assert(0);
+               break;
+       }
+
+       BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+       OUT_RING  (0);
+
+       ws->buffer_unmap(ws, ib);
+
+       return TRUE;
+}
+
+static boolean
+nv30_draw_elements_vbo(struct pipe_context *pipe,
+                      struct pipe_buffer *ib, unsigned ib_size,
+                      unsigned mode, unsigned start, unsigned count)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
+       unsigned nr, type;
+
+       switch (ib_size) {
+       case 2:
+               type = NV40TCL_IDXBUF_FORMAT_TYPE_U16;
+               break;
+       case 4:
+               type = NV40TCL_IDXBUF_FORMAT_TYPE_U32;
+               break;
+       default:
+               assert(0);
+       }
+
+       assert(nv30_vbo_validate_state(nv30, ib, type));
+
+       BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+       OUT_RING  (nvgl_primitive(mode));
+
+       nr = (count & 0xff);
+       if (nr) {
+               BEGIN_RING(rankine, NV40TCL_VB_INDEX_BATCH, 1);
+               OUT_RING  (((nr - 1) << 24) | start);
+               start += nr;
+       }
+
+       nr = count >> 8;
+       while (nr) {
+               unsigned push = nr > 2047 ? 2047 : nr;
+
+               nr -= push;
+
+               BEGIN_RING_NI(rankine, NV40TCL_VB_INDEX_BATCH, push);
+               while (push--) {
+                       OUT_RING(((0x100 - 1) << 24) | start);
+                       start += 0x100;
+               }
+       }
+
+       BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+       OUT_RING  (0);
+
+       return TRUE;
+}
+
+boolean
+nv30_draw_elements(struct pipe_context *pipe,
+                  struct pipe_buffer *indexBuffer, unsigned indexSize,
+                  unsigned mode, unsigned start, unsigned count)
+{
+       if (indexSize != 1) {
+               nv30_draw_elements_vbo(pipe, indexBuffer, indexSize,
+                                      mode, start, count);
+       } else {
+               nv30_draw_elements_inline(pipe, indexBuffer, indexSize,
+                                         mode, start, count);
+       }
+
+       pipe->flush(pipe, 0);
+       return TRUE;
+}
+
+
diff --git a/src/mesa/pipe/nv30/nv30_vertprog.c b/src/mesa/pipe/nv30/nv30_vertprog.c
new file mode 100644 (file)
index 0000000..b712049
--- /dev/null
@@ -0,0 +1,778 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+/* TODO (at least...):
+ *  1. Indexed consts  + ARL
+ *  2. Arb. swz/negation
+ *  3. NV_vp11, NV_vp2, NV_vp3 features
+ *       - extra arith opcodes
+ *       - branching
+ *       - texture sampling
+ *       - indexed attribs
+ *       - indexed results
+ *  4. bugs
+ */
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 8
+#define MASK_Y 4
+#define MASK_Z 2
+#define MASK_W 1
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE 0
+#define DEF_CTEST 0
+#include "nv30_shader.h"
+
+#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv30_sr_neg((s))
+#define abs(s) nv30_sr_abs((s))
+
+struct nv30_vpc {
+       struct nv30_vertex_program *vp;
+
+       struct nv30_vertex_program_exec *vpi;
+
+       unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
+
+       int high_temp;
+       int temp_temp_count;
+
+       struct nv30_sreg *imm;
+       unsigned nr_imm;
+};
+
+static struct nv30_sreg
+temp(struct nv30_vpc *vpc)
+{
+       int idx;
+
+       idx  = vpc->temp_temp_count++;
+       idx += vpc->high_temp + 1;
+       return nv30_sr(NV30SR_TEMP, idx);
+}
+
+static struct nv30_sreg
+constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)
+{
+       struct nv30_vertex_program *vp = vpc->vp;
+       struct nv30_vertex_program_data *vpd;
+       int idx;
+
+       if (pipe >= 0) {
+               for (idx = 0; idx < vp->nr_consts; idx++) {
+                       if (vp->consts[idx].index == pipe)
+                               return nv30_sr(NV30SR_CONST, idx);
+               }
+       }
+
+       idx = vp->nr_consts++;
+       vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+       vpd = &vp->consts[idx];
+
+       vpd->index = pipe;
+       vpd->value[0] = x;
+       vpd->value[1] = y;
+       vpd->value[2] = z;
+       vpd->value[3] = w;
+       return nv30_sr(NV30SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+       nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+
+static void
+emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src)
+{
+       struct nv30_vertex_program *vp = vpc->vp;
+       uint32_t sr = 0;
+
+       switch (src.type) {
+       case NV30SR_TEMP:
+               sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);
+               sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);
+               break;
+       case NV30SR_INPUT:
+               sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+                      NV30_VP_SRC_REG_TYPE_SHIFT);
+               vp->ir |= (1 << src.index);
+               hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);
+               break;
+       case NV30SR_CONST:
+               sr |= (NV30_VP_SRC_REG_TYPE_CONST <<
+                      NV30_VP_SRC_REG_TYPE_SHIFT);
+               assert(vpc->vpi->const_index == -1 ||
+                      vpc->vpi->const_index == src.index);
+               vpc->vpi->const_index = src.index;
+               break;
+       case NV30SR_NONE:
+               sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+                      NV30_VP_SRC_REG_TYPE_SHIFT);
+               break;
+       default:
+               assert(0);
+       }
+
+       if (src.negate)
+               sr |= NV30_VP_SRC_NEGATE;
+
+       if (src.abs)
+               hw[0] |= (1 << (21 + pos));
+
+       sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) |
+              (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) |
+              (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) |
+              (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT));
+
+/*
+ * |VVV|
+ * d°.°b
+ *  \u/
+ *
+ */
+
+       switch (pos) {
+       case 0:
+               hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >>
+                         NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT;
+               hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) <<
+                         NV30_VP_INST_SRC0L_SHIFT;
+               break;
+       case 1:
+               hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT;
+               break;
+       case 2:
+               hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >>
+                         NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT;
+               hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) <<
+                         NV30_VP_INST_SRC2L_SHIFT;
+               break;
+       default:
+               assert(0);
+       }
+}
+
+static void
+emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst)
+{
+       struct nv30_vertex_program *vp = vpc->vp;
+
+       switch (dst.type) {
+       case NV30SR_TEMP:
+               hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
+               break;
+       case NV30SR_OUTPUT:
+               switch (dst.index) {
+               case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+               case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+               case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+               case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+               case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
+               case NV30_VP_INST_DEST_PSZ  : vp->or |= (1 << 5); break;
+               case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+               case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+               case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+               case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+               case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+               case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+               case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+               case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+               default:
+                       break;
+               }
+
+               hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
+               hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+               break;
+       default:
+               assert(0);
+       }
+}
+
+static void
+nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,
+             struct nv30_sreg dst, int mask,
+             struct nv30_sreg s0, struct nv30_sreg s1,
+             struct nv30_sreg s2)
+{
+       struct nv30_vertex_program *vp = vpc->vp;
+       uint32_t *hw;
+
+       vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+       vpc->vpi = &vp->insns[vp->nr_insns - 1];
+       memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+       vpc->vpi->const_index = -1;
+
+       hw = vpc->vpi->data;
+
+       hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
+       hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |
+                 (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |
+                 (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) |
+                 (3 << NV30_VP_INST_COND_SWZ_W_SHIFT));
+
+       hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+//     hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;
+//     hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT);
+
+       emit_dst(vpc, hw, slot, dst);
+       emit_src(vpc, hw, 0, s0);
+       emit_src(vpc, hw, 1, s1);
+       emit_src(vpc, hw, 2, s2);
+}
+
+static INLINE struct nv30_sreg
+tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+       struct nv30_sreg src;
+
+       switch (fsrc->SrcRegister.File) {
+       case TGSI_FILE_INPUT:
+               src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
+               break;
+       case TGSI_FILE_CONSTANT:
+               src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+               break;
+       case TGSI_FILE_IMMEDIATE:
+               src = vpc->imm[fsrc->SrcRegister.Index];
+               break;
+       case TGSI_FILE_TEMPORARY:
+               if (vpc->high_temp < fsrc->SrcRegister.Index)
+                       vpc->high_temp = fsrc->SrcRegister.Index;
+               src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
+               break;
+       default:
+               NOUVEAU_ERR("bad src file\n");
+               break;
+       }
+
+       src.abs = fsrc->SrcRegisterExtMod.Absolute;
+       src.negate = fsrc->SrcRegister.Negate;
+       src.swz[0] = fsrc->SrcRegister.SwizzleX;
+       src.swz[1] = fsrc->SrcRegister.SwizzleY;
+       src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+       src.swz[3] = fsrc->SrcRegister.SwizzleW;
+       return src;
+}
+
+static INLINE struct nv30_sreg
+tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+       struct nv30_sreg dst;
+
+       switch (fdst->DstRegister.File) {
+       case TGSI_FILE_OUTPUT:
+               dst = nv30_sr(NV30SR_OUTPUT,
+                             vpc->output_map[fdst->DstRegister.Index]);
+
+               break;
+       case TGSI_FILE_TEMPORARY:
+               dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index);
+               if (vpc->high_temp < dst.index)
+                       vpc->high_temp = dst.index;
+               break;
+       default:
+               NOUVEAU_ERR("bad dst file\n");
+               break;
+       }
+
+       return dst;
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+       int mask = 0;
+
+       if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+       if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+       if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+       if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+       return mask;
+}
+
+static boolean
+nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
+                               const struct tgsi_full_instruction *finst)
+{
+       struct nv30_sreg src[3], dst, tmp;
+       struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+       int mask;
+       int ai = -1, ci = -1;
+       int i;
+
+       if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+               return TRUE;
+
+       vpc->temp_temp_count = 0;
+       for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+               const struct tgsi_full_src_register *fsrc;
+
+               fsrc = &finst->FullSrcRegisters[i];
+               if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+                       src[i] = tgsi_src(vpc, fsrc);
+               }
+       }
+
+       for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+               const struct tgsi_full_src_register *fsrc;
+
+               fsrc = &finst->FullSrcRegisters[i];
+               switch (fsrc->SrcRegister.File) {
+               case TGSI_FILE_INPUT:
+                       if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+                               ai = fsrc->SrcRegister.Index;
+                               src[i] = tgsi_src(vpc, fsrc);
+                       } else {
+                               src[i] = temp(vpc);
+                               arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+                                     tgsi_src(vpc, fsrc), none, none);
+                       }
+                       break;
+               /*XXX: index comparison is broken now that consts come from
+                *     two different register files.
+                */
+               case TGSI_FILE_CONSTANT:
+               case TGSI_FILE_IMMEDIATE:
+                       if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+                               ci = fsrc->SrcRegister.Index;
+                               src[i] = tgsi_src(vpc, fsrc);
+                       } else {
+                               src[i] = temp(vpc);
+                               arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+                                     tgsi_src(vpc, fsrc), none, none);
+                       }
+                       break;
+               case TGSI_FILE_TEMPORARY:
+                       /* handled above */
+                       break;
+               default:
+                       NOUVEAU_ERR("bad src file\n");
+                       return FALSE;
+               }
+       }
+
+       dst  = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
+       mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+
+       switch (finst->Instruction.Opcode) {
+       case TGSI_OPCODE_ABS:
+               arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+               break;
+       case TGSI_OPCODE_ADD:
+               arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+               break;
+       case TGSI_OPCODE_ARL:
+               arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_DP3:
+               arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_DP4:
+               arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_DPH:
+               arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_DST:
+               arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_EX2:
+               arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+               break;
+       case TGSI_OPCODE_EXP:
+               arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+               break;
+       case TGSI_OPCODE_FLR:
+               arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_FRC:
+               arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_LG2:
+               arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+               break;
+       case TGSI_OPCODE_LIT:
+               arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+               break;
+       case TGSI_OPCODE_LOG:
+               arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+               break;
+       case TGSI_OPCODE_MAD:
+               arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+               break;
+       case TGSI_OPCODE_MAX:
+               arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_MIN:
+               arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_MOV:
+               arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+               break;
+       case TGSI_OPCODE_MUL:
+               arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_POW:
+               tmp = temp(vpc);
+               arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+                     swz(src[0], X, X, X, X));
+               arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+                     swz(src[1], X, X, X, X), none);
+               arith(vpc, 1, OP_EX2, dst, mask, none, none,
+                     swz(tmp, X, X, X, X));
+               break;
+       case TGSI_OPCODE_RCP:
+               arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+               break;
+       case TGSI_OPCODE_RET:
+               break;
+       case TGSI_OPCODE_RSQ:
+               arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+               break;
+       case TGSI_OPCODE_SGE:
+               arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_SLT:
+               arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_SUB:
+               arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+               break;
+       case TGSI_OPCODE_XPD:
+               tmp = temp(vpc);
+               arith(vpc, 0, OP_MUL, tmp, mask,
+                     swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+               arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+                     swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+                     neg(tmp));
+               break;
+       default:
+               NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+               return FALSE;
+       }
+
+       return TRUE;
+}
+
+static boolean
+nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
+                               const struct tgsi_full_declaration *fdec)
+{
+       int hw;
+
+       switch (fdec->Semantic.SemanticName) {
+       case TGSI_SEMANTIC_POSITION:
+               hw = NV30_VP_INST_DEST_POS;
+               break;
+       case TGSI_SEMANTIC_COLOR:
+               if (fdec->Semantic.SemanticIndex == 0) {
+                       hw = NV30_VP_INST_DEST_COL0;
+               } else
+               if (fdec->Semantic.SemanticIndex == 1) {
+                       hw = NV30_VP_INST_DEST_COL1;
+               } else {
+                       NOUVEAU_ERR("bad colour semantic index\n");
+                       return FALSE;
+               }
+               break;
+       case TGSI_SEMANTIC_BCOLOR:
+               if (fdec->Semantic.SemanticIndex == 0) {
+                       hw = NV30_VP_INST_DEST_BFC0;
+               } else
+               if (fdec->Semantic.SemanticIndex == 1) {
+                       hw = NV30_VP_INST_DEST_BFC1;
+               } else {
+                       NOUVEAU_ERR("bad bcolour semantic index\n");
+                       return FALSE;
+               }
+               break;
+       case TGSI_SEMANTIC_FOG:
+               hw = NV30_VP_INST_DEST_FOGC;
+               break;
+       case TGSI_SEMANTIC_PSIZE:
+               hw = NV30_VP_INST_DEST_PSZ;
+               break;
+       case TGSI_SEMANTIC_GENERIC:
+               if (fdec->Semantic.SemanticIndex <= 7) {
+                       hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+               } else {
+                       NOUVEAU_ERR("bad generic semantic index\n");
+                       return FALSE;
+               }
+               break;
+       default:
+               NOUVEAU_ERR("bad output semantic\n");
+               return FALSE;
+       }
+
+       vpc->output_map[fdec->u.DeclarationRange.First] = hw;
+       return TRUE;
+}
+
+static boolean
+nv30_vertprog_prepare(struct nv30_vpc *vpc)
+{
+       struct tgsi_parse_context p;
+       int nr_imm = 0;
+
+       tgsi_parse_init(&p, vpc->vp->pipe->tokens);
+       while (!tgsi_parse_end_of_tokens(&p)) {
+               const union tgsi_full_token *tok = &p.FullToken;
+
+               tgsi_parse_token(&p);
+               switch(tok->Token.Type) {
+               case TGSI_TOKEN_TYPE_IMMEDIATE:
+                       nr_imm++;
+                       break;
+               default:
+                       break;
+               }
+       }
+       tgsi_parse_free(&p);
+
+       if (nr_imm) {
+               vpc->imm = calloc(nr_imm, sizeof(struct nv30_sreg));
+               assert(vpc->imm);
+       }
+
+       return TRUE;
+}
+
+void
+nv30_vertprog_translate(struct nv30_context *nv30,
+                       struct nv30_vertex_program *vp)
+{
+       struct tgsi_parse_context parse;
+       struct nv30_vpc *vpc = NULL;
+
+       vpc = calloc(1, sizeof(struct nv30_vpc));
+       if (!vpc)
+               return;
+       vpc->vp = vp;
+       vpc->high_temp = -1;
+
+       if (!nv30_vertprog_prepare(vpc)) {
+               free(vpc);
+               return;
+       }
+
+       tgsi_parse_init(&parse, vp->pipe->tokens);
+
+       while (!tgsi_parse_end_of_tokens(&parse)) {
+               tgsi_parse_token(&parse);
+
+               switch (parse.FullToken.Token.Type) {
+               case TGSI_TOKEN_TYPE_DECLARATION:
+               {
+                       const struct tgsi_full_declaration *fdec;
+                       fdec = &parse.FullToken.FullDeclaration;
+                       switch (fdec->Declaration.File) {
+                       case TGSI_FILE_OUTPUT:
+                               if (!nv30_vertprog_parse_decl_output(vpc, fdec))
+                                       goto out_err;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+                       break;
+               case TGSI_TOKEN_TYPE_IMMEDIATE:
+               {
+                       const struct tgsi_full_immediate *imm;
+
+                       imm = &parse.FullToken.FullImmediate;
+                       assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+//                     assert(imm->Immediate.Size == 4);
+                       vpc->imm[vpc->nr_imm++] =
+                               constant(vpc, -1,
+                                        imm->u.ImmediateFloat32[0].Float,
+                                        imm->u.ImmediateFloat32[1].Float,
+                                        imm->u.ImmediateFloat32[2].Float,
+                                        imm->u.ImmediateFloat32[3].Float);
+               }
+                       break;
+               case TGSI_TOKEN_TYPE_INSTRUCTION:
+               {
+                       const struct tgsi_full_instruction *finst;
+                       finst = &parse.FullToken.FullInstruction;
+                       if (!nv30_vertprog_parse_instruction(vpc, finst))
+                               goto out_err;
+               }
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST;
+       vp->translated = TRUE;
+out_err:
+       tgsi_parse_free(&parse);
+       free(vpc);
+}
+
+void
+nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp)
+{ 
+       struct nouveau_winsys *nvws = nv30->nvws;
+       struct pipe_winsys *ws = nv30->pipe.winsys;
+       boolean upload_code = FALSE, upload_data = FALSE;
+       int i;
+
+       /* Translate TGSI shader into hw bytecode */
+       if (!vp->translated) {
+               nv30_vertprog_translate(nv30, vp);
+               if (!vp->translated)
+                       assert(0);
+       }
+
+       /* Allocate hw vtxprog exec slots */
+       if (!vp->exec) {
+               struct nouveau_resource *heap = nv30->vertprog.exec_heap;
+               uint vplen = vp->nr_insns;
+
+               if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+                       while (heap->next && heap->size < vplen) {
+                               struct nv30_vertex_program *evict;
+                               
+                               evict = heap->next->priv;
+                               nvws->res_free(&evict->exec);
+                       }
+
+                       if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+                               assert(0);
+               }
+
+               upload_code = TRUE;
+       }
+
+       /* Allocate hw vtxprog const slots */
+       if (vp->nr_consts && !vp->data) {
+               struct nouveau_resource *heap = nv30->vertprog.data_heap;
+
+               if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+                       while (heap->next && heap->size < vp->nr_consts) {
+                               struct nv30_vertex_program *evict;
+                               
+                               evict = heap->next->priv;
+                               nvws->res_free(&evict->data);
+                       }
+
+                       if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+                               assert(0);
+               }
+
+               /*XXX: handle this some day */
+               assert(vp->data->start >= vp->data_start_min);
+
+               upload_data = TRUE;
+               if (vp->data_start != vp->data->start)
+                       upload_code = TRUE;
+       }
+
+       /* If exec or data segments moved we need to patch the program to
+        * fixup offsets and register IDs.
+        */
+       if (vp->exec_start != vp->exec->start) {
+               for (i = 0; i < vp->nr_insns; i++) {
+                       struct nv30_vertex_program_exec *vpi = &vp->insns[i];
+
+                       if (vpi->has_branch_offset) {
+                               assert(0);
+                       }
+               }
+
+               vp->exec_start = vp->exec->start;
+       }
+
+       if (vp->nr_consts && vp->data_start != vp->data->start) {
+               for (i = 0; i < vp->nr_insns; i++) {
+                       struct nv30_vertex_program_exec *vpi = &vp->insns[i];
+
+                       if (vpi->const_index >= 0) {
+                               vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK;
+                               vpi->data[1] |=
+                                       (vpi->const_index + vp->data->start) <<
+                                       NV30_VP_INST_CONST_SRC_SHIFT;
+
+                       }
+               }
+
+               vp->data_start = vp->data->start;
+       }
+
+       /* Update + Upload constant values */
+       if (vp->nr_consts) {
+               float *map = NULL;
+
+               if (nv30->vertprog.constant_buf) {
+                       map = ws->buffer_map(ws, nv30->vertprog.constant_buf,
+                                            PIPE_BUFFER_USAGE_CPU_READ);
+               }
+
+               for (i = 0; i < vp->nr_consts; i++) {
+                       struct nv30_vertex_program_data *vpd = &vp->consts[i];
+
+                       if (vpd->index >= 0) {
+                               if (!upload_data &&
+                                   !memcmp(vpd->value, &map[vpd->index * 4],
+                                           4 * sizeof(float)))
+                                       continue;
+                               memcpy(vpd->value, &map[vpd->index * 4],
+                                      4 * sizeof(float));
+                       }
+
+                       BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
+                       OUT_RING  (i + vp->data->start);
+                       OUT_RINGp ((uint32_t *)vpd->value, 4);
+               }
+
+               if (map) {
+                       ws->buffer_unmap(ws, nv30->vertprog.constant_buf);
+               }
+       }
+
+       /* Upload vtxprog */
+       if (upload_code) {
+#if 0
+               for (i = 0; i < vp->nr_insns; i++) {
+                       NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]);
+                       NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]);
+                       NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]);
+                       NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]);
+               }
+#endif
+               BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
+               OUT_RING  (vp->exec->start);
+               for (i = 0; i < vp->nr_insns; i++) {
+                       BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
+                       OUT_RINGp (vp->insns[i].data, 4);
+               }
+       }
+
+       BEGIN_RING(rankine, NV34TCL_VP_START_FROM_ID, 1);
+       OUT_RING  (vp->exec->start);
+       BEGIN_RING(rankine, NV34TCL_VP_ATTRIB_EN, 2);
+       OUT_RING  (vp->ir);
+       OUT_RING  (vp->or);
+
+       nv30->vertprog.active = vp;
+}
+
+void
+nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp)
+{
+       if (vp->nr_consts)
+               free(vp->consts);
+       if (vp->nr_insns)
+               free(vp->insns);
+}
+