Add structs describing vertex and pixel shader state to r300_state.
authorVladimir Dergachev <volodya@freedesktop.org>
Mon, 3 Jan 2005 15:45:10 +0000 (15:45 +0000)
committerVladimir Dergachev <volodya@freedesktop.org>
Mon, 3 Jan 2005 15:45:10 +0000 (15:45 +0000)
Take apart program_pipeline() and move it into appropriate pieces within the driver.
Test the framework on run_flat_render()

src/mesa/drivers/dri/r300/r300_cmdbuf.c
src/mesa/drivers/dri/r300/r300_context.h
src/mesa/drivers/dri/r300/r300_render.c
src/mesa/drivers/dri/r300/r300_state.c

index e2bb270..003cb44 100644 (file)
@@ -49,6 +49,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_reg.h"
 #include "r300_reg.h"
 #include "r300_cmdbuf.h"
+#include "r300_emit.h"
 
 
 // Set this to 1 for extremely verbose debugging of command buffers
@@ -213,6 +214,7 @@ void r300EmitState(r300ContextPtr r300)
        r300->hw.all_dirty = GL_FALSE;
 }
 
+#if 0
 
 static __inline__ uint32_t cmducs(int reg, int count)
 {
@@ -237,6 +239,7 @@ static __inline__ uint32_t cmdvpu(int addr, int count)
 
        return cmd.u;
 }
+#endif
 
 #define CHECK( NM, COUNT )                             \
 static int check_##NM( r300ContextPtr r300,            \
@@ -608,3 +611,144 @@ void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
                cmd[0].wait.flags = flags;
        }
 }
+
+void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start)
+{
+int i, a, count;
+GLuint dw;
+LOCAL_VARS
+
+count=rmesa->state.aos_count;
+
+a=1+(count>>1)*3+(count & 1)*2;
+start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, a-1);
+e32(count);
+for(i=0;i+1<count;i+=2){
+       e32(  (rmesa->state.aos[i].element_size << 0) 
+            |(rmesa->state.aos[i].stride << 8)
+            |(rmesa->state.aos[i+1].element_size << 16)
+            |(rmesa->state.aos[i+1].stride << 24)
+           );
+       e32(rmesa->state.aos[i].offset+start*4*rmesa->state.aos[i].stride);
+       e32(rmesa->state.aos[i+1].offset+start*4*rmesa->state.aos[i+1].stride);
+       }
+if(count & 1){
+       e32(  (rmesa->state.aos[count-1].element_size << 0) 
+            |(rmesa->state.aos[count-1].stride << 8)
+           );
+       e32(rmesa->state.aos[count-1].offset+start*4*rmesa->state.aos[count-1].stride); 
+       }
+
+/* delay ? */
+#if 0
+e32(RADEON_CP_PACKET2);
+e32(RADEON_CP_PACKET2);
+#endif
+}
+
+void static inline upload_vertex_shader_fragment(PREFIX int dest, struct r300_vertex_shader_fragment *vsf)
+{
+int i;
+LOCAL_VARS
+
+if(vsf->length==0)return;
+
+if(vsf->length & 0x3){
+       fprintf(stderr,"VERTEX_SHADER_FRAGMENT must have length divisible by 4\n");
+       exit(-1);
+       }
+
+vsf_start_fragment(dest, vsf->length);
+for(i=0;i<vsf->length;i++)
+       e32(vsf->body.d[i]);
+
+}
+
+void r300EmitVertexShader(r300ContextPtr rmesa)
+{
+LOCAL_VARS
+
+upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_PROGRAM, &(rmesa->state.vertex_shader.program));
+
+upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_MATRIX0, &(rmesa->state.vertex_shader.matrix[0]));
+upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_MATRIX1, &(rmesa->state.vertex_shader.matrix[0]));
+upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_MATRIX2, &(rmesa->state.vertex_shader.matrix[0]));
+
+upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_VECTOR0, &(rmesa->state.vertex_shader.vector[0]));
+upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_VECTOR1, &(rmesa->state.vertex_shader.vector[1]));
+
+upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_UNKNOWN1, &(rmesa->state.vertex_shader.unknown1));
+upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_UNKNOWN2, &(rmesa->state.vertex_shader.unknown2));
+
+reg_start(R300_VAP_PVS_CNTL_1, 2);
+e32(      (rmesa->state.vertex_shader.program_start << R300_PVS_CNTL_1_PROGRAM_START_SHIFT)
+       | (rmesa->state.vertex_shader.unknown_ptr1 << R300_PVS_CNTL_1_UNKNOWN_SHIFT)
+       | (rmesa->state.vertex_shader.program_end << R300_PVS_CNTL_1_PROGRAM_END_SHIFT)
+    );
+e32(      (rmesa->state.vertex_shader.param_offset << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT)
+       | (rmesa->state.vertex_shader.param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT)
+    );
+e32( (rmesa->state.vertex_shader.unknown_ptr2 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT)
+   | (rmesa->state.vertex_shader.unknown_ptr3 << 0));
+
+reg_start(R300_VAP_PVS_WAITIDLE,0);
+       e32(0x00000000);
+}
+
+void r300EmitPixelShader(r300ContextPtr rmesa)
+{
+int i,k;
+LOCAL_VARS
+
+if(rmesa->state.pixel_shader.program.tex.length>0){
+       reg_start(R300_PFS_TEXI_0, rmesa->state.pixel_shader.program.tex.length-1);
+       for(i=0;i<rmesa->state.pixel_shader.program.tex.length;i++)
+               e32(rmesa->state.pixel_shader.program.tex.inst[i]);
+       }
+
+if(rmesa->state.pixel_shader.program.alu.length>0){
+       #define OUTPUT_FIELD(reg, field)  \
+               reg_start(reg,rmesa->state.pixel_shader.program.alu.length-1); \
+               for(i=0;i<rmesa->state.pixel_shader.program.alu.length;i++) \
+                       e32(rmesa->state.pixel_shader.program.alu.inst[i].field);
+       
+       OUTPUT_FIELD(R300_PFS_INSTR0_0, inst0);
+       OUTPUT_FIELD(R300_PFS_INSTR1_0, inst1);
+       OUTPUT_FIELD(R300_PFS_INSTR2_0, inst2);
+       OUTPUT_FIELD(R300_PFS_INSTR3_0, inst3);
+       #undef OUTPUT_FIELD
+       }
+
+reg_start(R300_PFS_NODE_0, 3);
+for(i=0;i<4;i++){
+       e32(  (rmesa->state.pixel_shader.program.node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT)
+           | (rmesa->state.pixel_shader.program.node[i].alu_end  << R300_PFS_NODE_ALU_END_SHIFT)
+           | (rmesa->state.pixel_shader.program.node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT)
+           | (rmesa->state.pixel_shader.program.node[i].tex_end  << R300_PFS_NODE_TEX_END_SHIFT)
+           | ( (i==3) ? R300_PFS_NODE_LAST_NODE : 0)
+           );
+       }
+
+reg_start(R300_PFS_CNTL_0, 2);
+       /*  PFS_CNTL_0 */
+e32((rmesa->state.pixel_shader.program.active_nodes-1) | (rmesa->state.pixel_shader.program.first_node_has_tex<<3));
+       /* PFS_CNTL_1 */
+e32(rmesa->state.pixel_shader.program.temp_register_count);
+       /* PFS_CNTL_2 */
+e32(     (rmesa->state.pixel_shader.program.alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
+       | (rmesa->state.pixel_shader.program.alu_end << R300_PFS_CNTL_ALU_END_SHIFT)
+       | (rmesa->state.pixel_shader.program.tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
+       | (rmesa->state.pixel_shader.program.tex_end << R300_PFS_CNTL_TEX_END_SHIFT) 
+   );
+       
+if(rmesa->state.pixel_shader.param_length>0){
+       reg_start(R300_PFS_PARAM_0_X, rmesa->state.pixel_shader.param_length*4-1);
+       for(i=0;i<rmesa->state.pixel_shader.param_length;i++){
+               efloat(rmesa->state.pixel_shader.param[i].x);
+               efloat(rmesa->state.pixel_shader.param[i].y);
+               efloat(rmesa->state.pixel_shader.param[i].z);
+               efloat(rmesa->state.pixel_shader.param[i].w);
+               }
+       }
+       
+}
index af27979..2c3f484 100644 (file)
@@ -54,9 +54,16 @@ typedef struct r300_context *r300ContextPtr;
 #include "radeon_lock.h"
 #include "mm.h"
 
+
 typedef GLuint uint32_t;
 typedef GLubyte uint8_t;
 
+  /* We should probably change types within vertex_shader 
+      and pixel_shader structure later on */
+#define CARD32 GLuint
+#include "vertex_shader.h"
+#include "pixel_shader.h"
+#undef CARD32
 
 static __inline__ uint32_t r300PackFloat32(float fl)
 {
@@ -373,7 +380,6 @@ struct r300_hw_state {
        struct r300_state_atom vof;     /* VAP output format register 0x4000 */
        struct r300_state_atom gb_enable; /* (4008) */
        struct r300_state_atom gb_misc; /* Multisampling position shifts ? (4010) */
-       struct r300_state_atom txe;     /* tex enable (4104) */
        struct r300_state_atom unk4200; /* (4200) */
        struct r300_state_atom unk4214; /* (4214) */
        struct r300_state_atom ps;      /* pointsize (421C) */
@@ -432,6 +438,7 @@ struct r300_hw_state {
                struct r300_state_atom unknown4;
                struct r300_state_atom unknown5;                
                } tex;
+       struct r300_state_atom txe;     /* tex enable (4104) */
 };
 
 
@@ -458,9 +465,151 @@ struct r300_depthbuffer_state {
        GLfloat scale;
 };
 
+struct r300_vap_reg_state {
+          /* input register assigments */
+          int i_coords;
+          int i_color[2];
+          int i_tex[R300_MAX_TEXTURE_UNITS];
+       };
+
+/* Vertex shader state */
+
+/* 64 appears to be the maximum */
+#define VSF_MAX_FRAGMENT_LENGTH 64
+
+
+struct r300_vertex_shader_fragment {
+       int length;
+       union {
+               GLuint d[VSF_MAX_FRAGMENT_LENGTH];  
+               float f[VSF_MAX_FRAGMENT_LENGTH];
+               VERTEX_SHADER_INSTRUCTION i[VSF_MAX_FRAGMENT_LENGTH/4];
+               } body;
+       };
+
+#define VSF_DEST_PROGRAM       0x0
+#define VSF_DEST_MATRIX0       0x200
+#define VSF_DEST_MATRIX1       0x204
+#define VSF_DEST_MATRIX2       0x208
+#define VSF_DEST_VECTOR0       0x20c
+#define VSF_DEST_VECTOR1       0x20d
+#define VSF_DEST_UNKNOWN1      0x400
+#define VSF_DEST_UNKNOWN2      0x406
+
+struct r300_vertex_shader_state {
+       struct r300_vertex_shader_fragment program;
+
+       /* a bit of a waste - each uses only a subset of allocated space..
+           but easier to program */
+       struct r300_vertex_shader_fragment matrix[3];
+       struct r300_vertex_shader_fragment vector[2];
+               
+       struct r300_vertex_shader_fragment unknown1;
+       struct r300_vertex_shader_fragment unknown2;
+               
+       int program_start;
+       int unknown_ptr1;  /* pointer within program space */
+       int program_end;
+               
+       int param_offset;
+       int param_count;
+               
+       int unknown_ptr2;  /* pointer within program space */
+       int unknown_ptr3;  /* pointer within program space */
+       };
+
+/* 64 appears to be the maximum */
+#define PSF_MAX_PROGRAM_LENGTH 64
+
+struct r300_pixel_shader_program {
+       struct {
+               int length;
+               GLuint inst[PSF_MAX_PROGRAM_LENGTH];
+               } tex;
+       
+       /* ALU intructions (logic and integer) */
+       struct {
+               int length;
+               struct {
+                       GLuint inst0;
+                       GLuint inst1;
+                       GLuint inst2;
+                       GLuint inst3;
+                       } inst[PSF_MAX_PROGRAM_LENGTH];
+               } alu;
+       
+       /* node information */
+       /* nodes are used to synchronize ALU and TEX streams */
+       /* There could be up to 4 nodes each consisting of
+          a number of TEX instructions followed by some ALU
+          instructions */
+       /* the last node of a program should always be node3 */
+       struct {
+               int tex_offset;
+               int tex_end;            
+               int alu_offset;
+               int alu_end;
+               } node[4];
+               
+       int active_nodes;       /* must be between 1 and 4, inclusive */
+       int first_node_has_tex;  /* other nodes always have it */
+       
+       int temp_register_count;  /* magic value goes into PFS_CNTL_1 */
+       
+       /* entire program */
+       int tex_offset;
+       int tex_end;
+       int alu_offset;
+       int alu_end;
+       
+       };
+       
+       #define MAX_PIXEL_SHADER_PARAMS 32
+struct r300_pixel_shader_state {
+       struct r300_pixel_shader_program program;
+       
+       /* parameters */                
+       int param_length;  /* to limit the number of unnecessary writes */
+       struct {
+               float x;
+               float y;
+               float z;
+               float w;
+               } param[MAX_PIXEL_SHADER_PARAMS];
+       };
+       
+/* 8 is somewhat bogus... it is probably something like 24 */
+#define R300_MAX_AOS_ARRAYS            8
+
+struct r300_aos_rec {
+       GLuint offset;
+       int element_size; /* in dwords */
+       int stride;       /* distance between elements, in dwords */
+       
+       #define AOS_FORMAT_FLOAT        1
+       #define AOS_FORMAT_UBYTE        2
+       #define AOS_FORMAT_FLOAT_COLOR  3
+       int format;     
+       
+       int ncomponents; /* number of components - between 1 and 4, inclusive */
+
+        /* just guesses */
+       #define REG_COORDS      0
+       #define REG_COLOR0      1
+       #define REG_TEX0        2
+       int reg; /* which register they are assigned to. */
+                       
+       };
+
 struct r300_state {
        struct r300_depthbuffer_state depth;
        struct r300_texture_state texture;
+       struct r300_vap_reg_state vap_reg;
+       struct r300_vertex_shader_state vertex_shader;
+       struct r300_pixel_shader_state pixel_shader;
+       struct r300_aos_rec aos[R300_MAX_AOS_ARRAYS];
+       int aos_count;
+       
 };
 
 
index ce924eb..b7e6ff2 100644 (file)
@@ -138,7 +138,191 @@ static int r300_get_primitive_type(r300ContextPtr rmesa,
    return type;
 }
 
+/* This function compiles GL context into state registers that 
+   describe data routing inside of R300 pipeline.
+   
+   In particular, it programs input_route, output_vtx_fmt, texture
+   unit configuration and gb_output_vtx_fmt
+   
+   This function encompasses setup_AOS() from r300_lib.c
+*/
+
+
+
+static void inline r300_setup_routing(r300ContextPtr r300, GLcontext *ctx, GLboolean immediate)
+{
+int i, count=0,reg=0;
+GLuint dw, mask;
+TNLcontext *tnl = TNL_CONTEXT(ctx);
+struct vertex_buffer *VB = &tnl->vb;
+
+
+/* Stage 1 - input to VAP */
+
+/* Assign register number automatically, retaining it in rmesa->state.reg */
+
+   /* Note: immediate vertex data includes all coordinates.
+     To save bandwidth use either VBUF or state-based vertex generation */
+   
+#define CONFIGURE_AOS(v, o, r, f) \
+       {\
+       if(immediate){ \
+               r300->state.aos[count].element_size=4; \
+               r300->state.aos[count].stride=4; \
+               r300->state.aos[count].ncomponents=4; \
+               } else { \
+               r300->state.aos[count].element_size=v->size; \
+               r300->state.aos[count].stride=v->size; \
+               r300->state.aos[count].ncomponents=v->size; \
+               } \
+       r300->state.aos[count].offset=o; \
+       r300->state.aos[count].reg=reg; \
+       r300->state.aos[count].format=(f); \
+       r300->state.vap_reg.r=reg; \
+       count++; \
+       reg++; \
+       }
+
+       /* All offsets are 0 - for use by immediate mode. 
+          Should change later to handle vertex buffers */
+CONFIGURE_AOS(VB->ObjPtr, 0, i_coords, AOS_FORMAT_FLOAT);
+CONFIGURE_AOS(VB->ColorPtr[0], 0, i_color[0], AOS_FORMAT_FLOAT_COLOR);
+for(i=0;i < ctx->Const.MaxTextureUnits;i++)
+       if(ctx->Texture.Unit[i].Enabled)
+               CONFIGURE_AOS(VB->TexCoordPtr[i], 0, i_tex[i], AOS_FORMAT_FLOAT);
+               
+r300->state.aos_count=count;
 
+if(count>R300_MAX_AOS_ARRAYS){
+       fprintf(stderr, "Aieee ! AOS array count exceeded !\n");
+       exit(-1);
+       }
+               
+/* Implement AOS */
+
+
+/* setup INPUT_ROUTE */
+
+R300_STATECHANGE(r300, vir[0]);
+for(i=0;i+1<count;i+=2){
+       dw=(r300->state.aos[i].ncomponents-1) 
+          | ((r300->state.aos[i].reg)<<8)
+          | (r300->state.aos[i].format<<14)
+          | (((r300->state.aos[i+1].ncomponents-1) 
+          | ((r300->state.aos[i+1].reg)<<8)
+          | (r300->state.aos[i+1].format<<14))<<16);
+          
+       if(i+2==count){
+               dw|=(1<<(13+16));
+               }
+       r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
+       }
+if(count & 1){
+       dw=(r300->state.aos[count-1].ncomponents-1)
+          | (r300->state.aos[count-1].format<<14)
+          | ((r300->state.aos[count-1].reg)<<8)
+          | (1<<13);
+       r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(count>>1)]=dw;
+       }
+/* Set the rest of INPUT_ROUTE_0 to 0 */
+for(i=((count+1)>>1); i<8; i++)r300->hw.vir[0].cmd[R300_VIR_CNTL_0+i]=(0x0);
+
+/* Mesa assumes that all missing components are from (0, 0, 0, 1) */
+#define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \
+       | (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \
+       | (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \
+       | (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT))
+
+#define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \
+       | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \
+       | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \
+       | (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT))
+
+R300_STATECHANGE(r300, vir[1]);
+       
+for(i=0;i+1<count;i+=2){
+       /* do i first.. */
+       mask=(1<<(r300->state.aos[i].ncomponents*3))-1;
+       dw=(ALL_COMPONENTS & mask)
+        | (ALL_DEFAULT & ~mask)
+        | R300_INPUT_ROUTE_ENABLE;
+        
+       /* i+1 */
+       mask=(1<<(r300->state.aos[i+1].ncomponents*3))-1;
+       dw|=( 
+          (ALL_COMPONENTS & mask)
+        | (ALL_DEFAULT & ~mask)
+        | R300_INPUT_ROUTE_ENABLE
+           )<<16;
+
+       r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
+       }
+if(count & 1){
+       mask=(1<<(r300->state.aos[count-1].ncomponents*3))-1;
+       dw=(ALL_COMPONENTS & mask)
+        | (ALL_DEFAULT & ~mask)
+        | R300_INPUT_ROUTE_ENABLE;
+       r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(count>>1)]=dw;
+       }
+/* Set the rest of INPUT_ROUTE_1 to 0 */
+for(i=((count+1)>>1); i<8; i++)r300->hw.vir[1].cmd[R300_VIR_CNTL_0+i]=(0x0);
+
+/* Set up input_cntl */
+
+R300_STATECHANGE(r300, vic);
+r300->hw.vic.cmd[R300_VIC_CNTL_0]=0x5555;  /* Hard coded value, no idea what it means */
+
+r300->hw.vic.cmd[R300_VIC_CNTL_1]=R300_INPUT_CNTL_POS
+                               | R300_INPUT_CNTL_COLOR;
+
+for(i=0;i < ctx->Const.MaxTextureUnits;i++)
+       if(ctx->Texture.Unit[i].Enabled)
+               r300->hw.vic.cmd[R300_VIC_CNTL_1]|=(R300_INPUT_CNTL_TC0<<i);
+
+/* Stage 3: VAP output */
+R300_STATECHANGE(r300, vof);
+r300->hw.vof.cmd[R300_VOF_CNTL_0]=R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT
+                               | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
+
+r300->hw.vof.cmd[R300_VOF_CNTL_1]=0;
+for(i=0;i < ctx->Const.MaxTextureUnits;i++)
+       if(ctx->Texture.Unit[i].Enabled)
+               r300->hw.vof.cmd[R300_VOF_CNTL_1]|=(4<<(3*i));
+}
+
+static inline void r300_setup_textures(r300ContextPtr r300, GLcontext *ctx)
+{
+int i;
+struct r300_tex_obj *t;
+
+R300_STATECHANGE(r300, txe);
+R300_STATECHANGE(r300, tex.filter);
+R300_STATECHANGE(r300, tex.unknown1);
+R300_STATECHANGE(r300, tex.size);
+R300_STATECHANGE(r300, tex.format);
+R300_STATECHANGE(r300, tex.offset);
+R300_STATECHANGE(r300, tex.unknown4);
+R300_STATECHANGE(r300, tex.unknown5);
+
+r300->hw.txe.cmd[R300_TXE_ENABLE]=0x0;
+
+for(i=0;i<R300_MAX_TEXTURE_UNITS;i++){
+       if((t=r300->state.texture.unit[i].texobj)!=NULL){
+               r300->hw.txe.cmd[R300_TXE_ENABLE]|=(1<<i);
+               
+               r300->hw.tex.filter.cmd[R300_TEX_CMD_0+i]=t->filter;
+               r300->hw.tex.unknown1.cmd[R300_TEX_CMD_0+i]=t->pitch;
+               r300->hw.tex.size.cmd[R300_TEX_CMD_0+i]=t->size;
+               r300->hw.tex.format.cmd[R300_TEX_CMD_0+i]=t->format;
+               r300->hw.tex.offset.cmd[R300_TEX_CMD_0+i]=r300->radeon.radeonScreen->fbLocation+t->offset;
+               r300->hw.tex.unknown4.cmd[R300_TEX_CMD_0+i]=0x0;
+               r300->hw.tex.unknown5.cmd[R300_TEX_CMD_0+i]=0x0;
+               
+               /* We don't know how to set this yet */
+               r300->hw.tex.format.cmd[R300_TEX_CMD_0+i]=0x88a0c;
+               }
+       }
+}
 
 /* Immediate implementation - vertex data is sent via command stream */
 
@@ -199,6 +383,25 @@ static void r300_render_flat_primitive(r300ContextPtr rmesa,
 
 }
 
+static void assign_pipeline(r300ContextPtr rmesa, R300_PIPELINE *p)
+{
+   /* Watch out ! This is buggy .. but will do for now */
+   
+   /* At least one sanity check is in order */
+   if(sizeof(rmesa->state.vertex_shader) != sizeof(p->vertex_shader)){
+       fprintf(stderr, "Aieee ! vertex_shader sizes don't match.\n");
+       exit(-1);
+       }
+   if(sizeof(rmesa->state.pixel_shader) != sizeof(p->pixel_shader)){
+       fprintf(stderr, "Aieee ! vertex_shader sizes don't match.\n");
+       exit(-1);
+       }
+   
+   memcpy(&rmesa->state.vertex_shader, &(p->vertex_shader), sizeof(rmesa->state.vertex_shader));
+   memcpy(&rmesa->state.pixel_shader, &(p->pixel_shader), sizeof(rmesa->state.pixel_shader));
+
+}
+
 static GLboolean r300_run_flat_render(GLcontext *ctx,
                                 struct tnl_pipeline_stage *stage)
 {
@@ -206,7 +409,6 @@ static GLboolean r300_run_flat_render(GLcontext *ctx,
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
    GLuint i;
-   AOS_DATA vb_arrays[2];
    LOCAL_VARS
        
    /* Flush state - make sure command buffer is nice and large */
@@ -215,27 +417,6 @@ static GLboolean r300_run_flat_render(GLcontext *ctx,
        if (RADEON_DEBUG == DEBUG_PRIMS)
                fprintf(stderr, "%s\n", __FUNCTION__);
 
-   /* setup array of structures data */
-
-   /* Note: immediate vertex data includes all coordinates.
-     To save bandwidth use either VBUF or state-based vertex generation */
-    /* xyz */
-   vb_arrays[0].element_size=4;
-   vb_arrays[0].stride=4;
-   vb_arrays[0].offset=0; /* Not used */
-   vb_arrays[0].format=AOS_FORMAT_FLOAT;
-   vb_arrays[0].ncomponents=4;
-   vb_arrays[0].reg=REG_COORDS;
-
-    /* color */
-   vb_arrays[1].element_size=4;
-   vb_arrays[1].stride=4;
-   vb_arrays[1].offset=0; /* Not used */
-   vb_arrays[1].format=AOS_FORMAT_FLOAT_COLOR;
-   vb_arrays[1].ncomponents=4;
-   vb_arrays[1].reg=REG_COLOR0;
-
-   
    /* needed before starting 3d operation .. */
    reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
        e32(0x0000000a);
@@ -249,22 +430,28 @@ static GLboolean r300_run_flat_render(GLcontext *ctx,
                                | R300_VPORT_Y_OFFSET_ENA
                                | R300_VTX_W0_FMT;
    R300_STATECHANGE(rmesa, vte);
-   
+
+   r300_setup_routing(rmesa, ctx, GL_TRUE);
+   r300_setup_textures(rmesa, ctx);
+      
    r300EmitState(rmesa);
    
-   FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].length=16;
-   memcpy(FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].body.f, ctx->_ModelProjectMatrix.m, 16*4);
-
-   FLAT_COLOR_PIPELINE.vertex_shader.unknown2.length=4;
-   FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[0]=0.0;
-   FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[1]=0.0;
-   FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[2]=1.0;
-   FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[3]=0.0;
+   assign_pipeline(rmesa, &FLAT_COLOR_PIPELINE);
    
-   program_pipeline(PASS_PREFIX &FLAT_COLOR_PIPELINE);
+   rmesa->state.vertex_shader.matrix[0].length=16;
+   memcpy(rmesa->state.vertex_shader.matrix[0].body.f, ctx->_ModelProjectMatrix.m, 16*4);
+
+   rmesa->state.vertex_shader.unknown2.length=4;
+   rmesa->state.vertex_shader.unknown2.body.f[0]=0.0;
+   rmesa->state.vertex_shader.unknown2.body.f[1]=0.0;
+   rmesa->state.vertex_shader.unknown2.body.f[2]=1.0;
+   rmesa->state.vertex_shader.unknown2.body.f[3]=0.0;
    
-   /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. the offsets are irrelevant */
-   setup_AOS(PASS_PREFIX vb_arrays, 2);
+   r300EmitVertexShader(rmesa);
+   r300EmitPixelShader(rmesa);
+         
+   /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. the offsets are irrelevant */   
+   r300EmitLOAD_VBPNTR(rmesa, 0);
    
    for(i=0; i < VB->PrimitiveCount; i++){
        GLuint prim = VB->Primitive[i].mode;
@@ -288,8 +475,6 @@ static GLboolean r300_run_flat_render(GLcontext *ctx,
 
 /* We use the start part of GART texture buffer for vertices */
 
-/* 8 is somewhat bogus... it is probably something like 24 */
-#define R300_MAX_AOS_ARRAYS            8
 
 static void upload_vertex_buffer(r300ContextPtr rmesa, 
        GLcontext *ctx, AOS_DATA *array, int *n_arrays)
@@ -391,6 +576,8 @@ static GLboolean r300_run_vb_flat_render(GLcontext *ctx,
    reg_start(0x4f18,0);
        e32(0x00000003);
    
+   r300_setup_routing(rmesa, ctx, GL_FALSE);
+   
    r300EmitState(rmesa);
    
    FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].length=16;
@@ -507,7 +694,6 @@ static GLboolean r300_run_tex_render(GLcontext *ctx,
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
    GLuint i;
-   AOS_DATA vb_arrays[3];
    /* Only do 2d textures */
    struct gl_texture_object *to=ctx->Texture.Unit[0].Current2D;
    r300TexObjPtr t=to->DriverData;
@@ -518,6 +704,10 @@ static GLboolean r300_run_tex_render(GLcontext *ctx,
       All the time for now.. */
    r300UpdateTextureState(ctx);
    
+   r300_setup_routing(rmesa, ctx, GL_TRUE);
+   r300_setup_textures(rmesa, ctx);
+   exit(-1);
+      
    /* Flush state - make sure command buffer is nice and large */
    r300Flush(ctx);
    
@@ -528,34 +718,6 @@ static GLboolean r300_run_tex_render(GLcontext *ctx,
        if (RADEON_DEBUG == DEBUG_PRIMS)
                fprintf(stderr, "%s\n", __FUNCTION__);
 
-   /* setup array of structures data */
-
-   /* Note: immediate vertex data includes all coordinates.
-     To save bandwidth use either VBUF or state-based vertex generation */
-    /* xyzw */
-   vb_arrays[0].element_size=4;
-   vb_arrays[0].stride=4;
-   vb_arrays[0].offset=0; /* Not used */
-   vb_arrays[0].format=AOS_FORMAT_FLOAT;
-   vb_arrays[0].ncomponents=4;
-   vb_arrays[0].reg=REG_COORDS;
-
-    /* color */
-   vb_arrays[1].element_size=4;
-   vb_arrays[1].stride=4;
-   vb_arrays[1].offset=0; /* Not used */
-   vb_arrays[1].format=AOS_FORMAT_FLOAT_COLOR;
-   vb_arrays[1].ncomponents=4;
-   vb_arrays[1].reg=REG_COLOR0;
-
-    /* texture coordinates */
-   vb_arrays[2].element_size=4;
-   vb_arrays[2].stride=4;
-   vb_arrays[2].offset=0; /* Not used */
-   vb_arrays[2].format=AOS_FORMAT_FLOAT;
-   vb_arrays[2].ncomponents=4;
-   vb_arrays[2].reg=REG_TEX0;
-
      
    /* needed before starting 3d operation .. */
    reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
@@ -598,11 +760,6 @@ static GLboolean r300_run_tex_render(GLcontext *ctx,
    SINGLE_TEXTURE_PIPELINE.texture_unit[0].filter=t->filter;
    
    
-   /* Upload texture, a hack, really  we can do a lot better */
-   #if 0
-   memcpy(rsp->gartTextures.map, to->Image[0][0]->Data, to->Image[0][0]->RowStride*to->Image[0][0]->Height*4);
-   #endif
-
    /* Program RS unit. This needs to be moved into R300 pipeline */   
 reg_start(R300_RS_CNTL_0,1);
        /* R300_RS_CNTL_0(4300) */
@@ -642,7 +799,7 @@ reg_start(R300_RS_INTERP_0,7);
    program_pipeline(PASS_PREFIX &SINGLE_TEXTURE_PIPELINE);
          
    /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. the offsets are irrelevant */
-   setup_AOS(PASS_PREFIX vb_arrays, 3);
+   r300EmitLOAD_VBPNTR(rmesa, 0);
    
    for(i=0; i < VB->PrimitiveCount; i++){
        GLuint prim = VB->Primitive[i].mode;
@@ -683,6 +840,7 @@ static GLboolean r300_run_render(GLcontext *ctx,
        if (RADEON_DEBUG == DEBUG_PRIMS)
                fprintf(stderr, "%s\n", __FUNCTION__);
 
+               
    #if 1
        /* Just switch between pipelines.. We could possibly do better.. (?) */
         if(ctx->Texture.Unit[0].Enabled)
@@ -765,7 +923,9 @@ static void r300_check_render(GLcontext *ctx, struct tnl_pipeline_stage *stage)
        //FALLBACK_IF(ctx->Color.DitherFlag);
 
        /* I'm almost certain I forgot something here */
+       #if 0 /* This should work now.. */
        FALLBACK_IF(ctx->Color.AlphaEnabled); // GL_ALPHA_TEST
+       #endif
        FALLBACK_IF(ctx->Color.BlendEnabled); // GL_BLEND
        FALLBACK_IF(ctx->Fog.Enabled); // GL_FOG
        FALLBACK_IF(ctx->Line.SmoothFlag); // GL_LINE_SMOOTH
index a233022..87a4111 100644 (file)
@@ -57,6 +57,48 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_reg.h"
 #include "r300_program.h"
 
+static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       int pp_misc = rmesa->hw.at.cmd[R300_AT_ALPHA_TEST];
+       GLubyte refByte;
+
+       CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+
+       R300_STATECHANGE(rmesa, at);
+
+       pp_misc &= ~(R300_ALPHA_TEST_OP_MASK | R300_REF_ALPHA_MASK);
+       pp_misc |= (refByte & R300_REF_ALPHA_MASK);
+
+       switch (func) {
+       case GL_NEVER:
+               pp_misc |= R300_ALPHA_TEST_FAIL;
+               break;
+       case GL_LESS:
+               pp_misc |= R300_ALPHA_TEST_LESS;
+               break;
+       case GL_EQUAL:
+               pp_misc |= R300_ALPHA_TEST_EQUAL;
+               break;
+       case GL_LEQUAL:
+               pp_misc |= R300_ALPHA_TEST_LEQUAL;
+               break;
+       case GL_GREATER:
+               pp_misc |= R300_ALPHA_TEST_GREATER;
+               break;
+       case GL_NOTEQUAL:
+               pp_misc |= R300_ALPHA_TEST_NEQUAL;
+               break;
+       case GL_GEQUAL:
+               pp_misc |= R300_ALPHA_TEST_GEQUAL;
+               break;
+       case GL_ALWAYS:
+               pp_misc |= R300_ALPHA_TEST_PASS;
+               break;
+       }
+
+       rmesa->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc;
+}
 
 /**
  * Update our tracked culling state based on Mesa's state.
@@ -226,6 +268,18 @@ static void r300ColorMask(GLcontext* ctx,
 }
 
 /* =============================================================
+ * Point state
+ */
+static void r300PointSize(GLcontext * ctx, GLfloat size)
+{
+       r300ContextPtr r300 = R300_CONTEXT(ctx);
+       
+       /* This might need fixing later */
+       R300_STATECHANGE(r300, vps);
+       r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0);
+}
+
+/* =============================================================
  * Window position and viewport transformation
  */
 
@@ -589,6 +643,7 @@ void r300InitStateFuncs(struct dd_function_table* functions)
        radeonInitStateFuncs(functions);
 
        functions->UpdateState = r300InvalidateState;
+       functions->AlphaFunc = r300AlphaFunc;
        functions->Enable = r300Enable;
        functions->ColorMask = r300ColorMask;
        functions->DepthFunc = r300DepthFunc;
@@ -599,5 +654,6 @@ void r300InitStateFuncs(struct dd_function_table* functions)
        /* Viewport related */
        functions->Viewport = r300Viewport;
        functions->DepthRange = r300DepthRange;
+       functions->PointSize = r300PointSize;
 }