Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / r600 / r700_vertprog.c
1 /*
2  * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20  */
21
22 /*
23  * Authors:
24  *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25  */
26
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "main/mtypes.h"
36
37 #include "tnl/t_context.h"
38 #include "program/program.h"
39 #include "program/prog_parameter.h"
40 #include "program/prog_statevars.h"
41
42 #include "radeon_debug.h"
43 #include "r600_context.h"
44 #include "r600_cmdbuf.h"
45 #include "r600_emit.h"
46 #include "program/programopt.h"
47
48 #include "r700_debug.h"
49 #include "r700_vertprog.h"
50
51 unsigned int Map_Vertex_Output(r700_AssemblerBase       *pAsm, 
52                                                    struct gl_vertex_program *mesa_vp,
53                                                    unsigned int unStart)
54 {
55     unsigned int i;
56         unsigned int unBit;
57         unsigned int unTotal = unStart;
58
59     //!!!!!!! THE ORDER MATCH FS INPUT
60
61         unBit = 1 << VERT_RESULT_HPOS;
62         if(mesa_vp->Base.OutputsWritten & unBit)
63         {
64                 pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
65         }
66
67         unBit = 1 << VERT_RESULT_COL0;
68         if(mesa_vp->Base.OutputsWritten & unBit)
69         {
70                 pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
71         }
72
73         unBit = 1 << VERT_RESULT_COL1;
74         if(mesa_vp->Base.OutputsWritten & unBit)
75         {
76                 pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
77         }
78
79         //TODO : dealing back face.
80         unBit = 1 << VERT_RESULT_BFC0;
81         if(mesa_vp->Base.OutputsWritten & unBit)
82         {
83                 pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++;
84         }
85
86         unBit = 1 << VERT_RESULT_BFC1;
87         if(mesa_vp->Base.OutputsWritten & unBit)
88         {
89                 pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++;
90         }
91
92         //TODO : dealing fog.
93         unBit = 1 << VERT_RESULT_FOGC;
94         if(mesa_vp->Base.OutputsWritten & unBit)
95         {
96                 pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
97         }
98
99         //TODO : dealing point size.
100         unBit = 1 << VERT_RESULT_PSIZ;
101         if(mesa_vp->Base.OutputsWritten & unBit)
102         {
103                 pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
104         }
105
106         for(i=0; i<8; i++)
107         {
108                 unBit = 1 << (VERT_RESULT_TEX0 + i);
109                 if(mesa_vp->Base.OutputsWritten & unBit)
110                 {
111                         pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
112                 }
113         }
114
115     for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
116         {
117                 unBit = 1 << i;
118                 if(mesa_vp->Base.OutputsWritten & unBit)
119                 {
120                         pAsm->ucVP_OutputMap[i] = unTotal++;
121                 }
122         }
123
124         return (unTotal - unStart);
125 }
126
127 unsigned int Map_Vertex_Input(r700_AssemblerBase       *pAsm, 
128                                           struct gl_vertex_program *mesa_vp,
129                                           unsigned int unStart)
130 {
131         int i;
132         unsigned int unBit;
133         unsigned int unTotal = unStart;
134         for(i=0; i<VERT_ATTRIB_MAX; i++)
135         {
136                 unBit = 1 << i;
137                 if(mesa_vp->Base.InputsRead & unBit)
138                 {
139                         pAsm->ucVP_AttributeMap[i] = unTotal++;
140                 }
141         }
142         return (unTotal - unStart);
143 }
144
145 GLboolean Process_Vertex_Program_Vfetch_Instructions(
146                                                 struct r700_vertex_program *vp,
147                                                 struct gl_vertex_program   *mesa_vp)
148 {
149         int i;
150     unsigned int unBit;
151         VTX_FETCH_METHOD vtxFetchMethod;
152         vtxFetchMethod.bEnableMini          = GL_FALSE;
153         vtxFetchMethod.mega_fetch_remainder = 0;
154
155         for(i=0; i<VERT_ATTRIB_MAX; i++)
156         {
157                 unBit = 1 << i;
158                 if(mesa_vp->Base.InputsRead & unBit)
159                 {
160                         assemble_vfetch_instruction(&vp->r700AsmCode,
161                                                     i,
162                                                     vp->r700AsmCode.ucVP_AttributeMap[i],
163                                                     vp->aos_desc[i].size,
164                                                     vp->aos_desc[i].type,
165                                                     &vtxFetchMethod);
166                 }
167         }
168         
169         return GL_TRUE;
170 }
171
172 GLboolean Process_Vertex_Program_Vfetch_Instructions2(
173     struct gl_context *ctx,
174         struct r700_vertex_program *vp,
175         struct gl_vertex_program   *mesa_vp)
176 {
177     int i;
178     context_t *context = R700_CONTEXT(ctx);
179
180     VTX_FETCH_METHOD vtxFetchMethod;
181         vtxFetchMethod.bEnableMini          = GL_FALSE;
182         vtxFetchMethod.mega_fetch_remainder = 0;
183
184     for(i=0; i<context->nNumActiveAos; i++)
185     {
186         assemble_vfetch_instruction2(&vp->r700AsmCode,
187                                       vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element],
188                                       context->stream_desc[i].type,
189                                       context->stream_desc[i].size,
190                                       context->stream_desc[i].element,
191                                       context->stream_desc[i]._signed,
192                                       context->stream_desc[i].normalize,
193                                       context->stream_desc[i].format,
194                                      &vtxFetchMethod);
195     }
196
197     return GL_TRUE;
198 }
199
200 void Map_Vertex_Program(struct gl_context *ctx,
201                         struct r700_vertex_program *vp,
202                                                 struct gl_vertex_program   *mesa_vp)
203 {
204     r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
205         unsigned int num_inputs;
206
207         // R0 will always be used for index into vertex buffer
208         pAsm->number_used_registers = 1;
209         pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
210
211     // Map Inputs: Add 1 to mapping since R0 is used for index
212         num_inputs = Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
213         pAsm->number_used_registers += num_inputs;
214
215         // Create VFETCH instructions for inputs
216         if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
217         {
218                 radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
219                 return;
220         }
221
222         // Map Outputs
223         pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
224
225         pAsm->starting_export_register_number = pAsm->number_used_registers;
226
227         pAsm->number_used_registers += pAsm->number_of_exports;
228
229     /* Map temporary registers (GPRs) */
230     pAsm->starting_temp_register_number = pAsm->number_used_registers;
231
232     if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
233     {   /* arb uses NumNativeTemporaries */
234         pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
235     }
236     else
237     {   /* fix func t_vp uses NumTemporaries */
238         pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
239     }
240
241     pAsm->flag_reg_index = pAsm->number_used_registers++;
242
243     pAsm->uFirstHelpReg = pAsm->number_used_registers;
244 }
245
246 GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
247                                                                 struct gl_vertex_program   *mesa_vp)
248 {
249     GLuint i, j;
250     GLint * puiTEMPwrites;
251     struct prog_instruction *pILInst;
252     InstDeps         *pInstDeps;
253
254     puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
255     for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
256     {
257         puiTEMPwrites[i] = -1;
258     }
259
260     pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
261
262     for(i=0; i<mesa_vp->Base.NumInstructions; i++)
263     {
264         pInstDeps[i].nDstDep = -1;
265         pILInst = &(mesa_vp->Base.Instructions[i]);
266
267         //Dst
268         if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
269         {
270             //Set lastwrite for the temp
271             puiTEMPwrites[pILInst->DstReg.Index] = i;
272         }
273
274         //Src
275         for(j=0; j<3; j++)
276         {
277             if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
278             {
279                 //Set dep.
280                 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
281             }
282             else
283             {
284                 pInstDeps[i].nSrcDeps[j] = -1;
285             }
286         }
287     }
288
289     vp->r700AsmCode.pInstDeps = pInstDeps;
290
291     FREE(puiTEMPwrites);
292
293     return GL_TRUE;
294 }
295
296 struct r700_vertex_program* r700TranslateVertexShader(struct gl_context *ctx,
297                                                       struct gl_vertex_program *mesa_vp)
298 {
299         context_t *context = R700_CONTEXT(ctx);
300
301     R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
302
303         struct r700_vertex_program *vp;
304         unsigned int i;
305
306         vp = calloc(1, sizeof(*vp));
307         vp->mesa_program = _mesa_clone_vertex_program(ctx, mesa_vp);
308
309     vp->constbo0 = NULL;
310
311         if (mesa_vp->IsPositionInvariant)
312         {
313                 _mesa_insert_mvp_code(ctx, vp->mesa_program);
314         }
315
316         for(i=0; i<context->nNumActiveAos; i++)
317         {
318                 vp->aos_desc[i].size   = context->stream_desc[i].size;
319                 vp->aos_desc[i].stride = context->stream_desc[i].stride;
320                 vp->aos_desc[i].type   = context->stream_desc[i].type;
321                 vp->aos_desc[i].format = context->stream_desc[i].format;
322         }
323
324         if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
325         {
326                 vp->r700AsmCode.bR6xx = 1;
327         }
328
329         //Init_Program
330         Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
331
332     if(GL_TRUE == r700->bShaderUseMemConstant)
333     {
334         vp->r700AsmCode.bUseMemConstant = GL_TRUE;
335     }
336     else
337     {
338         vp->r700AsmCode.bUseMemConstant = GL_FALSE;
339     }
340
341     vp->r700AsmCode.unAsic = 7;
342
343         Map_Vertex_Program(ctx, vp, vp->mesa_program );
344
345         if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
346         {
347                 return NULL;
348         }
349
350     InitShaderProgram(&(vp->r700AsmCode));
351
352     for(i=0; i < MAX_SAMPLERS; i++)
353     {
354         vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i];
355     }
356
357     vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions;
358
359         if(GL_FALSE == AssembleInstr(0,
360                                  0,
361                                  vp->mesa_program->Base.NumInstructions,
362                                  &(vp->mesa_program->Base.Instructions[0]),
363                                  &(vp->r700AsmCode)) )
364         {
365                 return NULL;
366         }
367
368     if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
369     {
370         return NULL;
371     }
372
373     if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) )
374     {
375         return GL_FALSE;
376     }
377
378     vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 
379                          : (vp->r700AsmCode.number_used_registers - 1);
380
381         vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
382
383     vp->translated = GL_TRUE;
384
385         return vp;
386 }
387
388 void r700SelectVertexShader(struct gl_context *ctx)
389 {
390     context_t *context = R700_CONTEXT(ctx);
391     struct r700_vertex_program_cont *vpc;
392     struct r700_vertex_program *vp;
393     unsigned int i;
394     GLboolean match;
395     GLbitfield InputsRead;
396
397     vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
398
399     InputsRead = vpc->mesa_program.Base.InputsRead;
400     if (vpc->mesa_program.IsPositionInvariant)
401     {
402         InputsRead |= VERT_BIT_POS;
403     }
404
405     for (vp = vpc->progs; vp; vp = vp->next)
406     {
407         match = GL_TRUE;
408         for(i=0; i<context->nNumActiveAos; i++)
409         {
410                 if (vp->aos_desc[i].size != context->stream_desc[i].size ||
411                     vp->aos_desc[i].format != context->stream_desc[i].format)
412                 {
413                         match = GL_FALSE;
414                         break;
415                 }
416         }
417         if (match)
418         {
419                 context->selected_vp = vp;
420                 return;
421         }
422     }
423
424     vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program));
425     if(!vp)
426     {
427         radeon_error("Failed to translate vertex shader. \n");
428         return;
429     }
430     vp->next = vpc->progs;
431     vpc->progs = vp;
432     context->selected_vp = vp;
433     return;
434 }
435
436 int getTypeSize(GLenum type)
437 {
438     switch (type) 
439     {
440     case GL_DOUBLE:
441         return sizeof(GLdouble);
442     case GL_FLOAT:
443         return sizeof(GLfloat);
444     case GL_INT:
445         return sizeof(GLint);
446     case GL_UNSIGNED_INT:
447         return sizeof(GLuint);
448     case GL_SHORT:
449         return sizeof(GLshort);
450     case GL_UNSIGNED_SHORT:
451         return sizeof(GLushort);
452     case GL_BYTE:
453         return sizeof(GLbyte);
454     case GL_UNSIGNED_BYTE:
455         return sizeof(GLubyte);
456     default:
457         assert(0);
458         return 0;
459     }
460 }
461
462 static void r700TranslateAttrib(struct gl_context *ctx, GLuint unLoc, int count, const struct gl_client_array *input)
463 {
464     context_t *context = R700_CONTEXT(ctx);
465     
466     StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]);
467
468         GLuint stride;
469
470         stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size 
471                                    : input->StrideB;
472
473     if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT
474 #if MESA_BIG_ENDIAN
475         || getTypeSize(input->Type) != 4
476 #endif
477        ) 
478     {
479         pStreamDesc->type = GL_FLOAT;
480
481         if (input->StrideB == 0) 
482         {
483                 pStreamDesc->stride = 0;
484         } 
485         else 
486         {
487                 pStreamDesc->stride = sizeof(GLfloat) * input->Size;
488         }
489         pStreamDesc->dwords = input->Size;
490         pStreamDesc->is_named_bo = GL_FALSE;
491     } 
492     else 
493     {
494         pStreamDesc->type = input->Type;
495         pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4;
496         if (!input->BufferObj->Name) 
497         {
498             if (input->StrideB == 0) 
499             {
500                 pStreamDesc->stride = 0;
501             } 
502             else 
503             {
504                 pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3;
505             }
506
507             pStreamDesc->is_named_bo = GL_FALSE;
508         }
509     }
510
511         pStreamDesc->size = input->Size;
512         pStreamDesc->dst_loc = context->nNumActiveAos;
513         pStreamDesc->element = unLoc;
514         pStreamDesc->format = input->Format;
515
516         switch (pStreamDesc->type) 
517         { //GetSurfaceFormat
518         case GL_FLOAT:
519                 pStreamDesc->_signed = 0;
520                 pStreamDesc->normalize = GL_FALSE;
521                 break;
522         case GL_SHORT:
523                 pStreamDesc->_signed = 1;
524                 pStreamDesc->normalize = input->Normalized;
525                 break;
526         case GL_BYTE:
527                 pStreamDesc->_signed = 1;
528                 pStreamDesc->normalize = input->Normalized;
529                 break;
530         case GL_UNSIGNED_SHORT:
531                 pStreamDesc->_signed = 0;
532                 pStreamDesc->normalize = input->Normalized;
533                 break;
534         case GL_UNSIGNED_BYTE:
535                 pStreamDesc->_signed = 0;
536                 pStreamDesc->normalize = input->Normalized;
537                 break;
538         default:
539         case GL_INT:
540         case GL_UNSIGNED_INT:
541         case GL_DOUBLE: 
542                 assert(0);
543                 break;
544         }
545         context->nNumActiveAos++;
546 }
547
548 void r700SetVertexFormat(struct gl_context *ctx, const struct gl_client_array *arrays[], int count)
549 {
550     context_t *context = R700_CONTEXT(ctx);
551     struct r700_vertex_program *vpc
552            = (struct r700_vertex_program *)ctx->VertexProgram._Current;
553
554     struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program);
555     unsigned int unLoc = 0;
556     unsigned int unBit = mesa_vp->Base.InputsRead;
557     context->nNumActiveAos = 0;
558
559     if (mesa_vp->IsPositionInvariant)
560     {
561         unBit |= VERT_BIT_POS;
562     }
563
564     while(unBit) 
565     {
566         if(unBit & 1)
567         {
568             r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]);
569         }
570
571         unBit >>= 1;
572         ++unLoc;
573     }
574     context->radeon.tcl.aos_count = context->nNumActiveAos;
575 }
576
577 void * r700GetActiveVpShaderBo(struct gl_context * ctx)
578 {
579     context_t *context = R700_CONTEXT(ctx);
580     struct r700_vertex_program *vp = context->selected_vp;;
581
582     if (vp)
583         return vp->shaderbo;
584     else
585         return NULL;
586 }
587
588 void * r700GetActiveVpShaderConstBo(struct gl_context * ctx)
589 {
590     context_t *context = R700_CONTEXT(ctx);
591     struct r700_vertex_program *vp = context->selected_vp;;
592
593     if (vp)
594         return vp->constbo0;
595     else
596         return NULL;
597 }
598
599 GLboolean r700SetupVertexProgram(struct gl_context * ctx)
600 {
601     context_t *context = R700_CONTEXT(ctx);
602     R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
603     struct r700_vertex_program *vp = context->selected_vp;
604
605     struct gl_program_parameter_list *paramList;
606     unsigned int unNumParamData;
607     unsigned int ui;
608     unsigned int num_sq_vs_gprs;
609
610     if(GL_FALSE == vp->loaded)
611     {
612             if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
613             {
614                     Assemble( &(vp->r700Shader) );
615             }
616
617         /* Load vp to gpu */
618         r600EmitShader(ctx,
619                        &(vp->shaderbo),
620                        (GLvoid *)(vp->r700Shader.pProgram),
621                        vp->r700Shader.uShaderBinaryDWORDSize,
622                        "VS");
623
624         if(GL_TRUE == r700->bShaderUseMemConstant)
625         {
626             paramList = vp->mesa_program->Base.Parameters;
627             if(NULL != paramList)
628             {
629                 unNumParamData = paramList->NumParameters;
630                 r600AllocShaderConsts(ctx,
631                                &(vp->constbo0),                       
632                                unNumParamData *4*4,
633                                "VSCON");
634             }
635         }        
636
637         vp->loaded = GL_TRUE;
638     }
639
640     DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram),
641                  vp->r700Shader.uShaderBinaryDWORDSize);
642
643     /* TODO : enable this after MemUse fixed *=
644     (context->chipobj.MemUse)(context, vp->shadercode.buf->id);
645     */
646
647     R600_STATECHANGE(context, vs);
648     R600_STATECHANGE(context, fs); /* hack */
649
650     r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
651     SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
652
653     r700->vs.SQ_ALU_CONST_CACHE_VS_0.u32All = 0; /* set from buffer object. */
654     
655     r700->vs.SQ_PGM_START_VS.u32All = 0;
656
657     SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
658              NUM_GPRS_shift, NUM_GPRS_mask);
659
660     num_sq_vs_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_VS_GPRS_mask) >> NUM_VS_GPRS_shift);
661  
662     if((vp->r700Shader.nRegs + 1) > num_sq_vs_gprs)
663     {
664         /* care! thich changes sq - needs idle state */
665         R600_STATECHANGE(context, sq);
666         SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, vp->r700Shader.nRegs + 1,
667                  NUM_VS_GPRS_shift, NUM_VS_GPRS_mask);
668     }
669
670     if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
671         {
672         SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
673                  STACK_SIZE_shift, STACK_SIZE_mask);
674     }
675
676     R600_STATECHANGE(context, spi);
677
678     if(vp->mesa_program->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
679         R600_STATECHANGE(context, cl);
680         SETbit(r700->PA_CL_VS_OUT_CNTL.u32All, USE_VTX_POINT_SIZE_bit);
681         SETbit(r700->PA_CL_VS_OUT_CNTL.u32All, VS_OUT_MISC_VEC_ENA_bit);
682     } else if (r700->PA_CL_VS_OUT_CNTL.u32All != 0) {
683         R600_STATECHANGE(context, cl);
684         CLEARbit(r700->PA_CL_VS_OUT_CNTL.u32All, USE_VTX_POINT_SIZE_bit);
685         CLEARbit(r700->PA_CL_VS_OUT_CNTL.u32All, VS_OUT_MISC_VEC_ENA_bit);
686     }
687
688     SETfield(r700->SPI_VS_OUT_CONFIG.u32All,
689              vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0,
690              VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
691     SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
692              NUM_INTERP_shift, NUM_INTERP_mask);
693
694     /*
695     SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
696     CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
697     */
698
699     /* sent out shader constants. */
700     paramList = vp->mesa_program->Base.Parameters;
701
702     if(NULL != paramList) {
703         /* vp->mesa_program was cloned, not updated by glsl shader api. */
704         /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */
705         /* so, use ctx->VertexProgem._Current */       
706         struct gl_program_parameter_list *paramListOrginal = 
707                          ctx->VertexProgram._Current->Base.Parameters;
708          
709             _mesa_load_state_parameters(ctx, paramList);
710
711             if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
712                     return GL_FALSE;
713
714             R600_STATECHANGE(context, vs_consts);
715
716             r700->vs.num_consts = paramList->NumParameters;
717
718             unNumParamData = paramList->NumParameters;
719
720             for(ui=0; ui<unNumParamData; ui++) {
721             if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) 
722             {
723                 r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
724                         r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
725                         r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
726                         r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
727             }
728             else
729             {
730                         r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
731                         r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
732                         r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
733                         r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
734             }
735             }
736
737         /* Load vp constants to gpu */
738         if(GL_TRUE == r700->bShaderUseMemConstant)
739         {
740             r600EmitShaderConsts(ctx,
741                            vp->constbo0,
742                            0,
743                            (GLvoid *)&(r700->vs.consts[0][0]),
744                            unNumParamData * 4 * 4);
745         }
746     } else
747             r700->vs.num_consts = 0;
748
749     COMPILED_SUB * pCompiledSub;
750     GLuint uj;
751     GLuint unConstOffset = r700->vs.num_consts;
752     for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++)
753     {
754         pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub;
755
756         r700->vs.num_consts += pCompiledSub->NumParameters;
757
758         for(uj=0; uj<pCompiledSub->NumParameters; uj++)
759         {
760             r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
761                     r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
762                     r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
763                     r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
764         }
765         unConstOffset += pCompiledSub->NumParameters;
766     }
767
768     return GL_TRUE;
769 }