Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / r600 / evergreen_fragprog.c
1 /*
2  * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20  */
21
22 /*
23  * Authors:
24  *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25  *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26  */
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35
36 #include "program/prog_parameter.h"
37 #include "program/prog_statevars.h"
38 #include "program/program.h"
39
40 #include "r600_context.h"
41 #include "r600_cmdbuf.h"
42 #include "r600_emit.h"
43
44 #include "evergreen_vertprog.h"
45 #include "evergreen_fragprog.h"
46
47 void evergreen_insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog)
48 {
49     static const gl_state_index winstate[STATE_LENGTH]
50          = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0};
51     struct prog_instruction *newInst, *inst;
52     GLint  win_size;  /* state reference */
53     GLuint wpos_temp; /* temp register */
54     int i, j;
55
56     /* PARAM win_size = STATE_FB_SIZE */
57     win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
58
59     wpos_temp = fprog->Base.NumTemporaries++;
60
61     /* scan program where WPOS is used and replace with wpos_temp */
62     inst = fprog->Base.Instructions;
63     for (i = 0; i < fprog->Base.NumInstructions; i++) {
64         for (j=0; j < 3; j++) {
65             if(inst->SrcReg[j].File == PROGRAM_INPUT && 
66                inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) {
67                 inst->SrcReg[j].File = PROGRAM_TEMPORARY;
68                 inst->SrcReg[j].Index = wpos_temp;
69             }
70         }
71         inst++;
72     }
73
74     _mesa_insert_instructions(&(fprog->Base), 0, 1);
75
76     newInst = fprog->Base.Instructions;
77     /* invert wpos.y
78      * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */
79     newInst[0].Opcode = OPCODE_ADD;
80     newInst[0].DstReg.File = PROGRAM_TEMPORARY;
81     newInst[0].DstReg.Index = wpos_temp;
82     newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
83
84     newInst[0].SrcReg[0].File = PROGRAM_INPUT;
85     newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
86     newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
87     newInst[0].SrcReg[0].Negate = NEGATE_Y;
88
89     newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
90     newInst[0].SrcReg[1].Index = win_size;
91     newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
92
93 }
94
95 //TODO : Validate FP input with VP output.
96 void evergreen_Map_Fragment_Program(r700_AssemblerBase         *pAsm,
97                                                   struct gl_fragment_program *mesa_fp,
98                           struct gl_context *ctx) 
99 {
100         unsigned int unBit;
101     unsigned int i;
102
103     /* match fp inputs with vp exports. */
104     struct evergreen_vertex_program_cont *vpc =
105                        (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current;
106     GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
107     
108         pAsm->number_used_registers = 0;
109
110 //Input mapping : mesa_fp->Base.InputsRead set the flag, set in 
111         //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
112         //MUST match order in Map_Vertex_Output
113         unBit = 1 << FRAG_ATTRIB_WPOS;
114         if(mesa_fp->Base.InputsRead & unBit)
115         {
116                 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++;
117         }
118
119     unBit = 1 << VERT_RESULT_COL0;
120         if(OutputsWritten & unBit)
121         {
122                 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
123         }
124
125         unBit = 1 << VERT_RESULT_COL1;
126         if(OutputsWritten & unBit)
127         {
128                 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
129         }
130
131     unBit = 1 << VERT_RESULT_FOGC;
132     if(OutputsWritten & unBit)
133     {
134         pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
135     }
136
137         for(i=0; i<8; i++)
138         {
139                 unBit = 1 << (VERT_RESULT_TEX0 + i);
140                 if(OutputsWritten & unBit)
141                 {
142                         pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
143                 }
144         }
145  
146 /* order has been taken care of */ 
147 #if 1
148     for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
149         {
150         unBit = 1 << i;
151         if(OutputsWritten & unBit)
152                 {
153             pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++;
154         }
155     }
156 #else
157     if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 )
158     {
159             struct evergreen_vertex_program_cont *vpc =
160                        (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current;
161         struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying;
162         struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying;
163         struct gl_program_parameter      * pVsParam;
164         struct gl_program_parameter      * pPsParam;
165         GLuint j, k;
166         GLuint unMaxVarying = 0;
167
168         for(i=0; i<VsVarying->NumParameters; i++)
169         {
170             pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0;
171         }
172
173         for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++)
174             {
175             unBit = 1 << i;
176             if(mesa_fp->Base.InputsRead & unBit)
177                     {
178                 j = i - FRAG_ATTRIB_VAR0;
179                 pPsParam = PsVarying->Parameters + j;
180
181                 for(k=0; k<VsVarying->NumParameters; k++)
182                 {                                       
183                     pVsParam = VsVarying->Parameters + k;
184
185                                 if( strcmp(pPsParam->Name, pVsParam->Name) == 0)
186                     {
187                         pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k;                  
188                         if(k > unMaxVarying)
189                         {
190                             unMaxVarying = k;
191                         }
192                         break;
193                     }
194                 }
195                     }
196         }
197
198         pAsm->number_used_registers += unMaxVarying + 1;
199     }
200 #endif
201     unBit = 1 << FRAG_ATTRIB_FACE;
202     if(mesa_fp->Base.InputsRead & unBit)
203     {
204         pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++;
205     }
206
207     unBit = 1 << FRAG_ATTRIB_PNTC;
208     if(mesa_fp->Base.InputsRead & unBit)
209     {
210         pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++;
211     }
212
213     pAsm->uIIns = pAsm->number_used_registers;
214
215 /* Map temporary registers (GPRs) */
216     pAsm->starting_temp_register_number = pAsm->number_used_registers;
217
218     if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
219     {
220             pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
221     }
222     else
223     {
224         pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
225     }
226
227 /* Output mapping */
228         pAsm->number_of_exports = 0;
229         pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
230         pAsm->starting_export_register_number = pAsm->number_used_registers;
231         unBit = 1 << FRAG_RESULT_COLOR;
232         if(mesa_fp->Base.OutputsWritten & unBit)
233         {
234                 pAsm->uiFP_OutputMap[FRAG_RESULT_COLOR] = pAsm->number_used_registers++;
235                 pAsm->number_of_exports++;
236                 pAsm->number_of_colorandz_exports++;
237         }
238         unBit = 1 << FRAG_RESULT_DEPTH;
239         if(mesa_fp->Base.OutputsWritten & unBit)
240         {
241         pAsm->depth_export_register_number = pAsm->number_used_registers;
242                 pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++;
243                 pAsm->number_of_exports++;
244                 pAsm->number_of_colorandz_exports++;
245                 pAsm->pR700Shader->depthIsExported = 1;
246         }
247
248     pAsm->flag_reg_index = pAsm->number_used_registers++;
249
250     pAsm->uFirstHelpReg = pAsm->number_used_registers;
251 }
252
253 GLboolean evergreen_Find_Instruction_Dependencies_fp(struct evergreen_fragment_program *fp,
254                                                                 struct gl_fragment_program   *mesa_fp)
255 {
256     GLuint i, j;
257     GLint * puiTEMPwrites;
258     GLint * puiTEMPreads;
259     struct prog_instruction * pILInst;
260     InstDeps         *pInstDeps;
261     struct prog_instruction * texcoord_DepInst;
262     GLint              nDepInstID;
263
264     puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
265     puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
266
267     for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
268     {
269         puiTEMPwrites[i] = -1;
270         puiTEMPreads[i] = -1;
271     }
272
273     pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
274
275     for(i=0; i<mesa_fp->Base.NumInstructions; i++)
276     {
277         pInstDeps[i].nDstDep = -1;
278         pILInst = &(mesa_fp->Base.Instructions[i]);
279
280         //Dst
281         if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
282         {
283             //Set lastwrite for the temp
284             puiTEMPwrites[pILInst->DstReg.Index] = i;
285         }
286
287         //Src
288         for(j=0; j<3; j++)
289         {
290             if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
291             {
292                 //Set dep.
293                 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
294                 //Set first read
295                 if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 )
296                 {
297                     puiTEMPreads[pILInst->SrcReg[j].Index] = i;
298                 }
299             }
300             else
301             {
302                 pInstDeps[i].nSrcDeps[j] = -1;
303             }
304         }
305     }
306
307     fp->r700AsmCode.pInstDeps = pInstDeps;
308
309     //Find dep for tex inst    
310     for(i=0; i<mesa_fp->Base.NumInstructions; i++)
311     {
312         pILInst = &(mesa_fp->Base.Instructions[i]);
313
314         if(GL_TRUE == IsTex(pILInst->Opcode))
315         {   //src0 is the tex coord register, src1 is texunit, src2 is textype
316             nDepInstID = pInstDeps[i].nSrcDeps[0];
317             if(nDepInstID >= 0)
318             {
319                 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
320                 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
321                 {
322                     pInstDeps[nDepInstID].nDstDep = i;
323                     pInstDeps[i].nDstDep = i;
324                 }
325                 else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
326                 {
327                     pInstDeps[i].nDstDep = i;
328                 }
329                 else
330                 {   //... other deps?
331                 }
332             }
333             // make sure that we dont overwrite src used earlier
334             nDepInstID = puiTEMPreads[pILInst->DstReg.Index];
335             if(nDepInstID < i)
336             {
337                 pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index];
338                 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
339                 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
340                 {
341                     pInstDeps[nDepInstID].nDstDep = i;
342                 }
343  
344             }
345
346         }
347         }
348
349     FREE(puiTEMPwrites);
350     FREE(puiTEMPreads);
351
352     return GL_TRUE;
353 }
354
355 GLboolean evergreenTranslateFragmentShader(struct evergreen_fragment_program *fp,
356                                                              struct gl_fragment_program   *mesa_fp,
357                                  struct gl_context *ctx) 
358 {
359         GLuint    number_of_colors_exported;
360         GLboolean z_enabled = GL_FALSE;
361         GLuint    unBit, shadow_unit;
362         int i;
363         struct prog_instruction *inst;
364         gl_state_index shadow_ambient[STATE_LENGTH]
365             = { STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0};
366
367     //Init_Program
368         Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
369         
370     fp->constbo0 = NULL;
371     fp->r700AsmCode.bUseMemConstant = GL_TRUE;  
372     fp->r700AsmCode.unAsic = 8;
373
374     if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
375     {
376         evergreen_insert_wpos_code(ctx, mesa_fp);
377     }
378
379     /* add/map  consts for ARB_shadow_ambient */
380     if(mesa_fp->Base.ShadowSamplers)
381     {
382         inst = mesa_fp->Base.Instructions;
383         for (i = 0; i < mesa_fp->Base.NumInstructions; i++)
384         {
385             if(inst->TexShadow == 1)
386             {
387                 shadow_unit = inst->TexSrcUnit;
388                 shadow_ambient[2] = shadow_unit;
389                 fp->r700AsmCode.shadow_regs[shadow_unit] = 
390                     _mesa_add_state_reference(mesa_fp->Base.Parameters, shadow_ambient);
391             }
392             inst++;
393         }
394     }
395
396     evergreen_Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); 
397
398     if( GL_FALSE == evergreen_Find_Instruction_Dependencies_fp(fp, mesa_fp) )
399         {
400                 return GL_FALSE;
401     }
402
403     InitShaderProgram(&(fp->r700AsmCode));
404         
405     for(i=0; i < MAX_SAMPLERS; i++)
406     {
407          fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i];
408     }
409
410     fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions;
411
412         if( GL_FALSE == AssembleInstr(0,
413                                   0,
414                                   mesa_fp->Base.NumInstructions,
415                                   &(mesa_fp->Base.Instructions[0]), 
416                                   &(fp->r700AsmCode)) )
417         {
418                 return GL_FALSE;
419         }
420
421     if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
422     {
423         return GL_FALSE;
424     }
425
426     if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) )
427     {
428         return GL_FALSE;
429     }
430
431     fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0 
432                          : (fp->r700AsmCode.number_used_registers - 1);
433
434         fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
435
436         number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
437
438         unBit = 1 << FRAG_RESULT_DEPTH;
439         if(mesa_fp->Base.OutputsWritten & unBit)
440         {
441                 z_enabled = GL_TRUE;
442                 number_of_colors_exported--;
443         }
444
445         /* illegal to set this to 0 */
446         if(number_of_colors_exported || z_enabled)
447         {
448             fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
449         }
450         else
451         {
452             fp->r700Shader.exportMode = (1 << 1);
453         }
454
455     fp->translated = GL_TRUE;
456
457         return GL_TRUE;
458 }
459
460 void evergreenSelectFragmentShader(struct gl_context *ctx)
461 {
462     context_t *context = EVERGREEN_CONTEXT(ctx);
463     struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *)
464             (ctx->FragmentProgram._Current);
465     if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
466     {
467             fp->r700AsmCode.bR6xx = 1;
468     }
469
470     if (GL_FALSE == fp->translated)
471             evergreenTranslateFragmentShader(fp, &(fp->mesa_program), ctx); 
472 }
473
474 void * evergreenGetActiveFpShaderBo(struct gl_context * ctx)
475 {
476     struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *)
477                                            (ctx->FragmentProgram._Current);
478
479     return fp->shaderbo;
480 }
481
482 void * evergreenGetActiveFpShaderConstBo(struct gl_context * ctx)
483 {
484     struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *)
485                                            (ctx->FragmentProgram._Current);
486
487     return fp->constbo0;
488 }
489
490 GLboolean evergreenSetupFragmentProgram(struct gl_context * ctx)
491 {
492     context_t *context = EVERGREEN_CONTEXT(ctx);
493     EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
494     struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *)
495                                            (ctx->FragmentProgram._Current);
496     r700_AssemblerBase         *pAsm = &(fp->r700AsmCode);
497     struct gl_fragment_program *mesa_fp = &(fp->mesa_program);    
498     unsigned int ui, i;
499     unsigned int unNumOfReg;
500     unsigned int unBit;
501     GLuint exportCount;
502     GLboolean point_sprite = GL_FALSE;
503
504     if(GL_FALSE == fp->loaded)
505     {
506             if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
507             {
508                     Assemble( &(fp->r700Shader) );
509             }
510
511         r600EmitShader(ctx,
512                        &(fp->shaderbo),
513                        (GLvoid *)(fp->r700Shader.pProgram),
514                        fp->r700Shader.uShaderBinaryDWORDSize,
515                        "FS");
516         
517         fp->loaded = GL_TRUE;
518     }
519
520     /* TODO : enable this after MemUse fixed *=
521     (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
522     */
523
524     EVERGREEN_STATECHANGE(context, sq);
525
526     evergreen->SQ_PGM_RESOURCES_PS.u32All = 0;
527     SETbit(evergreen->SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
528
529     evergreen->ps.SQ_ALU_CONST_CACHE_PS_0.u32All = 0; 
530     evergreen->ps.SQ_PGM_START_PS.u32All = 0;         
531
532     EVERGREEN_STATECHANGE(context, spi);
533
534     unNumOfReg = fp->r700Shader.nRegs + 1;
535
536     ui = (evergreen->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
537
538     /* PS uses fragment.position */
539     if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
540     {
541         ui += 1;
542         SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
543         SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask);
544         SETbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
545         SETbit(evergreen->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
546     }
547     else
548     {
549         CLEARbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
550         CLEARbit(evergreen->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
551     }
552
553     if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE))
554     {
555         ui += 1;
556         SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
557         SETbit(evergreen->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
558         SETbit(evergreen->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit);
559         SETfield(evergreen->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask);
560     }
561     else
562     {
563         CLEARbit(evergreen->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
564     }
565
566     /* see if we need any point_sprite replacements */
567     for (i = VERT_RESULT_TEX0; i<= VERT_RESULT_TEX7; i++)
568     {
569         if(ctx->Point.CoordReplace[i - VERT_RESULT_TEX0] == GL_TRUE)
570             point_sprite = GL_TRUE;
571     }
572
573     if ((mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) || point_sprite)
574     {
575         /* for FRAG_ATTRIB_PNTC we need to increase num_interp */
576         if(mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC))
577         {
578             ui++;
579             SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
580         }
581         SETbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
582         SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask);
583         SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask);
584         SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask);
585         SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask);
586         if(ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
587             SETbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
588         else
589             CLEARbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
590     }
591     else
592     {
593         CLEARbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
594     }
595
596
597     ui = (unNumOfReg < ui) ? ui : unNumOfReg;
598
599     SETfield(evergreen->SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
600
601     CLEARbit(evergreen->SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
602
603     if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
604         {
605         SETfield(evergreen->SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
606                  STACK_SIZE_shift, STACK_SIZE_mask);
607     }
608
609     SETfield(evergreen->SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
610              EXPORT_MODE_shift, EXPORT_MODE_mask);
611
612     // emit ps input map
613     struct evergreen_vertex_program_cont *vpc =
614                        (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current;
615     GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
616     
617     for(ui = 0; ui < EVERGREEN_MAX_SHADER_EXPORTS; ui++)
618         evergreen->SPI_PS_INPUT_CNTL[ui].u32All = 0;
619
620     unBit = 1 << FRAG_ATTRIB_WPOS;
621     if(mesa_fp->Base.InputsRead & unBit)
622     {
623             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS];
624             SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
625             SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
626                      SEMANTIC_shift, SEMANTIC_mask);
627             CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
628     }
629
630     unBit = 1 << VERT_RESULT_COL0;
631     if(OutputsWritten & unBit)
632     {
633             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
634             SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
635             SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
636                      SEMANTIC_shift, SEMANTIC_mask);
637             if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
638                     SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
639             else
640                     CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
641     }
642
643     unBit = 1 << VERT_RESULT_COL1;
644     if(OutputsWritten & unBit)
645     {
646             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
647             SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
648             SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
649                      SEMANTIC_shift, SEMANTIC_mask);
650             if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
651                     SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
652             else
653                     CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
654     }
655
656     unBit = 1 << VERT_RESULT_FOGC;
657     if(OutputsWritten & unBit)
658     {
659             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
660             SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
661             SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
662                      SEMANTIC_shift, SEMANTIC_mask);
663             CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
664     }
665
666     for(i=0; i<8; i++)
667     {
668             unBit = 1 << (VERT_RESULT_TEX0 + i);
669             if(OutputsWritten & unBit)
670             {
671                     ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
672                     SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
673                     SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
674                              SEMANTIC_shift, SEMANTIC_mask);
675                     CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
676                     /* ARB_point_sprite */
677                     if(ctx->Point.CoordReplace[i] == GL_TRUE)
678                     {
679                              SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
680                     }
681             }
682     }
683
684     unBit = 1 << FRAG_ATTRIB_FACE;
685     if(mesa_fp->Base.InputsRead & unBit)
686     {
687             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE];
688             SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
689             SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
690                      SEMANTIC_shift, SEMANTIC_mask);
691             CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
692     }
693     unBit = 1 << FRAG_ATTRIB_PNTC;
694     if(mesa_fp->Base.InputsRead & unBit)
695     {
696             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC];
697             SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
698             SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
699                      SEMANTIC_shift, SEMANTIC_mask);
700             CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
701             SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
702     }
703
704
705
706
707     for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
708         {
709         unBit = 1 << i;
710         if(OutputsWritten & unBit)
711                 {
712             ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0];
713             SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
714             SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
715                              SEMANTIC_shift, SEMANTIC_mask);
716             CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
717         }
718     }
719
720     exportCount = (evergreen->SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
721
722     return GL_TRUE;
723 }
724
725 GLboolean evergreenSetupFPconstants(struct gl_context * ctx)
726 {
727     context_t *context = EVERGREEN_CONTEXT(ctx);
728     EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
729     struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *)
730                                            (ctx->FragmentProgram._Current);
731     r700_AssemblerBase *pAsm = &(fp->r700AsmCode);
732
733     struct gl_program_parameter_list *paramList;
734     unsigned int unNumParamData;
735     unsigned int ui;
736     int alloc_size;
737
738     /* sent out shader constants. */
739     paramList = fp->mesa_program.Base.Parameters;
740
741     if(NULL != paramList) 
742     {
743             _mesa_load_state_parameters(ctx, paramList);
744
745             if (paramList->NumParameters > EVERGREEN_MAX_DX9_CONSTS)
746                     return GL_FALSE;
747
748             EVERGREEN_STATECHANGE(context, sq);
749
750             evergreen->ps.num_consts = paramList->NumParameters;
751
752             unNumParamData = paramList->NumParameters;
753
754             for(ui=0; ui<unNumParamData; ui++) {
755                         evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
756                         evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
757                         evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
758                         evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
759             }
760
761             /* alloc multiple of 16 constants */
762             alloc_size = ((unNumParamData * 4 * 4) + 255) & ~255;
763
764         /* Load fp constants to gpu */
765         if(unNumParamData > 0) 
766         {            
767             radeonAllocDmaRegion(&context->radeon, 
768                                 &context->fp_Constbo, 
769                                 &context->fp_bo_offset, 
770                                 alloc_size,
771                                 256);            
772             r600EmitShaderConsts(ctx,
773                                  context->fp_Constbo,
774                                  context->fp_bo_offset,         
775                                  (GLvoid *)&(evergreen->ps.consts[0][0]),
776                                  unNumParamData * 4 * 4);
777         }
778     } else
779             evergreen->ps.num_consts = 0;
780
781     COMPILED_SUB * pCompiledSub;
782     GLuint uj;
783     GLuint unConstOffset = evergreen->ps.num_consts;
784     for(ui=0; ui<pAsm->unNumPresub; ui++)
785     {
786         pCompiledSub = pAsm->presubs[ui].pCompiledSub;
787
788         evergreen->ps.num_consts += pCompiledSub->NumParameters;
789
790         for(uj=0; uj<pCompiledSub->NumParameters; uj++)
791         {
792             evergreen->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
793                     evergreen->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
794                     evergreen->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
795                     evergreen->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
796         }
797         unConstOffset += pCompiledSub->NumParameters;
798     }
799
800     return GL_TRUE;
801 }
802