Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / r600 / r700_fragprog.c
1 /*
2  * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20  */
21
22 /*
23  * Authors:
24  *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25  *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26  */
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_statevars.h"
37 #include "program/program.h"
38
39 #include "r600_context.h"
40 #include "r600_cmdbuf.h"
41 #include "r600_emit.h"
42
43 #include "r700_fragprog.h"
44
45 #include "r700_debug.h"
46
47 void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog)
48 {
49     static const gl_state_index winstate[STATE_LENGTH]
50          = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0};
51     struct prog_instruction *newInst, *inst;
52     GLint  win_size;  /* state reference */
53     GLuint wpos_temp; /* temp register */
54     int i, j;
55
56     /* PARAM win_size = STATE_FB_WPOS_Y_TRANSFORM */
57     win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
58
59     wpos_temp = fprog->Base.NumTemporaries++;
60
61     /* scan program where WPOS is used and replace with wpos_temp */
62     inst = fprog->Base.Instructions;
63     for (i = 0; i < fprog->Base.NumInstructions; i++) {
64         for (j=0; j < 3; j++) {
65             if(inst->SrcReg[j].File == PROGRAM_INPUT && 
66                inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) {
67                 inst->SrcReg[j].File = PROGRAM_TEMPORARY;
68                 inst->SrcReg[j].Index = wpos_temp;
69             }
70         }
71         inst++;
72     }
73
74     _mesa_insert_instructions(&(fprog->Base), 0, 1);
75
76     newInst = fprog->Base.Instructions;
77     /* possibly invert wpos.y depending on STATE_FB_WPOS_Y_TRANSFORM var */
78     newInst[0].Opcode = OPCODE_MAD;
79     newInst[0].DstReg.File = PROGRAM_TEMPORARY;
80     newInst[0].DstReg.Index = wpos_temp;
81     newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
82
83     newInst[0].SrcReg[0].File = PROGRAM_INPUT;
84     newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
85     newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
86
87     newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
88     newInst[0].SrcReg[1].Index = win_size;
89     newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE);
90
91     newInst[0].SrcReg[2].File = PROGRAM_STATE_VAR;
92     newInst[0].SrcReg[2].Index = win_size;
93     newInst[0].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
94
95 }
96
97 //TODO : Validate FP input with VP output.
98 void Map_Fragment_Program(r700_AssemblerBase         *pAsm,
99                                                   struct gl_fragment_program *mesa_fp,
100                           struct gl_context *ctx) 
101 {
102         unsigned int unBit;
103     unsigned int i;
104
105     /* match fp inputs with vp exports. */
106     struct r700_vertex_program_cont *vpc =
107                        (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
108     GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
109     
110         pAsm->number_used_registers = 0;
111
112 //Input mapping : mesa_fp->Base.InputsRead set the flag, set in 
113         //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
114         //MUST match order in Map_Vertex_Output
115         unBit = 1 << FRAG_ATTRIB_WPOS;
116         if(mesa_fp->Base.InputsRead & unBit)
117         {
118                 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++;
119         }
120
121     unBit = 1 << VERT_RESULT_COL0;
122         if(OutputsWritten & unBit)
123         {
124                 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
125         }
126
127         unBit = 1 << VERT_RESULT_COL1;
128         if(OutputsWritten & unBit)
129         {
130                 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
131         }
132
133     unBit = 1 << VERT_RESULT_FOGC;
134     if(OutputsWritten & unBit)
135     {
136         pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
137     }
138
139         for(i=0; i<8; i++)
140         {
141                 unBit = 1 << (VERT_RESULT_TEX0 + i);
142                 if(OutputsWritten & unBit)
143                 {
144                         pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
145                 }
146         }
147  
148 /* order has been taken care of */ 
149 #if 1
150     for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
151         {
152         unBit = 1 << i;
153         if(OutputsWritten & unBit)
154                 {
155             pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++;
156         }
157     }
158 #else
159     if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 )
160     {
161             struct r700_vertex_program_cont *vpc =
162                        (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
163         struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying;
164         struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying;
165         struct gl_program_parameter      * pVsParam;
166         struct gl_program_parameter      * pPsParam;
167         GLuint j, k;
168         GLuint unMaxVarying = 0;
169
170         for(i=0; i<VsVarying->NumParameters; i++)
171         {
172             pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0;
173         }
174
175         for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++)
176             {
177             unBit = 1 << i;
178             if(mesa_fp->Base.InputsRead & unBit)
179                     {
180                 j = i - FRAG_ATTRIB_VAR0;
181                 pPsParam = PsVarying->Parameters + j;
182
183                 for(k=0; k<VsVarying->NumParameters; k++)
184                 {                                       
185                     pVsParam = VsVarying->Parameters + k;
186
187                                 if( strcmp(pPsParam->Name, pVsParam->Name) == 0)
188                     {
189                         pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k;                  
190                         if(k > unMaxVarying)
191                         {
192                             unMaxVarying = k;
193                         }
194                         break;
195                     }
196                 }
197                     }
198         }
199
200         pAsm->number_used_registers += unMaxVarying + 1;
201     }
202 #endif
203     unBit = 1 << FRAG_ATTRIB_FACE;
204     if(mesa_fp->Base.InputsRead & unBit)
205     {
206         pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++;
207     }
208
209     unBit = 1 << FRAG_ATTRIB_PNTC;
210     if(mesa_fp->Base.InputsRead & unBit)
211     {
212         pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++;
213     }
214
215 /* Map temporary registers (GPRs) */
216     pAsm->starting_temp_register_number = pAsm->number_used_registers;
217
218     if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
219     {
220             pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
221     }
222     else
223     {
224         pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
225     }
226
227 /* Output mapping */
228         pAsm->number_of_exports = 0;
229         pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
230         pAsm->starting_export_register_number = pAsm->number_used_registers;
231
232     for (i = 0; i < FRAG_RESULT_MAX; ++i)
233     {
234         unBit = 1 << i;
235         if (mesa_fp->Base.OutputsWritten & unBit)
236         {
237             if (i == FRAG_RESULT_DEPTH)
238             {
239                 pAsm->depth_export_register_number = pAsm->number_used_registers;
240                 pAsm->pR700Shader->depthIsExported = 1;
241             }
242
243             pAsm->uiFP_OutputMap[i] = pAsm->number_used_registers++;
244             ++pAsm->number_of_exports;
245             ++pAsm->number_of_colorandz_exports;
246         }
247     }
248
249     pAsm->flag_reg_index = pAsm->number_used_registers++;
250
251     pAsm->uFirstHelpReg = pAsm->number_used_registers;
252 }
253
254 GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
255                                                                 struct gl_fragment_program   *mesa_fp)
256 {
257     GLuint i, j;
258     GLint * puiTEMPwrites;
259     GLint * puiTEMPreads;
260     struct prog_instruction * pILInst;
261     InstDeps         *pInstDeps;
262     struct prog_instruction * texcoord_DepInst;
263     GLint              nDepInstID;
264
265     puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
266     puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
267
268     for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
269     {
270         puiTEMPwrites[i] = -1;
271         puiTEMPreads[i] = -1;
272     }
273
274     pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
275
276     for(i=0; i<mesa_fp->Base.NumInstructions; i++)
277     {
278         pInstDeps[i].nDstDep = -1;
279         pILInst = &(mesa_fp->Base.Instructions[i]);
280
281         //Dst
282         if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
283         {
284             //Set lastwrite for the temp
285             puiTEMPwrites[pILInst->DstReg.Index] = i;
286         }
287
288         //Src
289         for(j=0; j<3; j++)
290         {
291             if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
292             {
293                 //Set dep.
294                 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
295                 //Set first read
296                 if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 )
297                 {
298                     puiTEMPreads[pILInst->SrcReg[j].Index] = i;
299                 }
300             }
301             else
302             {
303                 pInstDeps[i].nSrcDeps[j] = -1;
304             }
305         }
306     }
307
308     fp->r700AsmCode.pInstDeps = pInstDeps;
309
310     //Find dep for tex inst    
311     for(i=0; i<mesa_fp->Base.NumInstructions; i++)
312     {
313         pILInst = &(mesa_fp->Base.Instructions[i]);
314
315         if(GL_TRUE == IsTex(pILInst->Opcode))
316         {   //src0 is the tex coord register, src1 is texunit, src2 is textype
317             nDepInstID = pInstDeps[i].nSrcDeps[0];
318             if(nDepInstID >= 0)
319             {
320                 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
321                 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
322                 {
323                     pInstDeps[nDepInstID].nDstDep = i;
324                     pInstDeps[i].nDstDep = i;
325                 }
326                 else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
327                 {
328                     pInstDeps[i].nDstDep = i;
329                 }
330                 else
331                 {   //... other deps?
332                 }
333             }
334             // make sure that we dont overwrite src used earlier
335             nDepInstID = puiTEMPreads[pILInst->DstReg.Index];
336             if(nDepInstID < i)
337             {
338                 pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index];
339                 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
340                 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
341                 {
342                     pInstDeps[nDepInstID].nDstDep = i;
343                 }
344  
345             }
346
347         }
348         }
349
350     FREE(puiTEMPwrites);
351     FREE(puiTEMPreads);
352
353     return GL_TRUE;
354 }
355
356 GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
357                                                              struct gl_fragment_program   *mesa_fp,
358                                  struct gl_context *ctx) 
359 {
360     context_t *context = R700_CONTEXT(ctx);      
361     R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
362
363         GLuint    number_of_colors_exported;
364         GLboolean z_enabled = GL_FALSE;
365         GLuint    unBit, shadow_unit;
366         int i;
367         struct prog_instruction *inst;
368         gl_state_index shadow_ambient[STATE_LENGTH]
369             = { STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0};
370
371     //Init_Program
372         Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
373
374     if(GL_TRUE == r700->bShaderUseMemConstant)
375     {
376         fp->r700AsmCode.bUseMemConstant = GL_TRUE;
377     }
378     else
379     {
380         fp->r700AsmCode.bUseMemConstant = GL_FALSE;
381     }
382
383     fp->r700AsmCode.unAsic = 7;
384
385     if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
386     {
387         insert_wpos_code(ctx, mesa_fp);
388     }
389
390     /* add/map  consts for ARB_shadow_ambient */
391     if(mesa_fp->Base.ShadowSamplers)
392     {
393         inst = mesa_fp->Base.Instructions;
394         for (i = 0; i < mesa_fp->Base.NumInstructions; i++)
395         {
396             if(inst->TexShadow == 1)
397             {
398                 shadow_unit = inst->TexSrcUnit;
399                 shadow_ambient[2] = shadow_unit;
400                 fp->r700AsmCode.shadow_regs[shadow_unit] = 
401                     _mesa_add_state_reference(mesa_fp->Base.Parameters, shadow_ambient);
402             }
403             inst++;
404         }
405     }
406
407     Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); 
408
409     if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
410         {
411                 return GL_FALSE;
412     }
413
414     InitShaderProgram(&(fp->r700AsmCode));
415         
416     for(i=0; i < MAX_SAMPLERS; i++)
417     {
418          fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i];
419     }
420
421     fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions;
422
423         if( GL_FALSE == AssembleInstr(0,
424                                   0,
425                                   mesa_fp->Base.NumInstructions,
426                                   &(mesa_fp->Base.Instructions[0]), 
427                                   &(fp->r700AsmCode)) )
428         {
429                 return GL_FALSE;
430         }
431
432     if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
433     {
434         return GL_FALSE;
435     }
436
437     if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) )
438     {
439         return GL_FALSE;
440     }
441
442     fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0 
443                          : (fp->r700AsmCode.number_used_registers - 1);
444
445         fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
446
447         number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
448
449         unBit = 1 << FRAG_RESULT_DEPTH;
450         if(mesa_fp->Base.OutputsWritten & unBit)
451         {
452                 z_enabled = GL_TRUE;
453                 number_of_colors_exported--;
454         }
455
456         /* illegal to set this to 0 */
457         if(number_of_colors_exported || z_enabled)
458         {
459             fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
460         }
461         else
462         {
463             fp->r700Shader.exportMode = (1 << 1);
464         }
465
466     fp->translated = GL_TRUE;
467
468         return GL_TRUE;
469 }
470
471 void r700SelectFragmentShader(struct gl_context *ctx)
472 {
473     context_t *context = R700_CONTEXT(ctx);
474     struct r700_fragment_program *fp = (struct r700_fragment_program *)
475             (ctx->FragmentProgram._Current);
476     if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
477     {
478             fp->r700AsmCode.bR6xx = 1;
479     }
480
481     if (GL_FALSE == fp->translated)
482             r700TranslateFragmentShader(fp, &(fp->mesa_program), ctx); 
483 }
484
485 void * r700GetActiveFpShaderBo(struct gl_context * ctx)
486 {
487     struct r700_fragment_program *fp = (struct r700_fragment_program *)
488                                            (ctx->FragmentProgram._Current);
489
490     return fp->shaderbo;
491 }
492
493 void * r700GetActiveFpShaderConstBo(struct gl_context * ctx)
494 {
495     struct r700_fragment_program *fp = (struct r700_fragment_program *)
496                                            (ctx->FragmentProgram._Current);
497
498     return fp->constbo0;
499 }
500
501 GLboolean r700SetupFragmentProgram(struct gl_context * ctx)
502 {
503     context_t *context = R700_CONTEXT(ctx);
504     R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
505     struct r700_fragment_program *fp = (struct r700_fragment_program *)
506                                            (ctx->FragmentProgram._Current);
507     r700_AssemblerBase         *pAsm = &(fp->r700AsmCode);
508     struct gl_fragment_program *mesa_fp = &(fp->mesa_program);
509     struct gl_program_parameter_list *paramList;
510     unsigned int unNumParamData;
511     unsigned int ui, i;
512     unsigned int unNumOfReg;
513     unsigned int unBit;
514     unsigned int num_sq_ps_gprs;
515     GLuint exportCount;
516     GLboolean point_sprite = GL_FALSE;
517
518     if(GL_FALSE == fp->loaded)
519     {
520             if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
521             {
522                     Assemble( &(fp->r700Shader) );
523             }
524
525         /* Load fp to gpu */
526         r600EmitShader(ctx,
527                        &(fp->shaderbo),
528                        (GLvoid *)(fp->r700Shader.pProgram),
529                        fp->r700Shader.uShaderBinaryDWORDSize,
530                        "FS");
531
532         fp->loaded = GL_TRUE;
533     }
534
535     DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram),
536                  fp->r700Shader.uShaderBinaryDWORDSize);
537
538     /* TODO : enable this after MemUse fixed *=
539     (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
540     */
541
542     R600_STATECHANGE(context, ps);
543
544     r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0;
545     SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
546
547     r700->ps.SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */
548
549     R600_STATECHANGE(context, spi);
550
551     unNumOfReg = fp->r700Shader.nRegs + 1;
552
553     ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
554
555     /* PS uses fragment.position */
556     if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
557     {
558         ui += 1;
559         SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
560         SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask);
561         SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
562         SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
563     }
564     else
565     {
566         CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
567         CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
568     }
569
570     if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE))
571     {
572         ui += 1;
573         SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
574         SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
575         SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit);
576         SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask);
577     }
578     else
579     {
580         CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
581     }
582
583     /* see if we need any point_sprite replacements, also increase num_interp
584      * as there's no vp output for them */
585     if (ctx->Point.PointSprite)
586     {
587         for (i = FRAG_ATTRIB_TEX0; i<= FRAG_ATTRIB_TEX7; i++)
588         {
589             if (ctx->Point.CoordReplace[i - FRAG_ATTRIB_TEX0] == GL_TRUE)
590             {
591                 ui++;
592                 point_sprite = GL_TRUE;
593             }
594         }
595     }
596
597     if( mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC))
598         ui++;
599
600     if ((mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) || point_sprite)
601     {
602         SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
603         SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
604         SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask);
605         SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask);
606         SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask);
607         SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask);
608         /* Like e.g. viewport and winding, point sprite coordinates are
609          * inverted when rendering to FBO. */
610         if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == !ctx->DrawBuffer->Name)
611             SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
612         else
613             CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
614     }
615     else
616     {
617         CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
618     }
619
620
621     ui = (unNumOfReg < ui) ? ui : unNumOfReg;
622
623     SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
624
625     num_sq_ps_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_PS_GPRS_mask) >> NUM_PS_GPRS_shift);
626
627     if(ui > num_sq_ps_gprs)
628     {
629         /* care! thich changes sq - needs idle state */
630         R600_STATECHANGE(context, sq);
631         SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, ui, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask);
632     } 
633
634     CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
635
636     if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
637         {
638         SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
639                  STACK_SIZE_shift, STACK_SIZE_mask);
640     }
641
642     SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
643              EXPORT_MODE_shift, EXPORT_MODE_mask);
644
645     // emit ps input map
646     struct r700_vertex_program_cont *vpc =
647                        (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
648     GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
649     
650     for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
651         r700->SPI_PS_INPUT_CNTL[ui].u32All = 0;
652
653     unBit = 1 << FRAG_ATTRIB_WPOS;
654     if(mesa_fp->Base.InputsRead & unBit)
655     {
656             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS];
657             SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
658             SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
659                      SEMANTIC_shift, SEMANTIC_mask);
660             CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
661     }
662
663     unBit = 1 << VERT_RESULT_COL0;
664     if(OutputsWritten & unBit)
665     {
666             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
667             SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
668             SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
669                      SEMANTIC_shift, SEMANTIC_mask);
670             if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
671                     SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
672             else
673                     CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
674     }
675
676     unBit = 1 << VERT_RESULT_COL1;
677     if(OutputsWritten & unBit)
678     {
679             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
680             SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
681             SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
682                      SEMANTIC_shift, SEMANTIC_mask);
683             if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
684                     SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
685             else
686                     CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
687     }
688
689     unBit = 1 << VERT_RESULT_FOGC;
690     if(OutputsWritten & unBit)
691     {
692             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
693             SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
694             SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
695                      SEMANTIC_shift, SEMANTIC_mask);
696             CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
697     }
698
699     for(i=0; i<8; i++)
700     {
701             GLboolean coord_replace = ctx->Point.PointSprite && ctx->Point.CoordReplace[i];
702             unBit = 1 << (VERT_RESULT_TEX0 + i);
703             if ((OutputsWritten & unBit) || coord_replace)
704             {
705                     ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
706                     SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
707                     SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
708                              SEMANTIC_shift, SEMANTIC_mask);
709                     CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
710                     /* ARB_point_sprite */
711                     if (coord_replace)
712                     {
713                              SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
714                     }
715             }
716     }
717
718     unBit = 1 << FRAG_ATTRIB_FACE;
719     if(mesa_fp->Base.InputsRead & unBit)
720     {
721             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE];
722             SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
723             SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
724                      SEMANTIC_shift, SEMANTIC_mask);
725             CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
726     }
727     unBit = 1 << FRAG_ATTRIB_PNTC;
728     if(mesa_fp->Base.InputsRead & unBit)
729     {
730             ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC];
731             SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
732             SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
733                      SEMANTIC_shift, SEMANTIC_mask);
734             if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
735                     SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
736             else
737                     CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
738             SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
739     }
740
741
742
743
744     for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
745         {
746         unBit = 1 << i;
747         if(OutputsWritten & unBit)
748                 {
749             ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0];
750             SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
751             SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
752                              SEMANTIC_shift, SEMANTIC_mask);
753             CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
754         }
755     }
756
757     exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
758     if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1))
759     {
760             R600_STATECHANGE(context, cb);
761             r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
762     }
763
764     /* sent out shader constants. */
765     paramList = fp->mesa_program.Base.Parameters;
766
767     if(NULL != paramList) 
768     {
769             _mesa_load_state_parameters(ctx, paramList);
770
771             if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
772                     return GL_FALSE;
773
774             R600_STATECHANGE(context, ps_consts);
775
776             r700->ps.num_consts = paramList->NumParameters;
777
778             unNumParamData = paramList->NumParameters;
779
780             for(ui=0; ui<unNumParamData; ui++) {
781                         r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
782                         r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
783                         r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
784                         r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
785             }
786
787         /* Load fp constants to gpu */
788         if( (GL_TRUE == r700->bShaderUseMemConstant) && (unNumParamData > 0) )
789         {
790             r600EmitShader(ctx,
791                            &(fp->constbo0),
792                            (GLvoid *)&(paramList->ParameterValues[0][0]),
793                            unNumParamData * 4,
794                            "FS Const");
795         }
796
797     } else
798             r700->ps.num_consts = 0;
799
800     COMPILED_SUB * pCompiledSub;
801     GLuint uj;
802     GLuint unConstOffset = r700->ps.num_consts;
803     for(ui=0; ui<pAsm->unNumPresub; ui++)
804     {
805         pCompiledSub = pAsm->presubs[ui].pCompiledSub;
806
807         r700->ps.num_consts += pCompiledSub->NumParameters;
808
809         for(uj=0; uj<pCompiledSub->NumParameters; uj++)
810         {
811             r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
812                     r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
813                     r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
814                     r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
815         }
816         unConstOffset += pCompiledSub->NumParameters;
817     }
818
819     return GL_TRUE;
820 }
821