Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2  * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20  */
21
22 /*
23  * Authors:
24  *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25  */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35 #include "program/prog_parameter.h"
36
37 #include "radeon_debug.h"
38 #include "r600_context.h"
39
40 #include "r700_assembler.h"
41 #include "evergreen_sq.h"
42
43 #define USE_CF_FOR_CONTINUE_BREAK 1
44 #define USE_CF_FOR_POP_AFTER      1
45
46 struct prog_instruction noise1_insts[12] = { 
47     {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
48     {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
49     {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
50     {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
51     {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
52     {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0}, 
53     {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
54     {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
55     {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
56     {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
57     {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
58     {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
59 };
60 float noise1_const[2][4] = {
61     {0.300000f, 0.900000f, 0.500000f, 0.300000f}
62 };
63
64 COMPILED_SUB noise1_presub = {
65     &(noise1_insts[0]),
66     12, 
67     2, 
68     1, 
69     0, 
70     &(noise1_const[0]), 
71     SWIZZLE_X, 
72     SWIZZLE_X, 
73     SWIZZLE_X, 
74     SWIZZLE_X,
75     {0,0,0},
76     0 
77 };
78
79 BITS addrmode_PVSDST(PVSDST * pPVSDST)
80 {
81         return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
82 }
83
84 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode) 
85 {
86         pPVSDST->addrmode0 = addrmode & 1;
87         pPVSDST->addrmode1 = (addrmode >> 1) & 1;
88 }
89
90 void nomask_PVSDST(PVSDST * pPVSDST) 
91 {
92         pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
93 }
94
95 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC) 
96 {
97         return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
98 }
99
100 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode) 
101 {
102         pPVSSRC->addrmode0 = addrmode & 1;
103         pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
104 }
105
106
107 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz) 
108 {
109         pPVSSRC->swizzlex = 
110         pPVSSRC->swizzley = 
111         pPVSSRC->swizzlez = 
112         pPVSSRC->swizzlew = swz;
113 }
114
115 void noswizzle_PVSSRC(PVSSRC* pPVSSRC) 
116 {
117         pPVSSRC->swizzlex = SQ_SEL_X;
118         pPVSSRC->swizzley = SQ_SEL_Y;
119         pPVSSRC->swizzlez = SQ_SEL_Z;
120         pPVSSRC->swizzlew = SQ_SEL_W;
121 }
122
123 void
124 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
125 {
126     switch (x) 
127     {
128         case SQ_SEL_X: x = pPVSSRC->swizzlex; 
129             break;
130         case SQ_SEL_Y: x = pPVSSRC->swizzley; 
131             break;
132         case SQ_SEL_Z: x = pPVSSRC->swizzlez; 
133             break;
134         case SQ_SEL_W: x = pPVSSRC->swizzlew; 
135             break;
136         default:;
137     }
138
139     switch (y) 
140     {
141         case SQ_SEL_X: y = pPVSSRC->swizzlex; 
142             break;
143         case SQ_SEL_Y: y = pPVSSRC->swizzley; 
144             break;
145         case SQ_SEL_Z: y = pPVSSRC->swizzlez; 
146             break;
147         case SQ_SEL_W: y = pPVSSRC->swizzlew; 
148             break;
149         default:;
150     }
151
152     switch (z) 
153     {
154         case SQ_SEL_X: z = pPVSSRC->swizzlex; 
155             break;
156         case SQ_SEL_Y: z = pPVSSRC->swizzley; 
157             break;
158         case SQ_SEL_Z: z = pPVSSRC->swizzlez; 
159             break;
160         case SQ_SEL_W: z = pPVSSRC->swizzlew; 
161             break;
162         default:;
163     }
164
165     switch (w) 
166     {
167         case SQ_SEL_X: w = pPVSSRC->swizzlex; 
168             break;
169         case SQ_SEL_Y: w = pPVSSRC->swizzley; 
170             break;
171         case SQ_SEL_Z: w = pPVSSRC->swizzlez; 
172             break;
173         case SQ_SEL_W: w = pPVSSRC->swizzlew; 
174             break;
175         default:;
176     }
177
178     pPVSSRC->swizzlex = x;
179     pPVSSRC->swizzley = y;
180     pPVSSRC->swizzlez = z;
181     pPVSSRC->swizzlew = w;
182 }
183
184 void neg_PVSSRC(PVSSRC* pPVSSRC) 
185 {
186         pPVSSRC->negx = 1;
187         pPVSSRC->negy = 1;
188         pPVSSRC->negz = 1;
189         pPVSSRC->negw = 1;
190 }
191
192 void noneg_PVSSRC(PVSSRC* pPVSSRC) 
193 {
194         pPVSSRC->negx = 0;
195         pPVSSRC->negy = 0;
196         pPVSSRC->negz = 0;
197         pPVSSRC->negw = 0;
198 }
199
200 // negate argument (for SUB instead of ADD and alike)
201 void flipneg_PVSSRC(PVSSRC* pPVSSRC) 
202 {
203         pPVSSRC->negx = !pPVSSRC->negx;
204         pPVSSRC->negy = !pPVSSRC->negy;
205         pPVSSRC->negz = !pPVSSRC->negz;
206         pPVSSRC->negw = !pPVSSRC->negw;
207 }
208
209 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c) 
210 {
211         switch (c) 
212         {
213                 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
214                 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
215                 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
216                 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
217                 default:;
218         } 
219 }
220
221 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c) 
222 {
223         switch (c) 
224         {
225                 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
226                 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
227                 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
228                 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
229                 default:;
230         } 
231 }
232
233 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)  
234 {
235           return (pOutVTXFmt0->point_size            |
236                           pOutVTXFmt0->edge_flag             |
237                           pOutVTXFmt0->rta_index             |
238                           pOutVTXFmt0->kill_flag             |
239                           pOutVTXFmt0->viewport_index);
240 }
241
242 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) 
243 {
244           return (pFPOutFmt->depth            | 
245                           pFPOutFmt->stencil_ref      | 
246                           pFPOutFmt->mask             | 
247                           pFPOutFmt->coverage_to_mask);
248 }
249
250 GLboolean is_reduction_opcode(PVSDWORD* dest)
251 {
252     if (dest->dst.op3 == 0) 
253     {
254         if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) ) 
255         {
256             return GL_TRUE;
257         }
258     }
259     return GL_FALSE;
260 }
261
262 #if 0 /* unused */
263 GLboolean EG_is_reduction_opcode(PVSDWORD* dest)
264 {
265     if (dest->dst.op3 == 0) 
266     {
267         if ( (dest->dst.opcode == EG_OP2_INST_DOT4 || dest->dst.opcode == EG_OP2_INST_DOT4_IEEE || dest->dst.opcode == EG_OP2_INST_CUBE) ) 
268         {
269             return GL_TRUE;
270         }
271     }
272     return GL_FALSE;
273 }
274 #endif
275
276 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
277 {
278     GLuint format = FMT_INVALID;
279         GLuint uiElemSize = 0;
280
281     switch (eType)
282     {
283         case GL_BYTE:
284         case GL_UNSIGNED_BYTE:
285                         uiElemSize = 1;
286             switch(nChannels)
287             {
288                 case 1:
289                     format = FMT_8; break;
290                 case 2:
291                     format = FMT_8_8; break;
292                 case 3:
293                     /* for some (small/unaligned) strides using 4 comps works
294                      * better, probably same as GL_SHORT below
295                      * test piglit/draw-vertices */
296                     format = FMT_8_8_8_8; break;
297                 case 4:
298                     format = FMT_8_8_8_8; break;
299                 default:
300                     break;
301             }
302             break;
303
304         case GL_UNSIGNED_SHORT:
305         case GL_SHORT:
306                         uiElemSize = 2;
307             switch(nChannels)
308             {
309                 case 1:
310                     format = FMT_16; break;
311                 case 2:
312                     format = FMT_16_16; break;
313                 case 3:
314                     /* 3 comp GL_SHORT vertex format doesnt work on r700
315                        4 somehow works, test - sauerbraten  */
316                     format = FMT_16_16_16_16; break;
317                 case 4:
318                     format = FMT_16_16_16_16; break;
319                 default:
320                     break;
321             }
322             break;
323
324         case GL_UNSIGNED_INT:
325         case GL_INT:
326                         uiElemSize = 4;
327             switch(nChannels)
328             {
329                 case 1:
330                     format = FMT_32; break;
331                 case 2:
332                     format = FMT_32_32; break;
333                 case 3:
334                     format = FMT_32_32_32; break;
335                 case 4:
336                     format = FMT_32_32_32_32; break;
337                 default:
338                     break;
339             }
340             break;
341
342         case GL_FLOAT:
343                         uiElemSize = 4;
344                         switch(nChannels)
345             {
346                 case 1:
347                     format = FMT_32_FLOAT; break;
348                 case 2:
349                     format = FMT_32_32_FLOAT; break;
350                 case 3:
351                     format = FMT_32_32_32_FLOAT; break;
352                 case 4:
353                     format = FMT_32_32_32_32_FLOAT; break;
354                 default:
355                     break;
356             }
357                         break;
358         case GL_DOUBLE:
359                         uiElemSize = 8;
360             switch(nChannels)
361             {
362                 case 1:
363                     format = FMT_32_FLOAT; break;
364                 case 2:
365                     format = FMT_32_32_FLOAT; break;
366                 case 3:
367                     format = FMT_32_32_32_FLOAT; break;
368                 case 4:
369                     format = FMT_32_32_32_32_FLOAT; break;
370                 default:
371                     break;
372             }
373             break;
374         default:
375                         ;
376             //GL_ASSERT_NO_CASE();
377     }
378
379     if(NULL != pClient_size)
380     {
381             *pClient_size = uiElemSize * nChannels;
382     }
383
384     return(format);
385 }
386
387 unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) 
388 {
389     if(nIsOp3 > 0)
390     {
391         return 3;
392     }
393
394     switch (opcode)
395     {
396     case SQ_OP2_INST_ADD:
397     case SQ_OP2_INST_KILLE:
398     case SQ_OP2_INST_KILLGT:
399     case SQ_OP2_INST_KILLGE:
400     case SQ_OP2_INST_KILLNE:
401     case SQ_OP2_INST_MUL: 
402     case SQ_OP2_INST_MAX:
403     case SQ_OP2_INST_MIN:
404     //case SQ_OP2_INST_MAX_DX10:
405     //case SQ_OP2_INST_MIN_DX10:
406     case SQ_OP2_INST_SETE: 
407     case SQ_OP2_INST_SETNE:
408     case SQ_OP2_INST_SETGT:
409     case SQ_OP2_INST_SETGE:
410     case SQ_OP2_INST_PRED_SETE:
411     case SQ_OP2_INST_PRED_SETGT:
412     case SQ_OP2_INST_PRED_SETGE:
413     case SQ_OP2_INST_PRED_SETNE:
414     case SQ_OP2_INST_DOT4:
415     case SQ_OP2_INST_DOT4_IEEE:
416     case SQ_OP2_INST_CUBE:
417         return 2;  
418
419     case SQ_OP2_INST_MOV: 
420     case SQ_OP2_INST_MOVA_FLOOR:
421     case SQ_OP2_INST_FRACT:
422     case SQ_OP2_INST_FLOOR:
423     case SQ_OP2_INST_TRUNC:
424     case SQ_OP2_INST_EXP_IEEE:
425     case SQ_OP2_INST_LOG_CLAMPED:
426     case SQ_OP2_INST_LOG_IEEE:
427     case SQ_OP2_INST_RECIP_IEEE:
428     case SQ_OP2_INST_RECIPSQRT_IEEE:
429     case SQ_OP2_INST_FLT_TO_INT:
430     case SQ_OP2_INST_SIN:
431     case SQ_OP2_INST_COS:
432         return 1;
433         
434     default: radeon_error(
435                     "Need instruction operand number for %x.\n", opcode); 
436     };
437
438     return 3;
439 }
440
441 unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3) 
442 {
443     if(nIsOp3 > 0)
444     {
445         return 3;
446     }
447
448     switch (opcode)
449     {
450     case EG_OP2_INST_ADD:
451     case EG_OP2_INST_KILLE:
452     case EG_OP2_INST_KILLGT:
453     case EG_OP2_INST_KILLGE:
454     case EG_OP2_INST_KILLNE:
455     case EG_OP2_INST_MUL: 
456     case EG_OP2_INST_MAX:
457     case EG_OP2_INST_MIN:
458     //case EG_OP2_INST_MAX_DX10:
459     //case EG_OP2_INST_MIN_DX10:
460     case EG_OP2_INST_SETE: 
461     case EG_OP2_INST_SETNE:
462     case EG_OP2_INST_SETGT:
463     case EG_OP2_INST_SETGE:
464     case EG_OP2_INST_PRED_SETE:
465     case EG_OP2_INST_PRED_SETGT:
466     case EG_OP2_INST_PRED_SETGE:
467     case EG_OP2_INST_PRED_SETNE:
468     case EG_OP2_INST_DOT4:
469     case EG_OP2_INST_DOT4_IEEE:
470     case EG_OP2_INST_CUBE:
471         return 2;  
472
473     case EG_OP2_INST_MOV: 
474     //case SQ_OP2_INST_MOVA_FLOOR:
475     case EG_OP2_INST_FRACT:
476     case EG_OP2_INST_FLOOR:
477     case EG_OP2_INST_TRUNC:
478     case EG_OP2_INST_EXP_IEEE:
479     case EG_OP2_INST_LOG_CLAMPED:
480     case EG_OP2_INST_LOG_IEEE:
481     case EG_OP2_INST_RECIP_IEEE:
482     case EG_OP2_INST_RECIPSQRT_IEEE:
483     case EG_OP2_INST_FLT_TO_INT:
484     case EG_OP2_INST_SIN:
485     case EG_OP2_INST_COS:
486     case EG_OP2_INST_FLT_TO_INT_FLOOR:
487     case EG_OP2_INST_MOVA_INT:
488         return 1;
489         
490     default: radeon_error(
491                     "Need instruction operand number for %x.\n", opcode); 
492     };
493
494     return 3;
495 }
496
497 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
498 {
499     GLuint i;
500
501     Init_R700_Shader(pShader);
502     pAsm->pR700Shader = pShader;
503     pAsm->currentShaderType = spt;
504
505     pAsm->cf_last_export_ptr   = NULL;
506
507     pAsm->cf_current_export_clause_ptr = NULL;
508     pAsm->cf_current_alu_clause_ptr    = NULL;
509     pAsm->cf_current_tex_clause_ptr    = NULL;
510     pAsm->cf_current_vtx_clause_ptr    = NULL;
511     pAsm->cf_current_cf_clause_ptr     = NULL;
512
513     // No clause has been created yet
514     pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
515
516     pAsm->number_of_colorandz_exports = 0;
517     pAsm->number_of_exports           = 0;
518     pAsm->number_of_export_opcodes    = 0;
519
520     pAsm->alu_x_opcode = 0;
521
522     pAsm->D2.bits = 0;
523
524     pAsm->D.bits = 0;
525     pAsm->S[0].bits = 0;
526     pAsm->S[1].bits = 0;
527     pAsm->S[2].bits = 0;
528
529     pAsm->uLastPosUpdate = 0; 
530         
531     *(BITS *) &pAsm->fp_stOutFmt0 = 0;
532
533     pAsm->uIIns = 0;
534     pAsm->uOIns = 0;
535     pAsm->number_used_registers = 0;
536     pAsm->uUsedConsts = 256; 
537
538
539     // Fragment programs
540     pAsm->uBoolConsts = 0;
541     pAsm->uIntConsts = 0;
542     pAsm->uInsts = 0;
543     pAsm->uConsts = 0;
544
545     pAsm->FCSP = 0;
546     pAsm->fc_stack[0].type = FC_NONE;
547
548     pAsm->aArgSubst[0] =
549     pAsm->aArgSubst[1] =
550     pAsm->aArgSubst[2] =
551     pAsm->aArgSubst[3] = (-1);
552
553     pAsm->uOutputs = 0;
554
555     for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) 
556     {
557         pAsm->color_export_register_number[i] = (-1);
558     }
559
560
561     pAsm->depth_export_register_number = (-1);
562     pAsm->stencil_export_register_number = (-1);
563     pAsm->coverage_to_mask_export_register_number = (-1);
564     pAsm->mask_export_register_number = (-1);
565
566     pAsm->starting_export_register_number = 0;
567     pAsm->starting_vfetch_register_number = 0;
568     pAsm->starting_temp_register_number   = 0;
569     pAsm->uFirstHelpReg = 0;
570
571     pAsm->input_position_is_used = GL_FALSE;
572     pAsm->input_normal_is_used   = GL_FALSE;
573
574     for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) 
575     {
576         pAsm->input_color_is_used[ i ] = GL_FALSE;
577     }
578
579     for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) 
580     {
581         pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
582     }
583
584     for (i=0; i<VERT_ATTRIB_MAX; i++) 
585     {
586         pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
587     }
588
589     pAsm->number_of_inputs = 0;
590
591     pAsm->is_tex = GL_FALSE;
592     pAsm->need_tex_barrier = GL_FALSE;
593
594     pAsm->subs              = NULL;
595     pAsm->unSubArraySize    = 0;
596     pAsm->unSubArrayPointer = 0;
597     pAsm->callers              = NULL;
598     pAsm->unCallerArraySize    = 0;
599     pAsm->unCallerArrayPointer = 0;
600
601     pAsm->CALLSP = 0;
602     pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
603     pAsm->CALLSTACK[0].plstCFInstructions_local
604           = &(pAsm->pR700Shader->lstCFInstructions);
605
606     pAsm->CALLSTACK[0].max = 0;
607     pAsm->CALLSTACK[0].current = 0;
608
609     SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
610
611     pAsm->unCFflags = 0;
612
613     pAsm->presubs           = NULL;
614     pAsm->unPresubArraySize = 0;
615     pAsm->unNumPresub       = 0;
616     pAsm->unCurNumILInsts   = 0;
617
618     pAsm->unVetTexBits      = 0;
619
620     return 0;
621 }
622
623 GLboolean IsTex(gl_inst_opcode Opcode)
624 {
625     if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
626         (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) || (OPCODE_TXL==Opcode) )
627     {
628         return GL_TRUE;
629     }
630     return GL_FALSE;
631 }
632
633 GLboolean IsAlu(gl_inst_opcode Opcode)
634 {
635     //TODO : more for fc and ex for higher spec.
636     if( IsTex(Opcode) )
637     {
638         return GL_FALSE;
639     }
640     return GL_TRUE;
641 }
642
643 int check_current_clause(r700_AssemblerBase* pAsm,
644                                              CF_CLAUSE_TYPE      new_clause_type)
645 {
646         if (pAsm->cf_current_clause_type != new_clause_type) 
647         {       //Close last open clause
648                 switch (pAsm->cf_current_clause_type) 
649                 {
650                 case CF_ALU_CLAUSE:
651                         if ( pAsm->cf_current_alu_clause_ptr != NULL) 
652             {
653                 pAsm->cf_current_alu_clause_ptr = NULL;
654             }
655                         break;
656                 case CF_VTX_CLAUSE:
657                         if ( pAsm->cf_current_vtx_clause_ptr != NULL) 
658             {
659                 pAsm->cf_current_vtx_clause_ptr = NULL;
660             }
661                         break;
662                 case CF_TEX_CLAUSE:
663                         if ( pAsm->cf_current_tex_clause_ptr != NULL) 
664             {
665                 pAsm->cf_current_tex_clause_ptr = NULL;
666             }
667                         break;
668                 case CF_EXPORT_CLAUSE:
669                         if ( pAsm->cf_current_export_clause_ptr != NULL) 
670             {
671                 pAsm->cf_current_export_clause_ptr = NULL;
672             }
673                         break;
674                 case CF_OTHER_CLAUSE:
675                         if ( pAsm->cf_current_cf_clause_ptr != NULL) 
676             {
677                 pAsm->cf_current_cf_clause_ptr = NULL;
678             }
679                         break;
680                 case CF_EMPTY_CLAUSE:
681                         break;
682                 default:
683             radeon_error(
684                        "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
685                         return GL_FALSE;
686                 }
687
688         pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
689
690                 // Create new clause
691         switch (new_clause_type) 
692             {
693         case CF_ALU_CLAUSE:
694             pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
695             break;
696         case CF_VTX_CLAUSE:
697             pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
698             break;
699         case CF_TEX_CLAUSE:        
700             pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
701             break;
702         case CF_EXPORT_CLAUSE:
703             {
704                 R700ControlFlowSXClause* pR700ControlFlowSXClause 
705                             = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause); 
706             
707                 // Add new export instruction to control flow program        
708                 if (pR700ControlFlowSXClause != 0) 
709                 {
710                     pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
711                     Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
712                     AddCFInstruction( pAsm->pR700Shader, 
713                                       (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
714                 }
715                 else 
716                 {
717                     radeon_error(
718                                "Error allocating new EXPORT CF instruction in check_current_clause. \n");
719                     return GL_FALSE;
720                 }
721                 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
722             }
723             break;
724         case CF_EMPTY_CLAUSE:
725             break;
726         case CF_OTHER_CLAUSE:
727             pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
728             break;
729         default:
730             radeon_error(
731                        "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
732             return GL_FALSE;
733         }
734     }
735
736     return GL_TRUE;
737 }
738
739 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
740 {
741     if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
742     {
743         return GL_FALSE;
744     }
745
746     pAsm->cf_current_cf_clause_ptr = 
747       (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
748
749     if (pAsm->cf_current_cf_clause_ptr != NULL) 
750         {
751                 Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
752                 AddCFInstruction( pAsm->pR700Shader, 
753                           (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
754         }
755         else 
756         {
757         radeon_error("Could not allocate a new VFetch CF instruction.\n");
758                 return GL_FALSE;
759         }
760
761     return GL_TRUE;
762 }
763
764 GLboolean add_vfetch_instruction(r700_AssemblerBase*     pAsm,
765                                                                  R700VertexInstruction*  vertex_instruction_ptr)
766 {
767         if( GL_FALSE == check_current_clause(pAsm,  CF_VTX_CLAUSE) )
768         {
769                 return GL_FALSE;
770         }
771
772     if( pAsm->cf_current_vtx_clause_ptr == NULL ||
773         ( (pAsm->cf_current_vtx_clause_ptr != NULL) && 
774          (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1) 
775         ) ) 
776     { 
777                 // Create new Vfetch control flow instruction for this new clause
778                 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
779
780                 if (pAsm->cf_current_vtx_clause_ptr != NULL) 
781                 {
782                         Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
783                         AddCFInstruction( pAsm->pR700Shader, 
784                               (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
785                 }
786                 else 
787                 {
788             radeon_error("Could not allocate a new VFetch CF instruction.\n");
789                         return GL_FALSE;
790                 }
791
792         if(8 == pAsm->unAsic)
793         {
794             SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, EG_CF_INST_VC,
795                      EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
796             SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
797                      EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); 
798             SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
799                      EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
800             SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE,
801                      EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
802             SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
803                      EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
804             SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
805                      EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
806             SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
807                      EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
808             SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
809                      EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
810             SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 1,
811                      EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
812         }
813         else
814         {
815                     pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count        = 0x0;
816                     pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const         = 0x0;
817                     pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
818                     pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count            = 0x0;
819                     pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program   = 0x0;
820                     pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
821                     pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_VTX;
822                     pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
823                     pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier          = 0x1;
824         }
825
826                 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
827         }
828         else
829         {
830         if(8 == pAsm->unAsic)
831         {
832             unsigned int count = GETbits(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 
833                                          EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1;
834             SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, count,
835                      EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
836         }
837         else
838         {
839                     pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
840         }
841         }
842
843         AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
844
845         return GL_TRUE;
846 }
847
848 GLboolean add_tex_instruction(r700_AssemblerBase*     pAsm,
849                               R700TextureInstruction* tex_instruction_ptr)
850
851     if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
852     {
853         return GL_FALSE;
854     }
855
856     if ( pAsm->cf_current_tex_clause_ptr == NULL ||
857          ( (pAsm->cf_current_tex_clause_ptr != NULL) && 
858            (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1) 
859          ) ) 
860     {
861         // new tex cf instruction for this new clause  
862         pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
863
864                 if (pAsm->cf_current_tex_clause_ptr != NULL) 
865                 {
866                         Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
867                         AddCFInstruction( pAsm->pR700Shader, 
868                               (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
869                 }
870                 else 
871                 {
872             radeon_error("Could not allocate a new TEX CF instruction.\n");
873                         return GL_FALSE;
874                 }
875
876         if(8 == pAsm->unAsic)
877         {
878             SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, EG_CF_INST_TC,
879                      EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
880             SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
881                      EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); 
882             SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
883                      EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
884             SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE,
885                      EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
886             SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
887                      EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
888             SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
889                      EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
890             SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
891                      EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
892             SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
893                      EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
894 #ifdef FORCE_CF_TEX_BARRIER
895             SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 1,
896                      EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
897 #else
898             SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
899                      EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
900 #endif
901         }
902         else
903         {        
904             pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count        = 0x0;
905             pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const         = 0x0;
906             pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
907
908             pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program   = 0x0;
909             pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
910             pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_TEX;
911             pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
912             pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier          = 0x0;   //0x1;
913         }
914     }
915     else 
916     {      
917         if(8 == pAsm->unAsic)
918         {
919             unsigned int count = GETbits(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 
920                                          EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1;
921             SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, count,
922                      EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
923         }
924         else
925         {        
926             pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
927         }
928     }
929
930     // If this clause constains any TEX instruction that is dependent on a 
931     // previous instruction, set the barrier bit, also always set for vert 
932     // programs as tex deps are not(yet) computed for them
933     if( pAsm->currentShaderType == SPT_VP || pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
934     {
935         pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;  
936     }
937
938     if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
939     {
940         pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
941         tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
942     }
943
944     AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
945
946     return GL_TRUE;
947 }
948
949 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
950                                                                 GLuint gl_client_id,
951                                 GLuint destination_register,
952                                                                 GLuint number_of_elements,
953                                 GLenum dataElementType,
954                                                                 VTX_FETCH_METHOD* pFetchMethod)
955 {
956     GLuint client_size_inbyte;
957         GLuint data_format;
958     GLuint mega_fetch_count;
959         GLuint is_mega_fetch_flag;
960
961         R700VertexGenericFetch*   vfetch_instruction_ptr;
962         R700VertexGenericFetch*   assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
963
964         if (assembled_vfetch_instruction_ptr == NULL) 
965         {
966                 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
967                 if (vfetch_instruction_ptr == NULL) 
968                 {
969                         return GL_FALSE;
970                 }
971         Init_R700VertexGenericFetch(vfetch_instruction_ptr);
972     }
973         else 
974         {
975                 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
976         }
977
978         data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
979
980         if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
981         {
982                 //TODO : mini fetch
983                 mega_fetch_count = 0;
984                 is_mega_fetch_flag = 0;
985         }
986         else
987         {
988                 mega_fetch_count = MEGA_FETCH_BYTES - 1;
989                 is_mega_fetch_flag       = 0x1;
990                 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
991         }
992
993         vfetch_instruction_ptr->m_Word0.f.vtx_inst         = SQ_VTX_INST_FETCH;
994         vfetch_instruction_ptr->m_Word0.f.fetch_type       = SQ_VTX_FETCH_VERTEX_DATA;
995         vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
996
997         vfetch_instruction_ptr->m_Word0.f.buffer_id        = gl_client_id;
998         vfetch_instruction_ptr->m_Word0.f.src_gpr          = 0x0; 
999         vfetch_instruction_ptr->m_Word0.f.src_rel          = SQ_ABSOLUTE;
1000         vfetch_instruction_ptr->m_Word0.f.src_sel_x        = SQ_SEL_X;
1001         vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
1002
1003         vfetch_instruction_ptr->m_Word1.f.dst_sel_x        = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
1004         vfetch_instruction_ptr->m_Word1.f.dst_sel_y        = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1005         vfetch_instruction_ptr->m_Word1.f.dst_sel_z        = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1006         vfetch_instruction_ptr->m_Word1.f.dst_sel_w        = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
1007
1008         vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
1009
1010         // Destination register
1011         vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; 
1012         vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
1013
1014         vfetch_instruction_ptr->m_Word2.f.offset              = 0;
1015         vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
1016
1017         vfetch_instruction_ptr->m_Word2.f.mega_fetch          = is_mega_fetch_flag;
1018
1019         if (assembled_vfetch_instruction_ptr == NULL) 
1020         {
1021                 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) 
1022         {   
1023                         return GL_FALSE;
1024                 }
1025
1026                 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL) 
1027                 {
1028                         return GL_FALSE;
1029                 }
1030                 else 
1031                 {
1032                         pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
1033                 }
1034         }
1035
1036         return GL_TRUE;
1037 }
1038
1039 GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
1040                                        GLuint              destination_register,                                                                       
1041                                        GLenum              type,
1042                                        GLint               size,
1043                                        GLubyte             element,
1044                                        GLuint              _signed,
1045                                        GLboolean           normalize,
1046                                        GLenum              format,
1047                                        VTX_FETCH_METHOD  * pFetchMethod)
1048 {
1049     GLuint client_size_inbyte;
1050         GLuint data_format;
1051     GLuint mega_fetch_count;
1052         GLuint is_mega_fetch_flag;
1053
1054     GLuint dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w;
1055
1056     R700VertexGenericFetch*   vfetch_instruction_ptr;
1057         R700VertexGenericFetch*   assembled_vfetch_instruction_ptr 
1058                                      = pAsm->vfetch_instruction_ptr_array[element];
1059
1060     if (assembled_vfetch_instruction_ptr == NULL) 
1061         {
1062                 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
1063                 if (vfetch_instruction_ptr == NULL) 
1064                 {
1065                         return GL_FALSE;
1066                 }
1067         Init_R700VertexGenericFetch(vfetch_instruction_ptr);
1068     }
1069         else 
1070         {
1071                 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
1072         }
1073
1074     data_format = GetSurfaceFormat(type, size, &client_size_inbyte);    
1075
1076         if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
1077         {
1078                 //TODO : mini fetch
1079                 mega_fetch_count = 0;
1080                 is_mega_fetch_flag = 0;
1081         }
1082         else
1083         {
1084                 mega_fetch_count = MEGA_FETCH_BYTES - 1;
1085                 is_mega_fetch_flag       = 0x1;
1086                 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
1087         }
1088
1089     SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VC_INST_FETCH,
1090              EG_VTX_WORD0__VC_INST_shift,
1091              EG_VTX_WORD0__VC_INST_mask);
1092     SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VTX_FETCH_VERTEX_DATA,
1093              EG_VTX_WORD0__FETCH_TYPE_shift,
1094              EG_VTX_WORD0__FETCH_TYPE_mask);
1095     CLEARbit(vfetch_instruction_ptr->m_Word0.val, 
1096              EG_VTX_WORD0__FWQ_bit);
1097     SETfield(vfetch_instruction_ptr->m_Word0.val, element,
1098              EG_VTX_WORD0__BUFFER_ID_shift,
1099              EG_VTX_WORD0__BUFFER_ID_mask);
1100     SETfield(vfetch_instruction_ptr->m_Word0.val, 0x0,
1101              EG_VTX_WORD0__SRC_GPR_shift,
1102              EG_VTX_WORD0__SRC_GPR_mask);
1103     SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_ABSOLUTE,
1104              EG_VTX_WORD0__SRC_REL_shift,
1105              EG_VTX_WORD0__SRC_REL_bit);
1106     SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_SEL_X,
1107              EG_VTX_WORD0__SRC_SEL_X_shift,
1108              EG_VTX_WORD0__SRC_SEL_X_mask);
1109     SETfield(vfetch_instruction_ptr->m_Word0.val, mega_fetch_count,
1110              EG_VTX_WORD0__MFC_shift,
1111              EG_VTX_WORD0__MFC_mask);
1112                         
1113         if(format == GL_BGRA)
1114         {        
1115                 dst_sel_x        = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
1116                 dst_sel_y        = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1117                 dst_sel_z        = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
1118                 dst_sel_w        = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1119         }
1120         else
1121         {
1122                 dst_sel_x        = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
1123                 dst_sel_y        = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1124                 dst_sel_z        = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1125                 dst_sel_w        = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1126
1127         }
1128     SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_x,
1129              EG_VTX_WORD1__DST_SEL_X_shift,
1130              EG_VTX_WORD1__DST_SEL_X_mask);
1131     SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_y,
1132              EG_VTX_WORD1__DST_SEL_Y_shift,
1133              EG_VTX_WORD1__DST_SEL_Y_mask);
1134     SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_z,
1135              EG_VTX_WORD1__DST_SEL_Z_shift,
1136              EG_VTX_WORD1__DST_SEL_Z_mask);
1137     SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_w,
1138              EG_VTX_WORD1__DST_SEL_W_shift,
1139              EG_VTX_WORD1__DST_SEL_W_mask);
1140     
1141     SETfield(vfetch_instruction_ptr->m_Word1.val, 1, 
1142              EG_VTX_WORD1__UCF_shift,
1143              EG_VTX_WORD1__UCF_bit);
1144     SETfield(vfetch_instruction_ptr->m_Word1.val, data_format,
1145              EG_VTX_WORD1__DATA_FORMAT_shift,
1146              EG_VTX_WORD1__DATA_FORMAT_mask);   
1147 #ifdef TEST_VFETCH
1148     SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED,
1149                  EG_VTX_WORD1__FCA_shift,
1150                  EG_VTX_WORD1__FCA_bit);
1151 #else
1152     if(1 == _signed)
1153     {
1154         SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED,
1155                  EG_VTX_WORD1__FCA_shift,
1156                  EG_VTX_WORD1__FCA_bit);        
1157     }
1158     else
1159     {
1160         SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_UNSIGNED,
1161                  EG_VTX_WORD1__FCA_shift,
1162                  EG_VTX_WORD1__FCA_bit);           
1163     }
1164 #endif /* TEST_VFETCH */
1165
1166     if(GL_TRUE == normalize)
1167     {
1168         SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_NORM,
1169                  EG_VTX_WORD1__NFA_shift,
1170                  EG_VTX_WORD1__NFA_mask);          
1171     }
1172     else
1173     {
1174         SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_SCALED,
1175                  EG_VTX_WORD1__NFA_shift,
1176                  EG_VTX_WORD1__NFA_mask);        
1177     }
1178
1179         /* Destination register */
1180     SETfield(vfetch_instruction_ptr->m_Word1.val, destination_register,
1181              EG_VTX_WORD1_GPR__DST_GPR_shift,
1182              EG_VTX_WORD1_GPR__DST_GPR_mask);
1183         SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_ABSOLUTE,
1184              EG_VTX_WORD1_GPR__DST_REL_shift,
1185              EG_VTX_WORD1_GPR__DST_REL_bit); 
1186         
1187
1188     SETfield(vfetch_instruction_ptr->m_Word2.val, 0,
1189              EG_VTX_WORD2__OFFSET_shift,
1190              EG_VTX_WORD2__OFFSET_mask); 
1191     SETfield(vfetch_instruction_ptr->m_Word2.val, 
1192 #ifdef MESA_BIG_ENDIAN
1193                          SQ_ENDIAN_8IN32,
1194 #else
1195                          SQ_ENDIAN_NONE,
1196 #endif
1197              EG_VTX_WORD2__ENDIAN_SWAP_shift,
1198              EG_VTX_WORD2__ENDIAN_SWAP_mask);
1199     SETfield(vfetch_instruction_ptr->m_Word2.val, 0,
1200              EG_VTX_WORD2__CBNS_shift,
1201              EG_VTX_WORD2__CBNS_bit);
1202     SETfield(vfetch_instruction_ptr->m_Word2.val, is_mega_fetch_flag,
1203              EG_VTX_WORD2__MEGA_FETCH_shift,
1204              EG_VTX_WORD2__MEGA_FETCH_mask);
1205         
1206         if (assembled_vfetch_instruction_ptr == NULL) 
1207         {
1208                 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) 
1209         {   
1210                         return GL_FALSE;
1211                 }
1212
1213                 if (pAsm->vfetch_instruction_ptr_array[element] != NULL) 
1214                 {
1215                         return GL_FALSE;
1216                 }
1217                 else 
1218                 {
1219                         pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
1220                 }
1221         }
1222
1223         return GL_TRUE;
1224 }
1225
1226 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
1227                                        GLuint              destination_register,                                                                       
1228                                        GLenum              type,
1229                                        GLint               size,
1230                                        GLubyte             element,
1231                                        GLuint              _signed,
1232                                        GLboolean           normalize,
1233                                        GLenum              format,
1234                                        VTX_FETCH_METHOD  * pFetchMethod)
1235 {
1236     GLuint client_size_inbyte;
1237         GLuint data_format;
1238     GLuint mega_fetch_count;
1239         GLuint is_mega_fetch_flag;
1240
1241         R700VertexGenericFetch*   vfetch_instruction_ptr;
1242         R700VertexGenericFetch*   assembled_vfetch_instruction_ptr 
1243                                      = pAsm->vfetch_instruction_ptr_array[element];
1244
1245         if (assembled_vfetch_instruction_ptr == NULL) 
1246         {
1247                 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
1248                 if (vfetch_instruction_ptr == NULL) 
1249                 {
1250                         return GL_FALSE;
1251                 }
1252         Init_R700VertexGenericFetch(vfetch_instruction_ptr);
1253     }
1254         else 
1255         {
1256                 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
1257         }
1258
1259     data_format = GetSurfaceFormat(type, size, &client_size_inbyte);    
1260
1261         if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
1262         {
1263                 //TODO : mini fetch
1264                 mega_fetch_count = 0;
1265                 is_mega_fetch_flag = 0;
1266         }
1267         else
1268         {
1269                 mega_fetch_count = MEGA_FETCH_BYTES - 1;
1270                 is_mega_fetch_flag       = 0x1;
1271                 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
1272         }
1273
1274         vfetch_instruction_ptr->m_Word0.f.vtx_inst         = SQ_VTX_INST_FETCH;
1275         vfetch_instruction_ptr->m_Word0.f.fetch_type       = SQ_VTX_FETCH_VERTEX_DATA;
1276         vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1277
1278         vfetch_instruction_ptr->m_Word0.f.buffer_id        = element;
1279         vfetch_instruction_ptr->m_Word0.f.src_gpr          = 0x0; 
1280         vfetch_instruction_ptr->m_Word0.f.src_rel          = SQ_ABSOLUTE;
1281         vfetch_instruction_ptr->m_Word0.f.src_sel_x        = SQ_SEL_X;
1282         vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
1283
1284         if(format == GL_BGRA)
1285         {
1286                 vfetch_instruction_ptr->m_Word1.f.dst_sel_x        = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
1287                 vfetch_instruction_ptr->m_Word1.f.dst_sel_y        = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1288                 vfetch_instruction_ptr->m_Word1.f.dst_sel_z        = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
1289                 vfetch_instruction_ptr->m_Word1.f.dst_sel_w        = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1290         }
1291         else
1292         {
1293                 vfetch_instruction_ptr->m_Word1.f.dst_sel_x        = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
1294                 vfetch_instruction_ptr->m_Word1.f.dst_sel_y        = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1295                 vfetch_instruction_ptr->m_Word1.f.dst_sel_z        = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1296                 vfetch_instruction_ptr->m_Word1.f.dst_sel_w        = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1297
1298         }
1299
1300         vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
1301     vfetch_instruction_ptr->m_Word1.f.data_format      = data_format;
1302 #ifdef MESA_BIG_ENDIAN
1303     vfetch_instruction_ptr->m_Word2.f.endian_swap      = SQ_ENDIAN_8IN32;
1304 #else
1305     vfetch_instruction_ptr->m_Word2.f.endian_swap      = SQ_ENDIAN_NONE;
1306 #endif
1307
1308     if(1 == _signed)
1309     {
1310         vfetch_instruction_ptr->m_Word1.f.format_comp_all  = SQ_FORMAT_COMP_SIGNED;
1311     }
1312     else
1313     {
1314         vfetch_instruction_ptr->m_Word1.f.format_comp_all  = SQ_FORMAT_COMP_UNSIGNED;
1315     }
1316
1317     if(GL_TRUE == normalize)
1318     {
1319         vfetch_instruction_ptr->m_Word1.f.num_format_all   = SQ_NUM_FORMAT_NORM;
1320     }
1321     else
1322     {
1323         vfetch_instruction_ptr->m_Word1.f.num_format_all   = SQ_NUM_FORMAT_INT;
1324     }
1325
1326         // Destination register
1327         vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; 
1328         vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
1329
1330         vfetch_instruction_ptr->m_Word2.f.offset              = 0;
1331         vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
1332
1333         vfetch_instruction_ptr->m_Word2.f.mega_fetch          = is_mega_fetch_flag;
1334
1335         if (assembled_vfetch_instruction_ptr == NULL) 
1336         {
1337                 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) 
1338         {   
1339                         return GL_FALSE;
1340                 }
1341
1342                 if (pAsm->vfetch_instruction_ptr_array[element] != NULL) 
1343                 {
1344                         return GL_FALSE;
1345                 }
1346                 else 
1347                 {
1348                         pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
1349                 }
1350         }
1351
1352         return GL_TRUE;
1353 }
1354
1355 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
1356 {
1357     GLint i;
1358     pAsm->cf_current_clause_type    = CF_EMPTY_CLAUSE;
1359     pAsm->cf_current_vtx_clause_ptr = NULL;
1360
1361     for (i=0; i<VERT_ATTRIB_MAX; i++) 
1362         {
1363                 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
1364         }
1365
1366     cleanup_vfetch_shaderinst(pAsm->pR700Shader);
1367     
1368     return GL_TRUE;
1369 }
1370
1371 GLuint gethelpr(r700_AssemblerBase* pAsm) 
1372 {
1373     GLuint r = pAsm->uHelpReg;
1374     pAsm->uHelpReg++;
1375     if (pAsm->uHelpReg > pAsm->number_used_registers)
1376     {
1377         pAsm->number_used_registers = pAsm->uHelpReg;
1378         }
1379     return r;
1380 }
1381 void resethelpr(r700_AssemblerBase* pAsm) 
1382 {
1383     pAsm->uHelpReg = pAsm->uFirstHelpReg;
1384 }
1385
1386 void checkop_init(r700_AssemblerBase* pAsm)
1387 {
1388     resethelpr(pAsm);
1389     pAsm->aArgSubst[0] =
1390     pAsm->aArgSubst[1] =
1391     pAsm->aArgSubst[2] =
1392     pAsm->aArgSubst[3] = -1;
1393 }
1394
1395 static GLboolean next_ins(r700_AssemblerBase *pAsm)
1396 {
1397     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1398
1399     if (GL_TRUE == pAsm->is_tex)
1400     {
1401         if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX)
1402         {
1403             if (GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE))
1404             {
1405                 radeon_error("Error assembling TEX instruction\n");
1406                 return GL_FALSE;
1407             }
1408         }
1409         else
1410         {
1411             if (GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE))
1412             {
1413                 radeon_error("Error assembling TEX instruction\n");
1414                 return GL_FALSE;
1415             }
1416         }
1417     }
1418     else
1419     {   //ALU
1420         if (GL_FALSE == assemble_alu_instruction(pAsm))
1421         {
1422             radeon_error("Error assembling ALU instruction\n");
1423             return GL_FALSE;
1424         }
1425     }
1426
1427     if (pAsm->D.dst.rtype == DST_REG_OUT)
1428     {
1429         assert(pAsm->D.dst.reg >= pAsm->starting_export_register_number);
1430     }
1431
1432     //reset for next inst.
1433     pAsm->D.bits    = 0;
1434     pAsm->D2.bits   = 0;
1435     pAsm->S[0].bits = 0;
1436     pAsm->S[1].bits = 0;
1437     pAsm->S[2].bits = 0;
1438     pAsm->is_tex = GL_FALSE;
1439     pAsm->need_tex_barrier = GL_FALSE;
1440     pAsm->D2.bits = 0;
1441     pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
1442     return GL_TRUE;
1443 }
1444
1445 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
1446 {
1447     GLuint tmp = gethelpr(pAsm);
1448
1449     //mov src to temp helper gpr.
1450     pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
1451
1452     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1453   
1454     pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1455     pAsm->D.dst.reg   = tmp;
1456
1457     nomask_PVSDST(&(pAsm->D.dst));
1458
1459     if( GL_FALSE == assemble_src(pAsm, src, 0) )
1460     {
1461         return GL_FALSE;
1462     }
1463
1464     noswizzle_PVSSRC(&(pAsm->S[0].src));
1465     noneg_PVSSRC(&(pAsm->S[0].src));
1466    
1467     if( GL_FALSE == next_ins(pAsm) ) 
1468     {
1469         return GL_FALSE;
1470     }
1471
1472     pAsm->aArgSubst[1 + src] = tmp;
1473
1474     return GL_TRUE;
1475 }
1476
1477 GLboolean checkop1(r700_AssemblerBase* pAsm)
1478 {
1479     checkop_init(pAsm);
1480     return GL_TRUE;
1481 }
1482
1483 GLboolean checkop2(r700_AssemblerBase* pAsm)
1484 {
1485     GLboolean bSrcConst[2];
1486     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1487
1488     checkop_init(pAsm);
1489
1490     if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM)     || 
1491         (pILInst->SrcReg[0].File == PROGRAM_CONSTANT)    ||
1492         (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1493         (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM)   ||
1494         (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1495     {
1496         bSrcConst[0] = GL_TRUE;
1497     }
1498     else
1499     {
1500         bSrcConst[0] = GL_FALSE;
1501     }
1502     if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM)     || 
1503         (pILInst->SrcReg[1].File == PROGRAM_CONSTANT)    ||
1504         (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1505         (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM)   ||
1506         (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1507     {
1508         bSrcConst[1] = GL_TRUE;
1509     }
1510     else
1511     {
1512         bSrcConst[1] = GL_FALSE;
1513     }
1514
1515     if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1516     {
1517         if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1518         {
1519             if( GL_FALSE == mov_temp(pAsm, 1) )
1520             {
1521                 return GL_FALSE;
1522             }
1523         }
1524     }
1525
1526     return GL_TRUE;
1527 }
1528
1529 GLboolean checkop3(r700_AssemblerBase* pAsm)
1530 {
1531     GLboolean bSrcConst[3];
1532     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1533
1534     checkop_init(pAsm);
1535
1536     if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM)     || 
1537         (pILInst->SrcReg[0].File == PROGRAM_CONSTANT)    ||
1538         (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1539         (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM)   ||
1540         (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1541     {
1542         bSrcConst[0] = GL_TRUE;
1543     }
1544     else
1545     {
1546         bSrcConst[0] = GL_FALSE;
1547     }
1548     if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM)     || 
1549         (pILInst->SrcReg[1].File == PROGRAM_CONSTANT)    ||
1550         (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1551         (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM)   ||
1552         (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1553     {
1554         bSrcConst[1] = GL_TRUE;
1555     }
1556     else
1557     {
1558         bSrcConst[1] = GL_FALSE;
1559     }
1560     if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM)     || 
1561         (pILInst->SrcReg[2].File == PROGRAM_CONSTANT)    ||
1562         (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1563         (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM)   ||
1564         (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1565     {
1566         bSrcConst[2] = GL_TRUE;
1567     }
1568     else
1569     {
1570         bSrcConst[2] = GL_FALSE;
1571     }
1572
1573     if( (GL_TRUE == bSrcConst[0]) && 
1574         (GL_TRUE == bSrcConst[1]) && 
1575         (GL_TRUE == bSrcConst[2]) ) 
1576     {
1577         if( GL_FALSE == mov_temp(pAsm, 1) )
1578         {
1579             return GL_FALSE;
1580         }
1581         if( GL_FALSE == mov_temp(pAsm, 2) )
1582         {
1583             return GL_FALSE;
1584         }
1585
1586         return GL_TRUE;
1587     }
1588     else if( (GL_TRUE == bSrcConst[0]) && 
1589              (GL_TRUE == bSrcConst[1]) ) 
1590     {
1591         if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)    
1592             {
1593             if( GL_FALSE == mov_temp(pAsm, 1) )
1594             {
1595                 return GL_FALSE;
1596             }
1597         }
1598
1599         return GL_TRUE;
1600     }
1601     else if ( (GL_TRUE == bSrcConst[0]) && 
1602               (GL_TRUE == bSrcConst[2]) )  
1603     {
1604         if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)     
1605             {
1606             if( GL_FALSE == mov_temp(pAsm, 2) )
1607             {
1608                 return GL_FALSE;
1609             }
1610         }
1611
1612         return GL_TRUE;
1613     }
1614     else if( (GL_TRUE == bSrcConst[1]) && 
1615              (GL_TRUE == bSrcConst[2]) ) 
1616     {
1617         if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1618             {
1619             if( GL_FALSE == mov_temp(pAsm, 2) )
1620             {
1621                 return GL_FALSE;
1622             }
1623         }
1624
1625         return GL_TRUE;
1626     }
1627
1628     return GL_TRUE;
1629 }
1630
1631 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1632                        int src, 
1633                        int fld)
1634 {
1635     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1636
1637     if (fld == -1)
1638     {
1639         fld = src;
1640     }
1641
1642     if(pAsm->aArgSubst[1+src] >= 0) 
1643     {
1644         assert(fld >= 0);
1645         setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1646         pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1647         pAsm->S[fld].src.reg   = pAsm->aArgSubst[1+src];
1648     }
1649     else 
1650     {
1651         if (1 == pILInst->SrcReg[src].RelAddr)
1652         {
1653             setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1654         }
1655         else
1656         {
1657             setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1658         }
1659         switch (pILInst->SrcReg[src].File)
1660         {
1661         case PROGRAM_TEMPORARY:
1662             pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1663             pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1664             break;
1665         case PROGRAM_CONSTANT:
1666         case PROGRAM_LOCAL_PARAM:
1667         case PROGRAM_ENV_PARAM:
1668         case PROGRAM_STATE_VAR:
1669         case PROGRAM_UNIFORM:
1670             pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1671             if(pILInst->SrcReg[src].Index < 0)
1672             {
1673                 WARN_ONCE("Negative register offsets not supported yet!\n");
1674                 pAsm->S[fld].src.reg  = 0;
1675             } 
1676             else
1677             {
1678                 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1679             }
1680             break;      
1681         case PROGRAM_INPUT:
1682             pAsm->S[fld].src.rtype = SRC_REG_GPR;
1683             switch (pAsm->currentShaderType)
1684             {
1685             case SPT_FP:
1686                 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1687                 break;
1688             case SPT_VP:
1689                 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1690                 break;
1691             }
1692             break;      
1693         case PROGRAM_OUTPUT:
1694             pAsm->S[fld].src.rtype = SRC_REG_GPR;
1695             switch (pAsm->currentShaderType)
1696             {
1697             case SPT_FP:
1698                 pAsm->S[fld].src.reg = pAsm->uiFP_OutputMap[pILInst->SrcReg[src].Index];
1699                 break;
1700             case SPT_VP:
1701                 pAsm->S[fld].src.reg = pAsm->ucVP_OutputMap[pILInst->SrcReg[src].Index];
1702                 break;
1703             }
1704             break;
1705         default:
1706             radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
1707             return GL_FALSE;
1708         }
1709     } 
1710
1711     pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1712     pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1713     pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1714     pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1715
1716     pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1717     pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1718     pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1719     pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1720      
1721     return GL_TRUE;
1722 }
1723
1724 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1725 {
1726     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1727     switch (pILInst->DstReg.File) 
1728     {
1729     case PROGRAM_TEMPORARY:
1730         if (1 == pILInst->DstReg.RelAddr)
1731         {
1732             setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_RELATIVE_A0);
1733         }
1734         else
1735         {
1736         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1737         }
1738         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1739         pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1740         break;
1741     case PROGRAM_ADDRESS:
1742         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1743         pAsm->D.dst.rtype = DST_REG_A0;
1744         pAsm->D.dst.reg = 0;
1745         break;
1746     case PROGRAM_OUTPUT:
1747         if (1 == pILInst->DstReg.RelAddr)
1748         {
1749             setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_RELATIVE_A0);
1750         }
1751         else
1752         {
1753         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1754         }
1755         pAsm->D.dst.rtype = DST_REG_OUT;
1756         switch (pAsm->currentShaderType)
1757         {
1758         case SPT_FP:
1759             pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1760             break;
1761         case SPT_VP:
1762             pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1763             break;
1764         }
1765         break;   
1766     default:
1767         radeon_error("Invalid destination output argument type\n");
1768         return GL_FALSE;
1769     }
1770
1771     pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1772     pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1773     pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1774     pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1775   
1776     if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
1777     {
1778         pAsm->D2.dst2.SaturateMode = 1;
1779     }
1780     else
1781     {
1782         pAsm->D2.dst2.SaturateMode = 0;
1783     }
1784
1785     return GL_TRUE;
1786 }
1787
1788 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1789 {
1790     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1791
1792     if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1793     {
1794         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1795         pAsm->D.dst.reg   = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1796
1797         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1798     }
1799     else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1800     {
1801         pAsm->D.dst.rtype = DST_REG_OUT;
1802         switch (pAsm->currentShaderType)
1803         {
1804         case SPT_FP:
1805             pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1806             break;
1807         case SPT_VP:
1808             pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1809             break;
1810         }
1811
1812         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1813     }
1814     else 
1815     {
1816         radeon_error("Invalid destination output argument type\n");
1817         return GL_FALSE;
1818     }
1819
1820     pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1821     pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1822     pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1823     pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1824   
1825     return GL_TRUE;
1826 }
1827
1828 GLboolean tex_src(r700_AssemblerBase *pAsm)
1829 {
1830     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1831
1832     GLboolean bValidTexCoord = GL_FALSE;
1833
1834     if(pAsm->aArgSubst[1] >= 0)
1835     {
1836         bValidTexCoord = GL_TRUE;
1837         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1838         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1839         pAsm->S[0].src.reg   = pAsm->aArgSubst[1];
1840     }
1841     else
1842     {
1843     switch (pILInst->SrcReg[0].File) {
1844         case PROGRAM_UNIFORM: 
1845         case PROGRAM_CONSTANT:
1846         case PROGRAM_LOCAL_PARAM:
1847         case PROGRAM_ENV_PARAM:
1848         case PROGRAM_STATE_VAR:
1849             break;
1850         case PROGRAM_TEMPORARY:
1851             bValidTexCoord = GL_TRUE;
1852             pAsm->S[0].src.reg   = pILInst->SrcReg[0].Index +
1853             pAsm->starting_temp_register_number;
1854             pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1855             break;
1856         case PROGRAM_INPUT:
1857             if(SPT_VP == pAsm->currentShaderType)
1858             {
1859                 switch (pILInst->SrcReg[0].Index)
1860                 {
1861                     case VERT_ATTRIB_TEX0:
1862                     case VERT_ATTRIB_TEX1:
1863                     case VERT_ATTRIB_TEX2:
1864                     case VERT_ATTRIB_TEX3:
1865                     case VERT_ATTRIB_TEX4:
1866                     case VERT_ATTRIB_TEX5:
1867                     case VERT_ATTRIB_TEX6:
1868                     case VERT_ATTRIB_TEX7:
1869                         bValidTexCoord = GL_TRUE;
1870                         pAsm->S[0].src.reg   =
1871                             pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
1872                         pAsm->S[0].src.rtype = SRC_REG_GPR;
1873                         break;
1874                 }
1875             }
1876             else
1877             {
1878                 switch (pILInst->SrcReg[0].Index)
1879                 {
1880                     case FRAG_ATTRIB_WPOS:
1881                     case FRAG_ATTRIB_COL0:
1882                     case FRAG_ATTRIB_COL1:
1883                     case FRAG_ATTRIB_FOGC:
1884                     case FRAG_ATTRIB_TEX0:
1885                     case FRAG_ATTRIB_TEX1:
1886                     case FRAG_ATTRIB_TEX2:
1887                     case FRAG_ATTRIB_TEX3:
1888                     case FRAG_ATTRIB_TEX4:
1889                     case FRAG_ATTRIB_TEX5:
1890                     case FRAG_ATTRIB_TEX6:
1891                     case FRAG_ATTRIB_TEX7:
1892                         bValidTexCoord = GL_TRUE;
1893                         pAsm->S[0].src.reg   =
1894                             pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1895                         pAsm->S[0].src.rtype = SRC_REG_GPR;
1896                         break;
1897                     case FRAG_ATTRIB_FACE:
1898                         fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1899                         break;
1900                     case FRAG_ATTRIB_PNTC:
1901                         fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1902                         break;
1903                 }
1904
1905                 if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
1906                     (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
1907                 {
1908                                     bValidTexCoord = GL_TRUE;
1909                     pAsm->S[0].src.reg   =
1910                         pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1911                     pAsm->S[0].src.rtype = SRC_REG_GPR;
1912                 }
1913             }
1914
1915             break;
1916         }
1917     }
1918
1919     if(GL_TRUE == bValidTexCoord)
1920     {
1921         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1922     }
1923     else
1924     {
1925         radeon_error("Invalid source texcoord for TEX instruction\n");
1926         return GL_FALSE;
1927     }
1928
1929     pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1930     pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1931     pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1932     pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1933
1934     pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1935     pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1936     pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1937     pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1938
1939     return GL_TRUE;
1940 }
1941
1942 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1943 {
1944     PVSSRC *   texture_coordinate_source;
1945     PVSSRC *   texture_unit_source;
1946     
1947     R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1948         if (tex_instruction_ptr == NULL) 
1949         {
1950                 return GL_FALSE;
1951         }
1952     Init_R700TextureInstruction(tex_instruction_ptr);
1953
1954     texture_coordinate_source = &(pAsm->S[0].src);
1955     texture_unit_source       = &(pAsm->S[1].src);
1956
1957     if(8 == pAsm->unAsic) /* evergreen */
1958     {
1959     
1960         SETfield(tex_instruction_ptr->m_Word0.val, pAsm->D.dst.opcode,
1961                  EG_TEX_WORD0__TEX_INST_shift,
1962                  EG_TEX_WORD0__TEX_INST_mask);
1963
1964         if(  (SQ_TEX_INST_GET_GRADIENTS_H == pAsm->D.dst.opcode)
1965            ||(SQ_TEX_INST_GET_GRADIENTS_V == pAsm->D.dst.opcode) )
1966         {
1967             /* Use fine texel derivative calculation rather than use quad derivative */
1968             SETfield(tex_instruction_ptr->m_Word0.val, 1,
1969                      EG_TEX_WORD0__INST_MOD_shift,
1970                      EG_TEX_WORD0__INST_MOD_mask);
1971         }
1972         else
1973         {
1974             SETfield(tex_instruction_ptr->m_Word0.val, 0,
1975                      EG_TEX_WORD0__INST_MOD_shift,
1976                      EG_TEX_WORD0__INST_MOD_mask);
1977         }
1978
1979         CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__FWQ_bit);                 
1980
1981         if(SPT_VP == pAsm->currentShaderType)
1982         {
1983             SETfield(tex_instruction_ptr->m_Word0.val, (texture_unit_source->reg + VERT_ATTRIB_MAX),
1984                      EG_TEX_WORD0__RESOURCE_ID_shift,
1985                      EG_TEX_WORD0__RESOURCE_ID_mask);
1986             pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
1987         }
1988         else
1989         {
1990             SETfield(tex_instruction_ptr->m_Word0.val, texture_unit_source->reg,
1991                      EG_TEX_WORD0__RESOURCE_ID_shift,
1992                      EG_TEX_WORD0__RESOURCE_ID_mask);
1993         }
1994         
1995         CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__ALT_CONST_bit);
1996         SETfield(tex_instruction_ptr->m_Word0.val, 0,
1997                  EG_TEX_WORD0__RIM_shift,
1998                  EG_TEX_WORD0__RIM_mask);
1999         SETfield(tex_instruction_ptr->m_Word0.val, 0,
2000                  EG_TEX_WORD0__SIM_shift,
2001                  EG_TEX_WORD0__SIM_mask);
2002     }
2003     else
2004     {
2005         tex_instruction_ptr->m_Word0.f.tex_inst         = pAsm->D.dst.opcode;
2006         tex_instruction_ptr->m_Word0.f.bc_frac_mode     = 0x0;
2007         tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
2008         tex_instruction_ptr->m_Word0.f.alt_const        = 0;
2009
2010         if(SPT_VP == pAsm->currentShaderType)
2011         {
2012             tex_instruction_ptr->m_Word0.f.resource_id      = texture_unit_source->reg + VERT_ATTRIB_MAX;
2013             pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
2014         }
2015         else
2016         {
2017             tex_instruction_ptr->m_Word0.f.resource_id      = texture_unit_source->reg;
2018         }
2019     }
2020
2021     tex_instruction_ptr->m_Word1.f.lod_bias     = 0x0;
2022     if (normalized) {
2023             tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
2024             tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
2025             tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
2026             tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
2027     } else {
2028             /* XXX: UNNORMALIZED tex coords have limited wrap modes */
2029             tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
2030             tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
2031             tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
2032             tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
2033     }
2034
2035     tex_instruction_ptr->m_Word2.f.offset_x   = 0x0;
2036     tex_instruction_ptr->m_Word2.f.offset_y   = 0x0;
2037     tex_instruction_ptr->m_Word2.f.offset_z   = 0x0;
2038     tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
2039
2040     // dst
2041     if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || 
2042          (pAsm->D.dst.rtype == DST_REG_OUT) ) 
2043     {
2044         if(8 == pAsm->unAsic) /* evergreen */
2045         {
2046             SETfield(tex_instruction_ptr->m_Word0.val, texture_coordinate_source->reg,
2047                      EG_TEX_WORD0__SRC_GPR_shift,
2048                      EG_TEX_WORD0__SRC_GPR_mask);
2049             SETfield(tex_instruction_ptr->m_Word0.val, SQ_ABSOLUTE,
2050                      EG_TEX_WORD0__SRC_REL_shift,
2051                      EG_TEX_WORD0__SRC_REL_bit);
2052         }
2053         else
2054         {
2055             tex_instruction_ptr->m_Word0.f.src_gpr    = texture_coordinate_source->reg;
2056             tex_instruction_ptr->m_Word0.f.src_rel    = SQ_ABSOLUTE;
2057         }
2058
2059         tex_instruction_ptr->m_Word1.f.dst_gpr    = pAsm->D.dst.reg;
2060         tex_instruction_ptr->m_Word1.f.dst_rel    = SQ_ABSOLUTE;
2061
2062         tex_instruction_ptr->m_Word1.f.dst_sel_x  = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
2063         tex_instruction_ptr->m_Word1.f.dst_sel_y  = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
2064         tex_instruction_ptr->m_Word1.f.dst_sel_z  = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
2065         tex_instruction_ptr->m_Word1.f.dst_sel_w  = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
2066
2067
2068         tex_instruction_ptr->m_Word2.f.src_sel_x  = texture_coordinate_source->swizzlex;
2069         tex_instruction_ptr->m_Word2.f.src_sel_y  = texture_coordinate_source->swizzley;
2070         tex_instruction_ptr->m_Word2.f.src_sel_z  = texture_coordinate_source->swizzlez;
2071         tex_instruction_ptr->m_Word2.f.src_sel_w  = texture_coordinate_source->swizzlew;
2072     }
2073     else 
2074     {
2075         radeon_error("Only temp destination registers supported for TEX dest regs.\n");
2076         return GL_FALSE;
2077     }
2078
2079     if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
2080     {
2081         return GL_FALSE;
2082     }
2083
2084     return GL_TRUE;
2085 }
2086
2087 void initialize(r700_AssemblerBase *pAsm)
2088 {
2089     GLuint cycle, component;
2090
2091     for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++) 
2092     {
2093         for (component=0; component<NUMBER_OF_COMPONENTS; component++) 
2094         {
2095             pAsm->hw_gpr[cycle][component] = (-1);
2096         }
2097     }
2098     for (component=0; component<NUMBER_OF_COMPONENTS; component++) 
2099     {
2100         pAsm->hw_cfile_addr[component] = (-1);
2101         pAsm->hw_cfile_chan[component] = (-1);
2102     }
2103 }
2104
2105 GLboolean assemble_alu_src(R700ALUInstruction*  alu_instruction_ptr,
2106                            int                  source_index,
2107                            PVSSRC*              pSource,
2108                            BITS                 scalar_channel_index,
2109                            r700_AssemblerBase  *pAsm)
2110 {
2111     BITS src_sel;
2112     BITS src_rel;
2113     BITS src_chan;
2114     BITS src_neg;
2115
2116     //--------------------------------------------------------------------------
2117     // Source for operands src0, src1. 
2118     // Values [0,127] correspond to GPR[0..127]. 
2119     // Values [256,511] correspond to cfile constants c[0..255]. 
2120
2121     //--------------------------------------------------------------------------
2122     // Other special values are shown in the list below.
2123
2124     // 248      SQ_ALU_SRC_0: special constant 0.0.
2125     // 249      SQ_ALU_SRC_1: special constant 1.0 float.
2126
2127     // 250      SQ_ALU_SRC_1_INT: special constant 1 integer.
2128     // 251      SQ_ALU_SRC_M_1_INT: special constant -1 integer.
2129
2130     // 252      SQ_ALU_SRC_0_5: special constant 0.5 float.
2131     // 253      SQ_ALU_SRC_LITERAL: literal constant.
2132
2133     // 254      SQ_ALU_SRC_PV: previous vector result.
2134     // 255      SQ_ALU_SRC_PS: previous scalar result.
2135     //--------------------------------------------------------------------------
2136
2137     BITS channel_swizzle;
2138     switch (scalar_channel_index) 
2139     {
2140         case 0: channel_swizzle = pSource->swizzlex; break;
2141         case 1: channel_swizzle = pSource->swizzley; break;
2142         case 2: channel_swizzle = pSource->swizzlez; break;
2143         case 3: channel_swizzle = pSource->swizzlew; break;
2144         default: channel_swizzle = SQ_SEL_MASK; break;
2145     }
2146
2147     if(channel_swizzle == SQ_SEL_0) 
2148     {
2149         src_sel = SQ_ALU_SRC_0; 
2150     }
2151     else if (channel_swizzle == SQ_SEL_1) 
2152     {
2153         src_sel = SQ_ALU_SRC_1; 
2154     }
2155     else 
2156     {
2157         if ( (pSource->rtype == SRC_REG_TEMPORARY) || 
2158              (pSource->rtype == SRC_REG_GPR)
2159         ) 
2160         {
2161             src_sel = pSource->reg;
2162         }
2163         else if (pSource->rtype == SRC_REG_CONSTANT)
2164         {
2165             /* TODO : 4 const buffers */
2166             if(GL_TRUE == pAsm->bUseMemConstant) 
2167             {
2168                 src_sel = pSource->reg + SQ_ALU_SRC_KCACHE0_BASE;
2169                 pAsm->kcacheUsed = SQ_ALU_SRC_KCACHE0_BASE;
2170             }
2171             else
2172             {
2173                 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;   
2174             }
2175         }
2176         else if (pSource->rtype == SRC_REC_LITERAL)
2177         {
2178             src_sel = SQ_ALU_SRC_LITERAL;            
2179         }
2180         else
2181         {
2182             radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
2183                      source_index, pSource->rtype);
2184             return GL_FALSE;
2185         }
2186     }
2187
2188     if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) ) 
2189     {
2190         src_rel = SQ_ABSOLUTE;
2191     }
2192     else 
2193     {
2194         src_rel = SQ_RELATIVE;
2195     }
2196
2197     switch (channel_swizzle) 
2198     {
2199         case SQ_SEL_X: 
2200             src_chan = SQ_CHAN_X; 
2201             break;
2202         case SQ_SEL_Y: 
2203             src_chan = SQ_CHAN_Y; 
2204             break;
2205         case SQ_SEL_Z: 
2206             src_chan = SQ_CHAN_Z; 
2207             break;
2208         case SQ_SEL_W: 
2209             src_chan = SQ_CHAN_W; 
2210             break;
2211         case SQ_SEL_0:
2212         case SQ_SEL_1:
2213             // Does not matter since src_sel controls
2214             src_chan = SQ_CHAN_X; 
2215             break;
2216         default:
2217             radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
2218             return GL_FALSE;
2219             break;
2220     }
2221
2222     switch (scalar_channel_index) 
2223     {
2224         case 0: src_neg = pSource->negx; break;
2225         case 1: src_neg = pSource->negy; break;
2226         case 2: src_neg = pSource->negz; break;
2227         case 3: src_neg = pSource->negw; break;
2228         default: src_neg = 0; break;
2229     }
2230
2231     switch (source_index) 
2232     {
2233         case 0:
2234             assert(alu_instruction_ptr);
2235             alu_instruction_ptr->m_Word0.f.src0_sel  = src_sel;
2236             alu_instruction_ptr->m_Word0.f.src0_rel  = src_rel;
2237             alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
2238             alu_instruction_ptr->m_Word0.f.src0_neg  = src_neg;
2239             break;
2240         case 1:
2241             assert(alu_instruction_ptr);
2242             alu_instruction_ptr->m_Word0.f.src1_sel  = src_sel;
2243             alu_instruction_ptr->m_Word0.f.src1_rel  = src_rel;
2244             alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
2245             alu_instruction_ptr->m_Word0.f.src1_neg  = src_neg;
2246             break;
2247         case 2:
2248             assert(alu_instruction_ptr);
2249             alu_instruction_ptr->m_Word1_OP3.f.src2_sel  = src_sel;
2250             alu_instruction_ptr->m_Word1_OP3.f.src2_rel  = src_rel;
2251             alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
2252             alu_instruction_ptr->m_Word1_OP3.f.src2_neg  = src_neg;
2253             break;
2254         default:
2255             radeon_error("Only three sources allowed in ALU opcodes.\n");
2256           return GL_FALSE;
2257           break;
2258     }
2259
2260     return GL_TRUE;
2261 }
2262
2263 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
2264                               R700ALUInstruction* alu_instruction_ptr,
2265                               GLuint              contiguous_slots_needed)
2266 {
2267     if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
2268     {
2269         return GL_FALSE;
2270     }
2271
2272     if ( pAsm->alu_x_opcode != 0 ||
2273          pAsm->cf_current_alu_clause_ptr == NULL ||
2274          ( (pAsm->cf_current_alu_clause_ptr != NULL) && 
2275            (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
2276          ) ) 
2277     {
2278
2279         //new cf inst for this clause
2280         pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
2281             
2282         // link the new cf to cf segment    
2283         if(NULL != pAsm->cf_current_alu_clause_ptr) 
2284         {
2285             Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
2286                         AddCFInstruction( pAsm->pR700Shader, 
2287                               (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );            
2288         }
2289         else 
2290         {
2291             radeon_error("Could not allocate a new ALU CF instruction.\n");
2292             return GL_FALSE;
2293         }
2294
2295         pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
2296         pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
2297         pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
2298
2299         pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
2300         pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
2301         pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
2302
2303         pAsm->cf_current_alu_clause_ptr->m_Word1.f.count           = 0x0;
2304
2305         if(pAsm->alu_x_opcode != 0)
2306         {
2307             pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
2308             pAsm->alu_x_opcode = 0;
2309         }
2310         else
2311         {
2312             pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
2313         }
2314
2315         pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
2316
2317         pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier         = 0x1;
2318     }
2319     else 
2320     {
2321         pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
2322     }
2323
2324     /* TODO : handle 4 bufs */
2325     if( (pAsm->kcacheUsed > 0) && (GL_TRUE == pAsm->bUseMemConstant) )
2326     {
2327         pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
2328         pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
2329         pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_LOCK_2;
2330         pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
2331         pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
2332         pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
2333     }
2334
2335     // If this clause constains any instruction that is forward dependent on a TEX instruction, 
2336     // set the whole_quad_mode for this clause
2337     if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) 
2338     {
2339         pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;   
2340     }
2341
2342     if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) ) 
2343     {
2344         alu_instruction_ptr->m_Word0.f.last = 1;
2345     }
2346
2347     if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
2348     {
2349         pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
2350         alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
2351     }
2352     
2353     AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
2354
2355     return GL_TRUE;
2356 }
2357
2358 GLboolean EG_add_ps_interp(r700_AssemblerBase* pAsm)
2359 {
2360     R700ALUInstruction * alu_instruction_ptr = NULL;
2361
2362     int          ui;
2363     unsigned int uj;
2364     unsigned int unWord0Temp = 0x380C00;
2365     unsigned int unWord1Temp = 0x146B10;    //SQ_SEL_X
2366
2367     if(pAsm->uIIns > 0)
2368     {                
2369         for(ui=(pAsm->uIIns-1); ui>=0; ui--)
2370         {                        
2371             for(uj=0; uj<8; uj++)
2372             {
2373                 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2374                 Init_R700ALUInstruction(alu_instruction_ptr);
2375                 alu_instruction_ptr->m_Word0.val = unWord0Temp;  
2376                 alu_instruction_ptr->m_Word1.val = unWord1Temp;
2377
2378                 if(uj < 4)
2379                 {
2380                     SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_ZW,
2381                              EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask);
2382                 }
2383                 else
2384                 {
2385                     SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_XY,
2386                              EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask);
2387                 }
2388                 if( (uj > 1) && (uj < 6) )
2389                 {
2390                     SETfield(alu_instruction_ptr->m_Word1.val, 1,
2391                              EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit);
2392                 }
2393                 else
2394                 {
2395                     SETfield(alu_instruction_ptr->m_Word1.val, 0,
2396                              EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit);
2397                 }
2398                 if( (uj > 1) && (uj < 6) )
2399                 {
2400                     SETfield(alu_instruction_ptr->m_Word1.val, ui,
2401                              EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask);
2402                 }  
2403                 else
2404                 {
2405                     SETfield(alu_instruction_ptr->m_Word1.val, 111,
2406                              EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask);
2407                 }                
2408
2409                 SETfield(alu_instruction_ptr->m_Word1.val, (uj % 4),
2410                          EG_ALU_WORD1__DST_CHAN_shift, EG_ALU_WORD1__DST_CHAN_mask);
2411                 SETfield(alu_instruction_ptr->m_Word0.val, (1 - (uj % 2)),
2412                          EG_ALU_WORD0__SRC0_CHAN_shift, EG_ALU_WORD0__SRC0_CHAN_mask);
2413                 SETfield(alu_instruction_ptr->m_Word0.val, (EG_ALU_SRC_PARAM_BASE + ui),
2414                          EG_ALU_WORD0__SRC1_SEL_shift, EG_ALU_WORD0__SRC1_SEL_mask);
2415                 if(3 == (uj % 4))
2416                 {
2417                     SETfield(alu_instruction_ptr->m_Word0.val, 1,
2418                              EG_ALU_WORD0__LAST_shift, EG_ALU_WORD0__LAST_bit);
2419                 }
2420
2421                 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, 4) )
2422                 {            
2423                     return GL_FALSE;
2424                 }
2425             }            
2426         }
2427     }
2428
2429     return GL_TRUE;
2430 }
2431
2432 void get_src_properties(R700ALUInstruction*  alu_instruction_ptr,
2433                         int                  source_index,
2434                         BITS*                psrc_sel,
2435                         BITS*                psrc_rel,
2436                         BITS*                psrc_chan,
2437                         BITS*                psrc_neg)
2438 {
2439     switch (source_index) 
2440     {
2441         case 0:
2442             *psrc_sel  = alu_instruction_ptr->m_Word0.f.src0_sel ;
2443             *psrc_rel  = alu_instruction_ptr->m_Word0.f.src0_rel ;
2444             *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
2445             *psrc_neg  = alu_instruction_ptr->m_Word0.f.src0_neg ;
2446             break;
2447
2448         case 1:
2449             *psrc_sel  = alu_instruction_ptr->m_Word0.f.src1_sel ;
2450             *psrc_rel  = alu_instruction_ptr->m_Word0.f.src1_rel ;
2451             *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
2452             *psrc_neg  = alu_instruction_ptr->m_Word0.f.src1_neg ;
2453             break;
2454
2455         case 2:
2456             *psrc_sel  = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
2457             *psrc_rel  = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
2458             *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
2459             *psrc_neg  = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
2460             break;
2461     }
2462 }
2463
2464 int is_cfile(BITS sel) 
2465 {
2466     if (sel > 255 && sel < 512) 
2467     {
2468         return 1;
2469     }
2470     return 0;
2471 }
2472
2473 int is_const(BITS sel) 
2474 {
2475     if (is_cfile(sel)) 
2476     {
2477         return 1;
2478     }
2479     else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL) 
2480     {
2481         return 1;
2482     }
2483     return 0;
2484 }
2485
2486 int is_gpr(BITS sel) 
2487 {
2488     if (sel >= 0 && sel < 128) 
2489     {
2490         return 1;
2491     }
2492     return 0;
2493 }
2494
2495 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210,  //000
2496                                     SQ_ALU_VEC_120,  //001
2497                                     SQ_ALU_VEC_102,  //010
2498
2499                                     SQ_ALU_VEC_201,  //011
2500                                     SQ_ALU_VEC_012,  //100
2501                                     SQ_ALU_VEC_021,  //101
2502
2503                                     SQ_ALU_VEC_012,  //110
2504                                     SQ_ALU_VEC_012}; //111
2505
2506 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210,  //000
2507                                     SQ_ALU_SCL_122,  //001 
2508                                     SQ_ALU_SCL_122,  //010
2509
2510                                     SQ_ALU_SCL_221,  //011
2511                                     SQ_ALU_SCL_212,  //100
2512                                     SQ_ALU_SCL_122,  //101
2513
2514                                     SQ_ALU_SCL_122,  //110
2515                                     SQ_ALU_SCL_122}; //111
2516
2517 GLboolean reserve_cfile(r700_AssemblerBase* pAsm, 
2518                         GLuint sel, 
2519                         GLuint chan)
2520 {
2521     int res_match = (-1);
2522     int res_empty = (-1);
2523
2524     GLint res;
2525
2526     for (res=3; res>=0; res--) 
2527     {
2528         if(pAsm->hw_cfile_addr[ res] < 0)  
2529         {
2530             res_empty = res;
2531         }
2532         else if( (pAsm->hw_cfile_addr[res] == (int)sel)
2533                  &&
2534                  (pAsm->hw_cfile_chan[ res ] == (int) chan) ) 
2535         {
2536             res_match = res;
2537         }
2538     }
2539
2540     if(res_match >= 0) 
2541     {
2542         // Read for this scalar component already reserved, nothing to do here.
2543         ;
2544     }
2545     else if(res_empty >= 0) 
2546     {
2547         pAsm->hw_cfile_addr[ res_empty ] = sel;
2548         pAsm->hw_cfile_chan[ res_empty ] = chan;
2549     }
2550     else 
2551     {
2552         radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
2553         return GL_FALSE;
2554     }
2555     return GL_TRUE;
2556 }
2557
2558 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
2559 {
2560     if(pAsm->hw_gpr[cycle][chan] < 0) 
2561     {
2562         pAsm->hw_gpr[cycle][chan] = sel;
2563     }
2564     else if(pAsm->hw_gpr[cycle][chan] != (int)sel) 
2565     {
2566         radeon_error("Another scalar operation has already used GPR read port for given channel\n");
2567         return GL_FALSE;
2568     }
2569
2570     return GL_TRUE;
2571 }
2572
2573 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2574 {
2575     switch (swiz) 
2576     {
2577         case SQ_ALU_SCL_210:
2578             {
2579                 int table[3] = {2,      1,      0};
2580                 *pCycle = table[sel];
2581                 return GL_TRUE;
2582             }
2583             break;
2584         case SQ_ALU_SCL_122:
2585             {
2586                 int table[3] = {1,      2,      2};
2587                 *pCycle = table[sel];
2588                 return GL_TRUE;
2589             }
2590             break;
2591         case SQ_ALU_SCL_212:
2592             {   
2593                 int table[3] = {2,      1,      2};
2594                 *pCycle = table[sel];
2595                 return GL_TRUE;
2596             }
2597             break;
2598         case SQ_ALU_SCL_221:
2599             {
2600                 int table[3] = {2, 2, 1};
2601                 *pCycle = table[sel];
2602                 return GL_TRUE;
2603             }
2604             break;
2605         default:
2606             radeon_error("Bad Scalar bank swizzle value\n");
2607             break;
2608     }
2609
2610     return GL_FALSE;
2611 }
2612
2613 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2614 {
2615     switch (swiz) 
2616     {
2617         case SQ_ALU_VEC_012:
2618             {
2619                 int table[3] = {0, 1, 2};
2620                 *pCycle = table[sel];
2621             }
2622             break;
2623         case SQ_ALU_VEC_021:
2624             {
2625                 int table[3] = {0, 2,   1};
2626                 *pCycle = table[sel];
2627             }
2628             break;        
2629         case SQ_ALU_VEC_120:
2630             {
2631                 int table[3] = {1, 2,   0};
2632                 *pCycle = table[sel];
2633             }
2634             break;
2635         case SQ_ALU_VEC_102:
2636             {
2637                 int table[3] = {1, 0,   2};
2638                 *pCycle = table[sel];
2639             }
2640             break;
2641         case SQ_ALU_VEC_201:
2642             {
2643                 int table[3] = {2, 0,   1};
2644                 *pCycle = table[sel];
2645             }
2646             break;
2647         case SQ_ALU_VEC_210:
2648             {
2649                 int table[3] = {2, 1,   0};
2650                 *pCycle = table[sel];
2651             }
2652             break;
2653         default:
2654             radeon_error("Bad Vec bank swizzle value\n");
2655             return GL_FALSE;
2656             break;
2657     }
2658
2659     return GL_TRUE;
2660 }
2661
2662 GLboolean check_scalar(r700_AssemblerBase* pAsm,
2663                        R700ALUInstruction* alu_instruction_ptr)
2664 {
2665     GLuint cycle;
2666     GLuint bank_swizzle;
2667     GLuint const_count = 0;
2668
2669     BITS sel;
2670     BITS chan;
2671     BITS rel;
2672     BITS neg;
2673
2674     GLuint src;
2675
2676     BITS src_sel [3] = {0,0,0};
2677     BITS src_chan[3] = {0,0,0};
2678     BITS src_rel [3] = {0,0,0};
2679     BITS src_neg [3] = {0,0,0};
2680
2681     GLuint swizzle_key;
2682     GLuint number_of_operands;
2683
2684     if(8 == pAsm->unAsic)
2685     {
2686         number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2687     }
2688     else
2689     {
2690         number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2691     }
2692
2693     for (src=0; src<number_of_operands; src++) 
2694     {
2695         get_src_properties(alu_instruction_ptr,
2696                            src,
2697                            &(src_sel[src]), 
2698                            &(src_rel[src]), 
2699                            &(src_chan[src]), 
2700                            &(src_neg[src]) );
2701     }
2702
2703
2704     swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + 
2705                     (is_const( src_sel[1] ) ? 2 : 0) + 
2706                     (is_const( src_sel[2] ) ? 1 : 0) );
2707   
2708     alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
2709
2710     for (src=0; src<number_of_operands; src++) 
2711     {
2712         sel  = src_sel [src];
2713         chan = src_chan[src];
2714         rel  = src_rel [src];
2715         neg  = src_neg [src];
2716
2717         if (is_const( sel )) 
2718         {
2719             // Any constant, including literal and inline constants
2720             const_count++;
2721
2722             if (is_cfile( sel )) 
2723             {
2724                 reserve_cfile(pAsm, sel, chan);
2725             }
2726
2727         }
2728     }
2729
2730     for (src=0; src<number_of_operands; src++) 
2731     {
2732         sel  = src_sel [src];
2733         chan = src_chan[src];
2734         rel  = src_rel [src];
2735         neg  = src_neg [src];
2736
2737         if( is_gpr(sel) ) 
2738         {
2739             bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2740
2741             if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
2742             {
2743                 return GL_FALSE;
2744             }
2745
2746             if(cycle < const_count) 
2747             {
2748                 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2749                 {
2750                     return GL_FALSE;
2751                 }
2752             }
2753         }
2754     }
2755
2756     return GL_TRUE;
2757 }
2758
2759 GLboolean check_vector(r700_AssemblerBase* pAsm,
2760                        R700ALUInstruction* alu_instruction_ptr)
2761 {
2762     GLuint cycle;
2763     GLuint bank_swizzle;
2764     GLuint const_count = 0;
2765
2766     GLuint src;
2767
2768     BITS sel;
2769     BITS chan;
2770     BITS rel;
2771     BITS neg;
2772
2773     BITS src_sel [3] = {0,0,0};
2774     BITS src_chan[3] = {0,0,0};
2775     BITS src_rel [3] = {0,0,0};
2776     BITS src_neg [3] = {0,0,0};
2777
2778     GLuint swizzle_key;
2779     GLuint number_of_operands;
2780
2781     if(8 == pAsm->unAsic)
2782     {
2783         number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2784     }
2785     else
2786     {
2787         number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2788     }
2789
2790     for (src=0; src<number_of_operands; src++) 
2791     {
2792         get_src_properties(alu_instruction_ptr,
2793                            src,
2794                            &(src_sel[src]), 
2795                            &(src_rel[src]), 
2796                            &(src_chan[src]), 
2797                            &(src_neg[src]) );
2798     }
2799
2800
2801     swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + 
2802                            (is_const( src_sel[1] ) ? 2 : 0) + 
2803                            (is_const( src_sel[2] ) ? 1 : 0) 
2804                          );
2805
2806     alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2807
2808     for (src=0; src<number_of_operands; src++) 
2809     {
2810         sel  = src_sel [src];
2811         chan = src_chan[src];
2812         rel  = src_rel [src];
2813         neg  = src_neg [src];
2814
2815
2816         bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2817
2818         if( is_gpr(sel) ) 
2819         {
2820             if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2821             {             
2822                 return GL_FALSE;
2823             }
2824
2825             if ( (src  == 1)          && 
2826                  (sel  == src_sel[0]) &&
2827                  (chan == src_chan[0]) ) 
2828             {        
2829             }
2830             else 
2831             {
2832                 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2833                 {                    
2834                     return GL_FALSE;
2835                 }
2836             }
2837         }
2838         else if( is_const(sel) ) 
2839         {                  
2840             const_count++;
2841
2842             if( is_cfile(sel) ) 
2843             {        
2844                 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2845                 {                    
2846                     return GL_FALSE;
2847                 }
2848             }
2849         }
2850     }
2851
2852     return GL_TRUE;
2853 }
2854
2855 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2856 {
2857     R700ALUInstruction            * alu_instruction_ptr = NULL;
2858     R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
2859     R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
2860
2861     GLuint    number_of_scalar_operations;
2862     GLboolean is_single_scalar_operation;
2863     GLuint    scalar_channel_index;
2864
2865     PVSSRC * pcurrent_source;
2866     int    current_source_index;
2867     GLuint contiguous_slots_needed;
2868     GLuint uNumSrc;
2869     GLboolean bSplitInst;
2870     
2871     if(8 == pAsm->unAsic)
2872     {
2873         uNumSrc = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2874     }
2875     else
2876     {
2877         uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2878     }
2879
2880     //GLuint    channel_swizzle, j;
2881     //GLuint    chan_counter[4] = {0, 0, 0, 0};
2882     //PVSSRC *  pSource[3];
2883     bSplitInst       = GL_FALSE;
2884     pAsm->kcacheUsed = 0;
2885
2886     if (1 == pAsm->D.dst.math) 
2887     {
2888         is_single_scalar_operation = GL_TRUE;
2889         number_of_scalar_operations = 1;
2890     }
2891     else 
2892     {
2893         is_single_scalar_operation = GL_FALSE;
2894         number_of_scalar_operations = 4;
2895
2896 /* current assembler doesn't do more than 1 register per source */
2897 #if 0
2898         /* check read port, only very preliminary algorithm, not count in 
2899            src0/1 same comp case and prev slot repeat case; also not count relative
2900            addressing. TODO: improve performance. */
2901         for(j=0; j<uNumSrc; j++)
2902         {
2903             pSource[j] = &(pAsm->S[j].src);
2904         }
2905         for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++) 
2906         {
2907             for(j=0; j<uNumSrc; j++) 
2908             {
2909                 switch (scalar_channel_index) 
2910                 {
2911                     case 0: channel_swizzle = pSource[j]->swizzlex; break;
2912                     case 1: channel_swizzle = pSource[j]->swizzley; break;
2913                     case 2: channel_swizzle = pSource[j]->swizzlez; break;
2914                     case 3: channel_swizzle = pSource[j]->swizzlew; break;
2915                     default: channel_swizzle = SQ_SEL_MASK; break;
2916                 }
2917                 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) || 
2918                      (pSource[j]->rtype == SRC_REG_GPR))
2919                      && (channel_swizzle <= SQ_SEL_W) )
2920                 {                    
2921                     chan_counter[channel_swizzle]++;                        
2922                 }
2923             }
2924         }
2925         if(   (chan_counter[SQ_SEL_X] > 3)
2926            || (chan_counter[SQ_SEL_Y] > 3)
2927            || (chan_counter[SQ_SEL_Z] > 3)
2928            || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2929         {
2930             bSplitInst = GL_TRUE;
2931         }
2932 #endif
2933     }
2934
2935     contiguous_slots_needed = 0;
2936
2937     if(!is_single_scalar_operation) 
2938     {
2939         contiguous_slots_needed = 4;
2940     }
2941
2942     contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
2943
2944     initialize(pAsm);    
2945
2946     for (scalar_channel_index=0;
2947             scalar_channel_index < number_of_scalar_operations; 
2948                 scalar_channel_index++) 
2949     {
2950         if(scalar_channel_index == (number_of_scalar_operations-1))
2951         {
2952             switch(pAsm->D2.dst2.literal_slots)
2953             {
2954             case 0:
2955                 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2956                 Init_R700ALUInstruction(alu_instruction_ptr);
2957                 break;
2958             case 1:
2959                 alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
2960                 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
2961                 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
2962                 break;
2963             case 2:
2964                 alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
2965                 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
2966                 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
2967             break;
2968             };
2969         }
2970         else
2971         {
2972             alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2973             Init_R700ALUInstruction(alu_instruction_ptr);
2974         }
2975         
2976         //src 0
2977         current_source_index = 0;
2978         pcurrent_source = &(pAsm->S[0].src);
2979
2980         if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2981                                          current_source_index,
2982                                          pcurrent_source, 
2983                                          scalar_channel_index,
2984                                          pAsm) )     
2985         {            
2986             return GL_FALSE;
2987         }
2988    
2989         if (uNumSrc > 1) 
2990         {            
2991             // Process source 1            
2992             current_source_index = 1;
2993             pcurrent_source = &(pAsm->S[current_source_index].src);
2994
2995             if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2996                                              current_source_index,
2997                                              pcurrent_source, 
2998                                              scalar_channel_index,
2999                                              pAsm) ) 
3000             {                
3001                 return GL_FALSE;
3002             }
3003         }
3004
3005         //other bits
3006         alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
3007
3008         if(   (is_single_scalar_operation == GL_TRUE) 
3009            || (GL_TRUE == bSplitInst) )
3010         {
3011             alu_instruction_ptr->m_Word0.f.last = 1;
3012         }
3013         else 
3014         {
3015             alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ?  1 : 0;
3016         }
3017
3018         alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
3019         if(1 == pAsm->D.dst.predicated)
3020         {
3021             alu_instruction_ptr->m_Word1_OP2.f.update_pred         = 0x1;
3022             alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
3023         }
3024         else
3025         {
3026             alu_instruction_ptr->m_Word1_OP2.f.update_pred         = 0x0;
3027             alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
3028         }
3029
3030         // dst
3031         if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || 
3032             (pAsm->D.dst.rtype == DST_REG_OUT) ) 
3033         {
3034             alu_instruction_ptr->m_Word1.f.dst_gpr  = pAsm->D.dst.reg;
3035         }
3036         else 
3037         {            
3038             radeon_error("Only temp destination registers supported for ALU dest regs.\n");
3039             return GL_FALSE;
3040         }
3041
3042         if ( ADDR_RELATIVE_A0 == addrmode_PVSDST(&(pAsm->D.dst)) )
3043         {
3044             alu_instruction_ptr->m_Word1.f.dst_rel = SQ_RELATIVE;
3045         }
3046         else
3047         {
3048             alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
3049         }
3050
3051         if ( is_single_scalar_operation == GL_TRUE ) 
3052         {
3053             // Override scalar_channel_index since only one scalar value will be written
3054             if(pAsm->D.dst.writex) 
3055             {
3056                 scalar_channel_index = 0;
3057             }
3058             else if(pAsm->D.dst.writey) 
3059             {
3060                 scalar_channel_index = 1;
3061             }
3062             else if(pAsm->D.dst.writez) 
3063             {
3064                 scalar_channel_index = 2;
3065             }
3066             else if(pAsm->D.dst.writew) 
3067             {
3068                 scalar_channel_index = 3;
3069             }
3070         }
3071
3072         alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
3073
3074         alu_instruction_ptr->m_Word1.f.clamp    = pAsm->D2.dst2.SaturateMode;
3075
3076         if (pAsm->D.dst.op3) 
3077         {            
3078             //op3
3079
3080             alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
3081
3082             //There's 3rd src for op3
3083             current_source_index = 2;
3084             pcurrent_source = &(pAsm->S[current_source_index].src);
3085
3086             if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
3087                                               current_source_index,
3088                                               pcurrent_source, 
3089                                               scalar_channel_index,
3090                                               pAsm) ) 
3091             {
3092                 return GL_FALSE;
3093             }
3094         }
3095         else 
3096         {
3097             //op2
3098             if (pAsm->bR6xx)
3099             {
3100                 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst           = pAsm->D.dst.opcode;
3101
3102                 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs           = pAsm->S[0].src.abs;
3103                 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs           = pAsm->S[1].src.abs;
3104
3105                 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
3106                 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred         = 0x0;
3107                 switch (scalar_channel_index) 
3108                 {
3109                     case 0: 
3110                         alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; 
3111                         break;
3112                     case 1: 
3113                         alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; 
3114                         break;
3115                     case 2: 
3116                         alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; 
3117                         break;
3118                     case 3: 
3119                         alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; 
3120                         break;
3121                     default: 
3122                         alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
3123                         break;
3124                 }            
3125                 alu_instruction_ptr->m_Word1_OP2.f6.omod               = SQ_ALU_OMOD_OFF;
3126             }
3127             else
3128             {
3129                 alu_instruction_ptr->m_Word1_OP2.f.alu_inst           = pAsm->D.dst.opcode;
3130
3131                 alu_instruction_ptr->m_Word1_OP2.f.src0_abs           = pAsm->S[0].src.abs;
3132                 alu_instruction_ptr->m_Word1_OP2.f.src1_abs           = pAsm->S[1].src.abs;
3133
3134                 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
3135                 //alu_instruction_ptr->m_Word1_OP2.f.update_pred         = 0x0;
3136                 switch (scalar_channel_index) 
3137                 {
3138                     case 0: 
3139                         alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; 
3140                         break;
3141                     case 1: 
3142                         alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; 
3143                         break;
3144                     case 2: 
3145                         alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; 
3146                         break;
3147                     case 3: 
3148                         alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; 
3149                         break;
3150                     default: 
3151                         alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
3152                         break;
3153                 }            
3154                 alu_instruction_ptr->m_Word1_OP2.f.omod               = SQ_ALU_OMOD_OFF;
3155             }
3156         }
3157
3158         if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
3159         {            
3160             return GL_FALSE;
3161         }
3162
3163         /*
3164          * Judge the type of current instruction, is it vector or scalar 
3165          * instruction.
3166          */        
3167         if (is_single_scalar_operation) 
3168         {
3169             if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
3170             {                
3171                 return GL_FALSE;
3172             }
3173         }
3174         else 
3175         {
3176             if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
3177             {                
3178                 return GL_FALSE; 
3179             }
3180         }
3181
3182         contiguous_slots_needed -= 1;
3183     }
3184
3185     return GL_TRUE;
3186 }
3187
3188 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
3189 {
3190     BITS tmp;
3191
3192     checkop1(pAsm);
3193
3194     tmp = gethelpr(pAsm);
3195
3196     // opcode  tmp.x,    a.x
3197     // MOV     dst,      tmp.x
3198
3199     pAsm->D.dst.opcode = opcode;
3200     pAsm->D.dst.math = 1;
3201
3202     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3203     pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
3204     pAsm->D.dst.reg    = tmp;
3205     pAsm->D.dst.writex = 1;
3206
3207     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3208     {
3209         return GL_FALSE;
3210     }
3211
3212     if( pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_RSQ )
3213         pAsm->S[0].src.abs = 1;
3214
3215     if ( GL_FALSE == next_ins(pAsm) ) 
3216     {
3217         return GL_FALSE;
3218     }
3219
3220     // Now replicate result to all necessary channels in destination
3221     pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3222
3223     if( GL_FALSE == assemble_dst(pAsm) )
3224     {
3225         return GL_FALSE;
3226     }
3227
3228     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3229     pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3230     pAsm->S[0].src.reg   = tmp;
3231
3232     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3233     noneg_PVSSRC(&(pAsm->S[0].src));
3234
3235     if( GL_FALSE == next_ins(pAsm) )
3236     {
3237         return GL_FALSE;
3238     }
3239
3240     return GL_TRUE;
3241 }
3242
3243 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
3244 {
3245     checkop1(pAsm);
3246
3247     pAsm->D.dst.opcode = SQ_OP2_INST_MAX;  
3248
3249     if( GL_FALSE == assemble_dst(pAsm) )
3250     {
3251         return GL_FALSE;
3252     }
3253     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3254     {
3255         return GL_FALSE;
3256     }
3257  
3258     pAsm->S[1].bits = pAsm->S[0].bits;
3259     flipneg_PVSSRC(&(pAsm->S[1].src));
3260
3261     if ( GL_FALSE == next_ins(pAsm) ) 
3262     {
3263         return GL_FALSE;
3264     }
3265
3266     return GL_TRUE;
3267 }
3268
3269 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
3270 {
3271     if( GL_FALSE == checkop2(pAsm) )
3272     {
3273         return GL_FALSE;
3274     }
3275
3276     pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3277  
3278     if( GL_FALSE == assemble_dst(pAsm) )
3279     {
3280         return GL_FALSE;
3281     }
3282  
3283     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3284     {
3285         return GL_FALSE;
3286     }
3287
3288     if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3289     {
3290         return GL_FALSE;
3291     }
3292
3293     if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
3294     {
3295         flipneg_PVSSRC(&(pAsm->S[1].src));
3296     }
3297
3298     if( GL_FALSE == next_ins(pAsm) ) 
3299     {
3300         return GL_FALSE;
3301     }
3302
3303     return GL_TRUE;
3304 }
3305
3306 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
3307 { /* TODO: ar values dont' persist between clauses */
3308     if( GL_FALSE == checkop1(pAsm) )
3309     {
3310         return GL_FALSE;
3311     }
3312
3313     if(8 == pAsm->unAsic)
3314     {
3315         /* Evergreen */
3316
3317         /* Float to Signed Integer Using FLOOR */
3318         pAsm->D.dst.opcode = EG_OP2_INST_FLT_TO_INT_FLOOR;
3319         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3320         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3321         pAsm->D.dst.reg = 0;
3322         pAsm->D.dst.writex = 0;
3323         pAsm->D.dst.writey = 0;
3324         pAsm->D.dst.writez = 0;
3325         pAsm->D.dst.writew = 0;
3326
3327         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3328         {
3329             return GL_FALSE;
3330         }
3331
3332         if( GL_FALSE == next_ins(pAsm) )
3333         {
3334             return GL_FALSE;
3335         }
3336
3337         /* Copy Signed Integer To Integer in AR and GPR */
3338         pAsm->D.dst.opcode = EG_OP2_INST_MOVA_INT;
3339         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3340         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3341         pAsm->D.dst.reg = 0;
3342         pAsm->D.dst.writex = 0;
3343         pAsm->D.dst.writey = 0;
3344         pAsm->D.dst.writez = 0;
3345         pAsm->D.dst.writew = 0;
3346
3347         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3348         {
3349             return GL_FALSE;
3350         }
3351
3352         if( GL_FALSE == next_ins(pAsm) )
3353         {
3354             return GL_FALSE;
3355         }
3356     }
3357     else
3358     {
3359         /* r6xx/r7xx */
3360
3361         /* Truncate floating-point to the nearest integer
3362            in the range [-256, +255], and copy to AR and
3363            to a GPR.
3364         */
3365         pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
3366         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3367         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3368         pAsm->D.dst.reg = 0;
3369         pAsm->D.dst.writex = 0;
3370         pAsm->D.dst.writey = 0;
3371         pAsm->D.dst.writez = 0;
3372         pAsm->D.dst.writew = 0;
3373
3374         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3375         {
3376             return GL_FALSE;
3377         }
3378
3379         if( GL_FALSE == next_ins(pAsm) )
3380         {
3381             return GL_FALSE;
3382         }
3383     }
3384
3385     return GL_TRUE;
3386 }
3387
3388 GLboolean assemble_BAD(char *opcode_str) 
3389 {
3390     radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
3391     return GL_FALSE;
3392 }
3393
3394 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
3395 {
3396     int tmp;
3397
3398     if( GL_FALSE == checkop3(pAsm) )
3399     {
3400         return GL_FALSE;
3401     }
3402
3403     if(8 == pAsm->unAsic)
3404     {
3405         pAsm->D.dst.opcode = EG_OP3_INST_CNDGE;
3406     }
3407     else
3408     {
3409         pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
3410     }
3411     pAsm->D.dst.op3     = 1;  
3412
3413     tmp = (-1);
3414
3415     if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3416     {
3417         //OP3 has no support for write mask
3418         tmp = gethelpr(pAsm);
3419
3420         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3421         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3422         pAsm->D.dst.reg   = tmp;
3423
3424         nomask_PVSDST(&(pAsm->D.dst));
3425     }
3426     else 
3427     {
3428         if( GL_FALSE == assemble_dst(pAsm) )
3429         {
3430             return GL_FALSE;
3431         }
3432     }
3433
3434     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3435     {
3436         return GL_FALSE;
3437     }
3438               
3439     if( GL_FALSE == assemble_src(pAsm, 2, 1) )  
3440     {
3441         return GL_FALSE;
3442     }
3443
3444     if( GL_FALSE == assemble_src(pAsm, 1, 2) ) 
3445     {
3446         return GL_FALSE;
3447     }
3448
3449     if ( GL_FALSE == next_ins(pAsm) )
3450     {
3451         return GL_FALSE;
3452     }
3453
3454     if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) 
3455     {
3456         if( GL_FALSE == assemble_dst(pAsm) )
3457         {
3458             return GL_FALSE;
3459         }
3460
3461         pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3462
3463         //tmp for source
3464         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3465         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3466         pAsm->S[0].src.reg   = tmp;
3467
3468         noneg_PVSSRC(&(pAsm->S[0].src));
3469         noswizzle_PVSSRC(&(pAsm->S[0].src));
3470
3471         if( GL_FALSE == next_ins(pAsm) )
3472         {
3473             return GL_FALSE;
3474         }
3475     }
3476
3477     return GL_TRUE;
3478 }
3479
3480 GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
3481 {
3482     /* 
3483      * r600 - trunc to -PI..PI range
3484      * r700 - normalize by dividing by 2PI
3485      * see fdo bug 27901
3486      */
3487   
3488     int tmp;
3489     checkop1(pAsm);
3490
3491     tmp = gethelpr(pAsm);
3492     if(8 == pAsm->unAsic)
3493     {
3494         pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
3495     }
3496     else
3497     {
3498         pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3499     }
3500     pAsm->D.dst.op3    = 1;
3501
3502     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3503     pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
3504     pAsm->D.dst.reg    = tmp;
3505
3506     assemble_src(pAsm, 0, -1);
3507
3508     pAsm->S[1].src.rtype = SRC_REC_LITERAL;
3509     setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3510     
3511     pAsm->S[2].src.rtype = SRC_REC_LITERAL;
3512     setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
3513
3514     pAsm->D2.dst2.literal_slots = 1;
3515     pAsm->C[0].f = 1/(3.1415926535 * 2);
3516     pAsm->C[1].f = 0.5f;
3517     
3518     if ( GL_FALSE == next_ins(pAsm) )
3519     {
3520         return GL_FALSE;
3521     }
3522
3523     pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3524
3525     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3526     pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
3527     pAsm->D.dst.reg    = tmp;
3528     pAsm->D.dst.writex = 1;
3529
3530     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3531     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3532     pAsm->S[0].src.reg   = tmp;
3533     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3534
3535     if(( GL_FALSE == next_ins(pAsm) ))
3536     {
3537         return GL_FALSE;
3538     }
3539     if(8 == pAsm->unAsic)
3540     {
3541         pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
3542     }
3543     else
3544     {
3545         pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3546     }
3547     pAsm->D.dst.op3    = 1;
3548
3549     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3550     pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
3551     pAsm->D.dst.reg    = tmp;
3552
3553     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3554     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3555     pAsm->S[0].src.reg   = tmp;
3556     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3557
3558     pAsm->S[1].src.rtype = SRC_REC_LITERAL;
3559     setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3560
3561     pAsm->S[2].src.rtype = SRC_REC_LITERAL;
3562     setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
3563
3564     pAsm->D2.dst2.literal_slots = 1;
3565
3566     if (pAsm->bR6xx)
3567     {
3568        pAsm->C[0].f = 3.1415926535897f * 2.0f;
3569        pAsm->C[1].f = -3.1415926535897f;
3570     }
3571     else 
3572     {
3573        pAsm->C[0].f = 1.0f;
3574        pAsm->C[1].f = -0.5f;
3575     }
3576
3577     if(( GL_FALSE == next_ins(pAsm) ))
3578     {
3579         return GL_FALSE;
3580     }
3581
3582     pAsm->D.dst.opcode = opcode;
3583     pAsm->D.dst.math = 1;
3584
3585     assemble_dst(pAsm);
3586
3587     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3588     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3589     pAsm->S[0].src.reg   = tmp;
3590     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3591     noneg_PVSSRC(&(pAsm->S[0].src));
3592
3593     next_ins(pAsm);
3594
3595     //TODO - replicate if more channels set in WriteMask
3596     return GL_TRUE;
3597
3598 }
3599  
3600 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
3601 {
3602     if( GL_FALSE == checkop2(pAsm) )
3603     {
3604         return GL_FALSE;
3605     }
3606  
3607     if(8 == pAsm->unAsic)
3608     {
3609         pAsm->D.dst.opcode = EG_OP2_INST_DOT4;
3610     }
3611     else
3612     {
3613         pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;  
3614     }
3615
3616     if( GL_FALSE == assemble_dst(pAsm) )
3617     {
3618         return GL_FALSE;
3619     }
3620
3621     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3622     {
3623         return GL_FALSE;
3624     }
3625
3626     if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3627     {
3628         return GL_FALSE;
3629     }
3630
3631     if(OPCODE_DP2 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3632     {
3633        zerocomp_PVSSRC(&(pAsm->S[0].src),2);
3634        zerocomp_PVSSRC(&(pAsm->S[0].src),3);
3635        zerocomp_PVSSRC(&(pAsm->S[1].src),2);
3636        zerocomp_PVSSRC(&(pAsm->S[1].src),3);
3637     }
3638     else if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3639     {
3640         zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
3641         zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
3642     }
3643     else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) 
3644     {
3645         onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3646     } 
3647
3648     if ( GL_FALSE == next_ins(pAsm) ) 
3649     {
3650         return GL_FALSE;
3651     }
3652
3653     return GL_TRUE;
3654 }
3655  
3656 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
3657 {
3658     if( GL_FALSE == checkop2(pAsm) )
3659     {
3660         return GL_FALSE;
3661     }
3662
3663     pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3664
3665     if( GL_FALSE == assemble_dst(pAsm) )
3666     {
3667         return GL_FALSE;
3668     }
3669
3670     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3671     {
3672         return GL_FALSE;
3673     }
3674
3675     if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3676     {
3677         return GL_FALSE;
3678     }
3679
3680     onecomp_PVSSRC(&(pAsm->S[0].src), 0);
3681     onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3682
3683     onecomp_PVSSRC(&(pAsm->S[1].src), 0);
3684     onecomp_PVSSRC(&(pAsm->S[1].src), 2);
3685
3686     if ( GL_FALSE == next_ins(pAsm) ) 
3687     {
3688         return GL_FALSE;
3689     }
3690
3691     return GL_TRUE;
3692 }
3693
3694 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
3695 {
3696     if(8 == pAsm->unAsic)
3697     {
3698         return assemble_math_function(pAsm, EG_OP2_INST_EXP_IEEE);
3699     }
3700
3701     return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
3702 }
3703
3704 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
3705 {
3706     BITS tmp;
3707
3708     checkop1(pAsm);
3709
3710     tmp = gethelpr(pAsm);
3711
3712     // FLOOR   tmp.x,    a.x
3713     // EX2     dst.x     tmp.x
3714
3715     if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
3716         pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3717
3718         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3719         pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
3720         pAsm->D.dst.reg    = tmp;
3721         pAsm->D.dst.writex = 1;
3722
3723         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3724         {
3725             return GL_FALSE;
3726         }
3727
3728         if( GL_FALSE == next_ins(pAsm) )
3729         {
3730             return GL_FALSE;
3731         }
3732
3733         if(8 == pAsm->unAsic)
3734         {
3735             pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
3736         }
3737         else
3738         {
3739             pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3740         }
3741         pAsm->D.dst.math = 1;
3742
3743         if( GL_FALSE == assemble_dst(pAsm) )
3744         {
3745             return GL_FALSE;
3746         }
3747
3748         pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3749
3750         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3751         pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3752         pAsm->S[0].src.reg   = tmp;
3753
3754         setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3755         noneg_PVSSRC(&(pAsm->S[0].src));
3756
3757         if( GL_FALSE == next_ins(pAsm) )
3758         {
3759             return GL_FALSE;
3760         }
3761     }
3762
3763     // FRACT   dst.y     a.x
3764
3765     if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
3766         pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3767
3768         if( GL_FALSE == assemble_dst(pAsm) )
3769         {
3770             return GL_FALSE;
3771         }
3772
3773         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3774         {
3775             return GL_FALSE;
3776         }
3777
3778         pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3779
3780         if( GL_FALSE == next_ins(pAsm) )
3781         {
3782             return GL_FALSE;
3783         }
3784     }
3785
3786     // EX2     dst.z,    a.x
3787
3788     if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
3789         if(8 == pAsm->unAsic)
3790         {
3791             pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
3792         }
3793         else
3794         {
3795             pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3796         }
3797         pAsm->D.dst.math = 1;
3798
3799         if( GL_FALSE == assemble_dst(pAsm) )
3800         {
3801             return GL_FALSE;
3802         }
3803
3804         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3805         {
3806             return GL_FALSE;
3807         }
3808
3809         pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3810
3811         if( GL_FALSE == next_ins(pAsm) )
3812         {
3813             return GL_FALSE;
3814         }
3815     }
3816
3817     // MOV     dst.w     1.0
3818
3819     if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
3820         pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3821
3822         if( GL_FALSE == assemble_dst(pAsm) )
3823         {
3824             return GL_FALSE;
3825         }
3826
3827         pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3828
3829         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3830         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3831         pAsm->S[0].src.reg   = tmp;
3832
3833         setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3834         noneg_PVSSRC(&(pAsm->S[0].src));
3835
3836         if( GL_FALSE == next_ins(pAsm) )
3837         {
3838             return GL_FALSE;
3839         }
3840     }
3841
3842     return GL_TRUE;
3843 }
3844  
3845 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
3846 {
3847     checkop1(pAsm);
3848
3849     pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;  
3850
3851     if ( GL_FALSE == assemble_dst(pAsm) )
3852     {
3853         return GL_FALSE;
3854     }
3855
3856     if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3857     {
3858         return GL_FALSE;
3859     }
3860
3861     if ( GL_FALSE == next_ins(pAsm) ) 
3862     {
3863         return GL_FALSE;
3864     }
3865
3866     return GL_TRUE;
3867 }
3868
3869 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
3870 {
3871     if(8 == pAsm->unAsic)
3872     {
3873         return assemble_math_function(pAsm, EG_OP2_INST_FLT_TO_INT);
3874     }
3875
3876     return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
3877 }
3878
3879 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
3880 {
3881     checkop1(pAsm);
3882
3883     pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; 
3884
3885     if ( GL_FALSE == assemble_dst(pAsm) )
3886     {
3887         return GL_FALSE;
3888     }
3889
3890     if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3891     {
3892         return GL_FALSE;
3893     }
3894
3895     if ( GL_FALSE == next_ins(pAsm) )
3896     {
3897         return GL_FALSE;
3898     }
3899
3900     return GL_TRUE;
3901 }
3902  
3903 GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
3904 {  
3905     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3906
3907     if(pILInst->Opcode == OPCODE_KIL)
3908         checkop1(pAsm);
3909
3910     pAsm->D.dst.opcode = opcode;  
3911     //pAsm->D.dst.math = 1;
3912
3913     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3914     pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3915     pAsm->D.dst.reg   = 0;
3916     pAsm->D.dst.writex = 0;
3917     pAsm->D.dst.writey = 0;
3918     pAsm->D.dst.writez = 0;
3919     pAsm->D.dst.writew = 0;
3920
3921     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3922     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3923     pAsm->S[0].src.reg = 0;
3924     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
3925     noneg_PVSSRC(&(pAsm->S[0].src));
3926
3927     if(pILInst->Opcode == OPCODE_KIL_NV)
3928     {
3929         setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3930         pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3931         pAsm->S[1].src.reg = 0;
3932         setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
3933         neg_PVSSRC(&(pAsm->S[1].src));
3934     }
3935     else
3936     {
3937         if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3938         {
3939             return GL_FALSE;
3940         }
3941
3942     }
3943
3944     if ( GL_FALSE == next_ins(pAsm) )
3945     {
3946         return GL_FALSE;
3947     }
3948
3949     /* Doc says KILL has to be last(end) ALU clause */
3950     pAsm->pR700Shader->killIsUsed = GL_TRUE;
3951     pAsm->alu_x_opcode = SQ_CF_INST_ALU;
3952     
3953     return GL_TRUE;
3954 }
3955
3956 GLboolean assemble_LG2(r700_AssemblerBase *pAsm) 
3957
3958     if(8 == pAsm->unAsic)
3959     {
3960         return assemble_math_function(pAsm, EG_OP2_INST_LOG_IEEE);
3961     }
3962
3963     return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
3964 }
3965
3966 GLboolean assemble_LRP(r700_AssemblerBase *pAsm) 
3967 {
3968     BITS tmp;
3969
3970     if( GL_FALSE == checkop3(pAsm) )
3971     {
3972         return GL_FALSE;
3973     }
3974
3975     tmp = gethelpr(pAsm);
3976
3977     pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3978
3979     pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3980     pAsm->D.dst.reg   = tmp;
3981     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3982     nomask_PVSDST(&(pAsm->D.dst));
3983
3984           
3985     if( GL_FALSE == assemble_src(pAsm, 1, 0) ) 
3986     {
3987             return GL_FALSE;
3988     }
3989
3990     if ( GL_FALSE == assemble_src(pAsm, 2, 1) )   
3991     {
3992             return GL_FALSE;
3993     }
3994
3995     neg_PVSSRC(&(pAsm->S[1].src));
3996
3997     if( GL_FALSE == next_ins(pAsm) ) 
3998     {
3999             return GL_FALSE;
4000     }
4001
4002     if(8 == pAsm->unAsic)
4003     {
4004         pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4005     }
4006     else
4007     {
4008         pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4009     }
4010     pAsm->D.dst.op3    = 1;
4011
4012     pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4013     pAsm->D.dst.reg = tmp;
4014     nomask_PVSDST(&(pAsm->D.dst));
4015     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4016
4017     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4018     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4019     pAsm->S[0].src.reg = tmp;
4020     noswizzle_PVSSRC(&(pAsm->S[0].src));
4021
4022
4023     if( GL_FALSE == assemble_src(pAsm, 0, 1) ) 
4024     {
4025         return GL_FALSE;
4026     }
4027
4028     if( GL_FALSE == assemble_src(pAsm, 2, -1) ) 
4029     {
4030         return GL_FALSE;
4031     }
4032
4033     if( GL_FALSE == next_ins(pAsm) ) 
4034     {
4035         return GL_FALSE;
4036     }
4037
4038     pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4039
4040     if( GL_FALSE == assemble_dst(pAsm) )
4041     {
4042         return GL_FALSE;
4043     }
4044
4045     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4046     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4047     pAsm->S[0].src.reg = tmp;
4048     noswizzle_PVSSRC(&(pAsm->S[0].src));
4049
4050     if( GL_FALSE == next_ins(pAsm) ) 
4051     {
4052         return GL_FALSE;
4053     }
4054
4055     return GL_TRUE;
4056 }
4057
4058 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
4059 {
4060     BITS tmp1, tmp2, tmp3;
4061
4062     checkop1(pAsm);
4063
4064     tmp1 = gethelpr(pAsm);
4065     tmp2 = gethelpr(pAsm);
4066     tmp3 = gethelpr(pAsm);
4067
4068     // FIXME: The hardware can do fabs() directly on input
4069     //        elements, but the compiler doesn't have the
4070     //        capability to use that.
4071
4072     // MAX     tmp1.x,   a.x,    -a.x   (fabs(a.x))
4073
4074     pAsm->D.dst.opcode = SQ_OP2_INST_MAX;  
4075
4076     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4077     pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
4078     pAsm->D.dst.reg    = tmp1;
4079     pAsm->D.dst.writex = 1;
4080
4081     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4082     {
4083         return GL_FALSE;
4084     }
4085  
4086     pAsm->S[1].bits = pAsm->S[0].bits;
4087     flipneg_PVSSRC(&(pAsm->S[1].src));
4088
4089     if ( GL_FALSE == next_ins(pAsm) ) 
4090     {
4091         return GL_FALSE;
4092     }
4093
4094     // Entire algo:
4095     //
4096     // LG2     tmp2.x,   tmp1.x
4097     // FLOOR   tmp3.x,   tmp2.x
4098     // MOV     dst.x,    tmp3.x
4099     // ADD     tmp3.x,   tmp2.x,    -tmp3.x
4100     // EX2     dst.y,    tmp3.x
4101     // MOV     dst.z,    tmp2.x
4102     // MOV     dst.w,    1.0
4103
4104     // LG2     tmp2.x,   tmp1.x
4105     // FLOOR   tmp3.x,   tmp2.x
4106
4107     if(8 == pAsm->unAsic)
4108     {
4109         pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE;
4110     }
4111     else
4112     {
4113         pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
4114     }
4115     pAsm->D.dst.math = 1;
4116
4117     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4118     pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
4119     pAsm->D.dst.reg    = tmp2;
4120     pAsm->D.dst.writex = 1;
4121
4122     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4123     pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4124     pAsm->S[0].src.reg   = tmp1;
4125
4126     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4127     noneg_PVSSRC(&(pAsm->S[0].src));
4128
4129     if( GL_FALSE == next_ins(pAsm) )
4130     {
4131         return GL_FALSE;
4132     }
4133
4134     pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
4135
4136     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4137     pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
4138     pAsm->D.dst.reg    = tmp3;
4139     pAsm->D.dst.writex = 1;
4140
4141     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4142     pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4143     pAsm->S[0].src.reg   = tmp2;
4144
4145     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4146     noneg_PVSSRC(&(pAsm->S[0].src));
4147
4148     if( GL_FALSE == next_ins(pAsm) )
4149     {
4150         return GL_FALSE;
4151     }
4152
4153     // MOV     dst.x,    tmp3.x
4154
4155     pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4156
4157     if( GL_FALSE == assemble_dst(pAsm) )
4158     {
4159         return GL_FALSE;
4160     }
4161
4162     pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4163
4164     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4165     pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4166     pAsm->S[0].src.reg   = tmp3;
4167
4168     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4169     noneg_PVSSRC(&(pAsm->S[0].src));
4170
4171     if( GL_FALSE == next_ins(pAsm) )
4172     {
4173         return GL_FALSE;
4174     }
4175
4176     // ADD     tmp3.x,   tmp2.x,    -tmp3.x
4177     // EX2     dst.y,    tmp3.x
4178
4179     pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
4180
4181     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4182     pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
4183     pAsm->D.dst.reg    = tmp3;
4184     pAsm->D.dst.writex = 1;
4185
4186     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4187     pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4188     pAsm->S[0].src.reg   = tmp2;
4189
4190     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4191     noneg_PVSSRC(&(pAsm->S[0].src));
4192
4193     setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4194     pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
4195     pAsm->S[1].src.reg   = tmp3;
4196
4197     setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4198     neg_PVSSRC(&(pAsm->S[1].src));
4199
4200     if( GL_FALSE == next_ins(pAsm) )
4201     {
4202         return GL_FALSE;
4203     }
4204
4205     if(8 == pAsm->unAsic)
4206     {
4207         pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
4208     }
4209     else
4210     {
4211         pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4212     }
4213     pAsm->D.dst.math = 1;
4214
4215     if( GL_FALSE == assemble_dst(pAsm) )
4216     {
4217         return GL_FALSE;
4218     }
4219
4220     pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4221
4222     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4223     pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4224     pAsm->S[0].src.reg   = tmp3;
4225
4226     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4227     noneg_PVSSRC(&(pAsm->S[0].src));
4228
4229     if( GL_FALSE == next_ins(pAsm) )
4230     {
4231         return GL_FALSE;
4232     }
4233
4234     // MOV     dst.z,    tmp2.x
4235
4236     pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4237
4238     if( GL_FALSE == assemble_dst(pAsm) )
4239     {
4240         return GL_FALSE;
4241     }
4242
4243     pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
4244
4245     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4246     pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4247     pAsm->S[0].src.reg   = tmp2;
4248
4249     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4250     noneg_PVSSRC(&(pAsm->S[0].src));
4251
4252     if( GL_FALSE == next_ins(pAsm) )
4253     {
4254         return GL_FALSE;
4255     }
4256
4257     // MOV     dst.w     1.0
4258
4259     pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4260
4261     if( GL_FALSE == assemble_dst(pAsm) )
4262     {
4263         return GL_FALSE;
4264     }
4265
4266     pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
4267
4268     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4269     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4270     pAsm->S[0].src.reg   = tmp1;
4271
4272     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
4273     noneg_PVSSRC(&(pAsm->S[0].src));
4274
4275     if( GL_FALSE == next_ins(pAsm) )
4276     {
4277         return GL_FALSE;
4278     }
4279
4280     return GL_TRUE;
4281 }
4282
4283 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) 
4284 {
4285     int tmp, ii;
4286     GLboolean bReplaceDst = GL_FALSE;
4287     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
4288
4289         if( GL_FALSE == checkop3(pAsm) )
4290     {
4291         return GL_FALSE;
4292     }
4293
4294     if(8 == pAsm->unAsic)
4295     {
4296         pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4297     }
4298     else
4299     {
4300             pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;  
4301     }
4302         pAsm->D.dst.op3     = 1; 
4303
4304         tmp = (-1);
4305
4306     if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
4307     {   /* TODO : more investigation on MAD src and dst using same register */
4308         for(ii=0; ii<3; ii++)
4309         {
4310             if(   (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
4311                && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
4312             {
4313                 bReplaceDst = GL_TRUE;
4314                 break;
4315             }
4316         }
4317     }
4318     if(0xF != pILInst->DstReg.WriteMask)
4319     {   /* OP3 has no support for write mask */
4320         bReplaceDst = GL_TRUE;
4321     }
4322
4323         if(GL_TRUE == bReplaceDst)
4324     {
4325         tmp = gethelpr(pAsm);
4326
4327         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4328         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4329         pAsm->D.dst.reg   = tmp;
4330
4331         nomask_PVSDST(&(pAsm->D.dst));
4332     }
4333     else 
4334     {
4335         if( GL_FALSE == assemble_dst(pAsm) )
4336         {
4337             return GL_FALSE;
4338         }
4339     }
4340
4341         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4342     {
4343         return GL_FALSE;
4344     }
4345               
4346     if( GL_FALSE == assemble_src(pAsm, 1, -1) )  
4347     {
4348         return GL_FALSE;
4349     }
4350
4351     if( GL_FALSE == assemble_src(pAsm, 2, -1) ) 
4352     {
4353         return GL_FALSE;
4354     }
4355
4356     if ( GL_FALSE == next_ins(pAsm) )
4357     {
4358         return GL_FALSE;
4359     }
4360
4361         if (GL_TRUE == bReplaceDst) 
4362     {
4363         if( GL_FALSE == assemble_dst(pAsm) )
4364         {
4365             return GL_FALSE;
4366         }
4367
4368         pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4369
4370         //tmp for source
4371         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4372         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4373         pAsm->S[0].src.reg   = tmp;
4374
4375         noneg_PVSSRC(&(pAsm->S[0].src));
4376         noswizzle_PVSSRC(&(pAsm->S[0].src));
4377
4378         if( GL_FALSE == next_ins(pAsm) )
4379         {
4380             return GL_FALSE;
4381         }
4382     }
4383
4384     return GL_TRUE;
4385 }
4386
4387 /* LIT dst, src */
4388 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
4389 {
4390     unsigned int dstReg;
4391     unsigned int dstType;
4392     checkop1(pAsm);
4393     int tmp = gethelpr(pAsm);
4394
4395     if( GL_FALSE == assemble_dst(pAsm) )
4396     {
4397         return GL_FALSE;
4398     }
4399     dstReg  = pAsm->D.dst.reg;
4400     dstType = pAsm->D.dst.rtype;
4401
4402     /* dst.xw, <- 1.0  */
4403     if( pAsm->D.dst.writex || pAsm->D.dst.writew )
4404     {
4405         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4406         {
4407             return GL_FALSE;
4408         }
4409
4410         pAsm->D.dst.opcode   = SQ_OP2_INST_MOV;
4411         pAsm->D.dst.writey   = 0;
4412         pAsm->D.dst.writez   = 0;
4413         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4414         pAsm->S[0].src.reg   = tmp;
4415         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4416         noneg_PVSSRC(&(pAsm->S[0].src));
4417         pAsm->S[0].src.swizzlex = SQ_SEL_1;
4418         pAsm->S[0].src.swizzley = SQ_SEL_1;
4419         pAsm->S[0].src.swizzlez = SQ_SEL_1;
4420         pAsm->S[0].src.swizzlew = SQ_SEL_1;
4421         if( GL_FALSE == next_ins(pAsm) )
4422         {
4423             return GL_FALSE;
4424         }
4425     }
4426
4427     if( GL_FALSE == assemble_dst(pAsm) )
4428     {
4429         return GL_FALSE;
4430     }
4431
4432     if( pAsm->D.dst.writey ) { 
4433
4434         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4435         {
4436             return GL_FALSE;
4437         }
4438
4439         /* dst.y = max(src.x, 0.0) */
4440         pAsm->D.dst.opcode   = SQ_OP2_INST_MAX;
4441         pAsm->D.dst.writex   = 0;
4442         pAsm->D.dst.writey   = 1;
4443         pAsm->D.dst.writez   = 0;
4444         pAsm->D.dst.writew   = 0;
4445         swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4446         pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4447         pAsm->S[1].src.reg   = tmp;
4448         setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4449         noneg_PVSSRC(&(pAsm->S[1].src));
4450         pAsm->S[1].src.swizzlex = SQ_SEL_0;
4451         pAsm->S[1].src.swizzley = SQ_SEL_0;
4452         pAsm->S[1].src.swizzlez = SQ_SEL_0;
4453         pAsm->S[1].src.swizzlew = SQ_SEL_0;
4454         if( GL_FALSE == next_ins(pAsm) )
4455         {
4456             return GL_FALSE;
4457         }
4458     }
4459
4460     if( GL_FALSE == assemble_dst(pAsm) )
4461     {
4462         return GL_FALSE;
4463     }
4464     if ( pAsm->D.dst.writez) {
4465
4466         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4467         {
4468             return GL_FALSE;
4469         }
4470
4471         /* dst.z = log(src.y) */
4472         if(8 == pAsm->unAsic)
4473         {
4474             pAsm->D.dst.opcode   = EG_OP2_INST_LOG_CLAMPED;
4475         }
4476         else
4477         {
4478             pAsm->D.dst.opcode   = SQ_OP2_INST_LOG_CLAMPED;
4479         }
4480         pAsm->D.dst.math     = 1;
4481         pAsm->D.dst.writex   = 0;
4482         pAsm->D.dst.writey   = 0;
4483         pAsm->D.dst.writez   = 1;
4484         pAsm->D.dst.writew   = 0;
4485         swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
4486         if( GL_FALSE == next_ins(pAsm) )
4487         {
4488             return GL_FALSE;
4489         }
4490
4491         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4492         {
4493             return GL_FALSE;
4494         }
4495
4496         if( GL_FALSE == assemble_src(pAsm, 0, 2) )
4497         {
4498             return GL_FALSE;
4499         }
4500
4501         swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4502
4503         swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4504
4505         /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
4506         if(8 == pAsm->unAsic)
4507         {
4508             pAsm->D.dst.opcode = EG_OP3_INST_MUL_LIT;
4509         }
4510         else
4511         {
4512             pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
4513         }
4514         pAsm->D.dst.math     = 1;
4515         pAsm->D.dst.op3      = 1;
4516         pAsm->D.dst.rtype    = DST_REG_TEMPORARY;
4517         pAsm->D.dst.reg      = tmp;
4518         pAsm->D.dst.writex   = 1;
4519         pAsm->D.dst.writey   = 0;
4520         pAsm->D.dst.writez   = 0;
4521         pAsm->D.dst.writew   = 0;
4522
4523
4524         pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4525         pAsm->S[1].src.reg   = dstReg;
4526         setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4527         noneg_PVSSRC(&(pAsm->S[1].src));
4528         pAsm->S[1].src.swizzlex = SQ_SEL_Z;
4529         pAsm->S[1].src.swizzley = SQ_SEL_Z;
4530         pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4531         pAsm->S[1].src.swizzlew = SQ_SEL_Z;
4532
4533         if( GL_FALSE == next_ins(pAsm) )
4534         {
4535             return GL_FALSE;
4536         }
4537
4538         /* dst.z = exp(tmp.x) */
4539         if( GL_FALSE == assemble_dst(pAsm) )
4540         {
4541             return GL_FALSE;
4542         }
4543         if(8 == pAsm->unAsic)
4544         {
4545             pAsm->D.dst.opcode   = EG_OP2_INST_EXP_IEEE;
4546         }
4547         else
4548         {
4549             pAsm->D.dst.opcode   = SQ_OP2_INST_EXP_IEEE;
4550         }
4551         pAsm->D.dst.math     = 1;
4552         pAsm->D.dst.writex   = 0;
4553         pAsm->D.dst.writey   = 0;
4554         pAsm->D.dst.writez   = 1;
4555         pAsm->D.dst.writew   = 0;
4556
4557         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4558         pAsm->S[0].src.reg   = tmp;
4559         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4560         noneg_PVSSRC(&(pAsm->S[0].src));
4561         pAsm->S[0].src.swizzlex = SQ_SEL_X;
4562         pAsm->S[0].src.swizzley = SQ_SEL_X;
4563         pAsm->S[0].src.swizzlez = SQ_SEL_X;
4564         pAsm->S[0].src.swizzlew = SQ_SEL_X;
4565
4566         if( GL_FALSE == next_ins(pAsm) )
4567         {
4568             return GL_FALSE;
4569         }
4570     }
4571     return GL_TRUE;
4572 }
4573  
4574 GLboolean assemble_MAX(r700_AssemblerBase *pAsm) 
4575 {
4576         if( GL_FALSE == checkop2(pAsm) )
4577         {
4578                 return GL_FALSE;
4579         }
4580
4581         pAsm->D.dst.opcode = SQ_OP2_INST_MAX; 
4582         
4583         if( GL_FALSE == assemble_dst(pAsm) )
4584         {
4585                 return GL_FALSE;
4586         }
4587
4588         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4589         {
4590                 return GL_FALSE;
4591         }
4592
4593         if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4594         {
4595                 return GL_FALSE;
4596         }
4597
4598         if( GL_FALSE == next_ins(pAsm) )
4599         {
4600                 return GL_FALSE;
4601         }
4602
4603     return GL_TRUE;
4604 }
4605  
4606 GLboolean assemble_MIN(r700_AssemblerBase *pAsm) 
4607 {
4608         if( GL_FALSE == checkop2(pAsm) )
4609         {
4610                 return GL_FALSE;
4611         }
4612
4613         pAsm->D.dst.opcode = SQ_OP2_INST_MIN;  
4614
4615         if( GL_FALSE == assemble_dst(pAsm) )
4616         {
4617                 return GL_FALSE;
4618         }
4619
4620         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4621         {
4622                 return GL_FALSE;
4623         }
4624
4625         if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4626         {
4627                 return GL_FALSE;
4628         }
4629  
4630         if( GL_FALSE == next_ins(pAsm) )
4631         {
4632                 return GL_FALSE;
4633         }
4634
4635     return GL_TRUE;
4636 }
4637  
4638 GLboolean assemble_MOV(r700_AssemblerBase *pAsm) 
4639 {
4640     checkop1(pAsm);
4641
4642     pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4643
4644     if (GL_FALSE == assemble_dst(pAsm))
4645     {
4646         return GL_FALSE;
4647     }
4648
4649     if (GL_FALSE == assemble_src(pAsm, 0, -1))
4650     {
4651         return GL_FALSE;
4652     }
4653
4654     if ( GL_FALSE == next_ins(pAsm) ) 
4655     {
4656         return GL_FALSE;
4657     }
4658
4659     return GL_TRUE;
4660 }
4661  
4662 GLboolean assemble_MUL(r700_AssemblerBase *pAsm) 
4663 {
4664         if( GL_FALSE == checkop2(pAsm) )
4665         {
4666                 return GL_FALSE;
4667         }
4668
4669         pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4670
4671         if( GL_FALSE == assemble_dst(pAsm) )
4672         {
4673                 return GL_FALSE;
4674         }
4675
4676         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4677         {
4678                 return GL_FALSE;
4679         }
4680
4681         if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4682         {
4683                 return GL_FALSE;
4684         }
4685
4686         if( GL_FALSE == next_ins(pAsm) ) 
4687         {
4688                 return GL_FALSE;
4689         }
4690
4691     return GL_TRUE;
4692 }
4693  
4694 GLboolean assemble_POW(r700_AssemblerBase *pAsm) 
4695 {
4696     BITS tmp;
4697
4698     checkop1(pAsm);
4699
4700     tmp = gethelpr(pAsm);
4701
4702     // LG2 tmp.x,     a.swizzle
4703     if(8 == pAsm->unAsic)
4704     {
4705         pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE;
4706     }
4707     else
4708     {
4709         pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;  
4710     }
4711     pAsm->D.dst.math = 1;
4712
4713     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4714     pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4715     pAsm->D.dst.reg   = tmp;
4716     nomask_PVSDST(&(pAsm->D.dst));
4717
4718     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4719     {
4720         return GL_FALSE;
4721     }
4722
4723     if( GL_FALSE == next_ins(pAsm) ) 
4724     {
4725         return GL_FALSE;
4726     }
4727
4728     // MUL tmp.x,     tmp.x, b.swizzle
4729     pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4730
4731     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4732     pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4733     pAsm->D.dst.reg = tmp;
4734     nomask_PVSDST(&(pAsm->D.dst));
4735
4736     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4737     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4738     pAsm->S[0].src.reg = tmp;
4739     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4740     noneg_PVSSRC(&(pAsm->S[0].src));
4741
4742     if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4743     {
4744         return GL_FALSE;
4745     }
4746
4747     if( GL_FALSE == next_ins(pAsm) ) 
4748     {
4749         return GL_FALSE;
4750     }
4751
4752     // EX2 dst.mask,          tmp.x
4753     // EX2 tmp.x,             tmp.x
4754     if(8 == pAsm->unAsic)
4755     {
4756         pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
4757     }
4758     else
4759     {
4760         pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4761     }
4762     pAsm->D.dst.math = 1;
4763
4764     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4765     pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4766     pAsm->D.dst.reg = tmp;
4767     nomask_PVSDST(&(pAsm->D.dst));
4768
4769     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4770     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4771     pAsm->S[0].src.reg = tmp;
4772     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4773     noneg_PVSSRC(&(pAsm->S[0].src));
4774
4775     if( GL_FALSE == next_ins(pAsm) ) 
4776     {
4777         return GL_FALSE;
4778     }
4779
4780     // Now replicate result to all necessary channels in destination
4781     pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4782
4783     if( GL_FALSE == assemble_dst(pAsm) )
4784     {
4785         return GL_FALSE;
4786     }
4787
4788     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4789     pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4790     pAsm->S[0].src.reg   = tmp;
4791
4792     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4793     noneg_PVSSRC(&(pAsm->S[0].src));
4794
4795     if( GL_FALSE == next_ins(pAsm) )
4796     {
4797         return GL_FALSE;
4798     }
4799
4800     return GL_TRUE;
4801 }
4802  
4803 GLboolean assemble_RCP(r700_AssemblerBase *pAsm) 
4804 {
4805     if(8 == pAsm->unAsic)
4806     {
4807         return assemble_math_function(pAsm, EG_OP2_INST_RECIP_IEEE);
4808     }
4809
4810     return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
4811 }
4812  
4813 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) 
4814 {
4815     if(8 == pAsm->unAsic)
4816     {
4817         return assemble_math_function(pAsm, EG_OP2_INST_RECIPSQRT_IEEE);
4818     }
4819
4820     return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
4821 }
4822  
4823 GLboolean assemble_SCS(r700_AssemblerBase *pAsm) 
4824 {
4825     BITS tmp;
4826
4827     checkop1(pAsm);
4828
4829     tmp = gethelpr(pAsm);
4830
4831     if(8 == pAsm->unAsic)
4832     {
4833         pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4834     }
4835     else
4836     {
4837         pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4838     }
4839     pAsm->D.dst.op3    = 1;
4840
4841     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4842     pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
4843     pAsm->D.dst.reg    = tmp;
4844
4845     assemble_src(pAsm, 0, -1);
4846
4847     pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4848     setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4849
4850     pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4851     setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
4852
4853     pAsm->D2.dst2.literal_slots = 1;
4854     pAsm->C[0].f = 1/(3.1415926535 * 2);
4855     pAsm->C[1].f = 0.5F;
4856
4857     if ( GL_FALSE == next_ins(pAsm) )
4858     {
4859         return GL_FALSE;
4860     }
4861
4862     pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
4863
4864     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4865     pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
4866     pAsm->D.dst.reg    = tmp;
4867     pAsm->D.dst.writex = 1;
4868
4869     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4870     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4871     pAsm->S[0].src.reg   = tmp;
4872     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4873
4874     if(( GL_FALSE == next_ins(pAsm) ))
4875     {
4876         return GL_FALSE;
4877     }
4878     if(8 == pAsm->unAsic)
4879     {
4880         pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4881     }
4882     else
4883     {
4884         pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4885     }
4886     pAsm->D.dst.op3    = 1;
4887
4888     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4889     pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
4890     pAsm->D.dst.reg    = tmp;
4891
4892     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4893     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4894     pAsm->S[0].src.reg   = tmp;
4895     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4896
4897     pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4898     setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4899
4900     pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4901     setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
4902
4903     pAsm->D2.dst2.literal_slots = 1;
4904
4905     if(pAsm->bR6xx) {
4906        pAsm->C[0].f = 3.1415926535897f * 2.0f;
4907        pAsm->C[1].f = -3.1415926535897f;
4908     } else {
4909        pAsm->C[0].f = 1.0f;
4910        pAsm->C[1].f = -0.5f;
4911     }
4912
4913     if(( GL_FALSE == next_ins(pAsm) ))
4914     {
4915         return GL_FALSE;
4916     }
4917
4918     // COS dst.x,    a.x
4919     if(8 == pAsm->unAsic)
4920     {
4921         pAsm->D.dst.opcode = EG_OP2_INST_COS;
4922     }
4923     else
4924     {
4925         pAsm->D.dst.opcode = SQ_OP2_INST_COS;
4926     }
4927     pAsm->D.dst.math = 1;
4928
4929     assemble_dst(pAsm);
4930     /* mask y */
4931     pAsm->D.dst.writey = 0;
4932
4933     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4934     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4935     pAsm->S[0].src.reg   = tmp;
4936     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4937     noneg_PVSSRC(&(pAsm->S[0].src));
4938
4939     if ( GL_FALSE == next_ins(pAsm) )
4940     {
4941         return GL_FALSE;
4942     }
4943
4944     // SIN dst.y,    a.x
4945     if(8 == pAsm->unAsic)
4946     {
4947         pAsm->D.dst.opcode = EG_OP2_INST_SIN;
4948     }
4949     else
4950     {
4951         pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
4952     }
4953     pAsm->D.dst.math = 1;
4954
4955     assemble_dst(pAsm);
4956     /* mask x */
4957     pAsm->D.dst.writex = 0;
4958
4959     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4960     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4961     pAsm->S[0].src.reg   = tmp;
4962     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4963     noneg_PVSSRC(&(pAsm->S[0].src));
4964
4965     if( GL_FALSE == next_ins(pAsm) )
4966     {
4967         return GL_FALSE;
4968     }
4969
4970     return GL_TRUE;
4971 }
4972
4973 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) 
4974 {
4975     if( GL_FALSE == checkop2(pAsm) )
4976     {
4977             return GL_FALSE;
4978     }
4979
4980     pAsm->D.dst.opcode = opcode;
4981     //pAsm->D.dst.math   = 1;
4982
4983     if( GL_FALSE == assemble_dst(pAsm) )
4984     {
4985             return GL_FALSE;
4986     }
4987
4988     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4989     {
4990             return GL_FALSE;
4991     }
4992
4993     if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4994     {
4995             return GL_FALSE;
4996     }
4997
4998     if( GL_FALSE == next_ins(pAsm) ) 
4999     {
5000             return GL_FALSE;
5001     }
5002
5003     return GL_TRUE;
5004 }
5005
5006 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) 
5007 {
5008     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
5009
5010     pAsm->D.dst.opcode = opcode;
5011     pAsm->D.dst.math   = 1;
5012     pAsm->D.dst.predicated = 1;
5013
5014     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5015     pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5016     pAsm->D.dst.reg = pAsm->uHelpReg;
5017     pAsm->D.dst.writex = 1;
5018     pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
5019
5020     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5021     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5022     pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
5023     pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
5024     noneg_PVSSRC(&(pAsm->S[0].src));
5025
5026     pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5027     pAsm->S[1].src.reg   = pAsm->uHelpReg;
5028     setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5029     noneg_PVSSRC(&(pAsm->S[1].src));
5030     pAsm->S[1].src.swizzlex = SQ_SEL_0;
5031     pAsm->S[1].src.swizzley = SQ_SEL_0;
5032     pAsm->S[1].src.swizzlez = SQ_SEL_0;
5033     pAsm->S[1].src.swizzlew = SQ_SEL_0;
5034
5035     if( GL_FALSE == next_ins(pAsm) ) 
5036     {
5037             return GL_FALSE;
5038     }
5039
5040     return GL_TRUE;
5041 }
5042  
5043 GLboolean assemble_SGE(r700_AssemblerBase *pAsm) 
5044 {
5045     if( GL_FALSE == checkop2(pAsm) )
5046     {
5047             return GL_FALSE;
5048     }
5049
5050     pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;  
5051
5052     if( GL_FALSE == assemble_dst(pAsm) )
5053     {
5054             return GL_FALSE;
5055     }
5056
5057     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5058     {
5059             return GL_FALSE;
5060     }
5061
5062     if( GL_FALSE == assemble_src(pAsm, 1, -1) )
5063     {
5064             return GL_FALSE;
5065     }
5066
5067     if( GL_FALSE == next_ins(pAsm) ) 
5068     {
5069             return GL_FALSE;
5070     }
5071
5072     return GL_TRUE;
5073 }
5074  
5075 GLboolean assemble_SLT(r700_AssemblerBase *pAsm) 
5076 {
5077     if( GL_FALSE == checkop2(pAsm) )
5078     {
5079             return GL_FALSE;
5080     }
5081
5082     pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;  
5083
5084     if( GL_FALSE == assemble_dst(pAsm) )
5085     {
5086         return GL_FALSE;
5087     }
5088                 
5089     if( GL_FALSE == assemble_src(pAsm, 0, 1) )  
5090     {
5091         return GL_FALSE;
5092     }
5093
5094     if( GL_FALSE == assemble_src(pAsm, 1, 0) )  
5095     {
5096         return GL_FALSE;
5097     }
5098
5099     if( GL_FALSE == next_ins(pAsm) )
5100     {
5101         return GL_FALSE;
5102     }
5103
5104     return GL_TRUE;
5105 }
5106  
5107 GLboolean assemble_SSG(r700_AssemblerBase *pAsm)
5108 {
5109     checkop1(pAsm);
5110     
5111     GLuint tmp = gethelpr(pAsm);
5112     /* tmp = (src > 0 ? 1 : src) */
5113     if(8 == pAsm->unAsic)
5114     {
5115         pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
5116     }
5117     else
5118     {
5119         pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
5120     }
5121     pAsm->D.dst.op3    = 1;
5122     pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5123     pAsm->D.dst.reg   = tmp;
5124
5125     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5126     {
5127         return GL_FALSE;
5128     }
5129
5130     setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
5131
5132     if( GL_FALSE == assemble_src(pAsm, 0, 2) )
5133     {
5134         return GL_FALSE;
5135     }
5136
5137     if( GL_FALSE == next_ins(pAsm) )
5138     {
5139         return GL_FALSE;
5140     }
5141
5142     /* dst = (-tmp > 0 ? -1 : tmp) */
5143     if(8 == pAsm->unAsic)
5144     {
5145         pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
5146     }
5147     else
5148     {
5149         pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
5150     }
5151     pAsm->D.dst.op3    = 1;
5152
5153     if( GL_FALSE == assemble_dst(pAsm) )
5154     {
5155         return GL_FALSE;
5156     }
5157
5158     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5159     pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5160     pAsm->S[0].src.reg   = tmp;
5161     noswizzle_PVSSRC(&(pAsm->S[0].src));
5162     neg_PVSSRC(&(pAsm->S[0].src));
5163
5164     setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
5165     neg_PVSSRC(&(pAsm->S[1].src));
5166
5167     setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
5168     pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
5169     pAsm->S[2].src.reg   = tmp;
5170     noswizzle_PVSSRC(&(pAsm->S[2].src));
5171
5172     if( GL_FALSE == next_ins(pAsm) )
5173     {
5174         return GL_FALSE;
5175     }
5176
5177     return GL_TRUE;
5178 }
5179
5180 GLboolean assemble_STP(r700_AssemblerBase *pAsm) 
5181 {
5182     return GL_TRUE;
5183 }
5184  
5185 GLboolean assemble_TEX(r700_AssemblerBase *pAsm) 
5186 {
5187     GLboolean src_const;
5188     GLboolean need_barrier = GL_FALSE; 
5189
5190     checkop1(pAsm);
5191     
5192     switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
5193     {
5194     case PROGRAM_UNIFORM: 
5195     case PROGRAM_CONSTANT:
5196     case PROGRAM_LOCAL_PARAM:
5197     case PROGRAM_ENV_PARAM:
5198     case PROGRAM_STATE_VAR:
5199         src_const = GL_TRUE;
5200         break;
5201     case PROGRAM_TEMPORARY:
5202     case PROGRAM_INPUT:
5203     default:
5204         src_const = GL_FALSE;
5205         break;
5206     }
5207
5208     if (GL_TRUE == src_const)
5209     {
5210             if ( GL_FALSE == mov_temp(pAsm, 0) )
5211                     return GL_FALSE;
5212             need_barrier = GL_TRUE;
5213     }
5214
5215     if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
5216     {
5217         GLuint tmp = gethelpr(pAsm);
5218         if(8 == pAsm->unAsic)
5219         {
5220             pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE;
5221         }
5222         else
5223         {
5224             pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
5225         }
5226         pAsm->D.dst.math = 1;
5227         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5228         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5229         pAsm->D.dst.reg   = tmp;
5230         pAsm->D.dst.writew = 1;
5231
5232         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5233         {
5234             return GL_FALSE;
5235         }
5236         swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
5237         if( GL_FALSE == next_ins(pAsm) )
5238         {
5239             return GL_FALSE;
5240         }
5241
5242         pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
5243         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5244         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5245         pAsm->D.dst.reg   = tmp;
5246         pAsm->D.dst.writex = 1;
5247         pAsm->D.dst.writey = 1;
5248         pAsm->D.dst.writez = 1;
5249         pAsm->D.dst.writew = 0;
5250
5251         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5252         {
5253             return GL_FALSE;
5254         }
5255         setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5256         pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5257         pAsm->S[1].src.reg   = tmp;
5258         setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
5259
5260         if( GL_FALSE == next_ins(pAsm) )
5261         {
5262             return GL_FALSE;
5263         }
5264         
5265         pAsm->aArgSubst[1] = tmp;
5266         need_barrier = GL_TRUE;
5267     }
5268
5269     if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
5270     {
5271         GLuint tmp1 = gethelpr(pAsm);
5272         GLuint tmp2 = gethelpr(pAsm);
5273         
5274         /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
5275         if(8 == pAsm->unAsic)
5276         {
5277             pAsm->D.dst.opcode = EG_OP2_INST_CUBE;
5278         }
5279         else
5280         {
5281             pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
5282         }
5283         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5284         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5285         pAsm->D.dst.reg   = tmp1;
5286         nomask_PVSDST(&(pAsm->D.dst));
5287         
5288         if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5289         {
5290             return GL_FALSE;
5291         }
5292
5293         if( GL_FALSE == assemble_src(pAsm, 0, 1) )
5294         {
5295             return GL_FALSE;
5296         }
5297
5298         swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
5299         swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z); 
5300
5301         if( GL_FALSE == next_ins(pAsm) )
5302         {
5303             return GL_FALSE;
5304         }
5305  
5306         /* tmp1.z = RCP_e(|tmp1.z|) */
5307         if(8 == pAsm->unAsic)
5308         {
5309             pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE;
5310         }
5311         else
5312         {
5313             pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
5314         }
5315         pAsm->D.dst.math = 1;
5316         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5317         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5318         pAsm->D.dst.reg   = tmp1;
5319         pAsm->D.dst.writez = 1;
5320
5321         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5322         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5323         pAsm->S[0].src.reg = tmp1;
5324         pAsm->S[0].src.swizzlex = SQ_SEL_Z;
5325         pAsm->S[0].src.abs = 1;
5326
5327         next_ins(pAsm);
5328
5329         /* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
5330          * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
5331          * muladd has no writemask, have to use another temp 
5332          */
5333         if(8 == pAsm->unAsic)
5334         {
5335             pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
5336         }
5337         else
5338         {
5339             pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
5340         }
5341         pAsm->D.dst.op3    = 1;
5342         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5343         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5344         pAsm->D.dst.reg   = tmp2;
5345
5346         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5347         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5348         pAsm->S[0].src.reg   = tmp1;
5349         noswizzle_PVSSRC(&(pAsm->S[0].src));
5350         setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5351         pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5352         pAsm->S[1].src.reg   = tmp1;
5353         setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
5354         setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
5355         /* immediate c 1.5 */
5356         pAsm->D2.dst2.literal_slots = 1;
5357         pAsm->C[0].f = 1.5F;
5358         pAsm->S[2].src.rtype = SRC_REC_LITERAL;
5359         pAsm->S[2].src.reg   = tmp1;
5360         setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
5361
5362         next_ins(pAsm);
5363
5364         /* tmp1.xy = temp2.xy */
5365         pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5366         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5367         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5368         pAsm->D.dst.reg   = tmp1;
5369         pAsm->D.dst.writex = 1;
5370         pAsm->D.dst.writey = 1;
5371         pAsm->D.dst.writez = 0;
5372         pAsm->D.dst.writew = 0;
5373
5374         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5375         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5376         pAsm->S[0].src.reg   = tmp2;
5377         noswizzle_PVSSRC(&(pAsm->S[0].src));
5378
5379         next_ins(pAsm);
5380         pAsm->aArgSubst[1] = tmp1;
5381         need_barrier = GL_TRUE;
5382
5383     }
5384
5385     switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
5386     {
5387         case OPCODE_DDX:
5388             /* will these need WQM(1) on CF inst ? */
5389             pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
5390             break;
5391         case OPCODE_DDY:
5392             pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
5393             break;
5394         case OPCODE_TXB:
5395             /* this should actually be SAMPLE_LB but that needs bias to be 
5396              * embedded in the instruction - cant do here */ 
5397             pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
5398             break;
5399         case OPCODE_TXL:
5400             pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
5401             break;
5402         default:
5403             if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5404                 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C;
5405             else
5406                 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
5407     }
5408
5409     pAsm->is_tex = GL_TRUE;
5410     if ( GL_TRUE == need_barrier )
5411
5412     pAsm->is_tex = GL_TRUE;
5413     if ( GL_TRUE == need_barrier )
5414     {
5415         pAsm->need_tex_barrier = GL_TRUE;
5416     }
5417     // Set src1 to tex unit id
5418     pAsm->S[1].src.reg   = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
5419     pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5420
5421     //No sw info from mesa compiler, so hard code here.
5422     pAsm->S[1].src.swizzlex = SQ_SEL_X;
5423     pAsm->S[1].src.swizzley = SQ_SEL_Y;
5424     pAsm->S[1].src.swizzlez = SQ_SEL_Z;
5425     pAsm->S[1].src.swizzlew = SQ_SEL_W;
5426
5427     if( GL_FALSE == tex_dst(pAsm) )
5428     {
5429         return GL_FALSE;
5430     }
5431
5432     if( GL_FALSE == tex_src(pAsm) )
5433     {
5434         return GL_FALSE;
5435     }
5436
5437     if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
5438     {
5439         /* hopefully did swizzles before */
5440         noswizzle_PVSSRC(&(pAsm->S[0].src));
5441     }
5442    
5443     if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
5444     {
5445         /* SAMPLE dst, tmp.yxwy, CUBE */
5446         pAsm->S[0].src.swizzlex = SQ_SEL_Y;
5447         pAsm->S[0].src.swizzley = SQ_SEL_X;
5448         pAsm->S[0].src.swizzlez = SQ_SEL_W;
5449         pAsm->S[0].src.swizzlew = SQ_SEL_Y;
5450     }
5451  
5452     if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5453     {
5454         /* compare value goes to w chan ? */
5455         pAsm->S[0].src.swizzlew = SQ_SEL_Z;
5456     }
5457
5458     if ( GL_FALSE == next_ins(pAsm) )
5459         {
5460             return GL_FALSE;
5461         }
5462
5463     /* add ARB shadow ambient but clamp to 0..1 */
5464     if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5465     {
5466         /* ADD_SAT dst,  dst,  ambient[texunit] */
5467         pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
5468
5469         if( GL_FALSE == assemble_dst(pAsm) )
5470         {
5471             return GL_FALSE;
5472         }
5473         pAsm->D2.dst2.SaturateMode = 1;
5474
5475         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5476         pAsm->S[0].src.reg = pAsm->D.dst.reg;
5477         noswizzle_PVSSRC(&(pAsm->S[0].src));
5478         noneg_PVSSRC(&(pAsm->S[0].src));
5479
5480         pAsm->S[1].src.rtype = SRC_REG_CONSTANT;
5481         pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
5482         noswizzle_PVSSRC(&(pAsm->S[1].src));
5483         noneg_PVSSRC(&(pAsm->S[1].src));
5484
5485         if( GL_FALSE == next_ins(pAsm) )
5486         {
5487             return GL_FALSE;
5488         }
5489
5490     }
5491
5492     return GL_TRUE;
5493 }
5494
5495 GLboolean assemble_XPD(r700_AssemblerBase *pAsm) 
5496 {
5497     BITS tmp1;
5498     BITS tmp2 = 0;
5499
5500     if( GL_FALSE == checkop2(pAsm) )
5501     {
5502             return GL_FALSE;
5503     }
5504
5505     tmp1 = gethelpr(pAsm);
5506
5507     pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
5508
5509     setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5510     pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5511     pAsm->D.dst.reg   = tmp1;
5512     nomask_PVSDST(&(pAsm->D.dst));
5513   
5514     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5515     {
5516         return GL_FALSE;
5517     }
5518
5519     if( GL_FALSE == assemble_src(pAsm, 1, -1) )
5520     {
5521         return GL_FALSE;
5522     }
5523  
5524     swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
5525     swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
5526
5527     if( GL_FALSE == next_ins(pAsm) ) 
5528     {
5529         return GL_FALSE;
5530     }
5531
5532     if(8 == pAsm->unAsic)
5533     {
5534         pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
5535     }
5536     else
5537     {
5538         pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
5539     }
5540     pAsm->D.dst.op3    = 1;
5541
5542     if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
5543     {
5544         tmp2 = gethelpr(pAsm);
5545
5546         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5547         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5548         pAsm->D.dst.reg   = tmp2;
5549
5550         nomask_PVSDST(&(pAsm->D.dst));
5551     }
5552     else 
5553     {
5554         if( GL_FALSE == assemble_dst(pAsm) )
5555         {
5556             return GL_FALSE;
5557         }
5558     }
5559
5560     if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5561     {
5562         return GL_FALSE;
5563     }
5564
5565     if( GL_FALSE == assemble_src(pAsm, 1, -1) )
5566     {
5567         return GL_FALSE;
5568     }
5569  
5570     swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
5571     swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
5572
5573     // result1 + (neg) result0
5574     setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
5575     pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
5576     pAsm->S[2].src.reg   = tmp1;
5577
5578     neg_PVSSRC(&(pAsm->S[2].src));
5579     noswizzle_PVSSRC(&(pAsm->S[2].src));
5580
5581     if( GL_FALSE == next_ins(pAsm) ) 
5582     {
5583         return GL_FALSE;
5584     }
5585
5586
5587     if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) 
5588     {
5589         if( GL_FALSE == assemble_dst(pAsm) )
5590         {
5591             return GL_FALSE;
5592         }
5593
5594         pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5595
5596         // Use tmp as source
5597         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5598         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5599         pAsm->S[0].src.reg   = tmp2;
5600
5601         noneg_PVSSRC(&(pAsm->S[0].src));
5602         noswizzle_PVSSRC(&(pAsm->S[0].src));
5603
5604         if( GL_FALSE == next_ins(pAsm) )
5605         {
5606             return GL_FALSE;
5607         }
5608     }
5609
5610     return GL_TRUE;
5611 }
5612
5613 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
5614 {
5615     return GL_TRUE;
5616 }
5617
5618 static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
5619 {
5620     switch (uReason)
5621     {
5622     case FC_PUSH_VPM:
5623         pAsm->CALLSTACK[pAsm->CALLSP].current--;
5624         break;
5625     case FC_PUSH_WQM:
5626         pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
5627         break;
5628     case FC_LOOP:
5629         pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
5630         break;
5631     case FC_REP:
5632         /* TODO : for 16 vp asic, should -= 2; */
5633         pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
5634         break;
5635     };
5636 }
5637
5638 static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
5639 {
5640     if(GL_TRUE == bCheckMaxOnly)
5641     {
5642         switch (uReason)
5643         {
5644         case FC_PUSH_VPM:
5645             if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
5646                     > pAsm->CALLSTACK[pAsm->CALLSP].max)
5647             {
5648                 pAsm->CALLSTACK[pAsm->CALLSP].max =
5649                     pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
5650             }
5651             break;
5652         case FC_PUSH_WQM:
5653             if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
5654                     > pAsm->CALLSTACK[pAsm->CALLSP].max)
5655             {
5656                 pAsm->CALLSTACK[pAsm->CALLSP].max =
5657                     pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
5658             }
5659             break;
5660         }
5661         return;
5662     }
5663
5664     switch (uReason)
5665     {
5666     case FC_PUSH_VPM:
5667         pAsm->CALLSTACK[pAsm->CALLSP].current++;
5668         break;
5669     case FC_PUSH_WQM:
5670         pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
5671         break;
5672     case FC_LOOP:
5673         pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
5674         break;
5675     case FC_REP:
5676         /* TODO : for 16 vp asic, should += 2; */
5677         pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
5678         break;
5679     };
5680
5681     if(pAsm->CALLSTACK[pAsm->CALLSP].current
5682          > pAsm->CALLSTACK[pAsm->CALLSP].max)
5683     {
5684         pAsm->CALLSTACK[pAsm->CALLSP].max =
5685             pAsm->CALLSTACK[pAsm->CALLSP].current;
5686     }
5687 }
5688
5689 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
5690 {
5691     if(GL_FALSE == add_cf_instruction(pAsm) )
5692     {
5693         return GL_FALSE;
5694     }
5695
5696     if(8 == pAsm->unAsic)
5697     {
5698         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5699                  EG_CF_INST_JUMP,
5700                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5701         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5702                  pops,
5703                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); 
5704         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5705                  0,
5706                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5707         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5708                  SQ_CF_COND_ACTIVE,
5709                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5710         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5711                  0,
5712                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5713         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5714                  0,
5715                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5716         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5717                  0,
5718                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
5719         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5720                  0,
5721                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5722         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5723                  1,
5724                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5725     }
5726     else
5727     {
5728         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = pops;
5729         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0;
5730         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
5731
5732         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
5733         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5734         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_JUMP;
5735         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
5736
5737         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
5738     }
5739
5740     pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
5741
5742     return GL_TRUE;
5743 }
5744
5745 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
5746 {
5747     if(GL_FALSE == add_cf_instruction(pAsm) )
5748     {
5749         return GL_FALSE;
5750     }
5751
5752     if(8 == pAsm->unAsic)
5753     {
5754         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5755                  EG_CF_INST_POP,
5756                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5757         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5758                  pops,
5759                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask); 
5760         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5761                  0,
5762                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5763         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5764                  SQ_CF_COND_ACTIVE,
5765                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5766         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5767                  0,
5768                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
5769         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5770                  0,
5771                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
5772         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5773                  0,
5774                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
5775         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5776                  1,
5777                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5778         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5779                  0,
5780                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5781     }
5782     else
5783     {
5784         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = pops;
5785         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
5786         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
5787
5788         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
5789         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
5790         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_POP;
5791  
5792         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
5793
5794         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
5795     }
5796     pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr             = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5797
5798     return GL_TRUE;
5799 }
5800
5801 GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
5802 {
5803     pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5804
5805     assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
5806
5807
5808     if(GL_FALSE == add_cf_instruction(pAsm) )
5809     {
5810         return GL_FALSE;
5811     }
5812
5813     if(8 == pAsm->unAsic)
5814      { 
5815         if(GL_TRUE != bHasElse)
5816         {            
5817             SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5818                      1,
5819                      EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5820         }
5821         else
5822         {            
5823             SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5824                      0,
5825                      EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5826         }
5827
5828         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5829                  EG_CF_INST_JUMP,
5830                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);         
5831         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5832                  0,
5833                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5834         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5835                  SQ_CF_COND_ACTIVE,
5836                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5837         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5838                  0,
5839                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
5840         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5841                  0,
5842                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
5843         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5844                  0,
5845                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
5846         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5847                  1,
5848                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5849         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5850                  0,
5851                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5852     }
5853     else
5854     {
5855         if(GL_TRUE != bHasElse)
5856         {
5857             pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; 
5858         }
5859         else
5860         {
5861             pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5862         }
5863         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0;
5864         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
5865
5866         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
5867         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5868         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_JUMP;
5869         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
5870
5871         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
5872     }
5873
5874     pAsm->FCSP++;
5875         pAsm->fc_stack[pAsm->FCSP].type  = FC_IF;
5876     pAsm->fc_stack[pAsm->FCSP].mid   = NULL;
5877     pAsm->fc_stack[pAsm->FCSP].midLen= 0;
5878     pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5879
5880 #ifndef USE_CF_FOR_POP_AFTER
5881     if(GL_TRUE != bHasElse)
5882     {
5883         pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5884     }
5885 #endif /* USE_CF_FOR_POP_AFTER */
5886
5887     checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE); 
5888
5889     return GL_TRUE;
5890 }
5891
5892 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
5893 {
5894     if(GL_FALSE == add_cf_instruction(pAsm) )
5895     {
5896         return GL_FALSE;
5897     }
5898
5899     if(8 == pAsm->unAsic)
5900     {                  
5901         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5902                  1,
5903                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);        
5904         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5905                  EG_CF_INST_ELSE,
5906                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);         
5907         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5908                  0,
5909                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5910         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5911                  SQ_CF_COND_ACTIVE,
5912                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5913         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5914                  0,
5915                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
5916         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5917                  0,
5918                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
5919         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5920                  0,
5921                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
5922         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5923                  1,
5924                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5925         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
5926                  0,
5927                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5928     }
5929     else
5930     {
5931         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1; ///
5932         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0;
5933         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
5934
5935         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
5936         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5937         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_ELSE;
5938         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
5939
5940         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
5941     }
5942
5943     pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
5944                                                                                      0,
5945                                                                                      sizeof(R700ControlFlowGenericClause *) );
5946     pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
5947     //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
5948
5949 #ifndef USE_CF_FOR_POP_AFTER
5950     pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5951 #endif /* USE_CF_FOR_POP_AFTER */
5952
5953     pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1; 
5954
5955     return GL_TRUE;
5956 }
5957
5958 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
5959 {
5960 #ifdef USE_CF_FOR_POP_AFTER
5961     pops(pAsm, 1); 
5962 #endif /* USE_CF_FOR_POP_AFTER */
5963
5964     pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5965
5966     if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
5967     {
5968         /* no else in between */
5969         pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5970     }
5971     else
5972     {
5973         pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5974     }
5975
5976     if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5977     {
5978         FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5979     }
5980
5981     if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
5982     {
5983         radeon_error("if/endif in shader code are not paired. \n");
5984         return GL_FALSE;
5985     }
5986     
5987     pAsm->FCSP--;
5988
5989     decreaseCurrent(pAsm, FC_PUSH_VPM);
5990
5991     return GL_TRUE;
5992 }
5993
5994 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
5995 {
5996     if(GL_FALSE == add_cf_instruction(pAsm) )
5997     {
5998         return GL_FALSE;
5999     }
6000
6001     if(8 == pAsm->unAsic)
6002     {                  
6003         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6004                  0,
6005                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);        
6006         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6007                  EG_CF_INST_LOOP_START_NO_AL,
6008                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);         
6009         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6010                  0,
6011                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6012         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6013                  SQ_CF_COND_ACTIVE,
6014                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6015         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6016                  0,
6017                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
6018         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6019                  0,
6020                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
6021         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6022                  0,
6023                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
6024         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6025                  1,
6026                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6027         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6028                  0,
6029                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6030     }
6031     else
6032     {
6033         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 0;
6034         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
6035         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
6036
6037         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
6038         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
6039         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_LOOP_START_NO_AL;
6040         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
6041
6042         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
6043     }
6044
6045     pAsm->FCSP++;
6046         pAsm->fc_stack[pAsm->FCSP].type  = FC_LOOP;
6047     pAsm->fc_stack[pAsm->FCSP].mid   = NULL;
6048     pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
6049     pAsm->fc_stack[pAsm->FCSP].midLen   = 0;
6050     pAsm->fc_stack[pAsm->FCSP].first    = pAsm->cf_current_cf_clause_ptr;
6051
6052     checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
6053
6054     return GL_TRUE;
6055 }
6056
6057 GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
6058 {
6059 #ifdef USE_CF_FOR_CONTINUE_BREAK
6060
6061     pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6062
6063     assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
6064     
6065     unsigned int unFCSP;
6066     for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
6067     {
6068         if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6069         {
6070             break;
6071         }
6072     }
6073     if(0 == FC_LOOP)
6074     {
6075         radeon_error("Break is not inside loop/endloop pair.\n");
6076         return GL_FALSE;
6077     }
6078
6079     if(GL_FALSE == add_cf_instruction(pAsm) )
6080     {
6081         return GL_FALSE;
6082     }
6083
6084     if(8 == pAsm->unAsic)
6085     {                  
6086         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6087                  1,
6088                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);        
6089         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6090                  EG_CF_INST_LOOP_BREAK,
6091                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);         
6092         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6093                  0,
6094                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6095         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6096                  SQ_CF_COND_ACTIVE,
6097                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6098         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6099                  0,
6100                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
6101         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6102                  0,
6103                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
6104         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6105                  0,
6106                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
6107         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6108                  1,
6109                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6110         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6111                  0,
6112                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6113     }
6114     else
6115     {
6116         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
6117         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
6118         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
6119
6120         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
6121         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
6122         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_LOOP_BREAK;
6123  
6124         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
6125
6126         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
6127     }
6128
6129     pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( 
6130                                               (void *)pAsm->fc_stack[unFCSP].mid,
6131                                               sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6132                                               sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6133     pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6134     pAsm->fc_stack[unFCSP].unNumMid++;
6135
6136     if(GL_FALSE == add_cf_instruction(pAsm) )
6137     {
6138         return GL_FALSE;
6139     }
6140
6141     if(8 == pAsm->unAsic)
6142     {                  
6143         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6144                  1,
6145                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);        
6146         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6147                  EG_CF_INST_POP,
6148                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);         
6149         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6150                  0,
6151                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6152         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6153                  SQ_CF_COND_ACTIVE,
6154                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6155         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6156                  0,
6157                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
6158         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6159                  0,
6160                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
6161         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6162                  0,
6163                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
6164         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6165                  1,
6166                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6167         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6168                  0,
6169                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6170     }
6171     else
6172     {
6173         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
6174         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
6175         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
6176
6177         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
6178         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
6179         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_POP;
6180  
6181         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
6182
6183         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
6184     }
6185
6186     pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6187
6188     checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6189
6190 #endif //USE_CF_FOR_CONTINUE_BREAK
6191     return GL_TRUE;
6192 }
6193
6194 GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
6195 {
6196 #ifdef USE_CF_FOR_CONTINUE_BREAK
6197     pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6198
6199     assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
6200
6201     unsigned int unFCSP;
6202     for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
6203     {
6204         if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6205         {
6206             break;
6207         }
6208     }
6209     if(0 == FC_LOOP)
6210     {
6211         radeon_error("Continue is not inside loop/endloop pair.\n");
6212         return GL_FALSE;
6213     }
6214
6215     if(GL_FALSE == add_cf_instruction(pAsm) )
6216     {
6217         return GL_FALSE;
6218     }
6219
6220     if(8 == pAsm->unAsic)
6221     {                  
6222         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6223                  1,
6224                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);        
6225         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6226                  EG_CF_INST_LOOP_CONTINUE,
6227                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);         
6228         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6229                  0,
6230                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6231         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6232                  SQ_CF_COND_ACTIVE,
6233                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6234         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6235                  0,
6236                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
6237         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6238                  0,
6239                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
6240         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6241                  0,
6242                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
6243         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6244                  1,
6245                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6246         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6247                  0,
6248                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6249     }
6250     else
6251     {
6252         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
6253         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
6254         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
6255
6256         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
6257         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
6258         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_LOOP_CONTINUE;
6259  
6260         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
6261
6262         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
6263     }
6264
6265     pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( 
6266                                               (void *)pAsm->fc_stack[unFCSP].mid,
6267                                               sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6268                                               sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6269     pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6270     pAsm->fc_stack[unFCSP].unNumMid++;
6271
6272     if(GL_FALSE == add_cf_instruction(pAsm) )
6273     {
6274         return GL_FALSE;
6275     }
6276
6277     if(8 == pAsm->unAsic)
6278     {                  
6279         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6280                  1,
6281                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);        
6282         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6283                  EG_CF_INST_POP,
6284                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);         
6285         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6286                  0,
6287                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6288         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6289                  SQ_CF_COND_ACTIVE,
6290                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6291         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6292                  0,
6293                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
6294         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6295                  0,
6296                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
6297         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6298                  0,
6299                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
6300         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6301                  1,
6302                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6303         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6304                  0,
6305                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6306     }
6307     else
6308     {
6309         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
6310         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
6311         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
6312
6313         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
6314         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
6315         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_POP;
6316  
6317         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
6318
6319         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
6320     }
6321
6322     pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr             = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6323
6324     checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6325
6326 #endif /* USE_CF_FOR_CONTINUE_BREAK */
6327
6328     return GL_TRUE;
6329 }
6330
6331 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
6332 {
6333     GLuint i;
6334
6335     if(GL_FALSE == add_cf_instruction(pAsm) )
6336     {
6337         return GL_FALSE;
6338     }
6339
6340     if(8 == pAsm->unAsic)
6341     {                  
6342         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6343                  0,
6344                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);        
6345         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6346                  EG_CF_INST_LOOP_END,
6347                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);         
6348         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6349                  0,
6350                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6351         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6352                  SQ_CF_COND_ACTIVE,
6353                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6354         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6355                  0,
6356                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
6357         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6358                  0,
6359                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
6360         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6361                  0,
6362                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
6363         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6364                  1,
6365                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6366         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6367                  0,
6368                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6369     }
6370     else
6371     {
6372         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 0;
6373         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
6374         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
6375
6376         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
6377         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
6378         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_LOOP_END;
6379         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
6380
6381         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
6382     }
6383
6384     pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr   = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
6385     pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6386
6387 #ifdef USE_CF_FOR_CONTINUE_BREAK
6388     for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
6389     {
6390         pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
6391     }
6392     if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
6393     {
6394         FREE(pAsm->fc_stack[pAsm->FCSP].mid);
6395     }
6396 #endif
6397
6398     if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
6399     {
6400         radeon_error("loop/endloop in shader code are not paired. \n");
6401         return GL_FALSE;
6402     }
6403
6404     GLuint unFCSP;
6405     GLuint unIF = 0;
6406     if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
6407     {        
6408         for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
6409         {
6410             if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6411             {
6412                 breakLoopOnFlag(pAsm, unFCSP);
6413                 break;
6414             }
6415             else if(FC_IF == pAsm->fc_stack[unFCSP].type)
6416             {
6417                 unIF++;
6418             }
6419         }
6420         if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
6421         {            
6422 #ifdef USE_CF_FOR_POP_AFTER
6423             returnOnFlag(pAsm, unIF); 
6424 #else
6425             returnOnFlag(pAsm, 0);
6426 #endif /* USE_CF_FOR_POP_AFTER */
6427             pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
6428         }
6429     }
6430
6431     pAsm->FCSP--;
6432
6433     decreaseCurrent(pAsm, FC_LOOP);
6434     
6435     return GL_TRUE;
6436 }
6437
6438 void add_return_inst(r700_AssemblerBase *pAsm)
6439 {
6440     if(GL_FALSE == add_cf_instruction(pAsm) )
6441     {
6442         return;
6443     }
6444
6445     if(8 == pAsm->unAsic)
6446     {                  
6447         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6448                  0,
6449                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);        
6450         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6451                  EG_CF_INST_RETURN,
6452                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);         
6453         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6454                  0,
6455                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6456         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6457                  SQ_CF_COND_ACTIVE,
6458                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6459         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6460                  0,
6461                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
6462         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6463                  0,
6464                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
6465         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6466                  0,
6467                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
6468         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6469                  1,
6470                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6471         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6472                  0,
6473                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6474     }
6475     else
6476     {
6477         //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
6478         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 0;
6479         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0;
6480         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
6481
6482         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
6483         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6484         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_RETURN;
6485         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
6486
6487         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
6488     }
6489 }
6490
6491 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
6492 {
6493     /* Put in sub */
6494     if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
6495     {
6496         pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
6497                                   sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
6498                                   sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
6499         if(NULL == pAsm->subs)
6500         {
6501             return GL_FALSE;
6502         }
6503         pAsm->unSubArraySize += 10;
6504     }
6505
6506     pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
6507     pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;  
6508     pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;  
6509     pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
6510
6511     pAsm->CALLSP++;
6512     pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
6513     pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
6514     pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
6515                    = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
6516     pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
6517     pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
6518     SetActiveCFlist(pAsm->pR700Shader, 
6519                     pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
6520
6521     pAsm->unSubArrayPointer++;
6522
6523     /* start sub */
6524     pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6525
6526     pAsm->FCSP++;
6527     pAsm->fc_stack[pAsm->FCSP].type  = FC_REP;
6528
6529     checkStackDepth(pAsm, FC_REP, GL_FALSE);
6530
6531     return GL_TRUE;
6532 }
6533
6534 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
6535 {
6536     if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
6537     {
6538         radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
6539         return GL_FALSE;
6540     }
6541
6542     /* copy max to sub structure */
6543     pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
6544         = pAsm->CALLSTACK[pAsm->CALLSP].max;
6545
6546     decreaseCurrent(pAsm, FC_REP);
6547
6548     pAsm->CALLSP--;
6549     SetActiveCFlist(pAsm->pR700Shader, 
6550                     pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
6551     
6552     pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6553
6554     pAsm->FCSP--;
6555
6556     return GL_TRUE;
6557 }
6558
6559 GLboolean assemble_RET(r700_AssemblerBase *pAsm)
6560 {
6561     GLuint unIF = 0;
6562
6563     if(pAsm->CALLSP > 0)
6564     {   /* in sub */
6565         GLuint unFCSP;        
6566         for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
6567         {
6568             if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6569             {
6570                 setRetInLoopFlag(pAsm, SQ_SEL_1);
6571                 breakLoopOnFlag(pAsm, unFCSP);
6572                 pAsm->unCFflags |= LOOPRET_FLAGS;
6573
6574                 return GL_TRUE;
6575             }
6576             else if(FC_IF == pAsm->fc_stack[unFCSP].type)
6577             {
6578                 unIF++;
6579             }
6580         }
6581     }
6582
6583 #ifdef USE_CF_FOR_POP_AFTER    
6584     if(unIF > 0)
6585     {
6586         pops(pAsm, unIF);
6587     }
6588 #endif /* USE_CF_FOR_POP_AFTER */
6589
6590     add_return_inst(pAsm);
6591
6592     return GL_TRUE;
6593 }
6594
6595 GLboolean assemble_CAL(r700_AssemblerBase *pAsm, 
6596                        GLint nILindex,
6597                        GLuint uiIL_Shift,
6598                        GLuint uiNumberInsts,
6599                        struct prog_instruction *pILInst,
6600                        PRESUB_DESC * pPresubDesc)
6601 {
6602     GLint uiIL_Offset;
6603
6604     pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6605
6606     if(GL_FALSE == add_cf_instruction(pAsm) )
6607     {
6608         return GL_FALSE;
6609     }
6610
6611     if(8 == pAsm->unAsic)
6612     {                  
6613         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6614                  0,
6615                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);        
6616         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6617                  EG_CF_INST_CALL,
6618                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);         
6619         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6620                  0,
6621                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6622         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6623                  SQ_CF_COND_ACTIVE,
6624                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6625         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6626                  0,
6627                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
6628         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6629                  0,
6630                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
6631         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6632                  0,
6633                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
6634         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6635                  1,
6636                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6637         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6638                  1,
6639                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6640     }
6641     else
6642     {
6643         pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count       = 1;
6644         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 0;
6645         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
6646         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
6647
6648         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
6649         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
6650         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_CALL;
6651         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
6652
6653         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
6654     }
6655
6656     /* Put in caller */
6657     if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
6658     {
6659         pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers, 
6660                        sizeof(CALLER_POINTER) * pAsm->unCallerArraySize, 
6661                        sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
6662         if(NULL == pAsm->callers)
6663         {
6664             return GL_FALSE;
6665         }
6666         pAsm->unCallerArraySize += 10;
6667     }
6668     
6669     uiIL_Offset = nILindex + uiIL_Shift;
6670     pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset; 
6671     pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr       = pAsm->cf_current_cf_clause_ptr;
6672     
6673     pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr  = NULL; 
6674     pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL; 
6675
6676     pAsm->unCallerArrayPointer++;
6677
6678     int j;
6679     GLuint max;
6680     GLuint unSubID;
6681     GLboolean bRet;
6682     for(j=0; j<pAsm->unSubArrayPointer; j++)
6683     {
6684         if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
6685         {   /* compiled before */
6686
6687             max = pAsm->subs[j].unStackDepthMax 
6688                 + pAsm->CALLSTACK[pAsm->CALLSP].current;
6689             if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
6690             {
6691                 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
6692             }
6693             
6694             pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j; 
6695             return GL_TRUE;
6696         }
6697     }
6698
6699     pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
6700     unSubID = pAsm->unSubArrayPointer;
6701
6702     bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
6703
6704     if(GL_TRUE == bRet)
6705     {
6706         max = pAsm->subs[unSubID].unStackDepthMax 
6707             + pAsm->CALLSTACK[pAsm->CALLSP].current;
6708         if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
6709         {
6710             pAsm->CALLSTACK[pAsm->CALLSP].max = max;
6711         }
6712
6713         pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
6714     }
6715
6716     return bRet;
6717 }
6718
6719 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
6720 {
6721     /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6722
6723     pAsm->D.dst.opcode   = SQ_OP2_INST_MOV;
6724     pAsm->D.dst.op3      = 0;
6725     pAsm->D.dst.rtype    = DST_REG_TEMPORARY;
6726     pAsm->D.dst.reg      = pAsm->flag_reg_index;
6727     pAsm->D.dst.writex   = 1;
6728     pAsm->D.dst.writey   = 0;
6729     pAsm->D.dst.writez   = 0;
6730     pAsm->D.dst.writew   = 0;
6731     pAsm->D2.dst2.literal_slots      = 1;
6732     pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
6733     pAsm->D.dst.predicated     = 0;
6734     /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
6735     pAsm->D.dst.math           = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
6736     pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
6737 #if 0
6738     pAsm->S[0].src.rtype = SRC_REC_LITERAL;
6739     //pAsm->S[0].src.reg   = 0;
6740     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6741     noneg_PVSSRC(&(pAsm->S[0].src));
6742     pAsm->S[0].src.swizzlex = SQ_SEL_X;
6743     pAsm->S[0].src.swizzley = SQ_SEL_Y;
6744     pAsm->S[0].src.swizzlez = SQ_SEL_Z;
6745     pAsm->S[0].src.swizzlew = SQ_SEL_W;
6746
6747     if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
6748     {
6749         return GL_FALSE;
6750     }
6751 #else
6752     pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6753     pAsm->S[0].src.reg   = 0;
6754     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6755     noneg_PVSSRC(&(pAsm->S[0].src));
6756     pAsm->S[0].src.swizzlex = flagValue;
6757     pAsm->S[0].src.swizzley = flagValue;
6758     pAsm->S[0].src.swizzlez = flagValue;
6759     pAsm->S[0].src.swizzlew = flagValue;
6760
6761     if( GL_FALSE == next_ins(pAsm) )
6762     {
6763         return GL_FALSE;
6764     }
6765 #endif
6766
6767     return GL_TRUE;
6768 }
6769
6770 GLboolean testFlag(r700_AssemblerBase *pAsm)
6771 {
6772     /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6773
6774     //Test flag
6775     GLuint tmp = gethelpr(pAsm);
6776     pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6777
6778     pAsm->D.dst.opcode   = SQ_OP2_INST_PRED_SETE;
6779     pAsm->D.dst.math     = 1;
6780     pAsm->D.dst.rtype    = DST_REG_TEMPORARY;
6781     pAsm->D.dst.reg      = tmp;
6782     pAsm->D.dst.writex   = 1;
6783     pAsm->D.dst.writey   = 0;
6784     pAsm->D.dst.writez   = 0;
6785     pAsm->D.dst.writew   = 0;
6786     pAsm->D2.dst2.literal_slots      = 1;
6787     pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
6788     pAsm->D.dst.predicated     = 1;
6789     pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
6790
6791     pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6792     pAsm->S[0].src.reg   = pAsm->flag_reg_index;
6793     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6794     noneg_PVSSRC(&(pAsm->S[0].src));
6795     pAsm->S[0].src.swizzlex = SQ_SEL_X;
6796     pAsm->S[0].src.swizzley = SQ_SEL_Y;
6797     pAsm->S[0].src.swizzlez = SQ_SEL_Z;
6798     pAsm->S[0].src.swizzlew = SQ_SEL_W;
6799 #if 0
6800     pAsm->S[1].src.rtype = SRC_REC_LITERAL;
6801     //pAsm->S[1].src.reg   = 0;
6802     setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
6803     noneg_PVSSRC(&(pAsm->S[1].src));
6804     pAsm->S[1].src.swizzlex = SQ_SEL_X;
6805     pAsm->S[1].src.swizzley = SQ_SEL_Y;
6806     pAsm->S[1].src.swizzlez = SQ_SEL_Z;
6807     pAsm->S[1].src.swizzlew = SQ_SEL_W;
6808
6809     if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
6810     {
6811         return GL_FALSE;
6812     }
6813 #else
6814     pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
6815     pAsm->S[1].src.reg   = 0;
6816     setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
6817     noneg_PVSSRC(&(pAsm->S[1].src));
6818     pAsm->S[1].src.swizzlex = SQ_SEL_1;
6819     pAsm->S[1].src.swizzley = SQ_SEL_1;
6820     pAsm->S[1].src.swizzlez = SQ_SEL_1;
6821     pAsm->S[1].src.swizzlew = SQ_SEL_1;
6822
6823     if( GL_FALSE == next_ins(pAsm) )
6824     {
6825         return GL_FALSE;
6826     }
6827 #endif
6828
6829     checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6830
6831     return GL_TRUE;
6832 }
6833
6834 GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
6835 {
6836     testFlag(pAsm);
6837     jumpToOffest(pAsm, 1, 4);
6838     setRetInLoopFlag(pAsm, SQ_SEL_0);
6839     pops(pAsm, unIF + 1);
6840     add_return_inst(pAsm);
6841
6842     return GL_TRUE;
6843 }
6844
6845 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
6846 {
6847     testFlag(pAsm);
6848  
6849     //break
6850     if(GL_FALSE == add_cf_instruction(pAsm) )
6851     {
6852         return GL_FALSE;
6853     }
6854     
6855     if(8 == pAsm->unAsic)
6856     {                  
6857         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6858                  1,
6859                  EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);        
6860         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6861                  EG_CF_INST_LOOP_BREAK,
6862                  EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);         
6863         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6864                  0,
6865                  EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6866         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6867                  SQ_CF_COND_ACTIVE,
6868                  EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6869         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6870                  0,
6871                  EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit); 
6872         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6873                  0,
6874                  EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit); 
6875         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6876                  0,
6877                  EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);                               
6878         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6879                  1,
6880                  EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6881         SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val, 
6882                  1,
6883                  EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6884     }
6885     else
6886     {
6887         pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
6888         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
6889         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
6890
6891         pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
6892         pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
6893         pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_LOOP_BREAK;
6894         pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
6895
6896         pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
6897     }
6898
6899     pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( 
6900                                               (void *)pAsm->fc_stack[unFCSP].mid,
6901                                               sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6902                                               sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6903     pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6904     pAsm->fc_stack[unFCSP].unNumMid++;
6905
6906     pops(pAsm, 1);
6907                
6908     return GL_TRUE;
6909 }
6910
6911 GLboolean AssembleInstr(GLuint uiFirstInst,
6912                         GLuint uiIL_Shift,
6913                         GLuint uiNumberInsts,
6914                         struct prog_instruction *pILInst, 
6915                                                 r700_AssemblerBase *pR700AsmCode)
6916 {
6917     GLuint i;
6918
6919     pR700AsmCode->pILInst = pILInst;
6920         for(i=uiFirstInst; i<uiNumberInsts; i++)
6921     {
6922         pR700AsmCode->uiCurInst = i;
6923
6924 #ifndef USE_CF_FOR_CONTINUE_BREAK
6925         if(OPCODE_BRK == pILInst[i+1].Opcode)
6926         {
6927             switch(pILInst[i].Opcode)            
6928             {
6929             case OPCODE_SLE:
6930                 pILInst[i].Opcode = OPCODE_SGT;
6931                 break;
6932             case OPCODE_SLT:
6933                 pILInst[i].Opcode = OPCODE_SGE;
6934                 break;
6935             case OPCODE_SGE:
6936                 pILInst[i].Opcode = OPCODE_SLT;
6937                 break;
6938             case OPCODE_SGT:
6939                 pILInst[i].Opcode = OPCODE_SLE;
6940                 break;
6941             case OPCODE_SEQ:
6942                 pILInst[i].Opcode = OPCODE_SNE;
6943                 break;
6944             case OPCODE_SNE:
6945                 pILInst[i].Opcode = OPCODE_SEQ;
6946                 break;
6947             default:
6948                 break;
6949             }
6950         }
6951 #endif
6952         if(pILInst[i].CondUpdate == 1)
6953         {
6954             /* remember dest register used for cond evaluation */
6955             /* XXX also handle PROGRAM_OUTPUT registers here? */
6956             pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index; 
6957         }
6958
6959         switch (pILInst[i].Opcode)
6960         {
6961         case OPCODE_ABS: 
6962             if ( GL_FALSE == assemble_ABS(pR700AsmCode) ) 
6963                 return GL_FALSE;
6964             break;  
6965         case OPCODE_ADD: 
6966         case OPCODE_SUB: 
6967             if ( GL_FALSE == assemble_ADD(pR700AsmCode) ) 
6968                 return GL_FALSE;
6969             break;  
6970
6971         case OPCODE_ARL: 
6972             if ( GL_FALSE == assemble_ARL(pR700AsmCode) ) 
6973                 return GL_FALSE;
6974             break;
6975         case OPCODE_ARR: 
6976             radeon_error("Not yet implemented instruction OPCODE_ARR \n");
6977             //if ( GL_FALSE == assemble_BAD("ARR") ) 
6978                 return GL_FALSE;
6979             break;
6980
6981         case OPCODE_CMP: 
6982             if ( GL_FALSE == assemble_CMP(pR700AsmCode) ) 
6983                 return GL_FALSE;
6984             break;  
6985         case OPCODE_COS: 
6986             if(8 == pR700AsmCode->unAsic)
6987             {
6988                 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_COS) ) 
6989                     return GL_FALSE;
6990             }
6991             else
6992             {
6993                 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) 
6994                     return GL_FALSE;
6995             }
6996             break;  
6997
6998         case OPCODE_DP2:
6999         case OPCODE_DP3: 
7000         case OPCODE_DP4: 
7001         case OPCODE_DPH: 
7002             if ( GL_FALSE == assemble_DOT(pR700AsmCode) ) 
7003                 return GL_FALSE;
7004             break;  
7005
7006         case OPCODE_DST: 
7007             if ( GL_FALSE == assemble_DST(pR700AsmCode) ) 
7008                 return GL_FALSE;
7009             break;  
7010
7011         case OPCODE_EX2: 
7012             if ( GL_FALSE == assemble_EX2(pR700AsmCode) ) 
7013                 return GL_FALSE;
7014             break;  
7015         case OPCODE_EXP: 
7016             if ( GL_FALSE == assemble_EXP(pR700AsmCode) ) 
7017                 return GL_FALSE;
7018             break;
7019
7020         case OPCODE_FLR:     
7021             if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) 
7022                 return GL_FALSE;
7023             break;  
7024         //case OP_FLR_INT: ;
7025
7026         //    if ( GL_FALSE == assemble_FLR_INT() ) 
7027         //        return GL_FALSE;
7028         //    break;  
7029
7030         case OPCODE_FRC: 
7031             if ( GL_FALSE == assemble_FRC(pR700AsmCode) ) 
7032                 return GL_FALSE;
7033             break;  
7034
7035         case OPCODE_KIL: 
7036         case OPCODE_KIL_NV: 
7037             if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) 
7038                 return GL_FALSE;
7039             break;
7040         case OPCODE_LG2: 
7041             if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) 
7042                 return GL_FALSE;
7043             break;  
7044         case OPCODE_LIT:
7045             if ( GL_FALSE == assemble_LIT(pR700AsmCode) ) 
7046                 return GL_FALSE;
7047             break;
7048         case OPCODE_LRP: 
7049             if ( GL_FALSE == assemble_LRP(pR700AsmCode) ) 
7050                 return GL_FALSE;
7051             break;  
7052         case OPCODE_LOG: 
7053             if ( GL_FALSE == assemble_LOG(pR700AsmCode) ) 
7054                 return GL_FALSE;
7055             break;
7056
7057         case OPCODE_MAD: 
7058             if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) 
7059                 return GL_FALSE;
7060             break;  
7061         case OPCODE_MAX: 
7062             if ( GL_FALSE == assemble_MAX(pR700AsmCode) ) 
7063                 return GL_FALSE;
7064             break;  
7065         case OPCODE_MIN: 
7066             if ( GL_FALSE == assemble_MIN(pR700AsmCode) ) 
7067                 return GL_FALSE;
7068             break;  
7069
7070         case OPCODE_MOV: 
7071             if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) 
7072                 return GL_FALSE;
7073             break;  
7074         case OPCODE_MUL: 
7075             if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) 
7076                 return GL_FALSE;
7077             break;
7078             
7079         case OPCODE_NOISE1:
7080             {                                               
7081                 callPreSub(pR700AsmCode, 
7082                            GLSL_NOISE1,                         
7083                            &noise1_presub,                                                  
7084                            pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number, 
7085                            1); 
7086                 radeon_error("noise1: not yet supported shader instruction\n");
7087             };
7088             break; 
7089         case OPCODE_NOISE2: 
7090             radeon_error("noise2: not yet supported shader instruction\n");
7091             break; 
7092         case OPCODE_NOISE3: 
7093             radeon_error("noise3: not yet supported shader instruction\n");
7094             break; 
7095         case OPCODE_NOISE4: 
7096             radeon_error("noise4: not yet supported shader instruction\n");
7097             break; 
7098
7099         case OPCODE_POW: 
7100             if ( GL_FALSE == assemble_POW(pR700AsmCode) ) 
7101                 return GL_FALSE;
7102             break;  
7103         case OPCODE_RCP: 
7104             if ( GL_FALSE == assemble_RCP(pR700AsmCode) ) 
7105                 return GL_FALSE;
7106             break;  
7107         case OPCODE_RSQ: 
7108             if ( GL_FALSE == assemble_RSQ(pR700AsmCode) ) 
7109                 return GL_FALSE;
7110             break;  
7111         case OPCODE_SIN: 
7112             if(8 == pR700AsmCode->unAsic)
7113             {
7114                 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_SIN) ) 
7115                     return GL_FALSE;
7116             }
7117             else
7118             {
7119                 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) 
7120                     return GL_FALSE;
7121             }
7122             break;  
7123         case OPCODE_SCS: 
7124             if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) 
7125                 return GL_FALSE;
7126             break; 
7127             
7128         case OPCODE_SEQ:
7129             if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) 
7130             {
7131                 return GL_FALSE;
7132             }
7133             break;
7134
7135         case OPCODE_SGT: 
7136             if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) 
7137             {
7138                 return GL_FALSE;
7139             }
7140             break;
7141
7142         case OPCODE_SGE: 
7143             if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) 
7144             { 
7145                 return GL_FALSE;
7146             }
7147             break;
7148         
7149         /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
7150         case OPCODE_SLT: 
7151             {
7152                 struct prog_src_register SrcRegSave[2];
7153                 SrcRegSave[0] = pILInst[i].SrcReg[0];
7154                 SrcRegSave[1] = pILInst[i].SrcReg[1];
7155                 pILInst[i].SrcReg[0] = SrcRegSave[1];
7156                 pILInst[i].SrcReg[1] = SrcRegSave[0];
7157                 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) 
7158                 {
7159                     pILInst[i].SrcReg[0] = SrcRegSave[0];
7160                     pILInst[i].SrcReg[1] = SrcRegSave[1];
7161                     return GL_FALSE;
7162                 }
7163                 pILInst[i].SrcReg[0] = SrcRegSave[0];
7164                 pILInst[i].SrcReg[1] = SrcRegSave[1];
7165             }
7166             break;
7167
7168         case OPCODE_SLE: 
7169             {
7170                 struct prog_src_register SrcRegSave[2];
7171                 SrcRegSave[0] = pILInst[i].SrcReg[0];
7172                 SrcRegSave[1] = pILInst[i].SrcReg[1];
7173                 pILInst[i].SrcReg[0] = SrcRegSave[1];
7174                 pILInst[i].SrcReg[1] = SrcRegSave[0];
7175                 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) 
7176                 {
7177                     pILInst[i].SrcReg[0] = SrcRegSave[0];
7178                     pILInst[i].SrcReg[1] = SrcRegSave[1];
7179                     return GL_FALSE;
7180                 }
7181                 pILInst[i].SrcReg[0] = SrcRegSave[0];
7182                 pILInst[i].SrcReg[1] = SrcRegSave[1];
7183             }
7184             break;
7185
7186         case OPCODE_SNE: 
7187             if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) 
7188             {
7189                 return GL_FALSE;
7190             }
7191             break;
7192
7193         //case OP_STP: 
7194         //    if ( GL_FALSE == assemble_STP(pR700AsmCode) ) 
7195         //        return GL_FALSE;
7196         //    break;
7197
7198         case OPCODE_SSG:
7199             if ( GL_FALSE == assemble_SSG(pR700AsmCode) )
7200             {
7201                 return GL_FALSE;
7202             }
7203             break;
7204
7205         case OPCODE_SWZ: 
7206             if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) 
7207             {
7208                 return GL_FALSE; 
7209             }
7210             else
7211             {
7212                 if( (i+1)<uiNumberInsts )
7213                 {
7214                     if(OPCODE_END != pILInst[i+1].Opcode)
7215                     {
7216                         if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
7217                         {
7218                             pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
7219                         }
7220                     }
7221                 }
7222             }
7223             break;
7224         case OPCODE_DDX:
7225         case OPCODE_DDY:
7226         case OPCODE_TEX: 
7227         case OPCODE_TXB:
7228         case OPCODE_TXL:
7229         case OPCODE_TXP: 
7230             if ( GL_FALSE == assemble_TEX(pR700AsmCode) ) 
7231                 return GL_FALSE;
7232             break;
7233
7234         case OPCODE_TRUNC:
7235             if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
7236                 return GL_FALSE;
7237             break;
7238
7239         case OPCODE_XPD: 
7240             if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) 
7241                 return GL_FALSE;
7242             break;  
7243
7244         case OPCODE_IF:
7245             {                
7246                 GLboolean bHasElse = GL_FALSE;
7247
7248                 if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE)
7249                 {
7250                     bHasElse = GL_TRUE;
7251                 }
7252
7253                 if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) ) 
7254                 {
7255                     return GL_FALSE;
7256                 }
7257             }
7258             break;
7259
7260         case OPCODE_ELSE : 
7261             if ( GL_FALSE == assemble_ELSE(pR700AsmCode) ) 
7262                 return GL_FALSE;
7263             break;
7264
7265         case OPCODE_ENDIF: 
7266             if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) ) 
7267                 return GL_FALSE;
7268             break;
7269
7270         case OPCODE_BGNLOOP:
7271             if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
7272             {
7273                 return GL_FALSE;
7274             }
7275             break;
7276
7277         case OPCODE_BRK:
7278             if( GL_FALSE == assemble_BRK(pR700AsmCode) )
7279             {
7280                 return GL_FALSE;
7281             }
7282             break;
7283
7284         case OPCODE_CONT:
7285             if( GL_FALSE == assemble_CONT(pR700AsmCode) )
7286             {
7287                 return GL_FALSE;
7288             }
7289             break;
7290
7291         case OPCODE_ENDLOOP:
7292             if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
7293             {
7294                 return GL_FALSE;
7295             }
7296             break;
7297
7298         case OPCODE_BGNSUB:
7299             if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
7300             {
7301                 return GL_FALSE;
7302             }
7303             break;
7304         
7305         case OPCODE_RET:
7306             if( GL_FALSE == assemble_RET(pR700AsmCode) )
7307             {
7308                 return GL_FALSE;
7309             }
7310             break;
7311         
7312         case OPCODE_CAL:
7313             if( GL_FALSE == assemble_CAL(pR700AsmCode, 
7314                                          pILInst[i].BranchTarget,
7315                                          uiIL_Shift,
7316                                          uiNumberInsts,
7317                                          pILInst,
7318                                          NULL) )
7319             {
7320                 return GL_FALSE;
7321             }
7322             break;
7323
7324         //case OPCODE_EXPORT: 
7325         //    if ( GL_FALSE == assemble_EXPORT() ) 
7326         //        return GL_FALSE;
7327         //    break;
7328
7329         case OPCODE_ENDSUB:
7330             return assemble_ENDSUB(pR700AsmCode);
7331
7332         case OPCODE_END: 
7333                         //pR700AsmCode->uiCurInst = i;
7334                         //This is to remaind that if in later exoort there is depth/stencil
7335                         //export, we need a mov to re-arrange DST channel, where using a
7336                         //psuedo inst, we will use this end inst to do it.
7337             return GL_TRUE;
7338
7339         default:
7340             radeon_error("r600: unknown instruction %d\n", pILInst[i].Opcode);
7341             return GL_FALSE;
7342         }
7343     }
7344
7345     return GL_TRUE;
7346 }
7347
7348 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
7349 {
7350 #ifndef GENERATE_SHADER_FOR_2D
7351     setRetInLoopFlag(pAsm, SQ_SEL_0);
7352 #endif
7353
7354     if((SPT_FP == pAsm->currentShaderType) && (8 == pAsm->unAsic))
7355     {
7356         EG_add_ps_interp(pAsm);
7357     }
7358
7359     pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7360     return GL_TRUE;
7361 }
7362
7363 GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
7364 {
7365     GLuint i;
7366     GLuint unCFoffset;
7367     TypedShaderList * plstCFmain;
7368     TypedShaderList * plstCFsub;
7369
7370     R700ShaderInstruction *        pInst;
7371     R700ControlFlowGenericClause * pCFInst;
7372
7373     R700ControlFlowALUClause * pCF_ALU;
7374     R700ALUInstruction       * pALU;
7375     GLuint                     unConstOffset = 0;
7376     GLuint                     unRegOffset;
7377     GLuint                     unMinRegIndex;
7378
7379     plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
7380
7381 #ifndef GENERATE_SHADER_FOR_2D
7382     /* remove flags init if they are not used */
7383     if((pAsm->unCFflags & HAS_LOOPRET) == 0)
7384     {
7385         R700ControlFlowALUClause * pCF_ALU;
7386         pInst = plstCFmain->pHead;
7387         while(pInst)
7388         {
7389             if(SIT_CF_ALU == pInst->m_ShaderInstType)
7390             {
7391                 pCF_ALU = (R700ControlFlowALUClause *)pInst;
7392                 if(0 == pCF_ALU->m_Word1.f.count)
7393                 {
7394                     pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
7395                 }
7396                 else
7397                 {
7398                     R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
7399                     
7400                     pALU->m_pLinkedALUClause = NULL;
7401                     pALU = (R700ALUInstruction *)(pALU->pNextInst);
7402                     pALU->m_pLinkedALUClause = pCF_ALU;
7403                     pCF_ALU->m_pLinkedALUInstruction = pALU;
7404
7405                     pCF_ALU->m_Word1.f.count--;
7406                 }
7407                 break;
7408             }
7409             pInst = pInst->pNextInst;
7410         };
7411     }
7412 #endif /* GENERATE_SHADER_FOR_2D */
7413
7414     if(pAsm->CALLSTACK[0].max > 0)
7415     {
7416         pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
7417     }
7418
7419     if(0 == pAsm->unSubArrayPointer)
7420     {
7421         return GL_TRUE;
7422     }
7423
7424     unCFoffset = plstCFmain->uNumOfNode;
7425
7426     if(NULL != pILProg->Parameters)
7427     {        
7428         unConstOffset = pILProg->Parameters->NumParameters;
7429     }
7430
7431     /* Reloc subs */
7432     for(i=0; i<pAsm->unSubArrayPointer; i++)
7433     {
7434         pAsm->subs[i].unCFoffset = unCFoffset;
7435         plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
7436
7437         pInst = plstCFsub->pHead;
7438
7439         /* reloc instructions */
7440         while(pInst)
7441         {
7442             if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
7443             {
7444                 pCFInst = (R700ControlFlowGenericClause *)pInst;
7445
7446                 switch (pCFInst->m_Word1.f.cf_inst)
7447                 {
7448                 case SQ_CF_INST_POP:
7449                 case SQ_CF_INST_JUMP:
7450                 case SQ_CF_INST_ELSE:
7451                 case SQ_CF_INST_LOOP_END:
7452                 case SQ_CF_INST_LOOP_START:
7453                 case SQ_CF_INST_LOOP_START_NO_AL:
7454                 case SQ_CF_INST_LOOP_CONTINUE:
7455                 case SQ_CF_INST_LOOP_BREAK:
7456                     pCFInst->m_Word0.f.addr += unCFoffset;
7457                     break;
7458                 default:
7459                     break;
7460                 }
7461             }  
7462             
7463             pInst->m_uIndex += unCFoffset;
7464
7465             pInst = pInst->pNextInst;
7466         };
7467
7468         if(NULL != pAsm->subs[i].pPresubDesc)
7469         {
7470             GLuint                     uNumSrc;            
7471             
7472             unMinRegIndex  = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
7473             unRegOffset    = pAsm->subs[i].pPresubDesc->maxStartReg;            
7474             unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
7475
7476             pInst = plstCFsub->pHead;
7477             while(pInst)
7478             {
7479                 if(SIT_CF_ALU == pInst->m_ShaderInstType)
7480                 {
7481                     pCF_ALU = (R700ControlFlowALUClause *)pInst;
7482
7483                     pALU = pCF_ALU->m_pLinkedALUInstruction;
7484                     for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7485                     {
7486                         pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
7487
7488                         if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
7489                         {   
7490                             pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
7491                         }
7492                         else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
7493                         {   
7494                             pALU->m_Word0.f.src0_sel += unConstOffset;
7495                         }
7496
7497                         if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F) 
7498                             >= SQ_OP3_INST_MUL_LIT )
7499                         {   /* op3 : 3 srcs */
7500                             if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
7501                             {   
7502                                 pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
7503                             }
7504                             else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
7505                             {   
7506                                 pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
7507                             }    
7508                             if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
7509                             {   
7510                                 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
7511                             }
7512                             else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
7513                             {   
7514                                 pALU->m_Word0.f.src1_sel += unConstOffset;
7515                             }                                 
7516                         }
7517                         else
7518                         {
7519                             if(8 == pAsm->unAsic)
7520                             {
7521                                  uNumSrc = EG_GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
7522                             }
7523                             else
7524                             {
7525                                 if(pAsm->bR6xx)
7526                                 {
7527                                     uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
7528                                 }
7529                                 else
7530                                 {
7531                                     uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
7532                                 }
7533                             }
7534                             if(2 == uNumSrc)
7535                             {   /* 2 srcs */
7536                                 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
7537                                 {   
7538                                     pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
7539                                 }
7540                                 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
7541                                 {   
7542                                     pALU->m_Word0.f.src1_sel += unConstOffset;
7543                                 }                                  
7544                             }                            
7545                         }
7546                         pALU = (R700ALUInstruction*)(pALU->pNextInst);
7547                     }                    
7548                 }             
7549                 pInst = pInst->pNextInst;
7550             };
7551         }
7552
7553         /* Put sub into main */
7554         plstCFmain->pTail->pNextInst = plstCFsub->pHead;
7555         plstCFmain->pTail            = plstCFsub->pTail;
7556         plstCFmain->uNumOfNode      += plstCFsub->uNumOfNode;
7557
7558         unCFoffset += plstCFsub->uNumOfNode;
7559     }
7560
7561     /* reloc callers */
7562     for(i=0; i<pAsm->unCallerArrayPointer; i++)
7563     {
7564         pAsm->callers[i].cf_ptr->m_Word0.f.addr
7565             = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; 
7566
7567         if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
7568         {                 
7569             unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
7570             unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
7571
7572             if(NULL != pAsm->callers[i].prelude_cf_ptr)
7573             {                
7574                 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
7575                 pALU = pCF_ALU->m_pLinkedALUInstruction;
7576                 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7577                 {
7578                     pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
7579                     pALU = (R700ALUInstruction*)(pALU->pNextInst);
7580                 }
7581             }
7582             if(NULL != pAsm->callers[i].finale_cf_ptr)
7583             {
7584                 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
7585                 pALU = pCF_ALU->m_pLinkedALUInstruction;
7586                 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7587                 {
7588                     pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
7589                     pALU = (R700ALUInstruction*)(pALU->pNextInst);
7590                 }
7591             }
7592         }
7593     }
7594
7595     return GL_TRUE;
7596 }
7597
7598 GLboolean callPreSub(r700_AssemblerBase* pAsm, 
7599                          LOADABLE_SCRIPT_SIGNITURE scriptSigniture,                          
7600                          COMPILED_SUB * pCompiledSub,                                               
7601                          GLshort uOutReg,
7602                          GLshort uNumValidSrc)
7603 {
7604     /* save assemble context */
7605     GLuint starting_temp_register_number_save;
7606     GLuint number_used_registers_save;
7607     GLuint uFirstHelpReg_save;
7608     GLuint uHelpReg_save;
7609     GLuint uiCurInst_save;
7610     struct prog_instruction *pILInst_save;
7611     PRESUB_DESC * pPresubDesc;
7612     GLboolean     bRet;
7613     int i;
7614
7615     R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
7616
7617     /* copy srcs to presub inputs */
7618     pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7619     for(i=0; i<uNumValidSrc; i++)
7620     {
7621         pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
7622         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
7623         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
7624         pAsm->D.dst.reg   = pCompiledSub->srcRegIndex[i];
7625         pAsm->D.dst.writex = 1;
7626         pAsm->D.dst.writey = 1;
7627         pAsm->D.dst.writez = 1;
7628         pAsm->D.dst.writew = 1;
7629
7630         if( GL_FALSE == assemble_src(pAsm, i, 0) )
7631         {
7632             return GL_FALSE;
7633         }
7634
7635         next_ins(pAsm);
7636     }
7637     if(uNumValidSrc > 0)
7638     {
7639         prelude_cf_ptr     = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
7640         pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7641     }
7642
7643     /* browse thro existing presubs. */
7644     for(i=0; i<pAsm->unNumPresub; i++)
7645     {
7646         if(pAsm->presubs[i].sptSigniture == scriptSigniture)
7647         {
7648             break;
7649         }
7650     }
7651
7652     if(i == pAsm->unNumPresub)
7653     {   /* not loaded yet */
7654         /* save assemble context */
7655         number_used_registers_save         = pAsm->number_used_registers;
7656         uFirstHelpReg_save                 = pAsm->uFirstHelpReg;
7657         uHelpReg_save                      = pAsm->uHelpReg;
7658         starting_temp_register_number_save = pAsm->starting_temp_register_number;
7659         pILInst_save                       = pAsm->pILInst;
7660         uiCurInst_save                     = pAsm->uiCurInst;
7661
7662         /* alloc in presub */
7663         if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
7664         {
7665             pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
7666                                       sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
7667                                       sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
7668             if(NULL == pAsm->presubs)
7669             {
7670                 radeon_error("No memeory to allocate built in shader function description structures. \n");
7671                 return GL_FALSE;
7672             }
7673             pAsm->unPresubArraySize += 4;
7674         }
7675         
7676         pPresubDesc = &(pAsm->presubs[i]);
7677         pPresubDesc->sptSigniture = scriptSigniture;
7678
7679         /* constants offsets need to be final resolved at reloc. */
7680         if(0 == pAsm->unNumPresub)
7681         {
7682             pPresubDesc->unConstantsStart = 0; 
7683         }
7684         else
7685         {
7686             pPresubDesc->unConstantsStart =  pAsm->presubs[i-1].unConstantsStart
7687                                            + pAsm->presubs[i-1].pCompiledSub->NumParameters;
7688         }
7689
7690         pPresubDesc->pCompiledSub = pCompiledSub;
7691
7692         pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
7693         pPresubDesc->maxStartReg  = uFirstHelpReg_save;
7694         pAsm->unCurNumILInsts    += pCompiledSub->NumInstructions;
7695
7696         pAsm->unNumPresub++;
7697
7698         /* setup new assemble context */
7699         pAsm->starting_temp_register_number = 0;
7700         pAsm->number_used_registers = pCompiledSub->NumTemporaries;
7701         pAsm->uFirstHelpReg         = pAsm->number_used_registers;
7702         pAsm->uHelpReg              = pAsm->uFirstHelpReg;
7703
7704         bRet = assemble_CAL(pAsm, 
7705                             0, 
7706                             pPresubDesc->subIL_Shift, 
7707                             pCompiledSub->NumInstructions,
7708                             pCompiledSub->Instructions,
7709                             pPresubDesc);
7710
7711         
7712         pPresubDesc->number_used_registers = pAsm->number_used_registers;        
7713
7714         /* restore assemble context */
7715         pAsm->number_used_registers         = number_used_registers_save; 
7716         pAsm->uFirstHelpReg                 = uFirstHelpReg_save;
7717         pAsm->uHelpReg                      = uHelpReg_save;
7718         pAsm->starting_temp_register_number = starting_temp_register_number_save;
7719         pAsm->pILInst                       = pILInst_save; 
7720         pAsm->uiCurInst                     = uiCurInst_save;
7721     }
7722     else
7723     {   /* was loaded */
7724         pPresubDesc = &(pAsm->presubs[i]);  
7725         
7726         bRet = assemble_CAL(pAsm, 
7727                             0, 
7728                             pPresubDesc->subIL_Shift, 
7729                             pCompiledSub->NumInstructions,
7730                             pCompiledSub->Instructions,
7731                             pPresubDesc);
7732     }
7733
7734     if(GL_FALSE == bRet)
7735     {
7736         radeon_error("Shader presub assemble failed. \n");
7737     }
7738     else
7739     {
7740         /* copy presub output to real dst */ 
7741         pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7742         pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
7743
7744         if( GL_FALSE == assemble_dst(pAsm) )
7745         {
7746             return GL_FALSE;
7747         }
7748
7749         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
7750         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
7751         pAsm->S[0].src.reg   = pCompiledSub->dstRegIndex;
7752         pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
7753         pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
7754         pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
7755         pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
7756
7757         next_ins(pAsm);        
7758
7759         pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr  = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
7760         pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
7761         pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7762     }
7763
7764     if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
7765     {
7766         pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
7767     }
7768     if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
7769     {
7770         pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
7771     }
7772
7773     return bRet;
7774 }
7775
7776 GLboolean Process_Export(r700_AssemblerBase* pAsm,
7777                          GLuint type,
7778                          GLuint export_starting_index,
7779                          GLuint export_count, 
7780                          GLuint starting_register_number,
7781                          GLboolean is_depth_export)
7782 {
7783     check_current_clause(pAsm, CF_EMPTY_CLAUSE);
7784     check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
7785
7786     pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
7787
7788     switch (type) 
7789     {
7790         case SQ_EXPORT_PIXEL:
7791             if(GL_TRUE == is_depth_export) 
7792             {
7793                 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_PIXEL_Z;
7794             }
7795             else 
7796             {
7797                 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_PIXEL_MRT0 + export_starting_index;
7798             }
7799             break;
7800
7801         case SQ_EXPORT_POS:
7802             pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_POS_0 + export_starting_index; 
7803             break;
7804
7805         case SQ_EXPORT_PARAM:
7806             pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = 0x0 + export_starting_index; 
7807             break;
7808
7809         default:
7810             radeon_error("Unknown export type: %d\n", type);
7811             return GL_FALSE;
7812             break;
7813     }
7814
7815     pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr      = starting_register_number;
7816
7817     pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel      = SQ_ABSOLUTE;
7818     pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr   = 0x0;
7819     pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size   = 0x3; 
7820
7821     if(8 == pAsm->unAsic)
7822     {
7823         SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, 
7824                  (export_count - 1),
7825                  EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_shift,
7826                  EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_mask);
7827         SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, 
7828                  0,
7829                  EG_CF_ALLOC_EXPORT_WORD1__EOP_shift,
7830                  EG_CF_ALLOC_EXPORT_WORD1__EOP_bit);
7831         SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, 
7832                  0,
7833                  EG_CF_ALLOC_EXPORT_WORD1__VPM_shift,
7834                  EG_CF_ALLOC_EXPORT_WORD1__VPM_bit);
7835         SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, 
7836                  EG_CF_INST_EXPORT,
7837                  EG_CF_WORD1__CF_INST_shift, 
7838                  EG_CF_WORD1__CF_INST_mask);
7839         SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, 
7840                  0,
7841                  EG_CF_ALLOC_EXPORT_WORD1__MARK_shift,
7842                  EG_CF_ALLOC_EXPORT_WORD1__MARK_bit);
7843         SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val, 
7844                  1,
7845                  EG_CF_ALLOC_EXPORT_WORD1__BARRIER_shift,
7846                  EG_CF_ALLOC_EXPORT_WORD1__BARRIER_bit);
7847     }
7848     else
7849     {
7850         pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count      = (export_count - 1);
7851         pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program   = 0x0;
7852         pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
7853         pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_EXPORT;  // _DONE
7854         pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
7855         pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier          = 0x1;
7856     }
7857
7858     if (export_count == 1) 
7859     {
7860         assert(starting_register_number >= pAsm->starting_export_register_number);
7861
7862         /* exports Z as a float into Red channel */
7863         if (GL_TRUE == is_depth_export)
7864         {
7865             pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_Z;
7866             pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
7867             pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
7868             pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
7869         }
7870         else
7871         {
7872             pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
7873             pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
7874             pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
7875             pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
7876         }
7877     }
7878     else 
7879     {
7880         // This should only be used if all components for all registers have been written
7881         pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
7882         pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
7883         pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
7884         pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
7885     }
7886
7887     pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
7888
7889     return GL_TRUE;
7890 }
7891
7892 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
7893                                    GLbitfield          OutputsWritten)  
7894
7895     unsigned int unBit;
7896     GLuint export_count = 0;
7897     unsigned int i;
7898
7899     for (i = 0; i < FRAG_RESULT_MAX; ++i)
7900     {
7901         unBit = 1 << i;
7902
7903         if (OutputsWritten & unBit)
7904         {
7905             GLboolean is_depth = i == FRAG_RESULT_DEPTH ? GL_TRUE : GL_FALSE;
7906             if (!Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->uiFP_OutputMap[i], is_depth))
7907                 return GL_FALSE;
7908             ++export_count;
7909         }
7910     }
7911
7912     /* Need to export something, otherwise we'll hang
7913      * results are undefined anyway */
7914     if(export_count == 0)
7915     {
7916         Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE);
7917     }
7918     
7919     if(pR700AsmCode->cf_last_export_ptr != NULL) 
7920     {
7921         if(8 == pR700AsmCode->unAsic)
7922         {            
7923             SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val, 
7924                      1,
7925                      EG_CF_ALLOC_EXPORT_WORD1__EOP_shift,
7926                      EG_CF_ALLOC_EXPORT_WORD1__EOP_bit);            
7927             SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val, 
7928                      EG_CF_INST_EXPORT_DONE,
7929                      EG_CF_WORD1__CF_INST_shift, 
7930                      EG_CF_WORD1__CF_INST_mask);
7931         }
7932         else
7933         {
7934             pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst        = SQ_CF_INST_EXPORT_DONE;
7935             pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
7936         }
7937     }
7938
7939     return GL_TRUE;
7940 }
7941
7942 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
7943                                  GLbitfield          OutputsWritten)  
7944 {
7945     unsigned int unBit;
7946     unsigned int i;
7947
7948     GLuint export_starting_index  = 0;
7949     GLuint export_count           = pR700AsmCode->number_of_exports;
7950
7951     unBit = 1 << VERT_RESULT_HPOS;
7952         if(OutputsWritten & unBit)
7953         {
7954         if( GL_FALSE == Process_Export(pR700AsmCode, 
7955                                        SQ_EXPORT_POS, 
7956                                        export_starting_index, 
7957                                        1, 
7958                                        pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
7959                                        GL_FALSE) )
7960         {
7961             return GL_FALSE;
7962         }
7963         export_starting_index++;
7964         export_count--;
7965         }
7966
7967     unBit = 1 << VERT_RESULT_PSIZ;
7968     if(OutputsWritten & unBit)
7969     {
7970         if( GL_FALSE == Process_Export(pR700AsmCode,
7971                                        SQ_EXPORT_POS,
7972                                        export_starting_index,
7973                                        1,
7974                                        pR700AsmCode->ucVP_OutputMap[VERT_RESULT_PSIZ],
7975                                        GL_FALSE) )
7976         {
7977             return GL_FALSE;
7978         }
7979         export_count--;
7980     }
7981
7982     if(8 == pR700AsmCode->unAsic)
7983     {                                   
7984         SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val, 
7985                  EG_CF_INST_EXPORT_DONE,
7986                  EG_CF_WORD1__CF_INST_shift, 
7987                  EG_CF_WORD1__CF_INST_mask);
7988     }
7989     else
7990     {
7991         pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
7992     }
7993
7994
7995     pR700AsmCode->number_of_exports = export_count;
7996     export_starting_index = 0;
7997
7998         unBit = 1 << VERT_RESULT_COL0;
7999         if(OutputsWritten & unBit)
8000         {
8001         if( GL_FALSE == Process_Export(pR700AsmCode, 
8002                                        SQ_EXPORT_PARAM, 
8003                                        export_starting_index, 
8004                                        1, 
8005                                        pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
8006                                        GL_FALSE) )
8007         {
8008             return GL_FALSE;
8009         }
8010
8011         export_starting_index++;
8012         }
8013
8014         unBit = 1 << VERT_RESULT_COL1;
8015         if(OutputsWritten & unBit)
8016         {
8017         if( GL_FALSE == Process_Export(pR700AsmCode, 
8018                                        SQ_EXPORT_PARAM, 
8019                                        export_starting_index, 
8020                                        1, 
8021                                        pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
8022                                        GL_FALSE) )
8023         {
8024             return GL_FALSE;
8025         }
8026
8027         export_starting_index++;
8028         }
8029
8030         unBit = 1 << VERT_RESULT_FOGC;
8031         if(OutputsWritten & unBit)
8032         {
8033         if( GL_FALSE == Process_Export(pR700AsmCode,
8034                                        SQ_EXPORT_PARAM,
8035                                        export_starting_index,
8036                                        1,
8037                                        pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
8038                                        GL_FALSE) )
8039         {
8040             return GL_FALSE;
8041         }
8042
8043         export_starting_index++;
8044         }
8045
8046         for(i=0; i<8; i++)
8047         {
8048                 unBit = 1 << (VERT_RESULT_TEX0 + i);
8049                 if(OutputsWritten & unBit)
8050                 {
8051             if( GL_FALSE == Process_Export(pR700AsmCode,
8052                                           SQ_EXPORT_PARAM, 
8053                                           export_starting_index, 
8054                                           1, 
8055                                           pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
8056                                           GL_FALSE) )
8057             {
8058                 return GL_FALSE;
8059             }
8060
8061             export_starting_index++;
8062                 }
8063         }
8064     
8065     for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
8066         {
8067         unBit = 1 << i;
8068         if(OutputsWritten & unBit)
8069                 {
8070             if( GL_FALSE == Process_Export(pR700AsmCode,
8071                                           SQ_EXPORT_PARAM, 
8072                                           export_starting_index, 
8073                                           1, 
8074                                           pR700AsmCode->ucVP_OutputMap[i],
8075                                           GL_FALSE) )
8076             {                
8077                 return GL_FALSE;
8078             }
8079
8080             export_starting_index++;
8081                 }
8082     }
8083
8084     // At least one param should be exported
8085     if (export_count) 
8086     {
8087         if(8 == pR700AsmCode->unAsic)
8088         {                                   
8089             SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val, 
8090                      EG_CF_INST_EXPORT_DONE,
8091                      EG_CF_WORD1__CF_INST_shift, 
8092                      EG_CF_WORD1__CF_INST_mask);
8093         }
8094         else
8095         {
8096             pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;    
8097         }
8098     }
8099     else
8100     {
8101         if( GL_FALSE == Process_Export(pR700AsmCode,
8102                                        SQ_EXPORT_PARAM, 
8103                                        0, 
8104                                        1, 
8105                                        pR700AsmCode->starting_export_register_number,
8106                                        GL_FALSE) )
8107         {
8108             return GL_FALSE;
8109         }
8110       
8111         pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
8112         pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
8113         pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
8114         pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
8115         if(8 == pR700AsmCode->unAsic)
8116         {                                   
8117             SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val, 
8118                      EG_CF_INST_EXPORT_DONE,
8119                      EG_CF_WORD1__CF_INST_shift, 
8120                      EG_CF_WORD1__CF_INST_mask);
8121         }
8122         else
8123         {
8124             pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
8125         }
8126     }
8127
8128     pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
8129
8130     return GL_TRUE;
8131 }
8132
8133 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
8134 {
8135     if(NULL != pR700AsmCode->pInstDeps)
8136     {
8137         FREE(pR700AsmCode->pInstDeps);
8138         pR700AsmCode->pInstDeps = NULL;
8139     }
8140
8141     if(NULL != pR700AsmCode->subs)
8142     {
8143         FREE(pR700AsmCode->subs);
8144         pR700AsmCode->subs = NULL;
8145     }
8146     if(NULL != pR700AsmCode->callers)
8147     {
8148         FREE(pR700AsmCode->callers);
8149         pR700AsmCode->callers = NULL;
8150     }
8151
8152     if(NULL != pR700AsmCode->presubs)
8153     {
8154         FREE(pR700AsmCode->presubs);
8155          pR700AsmCode->presubs = NULL;
8156     }
8157
8158     return GL_TRUE;
8159 }
8160