Tizen 2.1 base
[sdk/emulator/qemu.git] / gl / mesa / src / gallium / drivers / r300 / compiler / r500_fragprog_emit.c
1 /*
2  * Copyright (C) 2005 Ben Skeggs.
3  *
4  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5  * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6  *
7  * All Rights Reserved.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the
11  * "Software"), to deal in the Software without restriction, including
12  * without limitation the rights to use, copy, modify, merge, publish,
13  * distribute, sublicense, and/or sell copies of the Software, and to
14  * permit persons to whom the Software is furnished to do so, subject to
15  * the following conditions:
16  *
17  * The above copyright notice and this permission notice (including the
18  * next paragraph) shall be included in all copies or substantial
19  * portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28  *
29  */
30
31 /**
32  * \file
33  *
34  * \author Ben Skeggs <darktama@iinet.net.au>
35  *
36  * \author Jerome Glisse <j.glisse@gmail.com>
37  *
38  * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39  *
40  */
41
42 #include "r500_fragprog.h"
43
44 #include "../r300_reg.h"
45
46 #include "radeon_program_pair.h"
47
48 #define PROG_CODE \
49         struct r500_fragment_program_code *code = &c->code->code.r500
50
51 #define error(fmt, args...) do {                        \
52                 rc_error(&c->Base, "%s::%s(): " fmt "\n",       \
53                         __FILE__, __FUNCTION__, ##args);        \
54         } while(0)
55
56
57 struct branch_info {
58         int If;
59         int Else;
60         int Endif;
61 };
62
63 struct r500_loop_info {
64         int BgnLoop;
65
66         int BranchDepth;
67         int * Brks;
68         int BrkCount;
69         int BrkReserved;
70
71         int * Conts;
72         int ContCount;
73         int ContReserved;
74 };
75
76 struct emit_state {
77         struct radeon_compiler * C;
78         struct r500_fragment_program_code * Code;
79
80         struct branch_info * Branches;
81         unsigned int CurrentBranchDepth;
82         unsigned int BranchesReserved;
83
84         struct r500_loop_info * Loops;
85         unsigned int CurrentLoopDepth;
86         unsigned int LoopsReserved;
87
88         unsigned int MaxBranchDepth;
89
90 };
91
92 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
93 {
94         switch(opcode) {
95         case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
96         case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
97         case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
98         case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
99         case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
100         case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
101         case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
102         default:
103                 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
104                 /* fall through */
105         case RC_OPCODE_NOP:
106                 /* fall through */
107         case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
108         case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
109         case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
110         case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
111         }
112 }
113
114 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
115 {
116         switch(opcode) {
117         case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
118         case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
119         case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
120         case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
121         case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
122         case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
123         case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
124         case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
125         case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
126         case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
127         default:
128                 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
129                 /* fall through */
130         case RC_OPCODE_NOP:
131                 /* fall through */
132         case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
133         case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
134         case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
135         case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
136         case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
137         case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
138         }
139 }
140
141 static unsigned int fix_hw_swizzle(unsigned int swz)
142 {
143     switch (swz) {
144         case RC_SWIZZLE_ZERO:
145         case RC_SWIZZLE_UNUSED:
146             swz = 4;
147             break;
148         case RC_SWIZZLE_HALF:
149             swz = 5;
150             break;
151         case RC_SWIZZLE_ONE:
152             swz = 6;
153             break;
154     }
155
156         return swz;
157 }
158
159 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
160 {
161         unsigned int t = inst->RGB.Arg[arg].Source;
162         int comp;
163         t |= inst->RGB.Arg[arg].Negate << 11;
164         t |= inst->RGB.Arg[arg].Abs << 12;
165
166         for(comp = 0; comp < 3; ++comp)
167                 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
168
169         return t;
170 }
171
172 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
173 {
174         unsigned int t = inst->Alpha.Arg[i].Source;
175         t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
176         t |= inst->Alpha.Arg[i].Negate << 5;
177         t |= inst->Alpha.Arg[i].Abs << 6;
178         return t;
179 }
180
181 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
182 {
183         switch(func) {
184         case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
185         case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
186         case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
187         case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
188         default:
189                 rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
190                 return 0;
191         }
192 }
193
194 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
195 {
196         if (index > code->max_temp_idx)
197                 code->max_temp_idx = index;
198 }
199
200 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
201 {
202         /* From docs:
203          *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
204          * MSB = 1 << 7 */
205         if (!src.Used)
206                 return 1 << 7;
207
208         if (src.File == RC_FILE_CONSTANT) {
209                 return src.Index | R500_RGB_ADDR0_CONST;
210         } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
211                 use_temporary(code, src.Index);
212                 return src.Index;
213         }
214
215         return 0;
216 }
217
218 /**
219  * NOP the specified instruction if it is not a texture lookup.
220  */
221 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
222 {
223         PROG_CODE;
224
225         if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
226                 code->inst[ip].inst0 |= R500_INST_NOP;
227         }
228 }
229
230 /**
231  * Emit a paired ALU instruction.
232  */
233 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
234 {
235         int ip;
236         PROG_CODE;
237
238         if (code->inst_end >= c->Base.max_alu_insts-1) {
239                 error("emit_alu: Too many instructions");
240                 return;
241         }
242
243         ip = ++code->inst_end;
244
245         /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
246         if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
247                 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
248                 if (ip > 0) {
249                         alu_nop(c, ip - 1);
250                 }
251         }
252
253         code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
254         code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
255
256         if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
257                 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
258                 if (inst->WriteALUResult) {
259                         error("Cannot write output and ALU result at the same time");
260                         return;
261                 }
262         } else {
263                 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
264         }
265         code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
266
267         code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
268         code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
269         code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
270         if (inst->Nop) {
271                 code->inst[ip].inst0 |= R500_INST_NOP;
272         }
273         if (inst->Alpha.DepthWriteMask) {
274                 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
275                 c->code->writes_depth = 1;
276         }
277
278         code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
279         code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
280         use_temporary(code, inst->Alpha.DestIndex);
281         use_temporary(code, inst->RGB.DestIndex);
282
283         if (inst->RGB.Saturate)
284                 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
285         if (inst->Alpha.Saturate)
286                 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
287
288         /* Set the presubtract operation. */
289         switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
290                 case RC_PRESUB_BIAS:
291                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
292                         break;
293                 case RC_PRESUB_SUB:
294                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
295                         break;
296                 case RC_PRESUB_ADD:
297                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
298                         break;
299                 case RC_PRESUB_INV:
300                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
301                         break;
302                 default:
303                         break;
304         }
305         switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
306                 case RC_PRESUB_BIAS:
307                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
308                         break;
309                 case RC_PRESUB_SUB:
310                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
311                         break;
312                 case RC_PRESUB_ADD:
313                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
314                         break;
315                 case RC_PRESUB_INV:
316                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
317                         break;
318                 default:
319                         break;
320         }
321
322         /* Set the output modifier */
323         code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
324         code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
325
326         code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
327         code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
328         code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
329
330         code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
331         code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
332         code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
333
334         code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
335         code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
336         code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
337
338         code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
339         code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
340         code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
341
342         code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
343         code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
344
345         if (inst->WriteALUResult) {
346                 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
347
348                 if (inst->WriteALUResult == RC_ALURESULT_X)
349                         code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
350                 else
351                         code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
352
353                 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
354         }
355 }
356
357 static unsigned int translate_strq_swizzle(unsigned int swizzle)
358 {
359         unsigned int swiz = 0;
360         int i;
361         for (i = 0; i < 4; i++)
362                 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
363         return swiz;
364 }
365
366 /**
367  * Emit a single TEX instruction
368  */
369 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
370 {
371         int ip;
372         PROG_CODE;
373
374         if (code->inst_end >= c->Base.max_alu_insts-1) {
375                 error("emit_tex: Too many instructions");
376                 return 0;
377         }
378
379         ip = ++code->inst_end;
380
381         code->inst[ip].inst0 = R500_INST_TYPE_TEX
382                 | (inst->DstReg.WriteMask << 11)
383                 | (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
384         code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
385                 | (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
386
387         if (inst->TexSrcTarget == RC_TEXTURE_RECT)
388                 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
389
390         switch (inst->Opcode) {
391         case RC_OPCODE_KIL:
392                 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
393                 break;
394         case RC_OPCODE_TEX:
395                 code->inst[ip].inst1 |= R500_TEX_INST_LD;
396                 break;
397         case RC_OPCODE_TXB:
398                 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
399                 break;
400         case RC_OPCODE_TXP:
401                 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
402                 break;
403         case RC_OPCODE_TXD:
404                 code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
405                 break;
406         case RC_OPCODE_TXL:
407                 code->inst[ip].inst1 |= R500_TEX_INST_LOD;
408                 break;
409         default:
410                 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
411         }
412
413         use_temporary(code, inst->SrcReg[0].Index);
414         if (inst->Opcode != RC_OPCODE_KIL)
415                 use_temporary(code, inst->DstReg.Index);
416
417         code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
418                 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
419                 | R500_TEX_DST_ADDR(inst->DstReg.Index)
420                 | (GET_SWZ(inst->TexSwizzle, 0) << 24)
421                 | (GET_SWZ(inst->TexSwizzle, 1) << 26)
422                 | (GET_SWZ(inst->TexSwizzle, 2) << 28)
423                 | (GET_SWZ(inst->TexSwizzle, 3) << 30)
424                 ;
425
426         if (inst->Opcode == RC_OPCODE_TXD) {
427                 use_temporary(code, inst->SrcReg[1].Index);
428                 use_temporary(code, inst->SrcReg[2].Index);
429
430                 /* DX and DY parameters are specified in a separate register. */
431                 code->inst[ip].inst3 =
432                         R500_DX_ADDR(inst->SrcReg[1].Index) |
433                         (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
434                         R500_DY_ADDR(inst->SrcReg[2].Index) |
435                         (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
436         }
437
438         return 1;
439 }
440
441 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
442 {
443         unsigned int newip;
444
445         if (s->Code->inst_end >= s->C->max_alu_insts-1) {
446                 rc_error(s->C, "emit_tex: Too many instructions");
447                 return;
448         }
449
450         newip = ++s->Code->inst_end;
451
452         /* Currently all loops use the same integer constant to intialize
453          * the loop variables. */
454         if(!s->Code->int_constants[0]) {
455                 s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
456                 s->Code->int_constant_count = 1;
457         }
458         s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
459
460         switch(inst->U.I.Opcode){
461         struct branch_info * branch;
462         struct r500_loop_info * loop;
463         case RC_OPCODE_BGNLOOP:
464                 memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
465                         s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
466
467                 loop = &s->Loops[s->CurrentLoopDepth++];
468                 memset(loop, 0, sizeof(struct r500_loop_info));
469                 loop->BranchDepth = s->CurrentBranchDepth;
470                 loop->BgnLoop = newip;
471
472                 s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
473                         | R500_FC_JUMP_FUNC(0x00)
474                         | R500_FC_IGNORE_UNCOVERED
475                         ;
476                 break;
477         case RC_OPCODE_BRK:
478                 loop = &s->Loops[s->CurrentLoopDepth - 1];
479                 memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
480                                         loop->BrkCount, loop->BrkReserved, 1);
481
482                 loop->Brks[loop->BrkCount++] = newip;
483                 s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
484                         | R500_FC_JUMP_FUNC(0xff)
485                         | R500_FC_B_OP1_DECR
486                         | R500_FC_B_POP_CNT(
487                                 s->CurrentBranchDepth - loop->BranchDepth)
488                         | R500_FC_IGNORE_UNCOVERED
489                         ;
490                 break;
491
492         case RC_OPCODE_CONT:
493                 loop = &s->Loops[s->CurrentLoopDepth - 1];
494                 memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
495                                         loop->ContCount, loop->ContReserved, 1);
496                 loop->Conts[loop->ContCount++] = newip;
497                 s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
498                         | R500_FC_JUMP_FUNC(0xff)
499                         | R500_FC_B_OP1_DECR
500                         | R500_FC_B_POP_CNT(
501                                 s->CurrentBranchDepth - loop->BranchDepth)
502                         | R500_FC_IGNORE_UNCOVERED
503                         ;
504                 break;
505
506         case RC_OPCODE_ENDLOOP:
507         {
508                 loop = &s->Loops[s->CurrentLoopDepth - 1];
509                 /* Emit ENDLOOP */
510                 s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
511                         | R500_FC_JUMP_FUNC(0xff)
512                         | R500_FC_JUMP_ANY
513                         | R500_FC_IGNORE_UNCOVERED
514                         ;
515                 /* The constant integer at index 0 is used by all loops. */
516                 s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
517                         | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
518                         ;
519
520                 /* Set jump address and int constant for BGNLOOP */
521                 s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
522                         | R500_FC_JUMP_ADDR(newip)
523                         ;
524
525                 /* Set jump address for the BRK instructions. */
526                 while(loop->BrkCount--) {
527                         s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
528                                                 R500_FC_JUMP_ADDR(newip + 1);
529                 }
530
531                 /* Set jump address for CONT instructions. */
532                 while(loop->ContCount--) {
533                         s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
534                                                 R500_FC_JUMP_ADDR(newip);
535                 }
536                 s->CurrentLoopDepth--;
537                 break;
538         }
539         case RC_OPCODE_IF:
540                 if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
541                         rc_error(s->C, "Branch depth exceeds hardware limit");
542                         return;
543                 }
544                 memory_pool_array_reserve(&s->C->Pool, struct branch_info,
545                                 s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
546
547                 branch = &s->Branches[s->CurrentBranchDepth++];
548                 branch->If = newip;
549                 branch->Else = -1;
550                 branch->Endif = -1;
551
552                 if (s->CurrentBranchDepth > s->MaxBranchDepth)
553                         s->MaxBranchDepth = s->CurrentBranchDepth;
554
555                 /* actual instruction is filled in at ENDIF time */
556                 break;
557         
558         case RC_OPCODE_ELSE:
559                 if (!s->CurrentBranchDepth) {
560                         rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
561                         return;
562                 }
563
564                 branch = &s->Branches[s->CurrentBranchDepth - 1];
565                 branch->Else = newip;
566
567                 /* actual instruction is filled in at ENDIF time */
568                 break;
569
570         case RC_OPCODE_ENDIF:
571                 if (!s->CurrentBranchDepth) {
572                         rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
573                         return;
574                 }
575
576                 branch = &s->Branches[s->CurrentBranchDepth - 1];
577                 branch->Endif = newip;
578
579                 s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
580                         | R500_FC_A_OP_NONE /* no address stack */
581                         | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
582                         | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
583                         | R500_FC_B_OP1_NONE /* no branch counter if stay */
584                         | R500_FC_B_POP_CNT(1)
585                         ;
586                 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
587                 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
588                         | R500_FC_A_OP_NONE /* no address stack */
589                         | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
590                         | R500_FC_B_OP0_INCR /* increment branch counter if stay */
591                         | R500_FC_IGNORE_UNCOVERED
592                 ;
593
594                 if (branch->Else >= 0) {
595                         /* increment branch counter also if jump */
596                         s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
597                         s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
598
599                         s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
600                                 | R500_FC_A_OP_NONE /* no address stack */
601                                 | R500_FC_B_ELSE /* all active pixels want to jump */
602                                 | R500_FC_B_OP0_NONE /* no counter op if stay */
603                                 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
604                                 | R500_FC_B_POP_CNT(1)
605                         ;
606                         s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
607                 } else {
608                         /* don't touch branch counter on jump */
609                         s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
610                         s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
611                 }
612
613
614                 s->CurrentBranchDepth--;
615                 break;
616         default:
617                 rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
618         }
619 }
620
621 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
622 {
623         struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
624         struct emit_state s;
625         struct r500_fragment_program_code *code = &compiler->code->code.r500;
626
627         memset(&s, 0, sizeof(s));
628         s.C = &compiler->Base;
629         s.Code = code;
630
631         memset(code, 0, sizeof(*code));
632         code->max_temp_idx = 1;
633         code->inst_end = -1;
634
635         for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
636             inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
637             inst = inst->Next) {
638                 if (inst->Type == RC_INSTRUCTION_NORMAL) {
639                         const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
640
641                         if (opcode->IsFlowControl) {
642                                 emit_flowcontrol(&s, inst);
643                         } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
644                                 continue;
645                         } else {
646                                 emit_tex(compiler, &inst->U.I);
647                         }
648                 } else {
649                         emit_paired(compiler, &inst->U.P);
650                 }
651         }
652
653         if (code->max_temp_idx >= compiler->Base.max_temp_regs)
654                 rc_error(&compiler->Base, "Too many hardware temporaries used");
655
656         if (compiler->Base.Error)
657                 return;
658
659         if (code->inst_end == -1 ||
660             (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
661                 int ip;
662
663                 /* This may happen when dead-code elimination is disabled or
664                  * when most of the fragment program logic is leading to a KIL */
665                 if (code->inst_end >= compiler->Base.max_alu_insts-1) {
666                         rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
667                         return;
668                 }
669
670                 ip = ++code->inst_end;
671                 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
672         }
673
674         /* Make sure TEX_SEM_WAIT is set on the last instruction */
675         code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
676
677         /* Enable full flow control mode if we are using loops or have if
678          * statements nested at least four deep. */
679         if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
680                 if (code->max_temp_idx < 1)
681                         code->max_temp_idx = 1;
682
683                 code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
684         }
685 }