Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / r300 / compiler / r500_fragprog_emit.c
1 /*
2  * Copyright (C) 2005 Ben Skeggs.
3  *
4  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5  * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6  *
7  * All Rights Reserved.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the
11  * "Software"), to deal in the Software without restriction, including
12  * without limitation the rights to use, copy, modify, merge, publish,
13  * distribute, sublicense, and/or sell copies of the Software, and to
14  * permit persons to whom the Software is furnished to do so, subject to
15  * the following conditions:
16  *
17  * The above copyright notice and this permission notice (including the
18  * next paragraph) shall be included in all copies or substantial
19  * portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28  *
29  */
30
31 /**
32  * \file
33  *
34  * \author Ben Skeggs <darktama@iinet.net.au>
35  *
36  * \author Jerome Glisse <j.glisse@gmail.com>
37  *
38  * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39  *
40  */
41
42 #include "r500_fragprog.h"
43
44 #include "../r300_reg.h"
45
46 #include "radeon_program_pair.h"
47
48 #define PROG_CODE \
49         struct r500_fragment_program_code *code = &c->code->code.r500
50
51 #define error(fmt, args...) do {                        \
52                 rc_error(&c->Base, "%s::%s(): " fmt "\n",       \
53                         __FILE__, __FUNCTION__, ##args);        \
54         } while(0)
55
56
57 struct branch_info {
58         int If;
59         int Else;
60         int Endif;
61 };
62
63 struct r500_loop_info {
64         int BgnLoop;
65
66         int BranchDepth;
67         int * Brks;
68         int BrkCount;
69         int BrkReserved;
70
71         int * Conts;
72         int ContCount;
73         int ContReserved;
74 };
75
76 struct emit_state {
77         struct radeon_compiler * C;
78         struct r500_fragment_program_code * Code;
79
80         struct branch_info * Branches;
81         unsigned int CurrentBranchDepth;
82         unsigned int BranchesReserved;
83
84         struct r500_loop_info * Loops;
85         unsigned int CurrentLoopDepth;
86         unsigned int LoopsReserved;
87
88         unsigned int MaxBranchDepth;
89
90 };
91
92 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
93 {
94         switch(opcode) {
95         case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
96         case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
97         case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
98         case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
99         case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
100         case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
101         case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
102         default:
103                 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
104                 /* fall through */
105         case RC_OPCODE_NOP:
106                 /* fall through */
107         case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
108         case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
109         case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
110         case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
111         }
112 }
113
114 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
115 {
116         switch(opcode) {
117         case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
118         case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
119         case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
120         case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
121         case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
122         case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
123         case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
124         case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
125         case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
126         case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
127         default:
128                 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
129                 /* fall through */
130         case RC_OPCODE_NOP:
131                 /* fall through */
132         case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
133         case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
134         case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
135         case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
136         case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
137         case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
138         }
139 }
140
141 static unsigned int fix_hw_swizzle(unsigned int swz)
142 {
143     switch (swz) {
144         case RC_SWIZZLE_ZERO:
145         case RC_SWIZZLE_UNUSED:
146             swz = 4;
147             break;
148         case RC_SWIZZLE_HALF:
149             swz = 5;
150             break;
151         case RC_SWIZZLE_ONE:
152             swz = 6;
153             break;
154     }
155
156         return swz;
157 }
158
159 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
160 {
161         unsigned int t = inst->RGB.Arg[arg].Source;
162         int comp;
163         t |= inst->RGB.Arg[arg].Negate << 11;
164         t |= inst->RGB.Arg[arg].Abs << 12;
165
166         for(comp = 0; comp < 3; ++comp)
167                 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
168
169         return t;
170 }
171
172 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
173 {
174         unsigned int t = inst->Alpha.Arg[i].Source;
175         t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
176         t |= inst->Alpha.Arg[i].Negate << 5;
177         t |= inst->Alpha.Arg[i].Abs << 6;
178         return t;
179 }
180
181 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
182 {
183         switch(func) {
184         case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
185         case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
186         case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
187         case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
188         default:
189                 rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
190                 return 0;
191         }
192 }
193
194 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
195 {
196         if (index > code->max_temp_idx)
197                 code->max_temp_idx = index;
198 }
199
200 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
201 {
202         /* From docs:
203          *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
204          * MSB = 1 << 7 */
205         if (!src.Used)
206                 return 1 << 7;
207
208         if (src.File == RC_FILE_CONSTANT) {
209                 return src.Index | R500_RGB_ADDR0_CONST;
210         } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
211                 use_temporary(code, src.Index);
212                 return src.Index;
213         }
214
215         return 0;
216 }
217
218 /**
219  * NOP the specified instruction if it is not a texture lookup.
220  */
221 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
222 {
223         PROG_CODE;
224
225         if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
226                 code->inst[ip].inst0 |= R500_INST_NOP;
227         }
228 }
229
230 /**
231  * Emit a paired ALU instruction.
232  */
233 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
234 {
235         int ip;
236         PROG_CODE;
237
238         if (code->inst_end >= c->Base.max_alu_insts-1) {
239                 error("emit_alu: Too many instructions");
240                 return;
241         }
242
243         ip = ++code->inst_end;
244
245         /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
246         if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
247                 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
248                 if (ip > 0) {
249                         alu_nop(c, ip - 1);
250                 }
251         }
252
253         code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
254         code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
255
256         if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
257                 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
258                 if (inst->WriteALUResult) {
259                         error("Cannot write output and ALU result at the same time");
260                         return;
261                 }
262         } else {
263                 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
264         }
265         code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
266
267         code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
268         code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
269         code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
270         if (inst->Nop) {
271                 code->inst[ip].inst0 |= R500_INST_NOP;
272         }
273         if (inst->Alpha.DepthWriteMask) {
274                 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
275                 c->code->writes_depth = 1;
276         }
277
278         code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
279         code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
280         use_temporary(code, inst->Alpha.DestIndex);
281         use_temporary(code, inst->RGB.DestIndex);
282
283         if (inst->RGB.Saturate)
284                 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
285         if (inst->Alpha.Saturate)
286                 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
287
288         /* Set the presubtract operation. */
289         switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
290                 case RC_PRESUB_BIAS:
291                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
292                         break;
293                 case RC_PRESUB_SUB:
294                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
295                         break;
296                 case RC_PRESUB_ADD:
297                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
298                         break;
299                 case RC_PRESUB_INV:
300                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
301                         break;
302                 default:
303                         break;
304         }
305         switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
306                 case RC_PRESUB_BIAS:
307                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
308                         break;
309                 case RC_PRESUB_SUB:
310                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
311                         break;
312                 case RC_PRESUB_ADD:
313                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
314                         break;
315                 case RC_PRESUB_INV:
316                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
317                         break;
318                 default:
319                         break;
320         }
321
322         code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
323         code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
324         code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
325
326         code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
327         code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
328         code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
329
330         code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
331         code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
332         code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
333
334         code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
335         code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
336         code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
337
338         code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
339         code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
340
341         if (inst->WriteALUResult) {
342                 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
343
344                 if (inst->WriteALUResult == RC_ALURESULT_X)
345                         code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
346                 else
347                         code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
348
349                 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
350         }
351 }
352
353 static unsigned int translate_strq_swizzle(unsigned int swizzle)
354 {
355         unsigned int swiz = 0;
356         int i;
357         for (i = 0; i < 4; i++)
358                 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
359         return swiz;
360 }
361
362 /**
363  * Emit a single TEX instruction
364  */
365 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
366 {
367         int ip;
368         PROG_CODE;
369
370         if (code->inst_end >= c->Base.max_alu_insts-1) {
371                 error("emit_tex: Too many instructions");
372                 return 0;
373         }
374
375         ip = ++code->inst_end;
376
377         code->inst[ip].inst0 = R500_INST_TYPE_TEX
378                 | (inst->DstReg.WriteMask << 11)
379                 | R500_INST_TEX_SEM_WAIT;
380         code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
381                 | R500_TEX_SEM_ACQUIRE;
382
383         if (inst->TexSrcTarget == RC_TEXTURE_RECT)
384                 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
385
386         switch (inst->Opcode) {
387         case RC_OPCODE_KIL:
388                 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
389                 break;
390         case RC_OPCODE_TEX:
391                 code->inst[ip].inst1 |= R500_TEX_INST_LD;
392                 break;
393         case RC_OPCODE_TXB:
394                 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
395                 break;
396         case RC_OPCODE_TXP:
397                 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
398                 break;
399         case RC_OPCODE_TXD:
400                 code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
401                 break;
402         case RC_OPCODE_TXL:
403                 code->inst[ip].inst1 |= R500_TEX_INST_LOD;
404                 break;
405         default:
406                 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
407         }
408
409         use_temporary(code, inst->SrcReg[0].Index);
410         if (inst->Opcode != RC_OPCODE_KIL)
411                 use_temporary(code, inst->DstReg.Index);
412
413         code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
414                 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
415                 | R500_TEX_DST_ADDR(inst->DstReg.Index)
416                 | (GET_SWZ(inst->TexSwizzle, 0) << 24)
417                 | (GET_SWZ(inst->TexSwizzle, 1) << 26)
418                 | (GET_SWZ(inst->TexSwizzle, 2) << 28)
419                 | (GET_SWZ(inst->TexSwizzle, 3) << 30)
420                 ;
421
422         if (inst->Opcode == RC_OPCODE_TXD) {
423                 use_temporary(code, inst->SrcReg[1].Index);
424                 use_temporary(code, inst->SrcReg[2].Index);
425
426                 /* DX and DY parameters are specified in a separate register. */
427                 code->inst[ip].inst3 =
428                         R500_DX_ADDR(inst->SrcReg[1].Index) |
429                         (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
430                         R500_DY_ADDR(inst->SrcReg[2].Index) |
431                         (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
432         }
433
434         return 1;
435 }
436
437 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
438 {
439         unsigned int newip;
440
441         if (s->Code->inst_end >= s->C->max_alu_insts-1) {
442                 rc_error(s->C, "emit_tex: Too many instructions");
443                 return;
444         }
445
446         newip = ++s->Code->inst_end;
447
448         /* Currently all loops use the same integer constant to intialize
449          * the loop variables. */
450         if(!s->Code->int_constants[0]) {
451                 s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
452                 s->Code->int_constant_count = 1;
453         }
454         s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
455
456         switch(inst->U.I.Opcode){
457         struct branch_info * branch;
458         struct r500_loop_info * loop;
459         case RC_OPCODE_BGNLOOP:
460                 memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
461                         s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
462
463                 loop = &s->Loops[s->CurrentLoopDepth++];
464                 memset(loop, 0, sizeof(struct r500_loop_info));
465                 loop->BranchDepth = s->CurrentBranchDepth;
466                 loop->BgnLoop = newip;
467
468                 s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
469                         | R500_FC_JUMP_FUNC(0x00)
470                         | R500_FC_IGNORE_UNCOVERED
471                         ;
472                 break;
473         case RC_OPCODE_BRK:
474                 loop = &s->Loops[s->CurrentLoopDepth - 1];
475                 memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
476                                         loop->BrkCount, loop->BrkReserved, 1);
477
478                 loop->Brks[loop->BrkCount++] = newip;
479                 s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
480                         | R500_FC_JUMP_FUNC(0xff)
481                         | R500_FC_B_OP1_DECR
482                         | R500_FC_B_POP_CNT(
483                                 s->CurrentBranchDepth - loop->BranchDepth)
484                         | R500_FC_IGNORE_UNCOVERED
485                         ;
486                 break;
487
488         case RC_OPCODE_CONT:
489                 loop = &s->Loops[s->CurrentLoopDepth - 1];
490                 memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
491                                         loop->ContCount, loop->ContReserved, 1);
492                 loop->Conts[loop->ContCount++] = newip;
493                 s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
494                         | R500_FC_JUMP_FUNC(0xff)
495                         | R500_FC_B_OP1_DECR
496                         | R500_FC_B_POP_CNT(
497                                 s->CurrentBranchDepth - loop->BranchDepth)
498                         | R500_FC_IGNORE_UNCOVERED
499                         ;
500                 break;
501
502         case RC_OPCODE_ENDLOOP:
503         {
504                 loop = &s->Loops[s->CurrentLoopDepth - 1];
505                 /* Emit ENDLOOP */
506                 s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
507                         | R500_FC_JUMP_FUNC(0xff)
508                         | R500_FC_JUMP_ANY
509                         | R500_FC_IGNORE_UNCOVERED
510                         ;
511                 /* The constant integer at index 0 is used by all loops. */
512                 s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
513                         | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
514                         ;
515
516                 /* Set jump address and int constant for BGNLOOP */
517                 s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
518                         | R500_FC_JUMP_ADDR(newip)
519                         ;
520
521                 /* Set jump address for the BRK instructions. */
522                 while(loop->BrkCount--) {
523                         s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
524                                                 R500_FC_JUMP_ADDR(newip + 1);
525                 }
526
527                 /* Set jump address for CONT instructions. */
528                 while(loop->ContCount--) {
529                         s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
530                                                 R500_FC_JUMP_ADDR(newip);
531                 }
532                 s->CurrentLoopDepth--;
533                 break;
534         }
535         case RC_OPCODE_IF:
536                 if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
537                         rc_error(s->C, "Branch depth exceeds hardware limit");
538                         return;
539                 }
540                 memory_pool_array_reserve(&s->C->Pool, struct branch_info,
541                                 s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
542
543                 branch = &s->Branches[s->CurrentBranchDepth++];
544                 branch->If = newip;
545                 branch->Else = -1;
546                 branch->Endif = -1;
547
548                 if (s->CurrentBranchDepth > s->MaxBranchDepth)
549                         s->MaxBranchDepth = s->CurrentBranchDepth;
550
551                 /* actual instruction is filled in at ENDIF time */
552                 break;
553         
554         case RC_OPCODE_ELSE:
555                 if (!s->CurrentBranchDepth) {
556                         rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
557                         return;
558                 }
559
560                 branch = &s->Branches[s->CurrentBranchDepth - 1];
561                 branch->Else = newip;
562
563                 /* actual instruction is filled in at ENDIF time */
564                 break;
565
566         case RC_OPCODE_ENDIF:
567                 if (!s->CurrentBranchDepth) {
568                         rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
569                         return;
570                 }
571
572                 branch = &s->Branches[s->CurrentBranchDepth - 1];
573                 branch->Endif = newip;
574
575                 s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
576                         | R500_FC_A_OP_NONE /* no address stack */
577                         | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
578                         | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
579                         | R500_FC_B_OP1_NONE /* no branch counter if stay */
580                         | R500_FC_B_POP_CNT(1)
581                         ;
582                 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
583                 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
584                         | R500_FC_A_OP_NONE /* no address stack */
585                         | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
586                         | R500_FC_B_OP0_INCR /* increment branch counter if stay */
587                         | R500_FC_IGNORE_UNCOVERED
588                 ;
589
590                 if (branch->Else >= 0) {
591                         /* increment branch counter also if jump */
592                         s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
593                         s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
594
595                         s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
596                                 | R500_FC_A_OP_NONE /* no address stack */
597                                 | R500_FC_B_ELSE /* all active pixels want to jump */
598                                 | R500_FC_B_OP0_NONE /* no counter op if stay */
599                                 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
600                                 | R500_FC_B_POP_CNT(1)
601                         ;
602                         s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
603                 } else {
604                         /* don't touch branch counter on jump */
605                         s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
606                         s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
607                 }
608
609
610                 s->CurrentBranchDepth--;
611                 break;
612         default:
613                 rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
614         }
615 }
616
617 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
618 {
619         struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
620         struct emit_state s;
621         struct r500_fragment_program_code *code = &compiler->code->code.r500;
622
623         memset(&s, 0, sizeof(s));
624         s.C = &compiler->Base;
625         s.Code = code;
626
627         memset(code, 0, sizeof(*code));
628         code->max_temp_idx = 1;
629         code->inst_end = -1;
630
631         for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
632             inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
633             inst = inst->Next) {
634                 if (inst->Type == RC_INSTRUCTION_NORMAL) {
635                         const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
636
637                         if (opcode->IsFlowControl) {
638                                 emit_flowcontrol(&s, inst);
639                         } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
640                                 continue;
641                         } else {
642                                 emit_tex(compiler, &inst->U.I);
643                         }
644                 } else {
645                         emit_paired(compiler, &inst->U.P);
646                 }
647         }
648
649         if (code->max_temp_idx >= compiler->Base.max_temp_regs)
650                 rc_error(&compiler->Base, "Too many hardware temporaries used");
651
652         if (compiler->Base.Error)
653                 return;
654
655         if (code->inst_end == -1 ||
656             (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
657                 int ip;
658
659                 /* This may happen when dead-code elimination is disabled or
660                  * when most of the fragment program logic is leading to a KIL */
661                 if (code->inst_end >= compiler->Base.max_alu_insts-1) {
662                         rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
663                         return;
664                 }
665
666                 ip = ++code->inst_end;
667                 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
668         }
669
670         /* Enable full flow control mode if we are using loops or have if
671          * statements nested at least four deep. */
672         if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
673                 if (code->max_temp_idx < 1)
674                         code->max_temp_idx = 1;
675
676                 code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
677         }
678 }