Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / r300 / compiler / r300_fragprog_emit.c
1 /*
2  * Copyright (C) 2005 Ben Skeggs.
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27
28 /**
29  * \file
30  *
31  * Emit the r300_fragment_program_code that can be understood by the hardware.
32  * Input is a pre-transformed radeon_program.
33  *
34  * \author Ben Skeggs <darktama@iinet.net.au>
35  *
36  * \author Jerome Glisse <j.glisse@gmail.com>
37  */
38
39 #include "r300_fragprog.h"
40
41 #include "../r300_reg.h"
42
43 #include "radeon_program_pair.h"
44 #include "r300_fragprog_swizzle.h"
45
46
47 struct r300_emit_state {
48         struct r300_fragment_program_compiler * compiler;
49
50         unsigned current_node : 2;
51         unsigned node_first_tex : 8;
52         unsigned node_first_alu : 8;
53         uint32_t node_flags;
54 };
55
56 #define PROG_CODE \
57         struct r300_fragment_program_compiler *c = emit->compiler; \
58         struct r300_fragment_program_code *code = &c->code->code.r300
59
60 #define error(fmt, args...) do {                        \
61                 rc_error(&c->Base, "%s::%s(): " fmt "\n",       \
62                         __FILE__, __FUNCTION__, ##args);        \
63         } while(0)
64
65 static unsigned int get_msbs_alu(unsigned int bits)
66 {
67         return (bits >> 6) & 0x7;
68 }
69
70 /**
71  * @param lsbs The number of least significant bits
72  */
73 static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
74 {
75         return (bits >> lsbs) & 0x15;
76 }
77
78 #define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
79
80 /**
81  * Mark a temporary register as used.
82  */
83 static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
84 {
85         if (index > code->pixsize)
86                 code->pixsize = index;
87 }
88
89 static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
90 {
91         if (!src.Used)
92                 return 0;
93
94         if (src.File == RC_FILE_CONSTANT) {
95                 return src.Index | (1 << 5);
96         } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
97                 use_temporary(code, src.Index);
98                 return src.Index & 0x1f;
99         }
100
101         return 0;
102 }
103
104
105 static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
106 {
107         switch(opcode) {
108         case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
109         case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
110         case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
111         case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
112         case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
113         default:
114                 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
115                 /* fall through */
116         case RC_OPCODE_NOP:
117                 /* fall through */
118         case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
119         case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
120         case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
121         case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
122         }
123 }
124
125 static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
126 {
127         switch(opcode) {
128         case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
129         case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
130         case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
131         case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
132         case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
133         case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
134         case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
135         default:
136                 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
137                 /* fall through */
138         case RC_OPCODE_NOP:
139                 /* fall through */
140         case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
141         case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
142         case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
143         case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
144         case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
145         }
146 }
147
148 /**
149  * Emit one paired ALU instruction.
150  */
151 static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
152 {
153         int ip;
154         int j;
155         PROG_CODE;
156
157         if (code->alu.length >= c->Base.max_alu_insts) {
158                 error("Too many ALU instructions");
159                 return 0;
160         }
161
162         ip = code->alu.length++;
163
164         code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
165         code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
166
167         for(j = 0; j < 3; ++j) {
168                 /* Set the RGB address */
169                 unsigned int src = use_source(code, inst->RGB.Src[j]);
170                 unsigned int arg;
171                 if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
172                         code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
173
174                 code->alu.inst[ip].rgb_addr |= src << (6*j);
175
176                 /* Set the Alpha address */
177                 src = use_source(code, inst->Alpha.Src[j]);
178                 if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
179                         code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
180
181                 code->alu.inst[ip].alpha_addr |= src << (6*j);
182
183                 arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
184                 arg |= inst->RGB.Arg[j].Abs << 6;
185                 arg |= inst->RGB.Arg[j].Negate << 5;
186                 code->alu.inst[ip].rgb_inst |= arg << (7*j);
187
188                 arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
189                 arg |= inst->Alpha.Arg[j].Abs << 6;
190                 arg |= inst->Alpha.Arg[j].Negate << 5;
191                 code->alu.inst[ip].alpha_inst |= arg << (7*j);
192         }
193
194         /* Presubtract */
195         if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
196                 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
197                 case RC_PRESUB_BIAS:
198                         code->alu.inst[ip].rgb_inst |=
199                                                 R300_ALU_SRCP_1_MINUS_2_SRC0;
200                         break;
201                 case RC_PRESUB_ADD:
202                         code->alu.inst[ip].rgb_inst |=
203                                                 R300_ALU_SRCP_SRC1_PLUS_SRC0;
204                         break;
205                 case RC_PRESUB_SUB:
206                         code->alu.inst[ip].rgb_inst |=
207                                                 R300_ALU_SRCP_SRC1_MINUS_SRC0;
208                         break;
209                 case RC_PRESUB_INV:
210                         code->alu.inst[ip].rgb_inst |=
211                                                 R300_ALU_SRCP_1_MINUS_SRC0;
212                         break;
213                 default:
214                         break;
215                 }
216         }
217
218         if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
219                 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
220                 case RC_PRESUB_BIAS:
221                         code->alu.inst[ip].alpha_inst |=
222                                                 R300_ALU_SRCP_1_MINUS_2_SRC0;
223                         break;
224                 case RC_PRESUB_ADD:
225                         code->alu.inst[ip].alpha_inst |=
226                                                 R300_ALU_SRCP_SRC1_PLUS_SRC0;
227                         break;
228                 case RC_PRESUB_SUB:
229                         code->alu.inst[ip].alpha_inst |=
230                                                 R300_ALU_SRCP_SRC1_MINUS_SRC0;
231                         break;
232                 case RC_PRESUB_INV:
233                         code->alu.inst[ip].alpha_inst |=
234                                                 R300_ALU_SRCP_1_MINUS_SRC0;
235                         break;
236                 default:
237                         break;
238                 }
239         }
240
241         if (inst->RGB.Saturate)
242                 code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
243         if (inst->Alpha.Saturate)
244                 code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
245
246         if (inst->RGB.WriteMask) {
247                 use_temporary(code, inst->RGB.DestIndex);
248                 if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
249                         code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
250                 code->alu.inst[ip].rgb_addr |=
251                         ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
252                         (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
253         }
254         if (inst->RGB.OutputWriteMask) {
255                 code->alu.inst[ip].rgb_addr |=
256             (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
257             R300_RGB_TARGET(inst->RGB.Target);
258                 emit->node_flags |= R300_RGBA_OUT;
259         }
260
261         if (inst->Alpha.WriteMask) {
262                 use_temporary(code, inst->Alpha.DestIndex);
263                 if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
264                         code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
265                 code->alu.inst[ip].alpha_addr |=
266                         ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
267                         R300_ALU_DSTA_REG;
268         }
269         if (inst->Alpha.OutputWriteMask) {
270                 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
271             R300_ALPHA_TARGET(inst->Alpha.Target);
272                 emit->node_flags |= R300_RGBA_OUT;
273         }
274         if (inst->Alpha.DepthWriteMask) {
275                 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
276                 emit->node_flags |= R300_W_OUT;
277                 c->code->writes_depth = 1;
278         }
279         if (inst->Nop)
280                 code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
281
282         return 1;
283 }
284
285
286 /**
287  * Finish the current node without advancing to the next one.
288  */
289 static int finish_node(struct r300_emit_state * emit)
290 {
291         struct r300_fragment_program_compiler * c = emit->compiler;
292         struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
293         unsigned alu_offset;
294         unsigned alu_end;
295         unsigned tex_offset;
296         unsigned tex_end;
297
298         unsigned int alu_offset_msbs, alu_end_msbs;
299
300         if (code->alu.length == emit->node_first_alu) {
301                 /* Generate a single NOP for this node */
302                 struct rc_pair_instruction inst;
303                 memset(&inst, 0, sizeof(inst));
304                 if (!emit_alu(emit, &inst))
305                         return 0;
306         }
307
308         alu_offset = emit->node_first_alu;
309         alu_end = code->alu.length - alu_offset - 1;
310         tex_offset = emit->node_first_tex;
311         tex_end = code->tex.length - tex_offset - 1;
312
313         if (code->tex.length == emit->node_first_tex) {
314                 if (emit->current_node > 0) {
315                         error("Node %i has no TEX instructions", emit->current_node);
316                         return 0;
317                 }
318
319                 tex_end = 0;
320         } else {
321                 if (emit->current_node == 0)
322                         code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
323         }
324
325         /* Write the config register.
326          * Note: The order in which the words for each node are written
327          * is not correct here and needs to be fixed up once we're entirely
328          * done
329          *
330          * Also note that the register specification from AMD is slightly
331          * incorrect in its description of this register. */
332         code->code_addr[emit->current_node]  =
333                         ((alu_offset << R300_ALU_START_SHIFT)
334                                 & R300_ALU_START_MASK)
335                         | ((alu_end << R300_ALU_SIZE_SHIFT)
336                                 & R300_ALU_SIZE_MASK)
337                         | ((tex_offset << R300_TEX_START_SHIFT)
338                                 & R300_TEX_START_MASK)
339                         | ((tex_end << R300_TEX_SIZE_SHIFT)
340                                 & R300_TEX_SIZE_MASK)
341                         | emit->node_flags
342                         | (get_msbs_tex(tex_offset, 5)
343                                 << R400_TEX_START_MSB_SHIFT)
344                         | (get_msbs_tex(tex_end, 5)
345                                 << R400_TEX_SIZE_MSB_SHIFT)
346                         ;
347
348         /* Write r400 extended instruction fields.  These will be ignored on
349          * r300 cards.  */
350         alu_offset_msbs = get_msbs_alu(alu_offset);
351         alu_end_msbs = get_msbs_alu(alu_end);
352         switch(emit->current_node) {
353         case 0:
354                 code->r400_code_offset_ext |=
355                         alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
356                         | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
357                 break;
358         case 1:
359                 code->r400_code_offset_ext |=
360                         alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
361                         | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
362                 break;
363         case 2:
364                 code->r400_code_offset_ext |=
365                         alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
366                         | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
367                 break;
368         case 3:
369                 code->r400_code_offset_ext |=
370                         alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
371                         | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
372                 break;
373         }
374         return 1;
375 }
376
377
378 /**
379  * Begin a block of texture instructions.
380  * Create the necessary indirection.
381  */
382 static int begin_tex(struct r300_emit_state * emit)
383 {
384         PROG_CODE;
385
386         if (code->alu.length == emit->node_first_alu &&
387             code->tex.length == emit->node_first_tex) {
388                 return 1;
389         }
390
391         if (emit->current_node == 3) {
392                 error("Too many texture indirections");
393                 return 0;
394         }
395
396         if (!finish_node(emit))
397                 return 0;
398
399         emit->current_node++;
400         emit->node_first_tex = code->tex.length;
401         emit->node_first_alu = code->alu.length;
402         emit->node_flags = 0;
403         return 1;
404 }
405
406
407 static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
408 {
409         unsigned int unit;
410         unsigned int dest;
411         unsigned int opcode;
412         PROG_CODE;
413
414         if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
415                 error("Too many TEX instructions");
416                 return 0;
417         }
418
419         unit = inst->U.I.TexSrcUnit;
420         dest = inst->U.I.DstReg.Index;
421
422         switch(inst->U.I.Opcode) {
423         case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
424         case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
425         case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
426         case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
427         default:
428                 error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
429                 return 0;
430         }
431
432         if (inst->U.I.Opcode == RC_OPCODE_KIL) {
433                 unit = 0;
434                 dest = 0;
435         } else {
436                 use_temporary(code, dest);
437         }
438
439         use_temporary(code, inst->U.I.SrcReg[0].Index);
440
441         code->tex.inst[code->tex.length++] =
442                 ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
443                         & R300_SRC_ADDR_MASK)
444                 | ((dest << R300_DST_ADDR_SHIFT)
445                         & R300_DST_ADDR_MASK)
446                 | (unit << R300_TEX_ID_SHIFT)
447                 | (opcode << R300_TEX_INST_SHIFT)
448                 | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
449                         R400_SRC_ADDR_EXT_BIT : 0)
450                 | (dest >= R300_PFS_NUM_TEMP_REGS ?
451                         R400_DST_ADDR_EXT_BIT : 0)
452                 ;
453         return 1;
454 }
455
456
457 /**
458  * Final compilation step: Turn the intermediate radeon_program into
459  * machine-readable instructions.
460  */
461 void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
462 {
463         struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
464         struct r300_emit_state emit;
465         struct r300_fragment_program_code *code = &compiler->code->code.r300;
466         unsigned int tex_end;
467
468         memset(&emit, 0, sizeof(emit));
469         emit.compiler = compiler;
470
471         memset(code, 0, sizeof(struct r300_fragment_program_code));
472
473         for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
474             inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
475             inst = inst->Next) {
476                 if (inst->Type == RC_INSTRUCTION_NORMAL) {
477                         if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
478                                 begin_tex(&emit);
479                                 continue;
480                         }
481
482                         emit_tex(&emit, inst);
483                 } else {
484                         emit_alu(&emit, &inst->U.P);
485                 }
486         }
487
488         if (code->pixsize >= compiler->Base.max_temp_regs)
489                 rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
490
491         if (compiler->Base.Error)
492                 return;
493
494         /* Finish the program */
495         finish_node(&emit);
496
497         code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
498
499         /* Set r400 extended instruction fields.  These values will be ignored
500          * on r300 cards. */
501         code->r400_code_offset_ext |=
502                 (get_msbs_alu(0)
503                                 << R400_ALU_OFFSET_MSB_SHIFT)
504                 | (get_msbs_alu(code->alu.length - 1)
505                                 << R400_ALU_SIZE_MSB_SHIFT);
506
507         tex_end = code->tex.length ? code->tex.length - 1 : 0;
508         code->code_offset =
509                 ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
510                         & R300_PFS_CNTL_ALU_OFFSET_MASK)
511                 | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
512                         & R300_PFS_CNTL_ALU_END_MASK)
513                 | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
514                         & R300_PFS_CNTL_TEX_OFFSET_MASK)
515                 | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
516                         & R300_PFS_CNTL_TEX_END_MASK)
517                 | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
518                 | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
519                 ;
520
521         if (emit.current_node < 3) {
522                 int shift = 3 - emit.current_node;
523                 int i;
524                 for(i = emit.current_node; i >= 0; --i)
525                         code->code_addr[shift + i] = code->code_addr[i];
526                 for(i = 0; i < shift; ++i)
527                         code->code_addr[i] = 0;
528         }
529
530         if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
531             || code->alu.length > R300_PFS_MAX_ALU_INST
532             || code->tex.length > R300_PFS_MAX_TEX_INST) {
533
534                 code->r390_mode = 1;
535         }
536 }