Tizen 2.1 base
[sdk/emulator/qemu.git] / gl / mesa / src / gallium / drivers / r300 / compiler / radeon_compiler.c
1 /*
2  * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28
29 #include "radeon_dataflow.h"
30 #include "radeon_program.h"
31 #include "radeon_program_pair.h"
32 #include "radeon_compiler_util.h"
33
34
35 void rc_init(struct radeon_compiler * c)
36 {
37         memset(c, 0, sizeof(*c));
38
39         memory_pool_init(&c->Pool);
40         c->Program.Instructions.Prev = &c->Program.Instructions;
41         c->Program.Instructions.Next = &c->Program.Instructions;
42         c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
43 }
44
45 void rc_destroy(struct radeon_compiler * c)
46 {
47         rc_constants_destroy(&c->Program.Constants);
48         memory_pool_destroy(&c->Pool);
49         free(c->ErrorMsg);
50 }
51
52 void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
53 {
54         va_list ap;
55
56         if (!(c->Debug & RC_DBG_LOG))
57                 return;
58
59         va_start(ap, fmt);
60         vfprintf(stderr, fmt, ap);
61         va_end(ap);
62 }
63
64 void rc_error(struct radeon_compiler * c, const char * fmt, ...)
65 {
66         va_list ap;
67
68         c->Error = 1;
69
70         if (!c->ErrorMsg) {
71                 /* Only remember the first error */
72                 char buf[1024];
73                 int written;
74
75                 va_start(ap, fmt);
76                 written = vsnprintf(buf, sizeof(buf), fmt, ap);
77                 va_end(ap);
78
79                 if (written < sizeof(buf)) {
80                         c->ErrorMsg = strdup(buf);
81                 } else {
82                         c->ErrorMsg = malloc(written + 1);
83
84                         va_start(ap, fmt);
85                         vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
86                         va_end(ap);
87                 }
88         }
89
90         if (c->Debug & RC_DBG_LOG) {
91                 fprintf(stderr, "r300compiler error: ");
92
93                 va_start(ap, fmt);
94                 vfprintf(stderr, fmt, ap);
95                 va_end(ap);
96         }
97 }
98
99 int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
100 {
101         rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
102         return 1;
103 }
104
105 /**
106  * Recompute c->Program.InputsRead and c->Program.OutputsWritten
107  * based on which inputs and outputs are actually referenced
108  * in program instructions.
109  */
110 void rc_calculate_inputs_outputs(struct radeon_compiler * c)
111 {
112         struct rc_instruction *inst;
113
114         c->Program.InputsRead = 0;
115         c->Program.OutputsWritten = 0;
116
117         for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
118         {
119                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
120                 int i;
121
122                 for (i = 0; i < opcode->NumSrcRegs; ++i) {
123                         if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
124                                 c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
125                 }
126
127                 if (opcode->HasDstReg) {
128                         if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
129                                 c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
130                 }
131         }
132 }
133
134 /**
135  * Rewrite the program such that everything that source the given input
136  * register will source new_input instead.
137  */
138 void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
139 {
140         struct rc_instruction * inst;
141
142         c->Program.InputsRead &= ~(1 << input);
143
144         for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
145                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
146                 unsigned i;
147
148                 for(i = 0; i < opcode->NumSrcRegs; ++i) {
149                         if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
150                                 inst->U.I.SrcReg[i].File = new_input.File;
151                                 inst->U.I.SrcReg[i].Index = new_input.Index;
152                                 inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
153                                 if (!inst->U.I.SrcReg[i].Abs) {
154                                         inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
155                                         inst->U.I.SrcReg[i].Abs = new_input.Abs;
156                                 }
157
158                                 c->Program.InputsRead |= 1 << new_input.Index;
159                         }
160                 }
161         }
162 }
163
164
165 /**
166  * Rewrite the program such that everything that writes into the given
167  * output register will instead write to new_output. The new_output
168  * writemask is honoured.
169  */
170 void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
171 {
172         struct rc_instruction * inst;
173
174         c->Program.OutputsWritten &= ~(1 << output);
175
176         for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
177                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
178
179                 if (opcode->HasDstReg) {
180                         if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
181                                 inst->U.I.DstReg.Index = new_output;
182                                 inst->U.I.DstReg.WriteMask &= writemask;
183
184                                 c->Program.OutputsWritten |= 1 << new_output;
185                         }
186                 }
187         }
188 }
189
190
191 /**
192  * Rewrite the program such that a given output is duplicated.
193  */
194 void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
195 {
196         unsigned tempreg = rc_find_free_temporary(c);
197         struct rc_instruction * inst;
198
199         for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
200                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
201
202                 if (opcode->HasDstReg) {
203                         if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
204                                 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
205                                 inst->U.I.DstReg.Index = tempreg;
206                         }
207                 }
208         }
209
210         inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
211         inst->U.I.Opcode = RC_OPCODE_MOV;
212         inst->U.I.DstReg.File = RC_FILE_OUTPUT;
213         inst->U.I.DstReg.Index = output;
214
215         inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
216         inst->U.I.SrcReg[0].Index = tempreg;
217         inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
218
219         inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
220         inst->U.I.Opcode = RC_OPCODE_MOV;
221         inst->U.I.DstReg.File = RC_FILE_OUTPUT;
222         inst->U.I.DstReg.Index = dup_output;
223
224         inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
225         inst->U.I.SrcReg[0].Index = tempreg;
226         inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
227
228         c->Program.OutputsWritten |= 1 << dup_output;
229 }
230
231
232 /**
233  * Introduce standard code fragment to deal with fragment.position.
234  */
235 void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
236                                 int full_vtransform)
237 {
238         unsigned tempregi = rc_find_free_temporary(c);
239         struct rc_instruction * inst_rcp;
240         struct rc_instruction * inst_mul;
241         struct rc_instruction * inst_mad;
242         struct rc_instruction * inst;
243
244         c->Program.InputsRead &= ~(1 << wpos);
245         c->Program.InputsRead |= 1 << new_input;
246
247         /* perspective divide */
248         inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
249         inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
250
251         inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
252         inst_rcp->U.I.DstReg.Index = tempregi;
253         inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
254
255         inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
256         inst_rcp->U.I.SrcReg[0].Index = new_input;
257         inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
258
259         inst_mul = rc_insert_new_instruction(c, inst_rcp);
260         inst_mul->U.I.Opcode = RC_OPCODE_MUL;
261
262         inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
263         inst_mul->U.I.DstReg.Index = tempregi;
264         inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
265
266         inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
267         inst_mul->U.I.SrcReg[0].Index = new_input;
268
269         inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
270         inst_mul->U.I.SrcReg[1].Index = tempregi;
271         inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
272
273         /* viewport transformation */
274         inst_mad = rc_insert_new_instruction(c, inst_mul);
275         inst_mad->U.I.Opcode = RC_OPCODE_MAD;
276
277         inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
278         inst_mad->U.I.DstReg.Index = tempregi;
279         inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
280
281         inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
282         inst_mad->U.I.SrcReg[0].Index = tempregi;
283         inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
284
285         inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
286         inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
287
288         inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
289         inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
290
291         if (full_vtransform) {
292                 inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
293                 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
294         } else {
295                 inst_mad->U.I.SrcReg[1].Index =
296                 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
297         }
298
299         for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
300                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
301                 unsigned i;
302
303                 for(i = 0; i < opcode->NumSrcRegs; i++) {
304                         if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
305                             inst->U.I.SrcReg[i].Index == wpos) {
306                                 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
307                                 inst->U.I.SrcReg[i].Index = tempregi;
308                         }
309                 }
310         }
311 }
312
313
314 /**
315  * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
316  * Gallium and OpenGL define it the other way around.
317  *
318  * So let's just negate FACE at the beginning of the shader and rewrite the rest
319  * of the shader to read from the newly allocated temporary.
320  */
321 void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
322 {
323         unsigned tempregi = rc_find_free_temporary(c);
324         struct rc_instruction *inst_add;
325         struct rc_instruction *inst;
326
327         /* perspective divide */
328         inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
329         inst_add->U.I.Opcode = RC_OPCODE_ADD;
330
331         inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
332         inst_add->U.I.DstReg.Index = tempregi;
333         inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
334
335         inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
336         inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
337
338         inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
339         inst_add->U.I.SrcReg[1].Index = face;
340         inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
341         inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
342
343         for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
344                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
345                 unsigned i;
346
347                 for(i = 0; i < opcode->NumSrcRegs; i++) {
348                         if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
349                             inst->U.I.SrcReg[i].Index == face) {
350                                 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
351                                 inst->U.I.SrcReg[i].Index = tempregi;
352                         }
353                 }
354         }
355 }
356
357 static void reg_count_callback(void * userdata, struct rc_instruction * inst,
358                 rc_register_file file, unsigned int index, unsigned int mask)
359 {
360         int *max_reg = userdata;
361         if (file == RC_FILE_TEMPORARY)
362                 (int)index > *max_reg ? *max_reg = index : 0;
363 }
364
365 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
366 {
367         int max_reg = -1;
368         struct rc_instruction * tmp;
369         memset(s, 0, sizeof(*s));
370
371         for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
372                                                         tmp = tmp->Next){
373                 const struct rc_opcode_info * info;
374                 rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
375                 if (tmp->Type == RC_INSTRUCTION_NORMAL) {
376                         info = rc_get_opcode_info(tmp->U.I.Opcode);
377                         if (info->Opcode == RC_OPCODE_BEGIN_TEX)
378                                 continue;
379                         if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
380                                 s->num_presub_ops++;
381                 } else {
382                         if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
383                                 s->num_presub_ops++;
384                         if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
385                                 s->num_presub_ops++;
386                         /* Assuming alpha will never be a flow control or
387                          * a tex instruction. */
388                         if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
389                                 s->num_alpha_insts++;
390                         if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
391                                 s->num_rgb_insts++;
392                         if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
393                                 tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
394                                 s->num_omod_ops++;
395                         }
396                         if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
397                                 tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
398                                 s->num_omod_ops++;
399                         }
400                         info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
401                 }
402                 if (info->IsFlowControl)
403                         s->num_fc_insts++;
404                 if (info->HasTexture)
405                         s->num_tex_insts++;
406                 s->num_insts++;
407         }
408         s->num_temp_regs = max_reg + 1;
409 }
410
411 static void print_stats(struct radeon_compiler * c)
412 {
413         struct rc_program_stats s;
414
415         if (c->initial_num_insts <= 5)
416                 return;
417
418         rc_get_stats(c, &s);
419
420         switch (c->type) {
421         case RC_VERTEX_PROGRAM:
422                 fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
423                                "~%4u Instructions\n"
424                                "~%4u Flow Control Instructions\n"
425                                "~%4u Temporary Registers\n"
426                                "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
427                                s.num_insts, s.num_fc_insts, s.num_temp_regs);
428                 break;
429
430         case RC_FRAGMENT_PROGRAM:
431                 fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
432                                "~%4u Instructions\n"
433                                "~%4u Vector Instructions (RGB)\n"
434                                "~%4u Scalar Instructions (Alpha)\n"
435                                "~%4u Flow Control Instructions\n"
436                                "~%4u Texture Instructions\n"
437                                "~%4u Presub Operations\n"
438                                "~%4u OMOD Operations\n"
439                                "~%4u Temporary Registers\n"
440                                "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
441                                s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
442                                s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
443                                s.num_omod_ops, s.num_temp_regs);
444                 break;
445         default:
446                 assert(0);
447         }
448 }
449
450 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
451         "Vertex Program",
452         "Fragment Program"
453 };
454
455 void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
456 {
457         for (unsigned i = 0; list[i].name; i++) {
458                 if (list[i].predicate) {
459                         list[i].run(c, list[i].user);
460
461                         if (c->Error)
462                                 return;
463
464                         if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
465                                 fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
466                                 rc_print_program(&c->Program);
467                         }
468                 }
469         }
470 }
471
472 /* Executes a list of compiler passes given in the parameter 'list'. */
473 void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
474 {
475         struct rc_program_stats s;
476
477         rc_get_stats(c, &s);
478         c->initial_num_insts = s.num_insts;
479
480         if (c->Debug & RC_DBG_LOG) {
481                 fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
482                 rc_print_program(&c->Program);
483         }
484
485         rc_run_compiler_passes(c, list);
486
487         if (c->Debug & RC_DBG_STATS)
488                 print_stats(c);
489 }
490
491 void rc_validate_final_shader(struct radeon_compiler *c, void *user)
492 {
493         /* Check the number of constants. */
494         if (c->Program.Constants.Count > c->max_constants) {
495                 rc_error(c, "Too many constants. Max: %i, Got: %i\n",
496                          c->max_constants, c->Program.Constants.Count);
497         }
498 }