Tizen 2.1 base
[sdk/emulator/qemu.git] / gl / mesa / src / gallium / drivers / r300 / compiler / radeon_pair_translate.c
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27
28 #include "radeon_program_pair.h"
29
30 #include "radeon_compiler.h"
31 #include "radeon_compiler_util.h"
32
33
34 /**
35  * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
36  * and reverse the order of arguments for CMP.
37  */
38 static void final_rewrite(struct rc_sub_instruction *inst)
39 {
40         struct rc_src_register tmp;
41
42         switch(inst->Opcode) {
43         case RC_OPCODE_ADD:
44                 inst->SrcReg[2] = inst->SrcReg[1];
45                 inst->SrcReg[1].File = RC_FILE_NONE;
46                 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
47                 inst->SrcReg[1].Negate = RC_MASK_NONE;
48                 inst->Opcode = RC_OPCODE_MAD;
49                 break;
50         case RC_OPCODE_CMP:
51                 tmp = inst->SrcReg[2];
52                 inst->SrcReg[2] = inst->SrcReg[0];
53                 inst->SrcReg[0] = tmp;
54                 break;
55         case RC_OPCODE_MOV:
56                 /* AMD say we should use CMP.
57                  * However, when we transform
58                  *  KIL -r0;
59                  * into
60                  *  CMP tmp, -r0, -r0, 0;
61                  *  KIL tmp;
62                  * we get incorrect behaviour on R500 when r0 == 0.0.
63                  * It appears that the R500 KIL hardware treats -0.0 as less
64                  * than zero.
65                  */
66                 inst->SrcReg[1].File = RC_FILE_NONE;
67                 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
68                 inst->SrcReg[2].File = RC_FILE_NONE;
69                 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
70                 inst->Opcode = RC_OPCODE_MAD;
71                 break;
72         case RC_OPCODE_MUL:
73                 inst->SrcReg[2].File = RC_FILE_NONE;
74                 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
75                 inst->Opcode = RC_OPCODE_MAD;
76                 break;
77         default:
78                 /* nothing to do */
79                 break;
80         }
81 }
82
83
84 /**
85  * Classify an instruction according to which ALUs etc. it needs
86  */
87 static void classify_instruction(struct rc_sub_instruction * inst,
88         int * needrgb, int * needalpha, int * istranscendent)
89 {
90         *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
91         *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
92         *istranscendent = 0;
93
94         if (inst->WriteALUResult == RC_ALURESULT_X)
95                 *needrgb = 1;
96         else if (inst->WriteALUResult == RC_ALURESULT_W)
97                 *needalpha = 1;
98
99         switch(inst->Opcode) {
100         case RC_OPCODE_ADD:
101         case RC_OPCODE_CMP:
102         case RC_OPCODE_CND:
103         case RC_OPCODE_DDX:
104         case RC_OPCODE_DDY:
105         case RC_OPCODE_FRC:
106         case RC_OPCODE_MAD:
107         case RC_OPCODE_MAX:
108         case RC_OPCODE_MIN:
109         case RC_OPCODE_MOV:
110         case RC_OPCODE_MUL:
111                 break;
112         case RC_OPCODE_COS:
113         case RC_OPCODE_EX2:
114         case RC_OPCODE_LG2:
115         case RC_OPCODE_RCP:
116         case RC_OPCODE_RSQ:
117         case RC_OPCODE_SIN:
118                 *istranscendent = 1;
119                 *needalpha = 1;
120                 break;
121         case RC_OPCODE_DP4:
122                 *needalpha = 1;
123                 /* fall through */
124         case RC_OPCODE_DP3:
125                 *needrgb = 1;
126                 break;
127         default:
128                 break;
129         }
130 }
131
132 static void src_uses(struct rc_src_register src, unsigned int * rgb,
133                                                         unsigned int * alpha)
134 {
135         int j;
136         for(j = 0; j < 4; ++j) {
137                 unsigned int swz = GET_SWZ(src.Swizzle, j);
138                 if (swz < 3)
139                         *rgb = 1;
140                 else if (swz < 4)
141                         *alpha = 1;
142         }
143 }
144
145 /**
146  * Fill the given ALU instruction's opcodes and source operands into the given pair,
147  * if possible.
148  */
149 static void set_pair_instruction(struct r300_fragment_program_compiler *c,
150         struct rc_pair_instruction * pair,
151         struct rc_sub_instruction * inst)
152 {
153         int needrgb, needalpha, istranscendent;
154         const struct rc_opcode_info * opcode;
155         int i;
156
157         memset(pair, 0, sizeof(struct rc_pair_instruction));
158
159         classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
160
161         if (needrgb) {
162                 if (istranscendent)
163                         pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
164                 else
165                         pair->RGB.Opcode = inst->Opcode;
166                 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
167                         pair->RGB.Saturate = 1;
168         }
169         if (needalpha) {
170                 pair->Alpha.Opcode = inst->Opcode;
171                 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
172                         pair->Alpha.Saturate = 1;
173         }
174
175         opcode = rc_get_opcode_info(inst->Opcode);
176
177         /* Presubtract handling:
178          * We need to make sure that the values used by the presubtract
179          * operation end up in src0 or src1. */
180         if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
181                 /* rc_pair_alloc_source() will fill in data for
182                  * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
183                 int j;
184                 for(j = 0; j < 3; j++) {
185                         int src_regs;
186                         if(inst->SrcReg[j].File != RC_FILE_PRESUB)
187                                 continue;
188
189                         src_regs = rc_presubtract_src_reg_count(
190                                                         inst->PreSub.Opcode);
191                         for(i = 0; i < src_regs; i++) {
192                                 unsigned int rgb = 0;
193                                 unsigned int alpha = 0;
194                                 src_uses(inst->SrcReg[j], &rgb, &alpha);
195                                 if(rgb) {
196                                         pair->RGB.Src[i].File =
197                                                 inst->PreSub.SrcReg[i].File;
198                                         pair->RGB.Src[i].Index =
199                                                 inst->PreSub.SrcReg[i].Index;
200                                         pair->RGB.Src[i].Used = 1;
201                                 }
202                                 if(alpha) {
203                                         pair->Alpha.Src[i].File =
204                                                 inst->PreSub.SrcReg[i].File;
205                                         pair->Alpha.Src[i].Index =
206                                                 inst->PreSub.SrcReg[i].Index;
207                                         pair->Alpha.Src[i].Used = 1;
208                                 }
209                         }
210                 }
211         }
212
213         for(i = 0; i < opcode->NumSrcRegs; ++i) {
214                 int source;
215                 if (needrgb && !istranscendent) {
216                         unsigned int srcrgb = 0;
217                         unsigned int srcalpha = 0;
218                         unsigned int srcmask = 0;
219                         int j;
220                         /* We don't care about the alpha channel here.  We only
221                          * want the part of the swizzle that writes to rgb,
222                          * since we are creating an rgb instruction. */
223                         for(j = 0; j < 3; ++j) {
224                                 unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
225
226                                 if (swz < RC_SWIZZLE_W)
227                                         srcrgb = 1;
228                                 else if (swz == RC_SWIZZLE_W)
229                                         srcalpha = 1;
230
231                                 if (swz < RC_SWIZZLE_UNUSED)
232                                         srcmask |= 1 << j;
233                         }
234                         source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
235                                                         inst->SrcReg[i].File, inst->SrcReg[i].Index);
236                         if (source < 0) {
237                                 rc_error(&c->Base, "Failed to translate "
238                                                         "rgb instruction.\n");
239                                 return;
240                         }
241                         pair->RGB.Arg[i].Source = source;
242                         pair->RGB.Arg[i].Swizzle =
243                                 rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
244                         pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
245                         pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
246                 }
247                 if (needalpha) {
248                         unsigned int srcrgb = 0;
249                         unsigned int srcalpha = 0;
250                         unsigned int swz;
251                         if (istranscendent) {
252                                 swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
253                         } else {
254                                 swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
255                         }
256
257                         if (swz < 3)
258                                 srcrgb = 1;
259                         else if (swz < 4)
260                                 srcalpha = 1;
261                         source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
262                                                         inst->SrcReg[i].File, inst->SrcReg[i].Index);
263                         if (source < 0) {
264                                 rc_error(&c->Base, "Failed to translate "
265                                                         "alpha instruction.\n");
266                                 return;
267                         }
268                         pair->Alpha.Arg[i].Source = source;
269                         pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
270                         pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
271                         pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
272                 }
273         }
274
275         /* Destination handling */
276         if (inst->DstReg.File == RC_FILE_OUTPUT) {
277         if (inst->DstReg.Index == c->OutputDepth) {
278             pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
279         } else {
280             for (i = 0; i < 4; i++) {
281                 if (inst->DstReg.Index == c->OutputColor[i]) {
282                     pair->RGB.Target = i;
283                     pair->Alpha.Target = i;
284                     pair->RGB.OutputWriteMask |=
285                         inst->DstReg.WriteMask & RC_MASK_XYZ;
286                     pair->Alpha.OutputWriteMask |=
287                         GET_BIT(inst->DstReg.WriteMask, 3);
288                     break;
289                 }
290             }
291         }
292         } else {
293                 if (needrgb) {
294                         pair->RGB.DestIndex = inst->DstReg.Index;
295                         pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
296                 }
297
298                 if (needalpha) {
299                         pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
300                         if (pair->Alpha.WriteMask) {
301                                 pair->Alpha.DestIndex = inst->DstReg.Index;
302                         }
303                 }
304         }
305
306         if (needrgb) {
307                 pair->RGB.Omod = inst->Omod;
308         }
309         if (needalpha) {
310                 pair->Alpha.Omod = inst->Omod;
311         }
312
313         if (inst->WriteALUResult) {
314                 pair->WriteALUResult = inst->WriteALUResult;
315                 pair->ALUResultCompare = inst->ALUResultCompare;
316         }
317 }
318
319
320 static void check_opcode_support(struct r300_fragment_program_compiler *c,
321                                  struct rc_sub_instruction *inst)
322 {
323         const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
324
325         if (opcode->HasDstReg) {
326                 if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
327                         rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
328                         return;
329                 }
330         }
331
332         for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
333                 if (inst->SrcReg[i].RelAddr) {
334                         rc_error(&c->Base, "Fragment program does not support relative addressing "
335                                  " of source operands.\n");
336                         return;
337                 }
338         }
339 }
340
341
342 /**
343  * Translate all ALU instructions into corresponding pair instructions,
344  * performing no other changes.
345  */
346 void rc_pair_translate(struct radeon_compiler *cc, void *user)
347 {
348         struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
349
350         for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
351             inst != &c->Base.Program.Instructions;
352             inst = inst->Next) {
353                 const struct rc_opcode_info * opcode;
354                 struct rc_sub_instruction copy;
355
356                 if (inst->Type != RC_INSTRUCTION_NORMAL)
357                         continue;
358
359                 opcode = rc_get_opcode_info(inst->U.I.Opcode);
360
361                 if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
362                         continue;
363
364                 copy = inst->U.I;
365
366                 check_opcode_support(c, &copy);
367
368                 final_rewrite(&copy);
369                 inst->Type = RC_INSTRUCTION_PAIR;
370                 set_pair_instruction(c, &inst->U.P, &copy);
371         }
372 }