Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  */
28
29 #include "radeon_dataflow.h"
30
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_swizzle.h"
34
35 struct src_clobbered_reads_cb_data {
36         rc_register_file File;
37         unsigned int Index;
38         unsigned int Mask;
39         struct rc_reader_data * ReaderData;
40 };
41
42 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
43                                                 struct rc_instruction *,
44                                                 unsigned int);
45
46 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
47 {
48         struct rc_src_register combine;
49         combine.File = inner.File;
50         combine.Index = inner.Index;
51         combine.RelAddr = inner.RelAddr;
52         if (outer.Abs) {
53                 combine.Abs = 1;
54                 combine.Negate = outer.Negate;
55         } else {
56                 combine.Abs = inner.Abs;
57                 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
58                 combine.Negate ^= outer.Negate;
59         }
60         combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
61         return combine;
62 }
63
64 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
65                                                 struct rc_src_register * src)
66 {
67         rc_register_file file = src->File;
68         struct rc_reader_data * reader_data = data;
69
70         if(!rc_inst_can_use_presub(inst,
71                                 reader_data->Writer->U.I.PreSub.Opcode,
72                                 rc_swizzle_to_writemask(src->Swizzle),
73                                 src,
74                                 &reader_data->Writer->U.I.PreSub.SrcReg[0],
75                                 &reader_data->Writer->U.I.PreSub.SrcReg[1])) {
76                 reader_data->Abort = 1;
77                 return;
78         }
79
80         /* XXX This could probably be handled better. */
81         if (file == RC_FILE_ADDRESS) {
82                 reader_data->Abort = 1;
83                 return;
84         }
85
86         /* These instructions cannot read from the constants file.
87          * see radeonTransformTEX()
88          */
89         if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
90                         reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
91                                 (inst->U.I.Opcode == RC_OPCODE_TEX ||
92                                 inst->U.I.Opcode == RC_OPCODE_TXB ||
93                                 inst->U.I.Opcode == RC_OPCODE_TXP ||
94                                 inst->U.I.Opcode == RC_OPCODE_TXD ||
95                                 inst->U.I.Opcode == RC_OPCODE_TXL ||
96                                 inst->U.I.Opcode == RC_OPCODE_KIL)){
97                 reader_data->Abort = 1;
98                 return;
99         }
100 }
101
102 static void src_clobbered_reads_cb(
103         void * data,
104         struct rc_instruction * inst,
105         struct rc_src_register * src)
106 {
107         struct src_clobbered_reads_cb_data * sc_data = data;
108
109         if (src->File == sc_data->File
110             && src->Index == sc_data->Index
111             && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
112
113                 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
114         }
115
116         if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
117                 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
118         }
119 }
120
121 static void is_src_clobbered_scan_write(
122         void * data,
123         struct rc_instruction * inst,
124         rc_register_file file,
125         unsigned int index,
126         unsigned int mask)
127 {
128         struct src_clobbered_reads_cb_data sc_data;
129         struct rc_reader_data * reader_data = data;
130         sc_data.File = file;
131         sc_data.Index = index;
132         sc_data.Mask = mask;
133         sc_data.ReaderData = reader_data;
134         rc_for_all_reads_src(reader_data->Writer,
135                                         src_clobbered_reads_cb, &sc_data);
136 }
137
138 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
139 {
140         struct rc_reader_data reader_data;
141         unsigned int i;
142
143         if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
144             inst_mov->U.I.WriteALUResult ||
145             inst_mov->U.I.SaturateMode)
146                 return;
147
148         /* Get a list of all the readers of this MOV instruction. */
149         reader_data.ExitOnAbort = 1;
150         rc_get_readers(c, inst_mov, &reader_data,
151                        copy_propagate_scan_read, NULL,
152                        is_src_clobbered_scan_write);
153
154         if (reader_data.Abort || reader_data.ReaderCount == 0)
155                 return;
156
157         /* Propagate the MOV instruction. */
158         for (i = 0; i < reader_data.ReaderCount; i++) {
159                 struct rc_instruction * inst = reader_data.Readers[i].Inst;
160                 *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
161
162                 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
163                         inst->U.I.PreSub = inst_mov->U.I.PreSub;
164         }
165
166         /* Finally, remove the original MOV instruction */
167         rc_remove_instruction(inst_mov);
168 }
169
170 /**
171  * Check if a source register is actually always the same
172  * swizzle constant.
173  */
174 static int is_src_uniform_constant(struct rc_src_register src,
175                 rc_swizzle * pswz, unsigned int * pnegate)
176 {
177         int have_used = 0;
178
179         if (src.File != RC_FILE_NONE) {
180                 *pswz = 0;
181                 return 0;
182         }
183
184         for(unsigned int chan = 0; chan < 4; ++chan) {
185                 unsigned int swz = GET_SWZ(src.Swizzle, chan);
186                 if (swz < 4) {
187                         *pswz = 0;
188                         return 0;
189                 }
190                 if (swz == RC_SWIZZLE_UNUSED)
191                         continue;
192
193                 if (!have_used) {
194                         *pswz = swz;
195                         *pnegate = GET_BIT(src.Negate, chan);
196                         have_used = 1;
197                 } else {
198                         if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
199                                 *pswz = 0;
200                                 return 0;
201                         }
202                 }
203         }
204
205         return 1;
206 }
207
208 static void constant_folding_mad(struct rc_instruction * inst)
209 {
210         rc_swizzle swz = 0;
211         unsigned int negate= 0;
212
213         if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
214                 if (swz == RC_SWIZZLE_ZERO) {
215                         inst->U.I.Opcode = RC_OPCODE_MUL;
216                         return;
217                 }
218         }
219
220         if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
221                 if (swz == RC_SWIZZLE_ONE) {
222                         inst->U.I.Opcode = RC_OPCODE_ADD;
223                         if (negate)
224                                 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
225                         inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
226                         return;
227                 } else if (swz == RC_SWIZZLE_ZERO) {
228                         inst->U.I.Opcode = RC_OPCODE_MOV;
229                         inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
230                         return;
231                 }
232         }
233
234         if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
235                 if (swz == RC_SWIZZLE_ONE) {
236                         inst->U.I.Opcode = RC_OPCODE_ADD;
237                         if (negate)
238                                 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
239                         inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
240                         return;
241                 } else if (swz == RC_SWIZZLE_ZERO) {
242                         inst->U.I.Opcode = RC_OPCODE_MOV;
243                         inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
244                         return;
245                 }
246         }
247 }
248
249 static void constant_folding_mul(struct rc_instruction * inst)
250 {
251         rc_swizzle swz = 0;
252         unsigned int negate = 0;
253
254         if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
255                 if (swz == RC_SWIZZLE_ONE) {
256                         inst->U.I.Opcode = RC_OPCODE_MOV;
257                         inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
258                         if (negate)
259                                 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
260                         return;
261                 } else if (swz == RC_SWIZZLE_ZERO) {
262                         inst->U.I.Opcode = RC_OPCODE_MOV;
263                         inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
264                         return;
265                 }
266         }
267
268         if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
269                 if (swz == RC_SWIZZLE_ONE) {
270                         inst->U.I.Opcode = RC_OPCODE_MOV;
271                         if (negate)
272                                 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
273                         return;
274                 } else if (swz == RC_SWIZZLE_ZERO) {
275                         inst->U.I.Opcode = RC_OPCODE_MOV;
276                         inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
277                         return;
278                 }
279         }
280 }
281
282 static void constant_folding_add(struct rc_instruction * inst)
283 {
284         rc_swizzle swz = 0;
285         unsigned int negate = 0;
286
287         if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
288                 if (swz == RC_SWIZZLE_ZERO) {
289                         inst->U.I.Opcode = RC_OPCODE_MOV;
290                         inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
291                         return;
292                 }
293         }
294
295         if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
296                 if (swz == RC_SWIZZLE_ZERO) {
297                         inst->U.I.Opcode = RC_OPCODE_MOV;
298                         return;
299                 }
300         }
301 }
302
303 /**
304  * Replace 0.0, 1.0 and 0.5 immediate constants by their
305  * respective swizzles. Simplify instructions like ADD dst, src, 0;
306  */
307 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
308 {
309         const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
310         unsigned int i;
311
312         /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
313         for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
314                 struct rc_constant * constant;
315                 struct rc_src_register newsrc;
316                 int have_real_reference;
317                 unsigned int chan;
318
319                 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
320                 for (chan = 0; chan < 4; ++chan)
321                         if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
322                                 break;
323                 if (chan == 4) {
324                         inst->U.I.SrcReg[src].File = RC_FILE_NONE;
325                         continue;
326                 }
327
328                 /* Convert immediates to swizzles. */
329                 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
330                     inst->U.I.SrcReg[src].RelAddr ||
331                     inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
332                         continue;
333
334                 constant =
335                         &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
336
337                 if (constant->Type != RC_CONSTANT_IMMEDIATE)
338                         continue;
339
340                 newsrc = inst->U.I.SrcReg[src];
341                 have_real_reference = 0;
342                 for (chan = 0; chan < 4; ++chan) {
343                         unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
344                         unsigned int newswz;
345                         float imm;
346                         float baseimm;
347
348                         if (swz >= 4)
349                                 continue;
350
351                         imm = constant->u.Immediate[swz];
352                         baseimm = imm;
353                         if (imm < 0.0)
354                                 baseimm = -baseimm;
355
356                         if (baseimm == 0.0) {
357                                 newswz = RC_SWIZZLE_ZERO;
358                         } else if (baseimm == 1.0) {
359                                 newswz = RC_SWIZZLE_ONE;
360                         } else if (baseimm == 0.5 && c->has_half_swizzles) {
361                                 newswz = RC_SWIZZLE_HALF;
362                         } else {
363                                 have_real_reference = 1;
364                                 continue;
365                         }
366
367                         SET_SWZ(newsrc.Swizzle, chan, newswz);
368                         if (imm < 0.0 && !newsrc.Abs)
369                                 newsrc.Negate ^= 1 << chan;
370                 }
371
372                 if (!have_real_reference) {
373                         newsrc.File = RC_FILE_NONE;
374                         newsrc.Index = 0;
375                 }
376
377                 /* don't make the swizzle worse */
378                 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
379                     c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
380                         continue;
381
382                 inst->U.I.SrcReg[src] = newsrc;
383         }
384
385         /* Simplify instructions based on constants */
386         if (inst->U.I.Opcode == RC_OPCODE_MAD)
387                 constant_folding_mad(inst);
388
389         /* note: MAD can simplify to MUL or ADD */
390         if (inst->U.I.Opcode == RC_OPCODE_MUL)
391                 constant_folding_mul(inst);
392         else if (inst->U.I.Opcode == RC_OPCODE_ADD)
393                 constant_folding_add(inst);
394
395         /* In case this instruction has been converted, make sure all of the
396          * registers that are no longer used are empty. */
397         opcode = rc_get_opcode_info(inst->U.I.Opcode);
398         for(i = opcode->NumSrcRegs; i < 3; i++) {
399                 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
400         }
401 }
402
403 /**
404  * If src and dst use the same register, this function returns a writemask that
405  * indicates wich components are read by src.  Otherwise zero is returned.
406  */
407 static unsigned int src_reads_dst_mask(struct rc_src_register src,
408                                                 struct rc_dst_register dst)
409 {
410         if (dst.File != src.File || dst.Index != src.Index) {
411                 return 0;
412         }
413         return rc_swizzle_to_writemask(src.Swizzle);
414 }
415
416 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
417  * in any of its channels.  Return 0 otherwise. */
418 static int src_has_const_swz(struct rc_src_register src) {
419         int chan;
420         for(chan = 0; chan < 4; chan++) {
421                 unsigned int swz = GET_SWZ(src.Swizzle, chan);
422                 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
423                                                 || swz == RC_SWIZZLE_ONE) {
424                         return 1;
425                 }
426         }
427         return 0;
428 }
429
430 static void presub_scan_read(
431         void * data,
432         struct rc_instruction * inst,
433         struct rc_src_register * src)
434 {
435         struct rc_reader_data * reader_data = data;
436         rc_presubtract_op * presub_opcode = reader_data->CbData;
437
438         if (!rc_inst_can_use_presub(inst, *presub_opcode,
439                         reader_data->Writer->U.I.DstReg.WriteMask,
440                         src,
441                         &reader_data->Writer->U.I.SrcReg[0],
442                         &reader_data->Writer->U.I.SrcReg[1])) {
443                 reader_data->Abort = 1;
444                 return;
445         }
446 }
447
448 static int presub_helper(
449         struct radeon_compiler * c,
450         struct rc_instruction * inst_add,
451         rc_presubtract_op presub_opcode,
452         rc_presub_replace_fn presub_replace)
453 {
454         struct rc_reader_data reader_data;
455         unsigned int i;
456         rc_presubtract_op cb_op = presub_opcode;
457
458         reader_data.CbData = &cb_op;
459         reader_data.ExitOnAbort = 1;
460         rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
461                                                 is_src_clobbered_scan_write);
462
463         if (reader_data.Abort || reader_data.ReaderCount == 0)
464                 return 0;
465
466         for(i = 0; i < reader_data.ReaderCount; i++) {
467                 unsigned int src_index;
468                 struct rc_reader reader = reader_data.Readers[i];
469                 const struct rc_opcode_info * info =
470                                 rc_get_opcode_info(reader.Inst->U.I.Opcode);
471
472                 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
473                         if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
474                                 presub_replace(inst_add, reader.Inst, src_index);
475                 }
476         }
477         return 1;
478 }
479
480 /* This function assumes that inst_add->U.I.SrcReg[0] and
481  * inst_add->U.I.SrcReg[1] aren't both negative. */
482 static void presub_replace_add(
483         struct rc_instruction * inst_add,
484         struct rc_instruction * inst_reader,
485         unsigned int src_index)
486 {
487         rc_presubtract_op presub_opcode;
488         if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
489                 presub_opcode = RC_PRESUB_SUB;
490         else
491                 presub_opcode = RC_PRESUB_ADD;
492
493         if (inst_add->U.I.SrcReg[1].Negate) {
494                 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
495                 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
496         } else {
497                 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
498                 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
499         }
500         inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
501         inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
502         inst_reader->U.I.PreSub.Opcode = presub_opcode;
503         inst_reader->U.I.SrcReg[src_index] =
504                         chain_srcregs(inst_reader->U.I.SrcReg[src_index],
505                                         inst_reader->U.I.PreSub.SrcReg[0]);
506         inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
507         inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
508 }
509
510 static int is_presub_candidate(
511         struct radeon_compiler * c,
512         struct rc_instruction * inst)
513 {
514         const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
515         unsigned int i;
516         unsigned int is_constant[2] = {0, 0};
517
518         assert(inst->U.I.Opcode == RC_OPCODE_ADD);
519
520         if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
521                         || inst->U.I.SaturateMode
522                         || inst->U.I.WriteALUResult) {
523                 return 0;
524         }
525
526         /* If both sources use a constant swizzle, then we can't convert it to
527          * a presubtract operation.  In fact for the ADD and SUB presubtract
528          * operations neither source can contain a constant swizzle.  This
529          * specific case is checked in peephole_add_presub_add() when
530          * we make sure the swizzles for both sources are equal, so we
531          * don't need to worry about it here. */
532         for (i = 0; i < 2; i++) {
533                 int chan;
534                 for (chan = 0; chan < 4; chan++) {
535                         rc_swizzle swz =
536                                 get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
537                         if (swz == RC_SWIZZLE_ONE
538                                         || swz == RC_SWIZZLE_ZERO
539                                         || swz == RC_SWIZZLE_HALF) {
540                                 is_constant[i] = 1;
541                         }
542                 }
543         }
544         if (is_constant[0] && is_constant[1])
545                 return 0;
546
547         for(i = 0; i < info->NumSrcRegs; i++) {
548                 struct rc_src_register src = inst->U.I.SrcReg[i];
549                 if (src_reads_dst_mask(src, inst->U.I.DstReg))
550                         return 0;
551
552                 src.File = RC_FILE_PRESUB;
553                 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
554                         return 0;
555         }
556         return 1;
557 }
558
559 static int peephole_add_presub_add(
560         struct radeon_compiler * c,
561         struct rc_instruction * inst_add)
562 {
563         unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
564         unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
565         unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
566
567         if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
568                 return 0;
569
570         /* src0 and src1 can't have absolute values */
571         if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
572                 return 0;
573
574         /* presub_replace_add() assumes only one is negative */
575         if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
576                 return 0;
577
578         /* if src0 is negative, at least all bits of dstmask have to be set */
579         if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
580                 return 0;
581
582         /* if src1 is negative, at least all bits of dstmask have to be set */
583         if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
584                 return 0;
585
586         if (!is_presub_candidate(c, inst_add))
587                 return 0;
588
589         if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
590                 rc_remove_instruction(inst_add);
591                 return 1;
592         }
593         return 0;
594 }
595
596 static void presub_replace_inv(
597         struct rc_instruction * inst_add,
598         struct rc_instruction * inst_reader,
599         unsigned int src_index)
600 {
601         /* We must be careful not to modify inst_add, since it
602          * is possible it will remain part of the program.*/
603         inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
604         inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
605         inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
606         inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
607                                                 inst_reader->U.I.PreSub.SrcReg[0]);
608
609         inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
610         inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
611 }
612
613 /**
614  * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
615  * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
616  * of the add instruction must have the constatnt 1 swizzle.  This function
617  * does not check const registers to see if their value is 1.0, so it should
618  * be called after the constant_folding optimization.
619  * @return
620  *      0 if the ADD instruction is still part of the program.
621  *      1 if the ADD instruction is no longer part of the program.
622  */
623 static int peephole_add_presub_inv(
624         struct radeon_compiler * c,
625         struct rc_instruction * inst_add)
626 {
627         unsigned int i, swz;
628
629         if (!is_presub_candidate(c, inst_add))
630                 return 0;
631
632         /* Check if src0 is 1. */
633         /* XXX It would be nice to use is_src_uniform_constant here, but that
634          * function only works if the register's file is RC_FILE_NONE */
635         for(i = 0; i < 4; i++ ) {
636                 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
637                 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
638                                                 && swz != RC_SWIZZLE_ONE) {
639                         return 0;
640                 }
641         }
642
643         /* Check src1. */
644         if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
645                                                 inst_add->U.I.DstReg.WriteMask
646                 || inst_add->U.I.SrcReg[1].Abs
647                 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
648                         && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
649                 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
650
651                 return 0;
652         }
653
654         if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
655                 rc_remove_instruction(inst_add);
656                 return 1;
657         }
658         return 0;
659 }
660
661 /**
662  * @return
663  *      0 if inst is still part of the program.
664  *      1 if inst is no longer part of the program.
665  */
666 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
667 {
668         switch(inst->U.I.Opcode){
669         case RC_OPCODE_ADD:
670                 if (c->has_presub) {
671                         if(peephole_add_presub_inv(c, inst))
672                                 return 1;
673                         if(peephole_add_presub_add(c, inst))
674                                 return 1;
675                 }
676                 break;
677         default:
678                 break;
679         }
680         return 0;
681 }
682
683 void rc_optimize(struct radeon_compiler * c, void *user)
684 {
685         struct rc_instruction * inst = c->Program.Instructions.Next;
686         while(inst != &c->Program.Instructions) {
687                 struct rc_instruction * cur = inst;
688                 inst = inst->Next;
689
690                 constant_folding(c, cur);
691
692                 if(peephole(c, cur))
693                         continue;
694
695                 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
696                         copy_propagate(c, cur);
697                         /* cur may no longer be part of the program */
698                 }
699         }
700 }