Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_pair_schedule.c
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27
28 #include "radeon_program_pair.h"
29
30 #include <stdio.h>
31
32 #include "radeon_compiler.h"
33 #include "radeon_compiler_util.h"
34 #include "radeon_dataflow.h"
35
36
37 #define VERBOSE 0
38
39 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
40
41 struct schedule_instruction {
42         struct rc_instruction * Instruction;
43
44         /** Next instruction in the linked list of ready instructions. */
45         struct schedule_instruction *NextReady;
46
47         /** Values that this instruction reads and writes */
48         struct reg_value * WriteValues[4];
49         struct reg_value * ReadValues[12];
50         unsigned int NumWriteValues:3;
51         unsigned int NumReadValues:4;
52
53         /**
54          * Number of (read and write) dependencies that must be resolved before
55          * this instruction can be scheduled.
56          */
57         unsigned int NumDependencies:5;
58
59         /** List of all readers (see rc_get_readers() for the definition of
60          * "all readers"), even those outside the basic block this instruction
61          * lives in. */
62         struct rc_reader_data GlobalReaders;
63 };
64
65
66 /**
67  * Used to keep track of which instructions read a value.
68  */
69 struct reg_value_reader {
70         struct schedule_instruction *Reader;
71         struct reg_value_reader *Next;
72 };
73
74 /**
75  * Used to keep track which values are stored in each component of a
76  * RC_FILE_TEMPORARY.
77  */
78 struct reg_value {
79         struct schedule_instruction * Writer;
80
81         /**
82          * Unordered linked list of instructions that read from this value.
83          * When this value becomes available, we increase all readers'
84          * dependency count.
85          */
86         struct reg_value_reader *Readers;
87
88         /**
89          * Number of readers of this value. This is decremented each time
90          * a reader of the value is committed.
91          * When the reader cound reaches zero, the dependency count
92          * of the instruction writing \ref Next is decremented.
93          */
94         unsigned int NumReaders;
95
96         struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
97 };
98
99 struct register_state {
100         struct reg_value * Values[4];
101 };
102
103 struct remap_reg {
104         struct rc_instruciont * Inst;
105         unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
106         unsigned int OldSwizzle:3;
107         unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
108         unsigned int NewSwizzle:3;
109         unsigned int OnlyTexReads:1;
110         struct remap_reg * Next;
111 };
112
113 struct schedule_state {
114         struct radeon_compiler * C;
115         struct schedule_instruction * Current;
116
117         struct register_state Temporary[RC_REGISTER_MAX_INDEX];
118
119         /**
120          * Linked lists of instructions that can be scheduled right now,
121          * based on which ALU/TEX resources they require.
122          */
123         /*@{*/
124         struct schedule_instruction *ReadyFullALU;
125         struct schedule_instruction *ReadyRGB;
126         struct schedule_instruction *ReadyAlpha;
127         struct schedule_instruction *ReadyTEX;
128         /*@}*/
129 };
130
131 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
132                 rc_register_file file, unsigned int index, unsigned int chan)
133 {
134         if (file != RC_FILE_TEMPORARY)
135                 return 0;
136
137         if (index >= RC_REGISTER_MAX_INDEX) {
138                 rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
139                 return 0;
140         }
141
142         return &s->Temporary[index].Values[chan];
143 }
144
145 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
146 {
147         inst->NextReady = *list;
148         *list = inst;
149 }
150
151 static void add_inst_to_list_end(struct schedule_instruction ** list,
152                                         struct schedule_instruction * inst)
153 {
154         if(!*list){
155                 *list = inst;
156         }else{
157                 struct schedule_instruction * temp = *list;
158                 while(temp->NextReady){
159                         temp = temp->NextReady;
160                 }
161                 temp->NextReady = inst;
162         }
163 }
164
165 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
166 {
167         DBG("%i is now ready\n", sinst->Instruction->IP);
168
169         /* Adding Ready TEX instructions to the end of the "Ready List" helps
170          * us emit TEX instructions in blocks without losing our place. */
171         if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
172                 add_inst_to_list_end(&s->ReadyTEX, sinst);
173         else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
174                 add_inst_to_list(&s->ReadyRGB, sinst);
175         else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
176                 add_inst_to_list(&s->ReadyAlpha, sinst);
177         else
178                 add_inst_to_list(&s->ReadyFullALU, sinst);
179 }
180
181 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
182 {
183         assert(sinst->NumDependencies > 0);
184         sinst->NumDependencies--;
185         if (!sinst->NumDependencies)
186                 instruction_ready(s, sinst);
187 }
188
189 /**
190  * This function decreases the dependencies of the next instruction that
191  * wants to write to each of sinst's read values.
192  */
193 static void commit_update_reads(struct schedule_state * s,
194                                         struct schedule_instruction * sinst){
195         unsigned int i;
196         for(i = 0; i < sinst->NumReadValues; ++i) {
197                 struct reg_value * v = sinst->ReadValues[i];
198                 assert(v->NumReaders > 0);
199                 v->NumReaders--;
200                 if (!v->NumReaders) {
201                         if (v->Next)
202                                 decrease_dependencies(s, v->Next->Writer);
203                 }
204         }
205 }
206
207 static void commit_update_writes(struct schedule_state * s,
208                                         struct schedule_instruction * sinst){
209         unsigned int i;
210         for(i = 0; i < sinst->NumWriteValues; ++i) {
211                 struct reg_value * v = sinst->WriteValues[i];
212                 if (v->NumReaders) {
213                         for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
214                                 decrease_dependencies(s, r->Reader);
215                         }
216                 } else {
217                         /* This happens in instruction sequences of the type
218                          *  OP r.x, ...;
219                          *  OP r.x, r.x, ...;
220                          * See also the subtlety in how instructions that both
221                          * read and write the same register are scanned.
222                          */
223                         if (v->Next)
224                                 decrease_dependencies(s, v->Next->Writer);
225                 }
226         }
227 }
228
229 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
230 {
231         DBG("%i: commit\n", sinst->Instruction->IP);
232
233         commit_update_reads(s, sinst);
234
235         commit_update_writes(s, sinst);
236 }
237
238 /**
239  * Emit all ready texture instructions in a single block.
240  *
241  * Emit as a single block to (hopefully) sample many textures in parallel,
242  * and to avoid hardware indirections on R300.
243  */
244 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
245 {
246         struct schedule_instruction *readytex;
247         struct rc_instruction * inst_begin;
248
249         assert(s->ReadyTEX);
250
251         /* Node marker for R300 */
252         inst_begin = rc_insert_new_instruction(s->C, before->Prev);
253         inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
254
255         /* Link texture instructions back in */
256         readytex = s->ReadyTEX;
257         while(readytex) {
258                 rc_insert_instruction(before->Prev, readytex->Instruction);
259                 DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
260
261                 /* All of the TEX instructions in the same TEX block have
262                  * their source registers read from before any of the
263                  * instructions in that block write to their destination
264                  * registers.  This means that when we commit a TEX
265                  * instruction, any other TEX instruction that wants to write
266                  * to one of the committed instruction's source register can be
267                  * marked as ready and should be emitted in the same TEX
268                  * block. This prevents the following sequence from being
269                  * emitted in two different TEX blocks:
270                  * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
271                  * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
272                  */
273                 commit_update_reads(s, readytex);
274                 readytex = readytex->NextReady;
275         }
276         readytex = s->ReadyTEX;
277         s->ReadyTEX = 0;
278         while(readytex){
279                 DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
280                 commit_update_writes(s, readytex);
281                 readytex = readytex->NextReady;
282         }
283 }
284
285 /* This is a helper function for destructive_merge_instructions().  It helps
286  * merge presubtract sources from two instructions and makes sure the
287  * presubtract sources end up in the correct spot.  This function assumes that
288  * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
289  * but no scalar instruction (alpha).
290  * @return 0 if merging the presubtract sources fails.
291  * @retrun 1 if merging the presubtract sources succeeds.
292  */
293 static int merge_presub_sources(
294         struct rc_pair_instruction * dst_full,
295         struct rc_pair_sub_instruction src,
296         unsigned int type)
297 {
298         unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
299         struct rc_pair_sub_instruction * dst_sub;
300         const struct rc_opcode_info * info;
301
302         assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
303
304         switch(type) {
305         case RC_SOURCE_RGB:
306                 is_rgb = 1;
307                 is_alpha = 0;
308                 dst_sub = &dst_full->RGB;
309                 break;
310         case RC_SOURCE_ALPHA:
311                 is_rgb = 0;
312                 is_alpha = 1;
313                 dst_sub = &dst_full->Alpha;
314                 break;
315         default:
316                 assert(0);
317                 return 0;
318         }
319
320         info = rc_get_opcode_info(dst_full->RGB.Opcode);
321
322         if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
323                 return 0;
324
325         srcp_regs = rc_presubtract_src_reg_count(
326                                         src.Src[RC_PAIR_PRESUB_SRC].Index);
327         for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
328                 unsigned int arg;
329                 int free_source;
330                 unsigned int one_way = 0;
331                 struct rc_pair_instruction_source srcp = src.Src[srcp_src];
332                 struct rc_pair_instruction_source temp;
333
334                 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
335                                                         srcp.File, srcp.Index);
336
337                 /* If free_source < 0 then there are no free source
338                  * slots. */
339                 if (free_source < 0)
340                         return 0;
341
342                 temp = dst_sub->Src[srcp_src];
343                 dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
344
345                 /* srcp needs src0 and src1 to be the same */
346                 if (free_source < srcp_src) {
347                         if (!temp.Used)
348                                 continue;
349                         free_source = rc_pair_alloc_source(dst_full, is_rgb,
350                                         is_alpha, temp.File, temp.Index);
351                         if (free_source < 0)
352                                 return 0;
353                         one_way = 1;
354                 } else {
355                         dst_sub->Src[free_source] = temp;
356                 }
357
358                 /* If free_source == srcp_src, then the presubtract
359                  * source is already in the correct place. */
360                 if (free_source == srcp_src)
361                         continue;
362
363                 /* Shuffle the sources, so we can put the
364                  * presubtract source in the correct place. */
365                 for(arg = 0; arg < info->NumSrcRegs; arg++) {
366                         /*If this arg does not read from an rgb source,
367                          * do nothing. */
368                         if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
369                                                                 & type)) {
370                                 continue;
371                         }
372
373                         if (dst_full->RGB.Arg[arg].Source == srcp_src)
374                                 dst_full->RGB.Arg[arg].Source = free_source;
375                         /* We need to do this just in case register
376                          * is one of the sources already, but in the
377                          * wrong spot. */
378                         else if(dst_full->RGB.Arg[arg].Source == free_source
379                                                         && !one_way) {
380                                 dst_full->RGB.Arg[arg].Source = srcp_src;
381                         }
382                 }
383         }
384         return 1;
385 }
386
387
388 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
389 static int destructive_merge_instructions(
390                 struct rc_pair_instruction * rgb,
391                 struct rc_pair_instruction * alpha)
392 {
393         const struct rc_opcode_info * opcode;
394
395         assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
396         assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
397
398         /* Presubtract registers need to be merged first so that registers
399          * needed by the presubtract operation can be placed in src0 and/or
400          * src1. */
401
402         /* Merge the rgb presubtract registers. */
403         if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
404                 if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
405                         return 0;
406                 }
407         }
408         /* Merge the alpha presubtract registers */
409         if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
410                 if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
411                         return 0;
412                 }
413         }
414
415         /* Copy alpha args into rgb */
416         opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
417
418         for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
419                 unsigned int srcrgb = 0;
420                 unsigned int srcalpha = 0;
421                 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
422                 rc_register_file file = 0;
423                 unsigned int index = 0;
424                 int source;
425
426                 if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
427                         srcrgb = 1;
428                         file = alpha->RGB.Src[oldsrc].File;
429                         index = alpha->RGB.Src[oldsrc].Index;
430                 } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
431                         srcalpha = 1;
432                         file = alpha->Alpha.Src[oldsrc].File;
433                         index = alpha->Alpha.Src[oldsrc].Index;
434                 }
435
436                 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
437                 if (source < 0)
438                         return 0;
439
440                 rgb->Alpha.Arg[arg].Source = source;
441                 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
442                 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
443                 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
444         }
445
446         /* Copy alpha opcode into rgb */
447         rgb->Alpha.Opcode = alpha->Alpha.Opcode;
448         rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
449         rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
450         rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
451         rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
452         rgb->Alpha.Saturate = alpha->Alpha.Saturate;
453
454         /* Merge ALU result writing */
455         if (alpha->WriteALUResult) {
456                 if (rgb->WriteALUResult)
457                         return 0;
458
459                 rgb->WriteALUResult = alpha->WriteALUResult;
460                 rgb->ALUResultCompare = alpha->ALUResultCompare;
461         }
462
463         return 1;
464 }
465
466 /**
467  * Try to merge the given instructions into the rgb instructions.
468  *
469  * Return true on success; on failure, return false, and keep
470  * the instructions untouched.
471  */
472 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
473 {
474         struct rc_pair_instruction backup;
475
476         /*Instructions can't write output registers and ALU result at the
477          * same time. */
478         if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
479                 || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
480                 return 0;
481         }
482         memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
483
484         if (destructive_merge_instructions(rgb, alpha))
485                 return 1;
486
487         memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
488         return 0;
489 }
490
491 static void presub_nop(struct rc_instruction * emitted) {
492         int prev_rgb_index, prev_alpha_index, i, num_src;
493
494         /* We don't need a nop if the previous instruction is a TEX. */
495         if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
496                 return;
497         }
498         if (emitted->Prev->U.P.RGB.WriteMask)
499                 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
500         else
501                 prev_rgb_index = -1;
502         if (emitted->Prev->U.P.Alpha.WriteMask)
503                 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
504         else
505                 prev_alpha_index = 1;
506
507         /* Check the previous rgb instruction */
508         if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
509                 num_src = rc_presubtract_src_reg_count(
510                                 emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
511                 for (i = 0; i < num_src; i++) {
512                         unsigned int index = emitted->U.P.RGB.Src[i].Index;
513                         if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
514                             && (index  == prev_rgb_index
515                                 || index == prev_alpha_index)) {
516                                 emitted->Prev->U.P.Nop = 1;
517                                 return;
518                         }
519                 }
520         }
521
522         /* Check the previous alpha instruction. */
523         if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
524                 return;
525
526         num_src = rc_presubtract_src_reg_count(
527                                 emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
528         for (i = 0; i < num_src; i++) {
529                 unsigned int index = emitted->U.P.Alpha.Src[i].Index;
530                 if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
531                    && (index == prev_rgb_index || index == prev_alpha_index)) {
532                         emitted->Prev->U.P.Nop = 1;
533                         return;
534                 }
535         }
536 }
537
538 static void rgb_to_alpha_remap (
539         struct rc_instruction * inst,
540         struct rc_pair_instruction_arg * arg,
541         rc_register_file old_file,
542         rc_swizzle old_swz,
543         unsigned int new_index)
544 {
545         int new_src_index;
546         unsigned int i;
547
548         for (i = 0; i < 3; i++) {
549                 if (get_swz(arg->Swizzle, i) == old_swz) {
550                         SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
551                 }
552         }
553         new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
554                                                         old_file, new_index);
555         /* This conversion is not possible, we must have made a mistake in
556          * is_rgb_to_alpha_possible. */
557         if (new_src_index < 0) {
558                 assert(0);
559                 return;
560         }
561
562         arg->Source = new_src_index;
563 }
564
565 static int can_remap(unsigned int opcode)
566 {
567         switch(opcode) {
568         case RC_OPCODE_DDX:
569         case RC_OPCODE_DDY:
570                 return 0;
571         default:
572                 return 1;
573         }
574 }
575
576 static int can_convert_opcode_to_alpha(unsigned int opcode)
577 {
578         switch(opcode) {
579         case RC_OPCODE_DDX:
580         case RC_OPCODE_DDY:
581         case RC_OPCODE_DP2:
582         case RC_OPCODE_DP3:
583         case RC_OPCODE_DP4:
584         case RC_OPCODE_DPH:
585                 return 0;
586         default:
587                 return 1;
588         }
589 }
590
591 static void is_rgb_to_alpha_possible(
592         void * userdata,
593         struct rc_instruction * inst,
594         struct rc_pair_instruction_arg * arg,
595         struct rc_pair_instruction_source * src)
596 {
597         unsigned int chan_count = 0;
598         unsigned int alpha_sources = 0;
599         unsigned int i;
600         struct rc_reader_data * reader_data = userdata;
601
602         if (!can_remap(inst->U.P.RGB.Opcode)
603             || !can_remap(inst->U.P.Alpha.Opcode)) {
604                 reader_data->Abort = 1;
605                 return;
606         }
607
608         if (!src)
609                 return;
610
611         /* XXX There are some cases where we can still do the conversion if
612          * a reader reads from a presubtract source, but for now we'll prevent
613          * it. */
614         if (arg->Source == RC_PAIR_PRESUB_SRC) {
615                 reader_data->Abort = 1;
616                 return;
617         }
618
619         /* Make sure the source only reads from one component.
620          * XXX We should allow the source to read from the same component twice.
621          * XXX If the index we will be converting to is the same as the
622          * current index, then it is OK to read from more than one component.
623          */
624         for (i = 0; i < 3; i++) {
625                 rc_swizzle swz = get_swz(arg->Swizzle, i);
626                 switch(swz) {
627                 case RC_SWIZZLE_X:
628                 case RC_SWIZZLE_Y:
629                 case RC_SWIZZLE_Z:
630                 case RC_SWIZZLE_W:
631                         chan_count++;
632                         break;
633                 default:
634                         break;
635                 }
636         }
637         if (chan_count > 1) {
638                 reader_data->Abort = 1;
639                 return;
640         }
641
642         /* Make sure there are enough alpha sources.
643          * XXX If we know what register all the readers are going
644          * to be remapped to, then in some situations we can still do
645          * the subsitution, even if all 3 alpha sources are being used.*/
646         for (i = 0; i < 3; i++) {
647                 if (inst->U.P.Alpha.Src[i].Used) {
648                         alpha_sources++;
649                 }
650         }
651         if (alpha_sources > 2) {
652                 reader_data->Abort = 1;
653                 return;
654         }
655 }
656
657 static int convert_rgb_to_alpha(
658         struct schedule_state * s,
659         struct schedule_instruction * sched_inst)
660 {
661         struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
662         unsigned int old_mask = pair_inst->RGB.WriteMask;
663         unsigned int old_swz = rc_mask_to_swizzle(old_mask);
664         const struct rc_opcode_info * info =
665                                 rc_get_opcode_info(pair_inst->RGB.Opcode);
666         int new_index = -1;
667         unsigned int i;
668
669         if (sched_inst->GlobalReaders.Abort)
670                 return 0;
671
672         if (!pair_inst->RGB.WriteMask)
673                 return 0;
674
675         if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
676             || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
677                 return 0;
678         }
679
680         assert(sched_inst->NumWriteValues == 1);
681
682         if (!sched_inst->WriteValues[0]) {
683                 assert(0);
684                 return 0;
685         }
686
687         /* We start at the old index, because if we can reuse the same
688          * register and just change the swizzle then it is more likely we
689          * will be able to convert all the readers. */
690         for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
691                 struct reg_value ** new_regvalp = get_reg_valuep(
692                                                 s, RC_FILE_TEMPORARY, i, 3);
693                 if (!*new_regvalp) {
694                         struct reg_value ** old_regvalp =
695                                 get_reg_valuep(s,
696                                         RC_FILE_TEMPORARY,
697                                         pair_inst->RGB.DestIndex,
698                                         rc_mask_to_swizzle(old_mask));
699                         new_index = i;
700                         *new_regvalp = *old_regvalp;
701                         *old_regvalp = NULL;
702                         new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
703                         break;
704                 }
705         }
706         if (new_index < 0) {
707                 return 0;
708         }
709
710         pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
711         pair_inst->Alpha.DestIndex = new_index;
712         pair_inst->Alpha.WriteMask = RC_MASK_W;
713         pair_inst->Alpha.Target = pair_inst->RGB.Target;
714         pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
715         pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
716         pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
717         memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
718                                                 sizeof(pair_inst->Alpha.Arg));
719         /* Move the swizzles into the first chan */
720         for (i = 0; i < info->NumSrcRegs; i++) {
721                 unsigned int j;
722                 for (j = 0; j < 3; j++) {
723                         unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
724                         if (swz != RC_SWIZZLE_UNUSED) {
725                                 pair_inst->Alpha.Arg[i].Swizzle =
726                                                         rc_init_swizzle(swz, 1);
727                                 break;
728                         }
729                 }
730         }
731         pair_inst->RGB.Opcode = RC_OPCODE_NOP;
732         pair_inst->RGB.DestIndex = 0;
733         pair_inst->RGB.WriteMask = 0;
734         pair_inst->RGB.Target = 0;
735         pair_inst->RGB.OutputWriteMask = 0;
736         pair_inst->RGB.DepthWriteMask = 0;
737         pair_inst->RGB.Saturate = 0;
738         memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
739
740         for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
741                 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
742                 rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
743                                         RC_FILE_TEMPORARY, old_swz, new_index);
744         }
745         return 1;
746 }
747
748 /**
749  * Find a good ALU instruction or pair of ALU instruction and emit it.
750  *
751  * Prefer emitting full ALU instructions, so that when we reach a point
752  * where no full ALU instruction can be emitted, we have more candidates
753  * for RGB/Alpha pairing.
754  */
755 static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
756 {
757         struct schedule_instruction * sinst;
758
759         if (s->ReadyFullALU) {
760                 sinst = s->ReadyFullALU;
761                 s->ReadyFullALU = s->ReadyFullALU->NextReady;
762                 rc_insert_instruction(before->Prev, sinst->Instruction);
763                 commit_alu_instruction(s, sinst);
764         } else {
765                 struct schedule_instruction **prgb;
766                 struct schedule_instruction **palpha;
767                 struct schedule_instruction *prev;
768 pair:
769                 /* Some pairings might fail because they require too
770                  * many source slots; try all possible pairings if necessary */
771                 for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
772                         for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
773                                 struct schedule_instruction * psirgb = *prgb;
774                                 struct schedule_instruction * psialpha = *palpha;
775
776                                 if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
777                                         continue;
778
779                                 *prgb = (*prgb)->NextReady;
780                                 *palpha = (*palpha)->NextReady;
781                                 rc_insert_instruction(before->Prev, psirgb->Instruction);
782                                 commit_alu_instruction(s, psirgb);
783                                 commit_alu_instruction(s, psialpha);
784                                 goto success;
785                         }
786                 }
787                 prev = NULL;
788                 /* No success in pairing, now try to convert one of the RGB
789                  * instructions to an Alpha so we can pair it with another RGB.
790                  */
791                 if (s->ReadyRGB && s->ReadyRGB->NextReady) {
792                 for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
793                         if ((*prgb)->NumWriteValues == 1) {
794                                 struct schedule_instruction * prgb_next;
795                                 if (!convert_rgb_to_alpha(s, *prgb))
796                                         goto cont_loop;
797                                 prgb_next = (*prgb)->NextReady;
798                                 /* Add instruction to the Alpha ready list. */
799                                 (*prgb)->NextReady = s->ReadyAlpha;
800                                 s->ReadyAlpha = *prgb;
801                                 /* Remove instruction from the RGB ready list.*/
802                                 if (prev)
803                                         prev->NextReady = prgb_next;
804                                 else
805                                         s->ReadyRGB = prgb_next;
806                                 goto pair;
807                         }
808 cont_loop:
809                         prev = *prgb;
810                 }
811                 }
812                 /* Still no success in pairing, just take the first RGB
813                  * or alpha instruction. */
814                 if (s->ReadyRGB) {
815                         sinst = s->ReadyRGB;
816                         s->ReadyRGB = s->ReadyRGB->NextReady;
817                 } else if (s->ReadyAlpha) {
818                         sinst = s->ReadyAlpha;
819                         s->ReadyAlpha = s->ReadyAlpha->NextReady;
820                 } else {
821                         /*XXX Something real bad has happened. */
822                         assert(0);
823                 }
824
825                 rc_insert_instruction(before->Prev, sinst->Instruction);
826                 commit_alu_instruction(s, sinst);
827         success: ;
828         }
829         /* If the instruction we just emitted uses a presubtract value, and
830          * the presubtract sources were written by the previous intstruction,
831          * the previous instruction needs a nop. */
832         presub_nop(before->Prev);
833 }
834
835 static void scan_read(void * data, struct rc_instruction * inst,
836                 rc_register_file file, unsigned int index, unsigned int chan)
837 {
838         struct schedule_state * s = data;
839         struct reg_value ** v = get_reg_valuep(s, file, index, chan);
840         struct reg_value_reader * reader;
841
842         if (!v)
843                 return;
844
845         if (*v && (*v)->Writer == s->Current) {
846                 /* The instruction reads and writes to a register component.
847                  * In this case, we only want to increment dependencies by one.
848                  */
849                 return;
850         }
851
852         DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
853
854         reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
855         reader->Reader = s->Current;
856         if (!*v) {
857                 /* In this situation, the instruction reads from a register
858                  * that hasn't been written to or read from in the current
859                  * block. */
860                 *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
861                 memset(*v, 0, sizeof(struct reg_value));
862                 (*v)->Readers = reader;
863         } else {
864                 reader->Next = (*v)->Readers;
865                 (*v)->Readers = reader;
866                 /* Only update the current instruction's dependencies if the
867                  * register it reads from has been written to in this block. */
868                 if ((*v)->Writer) {
869                         s->Current->NumDependencies++;
870                 }
871         }
872         (*v)->NumReaders++;
873
874         if (s->Current->NumReadValues >= 12) {
875                 rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
876         } else {
877                 s->Current->ReadValues[s->Current->NumReadValues++] = *v;
878         }
879 }
880
881 static void scan_write(void * data, struct rc_instruction * inst,
882                 rc_register_file file, unsigned int index, unsigned int chan)
883 {
884         struct schedule_state * s = data;
885         struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
886         struct reg_value * newv;
887
888         if (!pv)
889                 return;
890
891         DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
892
893         newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
894         memset(newv, 0, sizeof(*newv));
895
896         newv->Writer = s->Current;
897
898         if (*pv) {
899                 (*pv)->Next = newv;
900                 s->Current->NumDependencies++;
901         }
902
903         *pv = newv;
904
905         if (s->Current->NumWriteValues >= 4) {
906                 rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
907         } else {
908                 s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
909         }
910 }
911
912 static void is_rgb_to_alpha_possible_normal(
913         void * userdata,
914         struct rc_instruction * inst,
915         struct rc_src_register * src)
916 {
917         struct rc_reader_data * reader_data = userdata;
918         reader_data->Abort = 1;
919
920 }
921
922 static void schedule_block(struct r300_fragment_program_compiler * c,
923                 struct rc_instruction * begin, struct rc_instruction * end)
924 {
925         struct schedule_state s;
926         unsigned int ip;
927
928         memset(&s, 0, sizeof(s));
929         s.C = &c->Base;
930
931         /* Scan instructions for data dependencies */
932         ip = 0;
933         for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
934                 s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
935                 memset(s.Current, 0, sizeof(struct schedule_instruction));
936
937                 s.Current->Instruction = inst;
938                 inst->IP = ip++;
939
940                 DBG("%i: Scanning\n", inst->IP);
941
942                 /* The order of things here is subtle and maybe slightly
943                  * counter-intuitive, to account for the case where an
944                  * instruction writes to the same register as it reads
945                  * from. */
946                 rc_for_all_writes_chan(inst, &scan_write, &s);
947                 rc_for_all_reads_chan(inst, &scan_read, &s);
948
949                 DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
950
951                 if (!s.Current->NumDependencies)
952                         instruction_ready(&s, s.Current);
953
954                 /* Get global readers for possible RGB->Alpha conversion. */
955                 s.Current->GlobalReaders.ExitOnAbort = 1;
956                 rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
957                                 is_rgb_to_alpha_possible_normal,
958                                 is_rgb_to_alpha_possible, NULL);
959         }
960
961         /* Temporarily unlink all instructions */
962         begin->Prev->Next = end;
963         end->Prev = begin->Prev;
964
965         /* Schedule instructions back */
966         while(!s.C->Error &&
967               (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
968                 if (s.ReadyTEX)
969                         emit_all_tex(&s, end);
970
971                 while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
972                         emit_one_alu(&s, end);
973         }
974 }
975
976 static int is_controlflow(struct rc_instruction * inst)
977 {
978         if (inst->Type == RC_INSTRUCTION_NORMAL) {
979                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
980                 return opcode->IsFlowControl;
981         }
982         return 0;
983 }
984
985 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
986 {
987         struct schedule_state s;
988
989         struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
990         struct rc_instruction * inst = c->Base.Program.Instructions.Next;
991
992         memset(&s, 0, sizeof(s));
993         s.C = &c->Base;
994         while(inst != &c->Base.Program.Instructions) {
995                 struct rc_instruction * first;
996
997                 if (is_controlflow(inst)) {
998                         inst = inst->Next;
999                         continue;
1000                 }
1001
1002                 first = inst;
1003
1004                 while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1005                         inst = inst->Next;
1006
1007                 DBG("Schedule one block\n");
1008                 schedule_block(c, first, inst);
1009         }
1010 }