Tizen 2.1 base
[sdk/emulator/qemu.git] / gl / mesa / src / gallium / drivers / r300 / compiler / radeon_pair_schedule.c
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27
28 #include "radeon_program_pair.h"
29
30 #include <stdio.h>
31
32 #include "radeon_compiler.h"
33 #include "radeon_compiler_util.h"
34 #include "radeon_dataflow.h"
35 #include "radeon_list.h"
36 #include "radeon_variable.h"
37
38 #include "util/u_debug.h"
39
40 #define VERBOSE 0
41
42 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
43
44 struct schedule_instruction {
45         struct rc_instruction * Instruction;
46
47         /** Next instruction in the linked list of ready instructions. */
48         struct schedule_instruction *NextReady;
49
50         /** Values that this instruction reads and writes */
51         struct reg_value * WriteValues[4];
52         struct reg_value * ReadValues[12];
53         unsigned int NumWriteValues:3;
54         unsigned int NumReadValues:4;
55
56         /**
57          * Number of (read and write) dependencies that must be resolved before
58          * this instruction can be scheduled.
59          */
60         unsigned int NumDependencies:5;
61
62         /** List of all readers (see rc_get_readers() for the definition of
63          * "all readers"), even those outside the basic block this instruction
64          * lives in. */
65         struct rc_reader_data GlobalReaders;
66
67         /** If the scheduler has paired an RGB and an Alpha instruction together,
68          * PairedInst references the alpha insturction's dependency information.
69          */
70         struct schedule_instruction * PairedInst;
71
72         /** This scheduler uses the value of Score to determine which
73          * instruction to schedule.  Instructions with a higher value of Score
74          * will be scheduled first. */
75         int Score;
76
77         /** The number of components that read from a TEX instruction. */
78         unsigned TexReadCount;
79
80         /** For TEX instructions a list of readers */
81         struct rc_list * TexReaders;
82 };
83
84
85 /**
86  * Used to keep track of which instructions read a value.
87  */
88 struct reg_value_reader {
89         struct schedule_instruction *Reader;
90         struct reg_value_reader *Next;
91 };
92
93 /**
94  * Used to keep track which values are stored in each component of a
95  * RC_FILE_TEMPORARY.
96  */
97 struct reg_value {
98         struct schedule_instruction * Writer;
99
100         /**
101          * Unordered linked list of instructions that read from this value.
102          * When this value becomes available, we increase all readers'
103          * dependency count.
104          */
105         struct reg_value_reader *Readers;
106
107         /**
108          * Number of readers of this value. This is decremented each time
109          * a reader of the value is committed.
110          * When the reader cound reaches zero, the dependency count
111          * of the instruction writing \ref Next is decremented.
112          */
113         unsigned int NumReaders;
114
115         struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
116 };
117
118 struct register_state {
119         struct reg_value * Values[4];
120 };
121
122 struct remap_reg {
123         struct rc_instruciont * Inst;
124         unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
125         unsigned int OldSwizzle:3;
126         unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
127         unsigned int NewSwizzle:3;
128         unsigned int OnlyTexReads:1;
129         struct remap_reg * Next;
130 };
131
132 struct schedule_state {
133         struct radeon_compiler * C;
134         struct schedule_instruction * Current;
135         /** Array of the previous writers of Current's destination register
136          * indexed by channel. */
137         struct schedule_instruction * PrevWriter[4];
138
139         struct register_state Temporary[RC_REGISTER_MAX_INDEX];
140
141         /**
142          * Linked lists of instructions that can be scheduled right now,
143          * based on which ALU/TEX resources they require.
144          */
145         /*@{*/
146         struct schedule_instruction *ReadyFullALU;
147         struct schedule_instruction *ReadyRGB;
148         struct schedule_instruction *ReadyAlpha;
149         struct schedule_instruction *ReadyTEX;
150         /*@}*/
151         struct rc_list *PendingTEX;
152
153         void (*CalcScore)(struct schedule_instruction *);
154         long max_tex_group;
155         unsigned PrevBlockHasTex:1;
156         unsigned TEXCount;
157         unsigned Opt:1;
158 };
159
160 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
161                 rc_register_file file, unsigned int index, unsigned int chan)
162 {
163         if (file != RC_FILE_TEMPORARY)
164                 return 0;
165
166         if (index >= RC_REGISTER_MAX_INDEX) {
167                 rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
168                 return 0;
169         }
170
171         return &s->Temporary[index].Values[chan];
172 }
173
174 static unsigned get_tex_read_count(struct schedule_instruction * sinst)
175 {
176         unsigned tex_read_count = sinst->TexReadCount;
177         if (sinst->PairedInst) {
178                 tex_read_count += sinst->PairedInst->TexReadCount;
179         }
180         return tex_read_count;
181 }
182
183 #if VERBOSE
184 static void print_list(struct schedule_instruction * sinst)
185 {
186         struct schedule_instruction * ptr;
187         for (ptr = sinst; ptr; ptr=ptr->NextReady) {
188                 unsigned tex_read_count = get_tex_read_count(ptr);
189                 unsigned score = sinst->Score;
190                 fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
191                                                 tex_read_count);
192         }
193         fprintf(stderr, "\n");
194 }
195 #endif
196
197 static void remove_inst_from_list(struct schedule_instruction ** list,
198                                         struct schedule_instruction * inst)
199 {
200         struct schedule_instruction * prev = NULL;
201         struct schedule_instruction * list_ptr;
202         for (list_ptr = *list; list_ptr; prev = list_ptr,
203                                         list_ptr = list_ptr->NextReady) {
204                 if (list_ptr == inst) {
205                         if (prev) {
206                                 prev->NextReady = inst->NextReady;
207                         } else {
208                                 *list = inst->NextReady;
209                         }
210                         inst->NextReady = NULL;
211                         break;
212                 }
213         }
214 }
215
216 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
217 {
218         inst->NextReady = *list;
219         *list = inst;
220 }
221
222 static void add_inst_to_list_score(struct schedule_instruction ** list,
223                                         struct schedule_instruction * inst)
224 {
225         struct schedule_instruction * temp;
226         struct schedule_instruction * prev;
227         if (!*list) {
228                 *list = inst;
229                 return;
230         }
231         temp = *list;
232         prev = NULL;
233         while(temp && inst->Score <= temp->Score) {
234                 prev = temp;
235                 temp = temp->NextReady;
236         }
237
238         if (!prev) {
239                 inst->NextReady = temp;
240                 *list = inst;
241         } else {
242                 prev->NextReady = inst;
243                 inst->NextReady = temp;
244         }
245 }
246
247 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
248 {
249         DBG("%i is now ready\n", sinst->Instruction->IP);
250
251         /* Adding Ready TEX instructions to the end of the "Ready List" helps
252          * us emit TEX instructions in blocks without losing our place. */
253         if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
254                 add_inst_to_list_score(&s->ReadyTEX, sinst);
255         else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
256                 add_inst_to_list_score(&s->ReadyRGB, sinst);
257         else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
258                 add_inst_to_list_score(&s->ReadyAlpha, sinst);
259         else
260                 add_inst_to_list_score(&s->ReadyFullALU, sinst);
261 }
262
263 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
264 {
265         assert(sinst->NumDependencies > 0);
266         sinst->NumDependencies--;
267         if (!sinst->NumDependencies)
268                 instruction_ready(s, sinst);
269 }
270
271 /* These functions provide different heuristics for scheduling instructions.
272  * The default is calc_score_readers. */
273
274 #if 0
275
276 static void calc_score_zero(struct schedule_instruction * sinst)
277 {
278         sinst->Score = 0;
279 }
280
281 static void calc_score_deps(struct schedule_instruction * sinst)
282 {
283         int i;
284         sinst->Score = 0;
285         for (i = 0; i < sinst->NumWriteValues; i++) {
286                 struct reg_value * v = sinst->WriteValues[i];
287                 if (v->NumReaders) {
288                         struct reg_value_reader * r;
289                         for (r = v->Readers; r; r = r->Next) {
290                                 if (r->Reader->NumDependencies == 1) {
291                                         sinst->Score += 100;
292                                 }
293                                 sinst->Score += r->Reader->NumDependencies;
294                         }
295                 }
296         }
297 }
298
299 #endif
300
301 #define NO_READ_TEX_SCORE (1 << 16)
302 #define NO_OUTPUT_SCORE (1 << 24)
303
304 static void calc_score_readers(struct schedule_instruction * sinst)
305 {
306         if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
307                 sinst->Score = 0;
308         } else {
309                 sinst->Score = sinst->NumReadValues;
310                 if (sinst->PairedInst) {
311                         sinst->Score += sinst->PairedInst->NumReadValues;
312                 }
313                 if (get_tex_read_count(sinst) == 0) {
314                         sinst->Score |= NO_READ_TEX_SCORE;
315                 }
316                 if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
317                         !sinst->Instruction->U.P.Alpha.OutputWriteMask) {
318                         if (sinst->PairedInst) {
319                                 if (!sinst->PairedInst->Instruction->U.P.
320                                                 RGB.OutputWriteMask
321                                 && !sinst->PairedInst->Instruction->U.P.
322                                                 Alpha.OutputWriteMask) {
323                                         sinst->Score |= NO_OUTPUT_SCORE;
324                                 }
325
326                         } else {
327                                 sinst->Score |= NO_OUTPUT_SCORE;
328                         }
329                 }
330         }
331 }
332
333 /**
334  * This function decreases the dependencies of the next instruction that
335  * wants to write to each of sinst's read values.
336  */
337 static void commit_update_reads(struct schedule_state * s,
338                                         struct schedule_instruction * sinst){
339         unsigned int i;
340         for(i = 0; i < sinst->NumReadValues; ++i) {
341                 struct reg_value * v = sinst->ReadValues[i];
342                 assert(v->NumReaders > 0);
343                 v->NumReaders--;
344                 if (!v->NumReaders) {
345                         if (v->Next) {
346                                 decrease_dependencies(s, v->Next->Writer);
347                         }
348                 }
349         }
350         if (sinst->PairedInst) {
351                 commit_update_reads(s, sinst->PairedInst);
352         }
353 }
354
355 static void commit_update_writes(struct schedule_state * s,
356                                         struct schedule_instruction * sinst){
357         unsigned int i;
358         for(i = 0; i < sinst->NumWriteValues; ++i) {
359                 struct reg_value * v = sinst->WriteValues[i];
360                 if (v->NumReaders) {
361                         for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
362                                 decrease_dependencies(s, r->Reader);
363                         }
364                 } else {
365                         /* This happens in instruction sequences of the type
366                          *  OP r.x, ...;
367                          *  OP r.x, r.x, ...;
368                          * See also the subtlety in how instructions that both
369                          * read and write the same register are scanned.
370                          */
371                         if (v->Next)
372                                 decrease_dependencies(s, v->Next->Writer);
373                 }
374         }
375         if (sinst->PairedInst) {
376                 commit_update_writes(s, sinst->PairedInst);
377         }
378 }
379
380 static void notify_sem_wait(struct schedule_state *s)
381 {
382         struct rc_list * pend_ptr;
383         for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
384                 struct rc_list * read_ptr;
385                 struct schedule_instruction * pending = pend_ptr->Item;
386                 for (read_ptr = pending->TexReaders; read_ptr;
387                                                 read_ptr = read_ptr->Next) {
388                         struct schedule_instruction * reader = read_ptr->Item;
389                         reader->TexReadCount--;
390                 }
391         }
392         s->PendingTEX = NULL;
393 }
394
395 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
396 {
397         DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
398
399         commit_update_reads(s, sinst);
400
401         commit_update_writes(s, sinst);
402
403         if (get_tex_read_count(sinst) > 0) {
404                 sinst->Instruction->U.P.SemWait = 1;
405                 notify_sem_wait(s);
406         }
407 }
408
409 /**
410  * Emit all ready texture instructions in a single block.
411  *
412  * Emit as a single block to (hopefully) sample many textures in parallel,
413  * and to avoid hardware indirections on R300.
414  */
415 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
416 {
417         struct schedule_instruction *readytex;
418         struct rc_instruction * inst_begin;
419
420         assert(s->ReadyTEX);
421         notify_sem_wait(s);
422
423         /* Node marker for R300 */
424         inst_begin = rc_insert_new_instruction(s->C, before->Prev);
425         inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
426
427         /* Link texture instructions back in */
428         readytex = s->ReadyTEX;
429         while(readytex) {
430                 rc_insert_instruction(before->Prev, readytex->Instruction);
431                 DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
432
433                 /* All of the TEX instructions in the same TEX block have
434                  * their source registers read from before any of the
435                  * instructions in that block write to their destination
436                  * registers.  This means that when we commit a TEX
437                  * instruction, any other TEX instruction that wants to write
438                  * to one of the committed instruction's source register can be
439                  * marked as ready and should be emitted in the same TEX
440                  * block. This prevents the following sequence from being
441                  * emitted in two different TEX blocks:
442                  * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
443                  * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
444                  */
445                 commit_update_reads(s, readytex);
446                 readytex = readytex->NextReady;
447         }
448         readytex = s->ReadyTEX;
449         s->ReadyTEX = 0;
450         while(readytex){
451                 DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
452                 commit_update_writes(s, readytex);
453                 /* Set semaphore bits for last TEX instruction in the block */
454                 if (!readytex->NextReady) {
455                         readytex->Instruction->U.I.TexSemAcquire = 1;
456                         readytex->Instruction->U.I.TexSemWait = 1;
457                 }
458                 rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
459                 readytex = readytex->NextReady;
460         }
461 }
462
463 /* This is a helper function for destructive_merge_instructions().  It helps
464  * merge presubtract sources from two instructions and makes sure the
465  * presubtract sources end up in the correct spot.  This function assumes that
466  * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
467  * but no scalar instruction (alpha).
468  * @return 0 if merging the presubtract sources fails.
469  * @retrun 1 if merging the presubtract sources succeeds.
470  */
471 static int merge_presub_sources(
472         struct rc_pair_instruction * dst_full,
473         struct rc_pair_sub_instruction src,
474         unsigned int type)
475 {
476         unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
477         struct rc_pair_sub_instruction * dst_sub;
478         const struct rc_opcode_info * info;
479
480         assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
481
482         switch(type) {
483         case RC_SOURCE_RGB:
484                 is_rgb = 1;
485                 is_alpha = 0;
486                 dst_sub = &dst_full->RGB;
487                 break;
488         case RC_SOURCE_ALPHA:
489                 is_rgb = 0;
490                 is_alpha = 1;
491                 dst_sub = &dst_full->Alpha;
492                 break;
493         default:
494                 assert(0);
495                 return 0;
496         }
497
498         info = rc_get_opcode_info(dst_full->RGB.Opcode);
499
500         if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
501                 return 0;
502
503         srcp_regs = rc_presubtract_src_reg_count(
504                                         src.Src[RC_PAIR_PRESUB_SRC].Index);
505         for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
506                 unsigned int arg;
507                 int free_source;
508                 unsigned int one_way = 0;
509                 struct rc_pair_instruction_source srcp = src.Src[srcp_src];
510                 struct rc_pair_instruction_source temp;
511
512                 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
513                                                         srcp.File, srcp.Index);
514
515                 /* If free_source < 0 then there are no free source
516                  * slots. */
517                 if (free_source < 0)
518                         return 0;
519
520                 temp = dst_sub->Src[srcp_src];
521                 dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
522
523                 /* srcp needs src0 and src1 to be the same */
524                 if (free_source < srcp_src) {
525                         if (!temp.Used)
526                                 continue;
527                         free_source = rc_pair_alloc_source(dst_full, is_rgb,
528                                         is_alpha, temp.File, temp.Index);
529                         if (free_source < 0)
530                                 return 0;
531                         one_way = 1;
532                 } else {
533                         dst_sub->Src[free_source] = temp;
534                 }
535
536                 /* If free_source == srcp_src, then the presubtract
537                  * source is already in the correct place. */
538                 if (free_source == srcp_src)
539                         continue;
540
541                 /* Shuffle the sources, so we can put the
542                  * presubtract source in the correct place. */
543                 for(arg = 0; arg < info->NumSrcRegs; arg++) {
544                         /*If this arg does not read from an rgb source,
545                          * do nothing. */
546                         if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
547                                                                 & type)) {
548                                 continue;
549                         }
550
551                         if (dst_full->RGB.Arg[arg].Source == srcp_src)
552                                 dst_full->RGB.Arg[arg].Source = free_source;
553                         /* We need to do this just in case register
554                          * is one of the sources already, but in the
555                          * wrong spot. */
556                         else if(dst_full->RGB.Arg[arg].Source == free_source
557                                                         && !one_way) {
558                                 dst_full->RGB.Arg[arg].Source = srcp_src;
559                         }
560                 }
561         }
562         return 1;
563 }
564
565
566 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
567 static int destructive_merge_instructions(
568                 struct rc_pair_instruction * rgb,
569                 struct rc_pair_instruction * alpha)
570 {
571         const struct rc_opcode_info * opcode;
572
573         assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
574         assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
575
576         /* Presubtract registers need to be merged first so that registers
577          * needed by the presubtract operation can be placed in src0 and/or
578          * src1. */
579
580         /* Merge the rgb presubtract registers. */
581         if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
582                 if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
583                         return 0;
584                 }
585         }
586         /* Merge the alpha presubtract registers */
587         if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
588                 if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
589                         return 0;
590                 }
591         }
592
593         /* Copy alpha args into rgb */
594         opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
595
596         for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
597                 unsigned int srcrgb = 0;
598                 unsigned int srcalpha = 0;
599                 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
600                 rc_register_file file = 0;
601                 unsigned int index = 0;
602                 int source;
603
604                 if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
605                         srcrgb = 1;
606                         file = alpha->RGB.Src[oldsrc].File;
607                         index = alpha->RGB.Src[oldsrc].Index;
608                 } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
609                         srcalpha = 1;
610                         file = alpha->Alpha.Src[oldsrc].File;
611                         index = alpha->Alpha.Src[oldsrc].Index;
612                 }
613
614                 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
615                 if (source < 0)
616                         return 0;
617
618                 rgb->Alpha.Arg[arg].Source = source;
619                 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
620                 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
621                 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
622         }
623
624         /* Copy alpha opcode into rgb */
625         rgb->Alpha.Opcode = alpha->Alpha.Opcode;
626         rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
627         rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
628         rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
629         rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
630         rgb->Alpha.Saturate = alpha->Alpha.Saturate;
631         rgb->Alpha.Omod = alpha->Alpha.Omod;
632
633         /* Merge ALU result writing */
634         if (alpha->WriteALUResult) {
635                 if (rgb->WriteALUResult)
636                         return 0;
637
638                 rgb->WriteALUResult = alpha->WriteALUResult;
639                 rgb->ALUResultCompare = alpha->ALUResultCompare;
640         }
641
642         /* Copy SemWait */
643         rgb->SemWait |= alpha->SemWait;
644
645         return 1;
646 }
647
648 /**
649  * Try to merge the given instructions into the rgb instructions.
650  *
651  * Return true on success; on failure, return false, and keep
652  * the instructions untouched.
653  */
654 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
655 {
656         struct rc_pair_instruction backup;
657
658         /*Instructions can't write output registers and ALU result at the
659          * same time. */
660         if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
661                 || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
662                 return 0;
663         }
664
665         /* Writing output registers in the middle of shaders is slow, so
666          * we don't want to pair output writes with temp writes. */
667         if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
668                 || (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
669                 return 0;
670         }
671
672         memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
673
674         if (destructive_merge_instructions(rgb, alpha))
675                 return 1;
676
677         memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
678         return 0;
679 }
680
681 static void presub_nop(struct rc_instruction * emitted) {
682         int prev_rgb_index, prev_alpha_index, i, num_src;
683
684         /* We don't need a nop if the previous instruction is a TEX. */
685         if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
686                 return;
687         }
688         if (emitted->Prev->U.P.RGB.WriteMask)
689                 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
690         else
691                 prev_rgb_index = -1;
692         if (emitted->Prev->U.P.Alpha.WriteMask)
693                 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
694         else
695                 prev_alpha_index = 1;
696
697         /* Check the previous rgb instruction */
698         if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
699                 num_src = rc_presubtract_src_reg_count(
700                                 emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
701                 for (i = 0; i < num_src; i++) {
702                         unsigned int index = emitted->U.P.RGB.Src[i].Index;
703                         if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
704                             && (index  == prev_rgb_index
705                                 || index == prev_alpha_index)) {
706                                 emitted->Prev->U.P.Nop = 1;
707                                 return;
708                         }
709                 }
710         }
711
712         /* Check the previous alpha instruction. */
713         if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
714                 return;
715
716         num_src = rc_presubtract_src_reg_count(
717                                 emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
718         for (i = 0; i < num_src; i++) {
719                 unsigned int index = emitted->U.P.Alpha.Src[i].Index;
720                 if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
721                    && (index == prev_rgb_index || index == prev_alpha_index)) {
722                         emitted->Prev->U.P.Nop = 1;
723                         return;
724                 }
725         }
726 }
727
728 static void rgb_to_alpha_remap (
729         struct rc_instruction * inst,
730         struct rc_pair_instruction_arg * arg,
731         rc_register_file old_file,
732         rc_swizzle old_swz,
733         unsigned int new_index)
734 {
735         int new_src_index;
736         unsigned int i;
737
738         for (i = 0; i < 3; i++) {
739                 if (get_swz(arg->Swizzle, i) == old_swz) {
740                         SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
741                 }
742         }
743         new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
744                                                         old_file, new_index);
745         /* This conversion is not possible, we must have made a mistake in
746          * is_rgb_to_alpha_possible. */
747         if (new_src_index < 0) {
748                 assert(0);
749                 return;
750         }
751
752         arg->Source = new_src_index;
753 }
754
755 static int can_remap(unsigned int opcode)
756 {
757         switch(opcode) {
758         case RC_OPCODE_DDX:
759         case RC_OPCODE_DDY:
760                 return 0;
761         default:
762                 return 1;
763         }
764 }
765
766 static int can_convert_opcode_to_alpha(unsigned int opcode)
767 {
768         switch(opcode) {
769         case RC_OPCODE_DDX:
770         case RC_OPCODE_DDY:
771         case RC_OPCODE_DP2:
772         case RC_OPCODE_DP3:
773         case RC_OPCODE_DP4:
774         case RC_OPCODE_DPH:
775                 return 0;
776         default:
777                 return 1;
778         }
779 }
780
781 static void is_rgb_to_alpha_possible(
782         void * userdata,
783         struct rc_instruction * inst,
784         struct rc_pair_instruction_arg * arg,
785         struct rc_pair_instruction_source * src)
786 {
787         unsigned int read_chan = RC_SWIZZLE_UNUSED;
788         unsigned int alpha_sources = 0;
789         unsigned int i;
790         struct rc_reader_data * reader_data = userdata;
791
792         if (!can_remap(inst->U.P.RGB.Opcode)
793             || !can_remap(inst->U.P.Alpha.Opcode)) {
794                 reader_data->Abort = 1;
795                 return;
796         }
797
798         if (!src)
799                 return;
800
801         /* XXX There are some cases where we can still do the conversion if
802          * a reader reads from a presubtract source, but for now we'll prevent
803          * it. */
804         if (arg->Source == RC_PAIR_PRESUB_SRC) {
805                 reader_data->Abort = 1;
806                 return;
807         }
808
809         /* Make sure the source only reads the register component that we
810          * are going to be convering from.  It is OK if the instruction uses
811          * this component more than once.
812          * XXX If the index we will be converting to is the same as the
813          * current index, then it is OK to read from more than one component.
814          */
815         for (i = 0; i < 3; i++) {
816                 rc_swizzle swz = get_swz(arg->Swizzle, i);
817                 switch(swz) {
818                 case RC_SWIZZLE_X:
819                 case RC_SWIZZLE_Y:
820                 case RC_SWIZZLE_Z:
821                 case RC_SWIZZLE_W:
822                         if (read_chan == RC_SWIZZLE_UNUSED) {
823                                 read_chan = swz;
824                         } else if (read_chan != swz) {
825                                 reader_data->Abort = 1;
826                                 return;
827                         }
828                         break;
829                 default:
830                         break;
831                 }
832         }
833
834         /* Make sure there are enough alpha sources.
835          * XXX If we know what register all the readers are going
836          * to be remapped to, then in some situations we can still do
837          * the subsitution, even if all 3 alpha sources are being used.*/
838         for (i = 0; i < 3; i++) {
839                 if (inst->U.P.Alpha.Src[i].Used) {
840                         alpha_sources++;
841                 }
842         }
843         if (alpha_sources > 2) {
844                 reader_data->Abort = 1;
845                 return;
846         }
847 }
848
849 static int convert_rgb_to_alpha(
850         struct schedule_state * s,
851         struct schedule_instruction * sched_inst)
852 {
853         struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
854         unsigned int old_mask = pair_inst->RGB.WriteMask;
855         unsigned int old_swz = rc_mask_to_swizzle(old_mask);
856         const struct rc_opcode_info * info =
857                                 rc_get_opcode_info(pair_inst->RGB.Opcode);
858         int new_index = -1;
859         unsigned int i;
860
861         if (sched_inst->GlobalReaders.Abort)
862                 return 0;
863
864         if (!pair_inst->RGB.WriteMask)
865                 return 0;
866
867         if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
868             || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
869                 return 0;
870         }
871
872         assert(sched_inst->NumWriteValues == 1);
873
874         if (!sched_inst->WriteValues[0]) {
875                 assert(0);
876                 return 0;
877         }
878
879         /* We start at the old index, because if we can reuse the same
880          * register and just change the swizzle then it is more likely we
881          * will be able to convert all the readers. */
882         for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
883                 struct reg_value ** new_regvalp = get_reg_valuep(
884                                                 s, RC_FILE_TEMPORARY, i, 3);
885                 if (!*new_regvalp) {
886                         struct reg_value ** old_regvalp =
887                                 get_reg_valuep(s,
888                                         RC_FILE_TEMPORARY,
889                                         pair_inst->RGB.DestIndex,
890                                         rc_mask_to_swizzle(old_mask));
891                         new_index = i;
892                         *new_regvalp = *old_regvalp;
893                         *old_regvalp = NULL;
894                         new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
895                         break;
896                 }
897         }
898         if (new_index < 0) {
899                 return 0;
900         }
901
902         /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
903          * as the RGB opcode, then the Alpha instruction will already contain
904          * the correct opcode and instruction args, so we do not want to
905          * overwrite them.
906          */
907         if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
908                 pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
909                 memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
910                                                 sizeof(pair_inst->Alpha.Arg));
911         }
912         pair_inst->Alpha.DestIndex = new_index;
913         pair_inst->Alpha.WriteMask = RC_MASK_W;
914         pair_inst->Alpha.Target = pair_inst->RGB.Target;
915         pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
916         pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
917         pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
918         pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
919         /* Move the swizzles into the first chan */
920         for (i = 0; i < info->NumSrcRegs; i++) {
921                 unsigned int j;
922                 for (j = 0; j < 3; j++) {
923                         unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
924                         if (swz != RC_SWIZZLE_UNUSED) {
925                                 pair_inst->Alpha.Arg[i].Swizzle =
926                                                         rc_init_swizzle(swz, 1);
927                                 break;
928                         }
929                 }
930         }
931         pair_inst->RGB.Opcode = RC_OPCODE_NOP;
932         pair_inst->RGB.DestIndex = 0;
933         pair_inst->RGB.WriteMask = 0;
934         pair_inst->RGB.Target = 0;
935         pair_inst->RGB.OutputWriteMask = 0;
936         pair_inst->RGB.DepthWriteMask = 0;
937         pair_inst->RGB.Saturate = 0;
938         memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
939
940         for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
941                 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
942                 rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
943                                         RC_FILE_TEMPORARY, old_swz, new_index);
944         }
945         return 1;
946 }
947
948 static void try_convert_and_pair(
949         struct schedule_state *s,
950         struct schedule_instruction ** inst_list)
951 {
952         struct schedule_instruction * list_ptr = *inst_list;
953         while (list_ptr && *inst_list && (*inst_list)->NextReady) {
954                 int paired = 0;
955                 if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
956                         && list_ptr->Instruction->U.P.RGB.Opcode
957                                                 != RC_OPCODE_REPL_ALPHA) {
958                                 goto next;
959                 }
960                 if (list_ptr->NumWriteValues == 1
961                                         && convert_rgb_to_alpha(s, list_ptr)) {
962
963                         struct schedule_instruction * pair_ptr;
964                         remove_inst_from_list(inst_list, list_ptr);
965                         add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
966
967                         for (pair_ptr = s->ReadyRGB; pair_ptr;
968                                         pair_ptr = pair_ptr->NextReady) {
969                                 if (merge_instructions(&pair_ptr->Instruction->U.P,
970                                                 &list_ptr->Instruction->U.P)) {
971                                         remove_inst_from_list(&s->ReadyAlpha, list_ptr);
972                                         remove_inst_from_list(&s->ReadyRGB, pair_ptr);
973                                         pair_ptr->PairedInst = list_ptr;
974
975                                         add_inst_to_list(&s->ReadyFullALU, pair_ptr);
976                                         list_ptr = *inst_list;
977                                         paired = 1;
978                                         break;
979                                 }
980
981                         }
982                 }
983                 if (!paired) {
984 next:
985                         list_ptr = list_ptr->NextReady;
986                 }
987         }
988 }
989
990 /**
991  * This function attempts to merge RGB and Alpha instructions together.
992  */
993 static void pair_instructions(struct schedule_state * s)
994 {
995         struct schedule_instruction *rgb_ptr;
996         struct schedule_instruction *alpha_ptr;
997
998         /* Some pairings might fail because they require too
999          * many source slots; try all possible pairings if necessary */
1000         rgb_ptr = s->ReadyRGB;
1001         while(rgb_ptr) {
1002                 struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
1003                 alpha_ptr = s->ReadyAlpha;
1004                 while(alpha_ptr) {
1005                         struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
1006                         if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1007                                 /* Remove RGB and Alpha from their ready lists.
1008                                  */
1009                                 remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1010                                 remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1011                                 rgb_ptr->PairedInst = alpha_ptr;
1012                                 add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1013                                 break;
1014                         }
1015                         alpha_ptr = alpha_next;
1016                 }
1017                 rgb_ptr = rgb_next;
1018         }
1019
1020         if (!s->Opt) {
1021                 return;
1022         }
1023
1024         /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1025          * slot can be converted into Alpha instructions. */
1026         try_convert_and_pair(s, &s->ReadyFullALU);
1027
1028         /* Try to convert some of the RGB instructions to Alpha and
1029          * try to pair it with another RGB. */
1030         try_convert_and_pair(s, &s->ReadyRGB);
1031 }
1032
1033 static void update_max_score(
1034         struct schedule_state * s,
1035         struct schedule_instruction ** list,
1036         int * max_score,
1037         struct schedule_instruction ** max_inst_out,
1038         struct schedule_instruction *** list_out)
1039 {
1040         struct schedule_instruction * list_ptr;
1041         for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1042                 int score;
1043                 s->CalcScore(list_ptr);
1044                 score = list_ptr->Score;
1045                 if (!*max_inst_out || score > *max_score) {
1046                         *max_score = score;
1047                         *max_inst_out = list_ptr;
1048                         *list_out = list;
1049                 }
1050         }
1051 }
1052
1053 static void emit_instruction(
1054         struct schedule_state * s,
1055         struct rc_instruction * before)
1056 {
1057         int max_score = -1;
1058         struct schedule_instruction * max_inst = NULL;
1059         struct schedule_instruction ** max_list = NULL;
1060         unsigned tex_count = 0;
1061         struct schedule_instruction * tex_ptr;
1062
1063         pair_instructions(s);
1064 #if VERBOSE
1065         fprintf(stderr, "Full:\n");
1066         print_list(s->ReadyFullALU);
1067         fprintf(stderr, "RGB:\n");
1068         print_list(s->ReadyRGB);
1069         fprintf(stderr, "Alpha:\n");
1070         print_list(s->ReadyAlpha);
1071         fprintf(stderr, "TEX:\n");
1072         print_list(s->ReadyTEX);
1073 #endif
1074
1075         for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1076                 tex_count++;
1077         }
1078         update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1079         update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1080         update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1081
1082         if (tex_count >= s->max_tex_group || max_score == -1
1083                 || (s->TEXCount > 0 && tex_count == s->TEXCount)) {
1084                 emit_all_tex(s, before);
1085         } else {
1086
1087
1088                 remove_inst_from_list(max_list, max_inst);
1089                 rc_insert_instruction(before->Prev, max_inst->Instruction);
1090                 commit_alu_instruction(s, max_inst);
1091
1092                 presub_nop(before->Prev);
1093         }
1094 }
1095
1096 /**
1097  * Find a good ALU instruction or pair of ALU instruction and emit it.
1098  *
1099  * Prefer emitting full ALU instructions, so that when we reach a point
1100  * where no full ALU instruction can be emitted, we have more candidates
1101  * for RGB/Alpha pairing.
1102  */
1103 static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
1104 {
1105         struct schedule_instruction * sinst;
1106         int rgb_score = -1, alpha_score = -1;
1107
1108         /* Try to merge RGB and Alpha instructions together. */
1109         pair_instructions(s);
1110
1111         if (s->ReadyFullALU) {
1112                 sinst = s->ReadyFullALU;
1113                 s->ReadyFullALU = s->ReadyFullALU->NextReady;
1114                 rc_insert_instruction(before->Prev, sinst->Instruction);
1115                 commit_alu_instruction(s, sinst);
1116         } else {
1117                 if (s->ReadyRGB) {
1118                         rgb_score = s->ReadyRGB->Score;
1119                 }
1120                 if (s->ReadyAlpha) {
1121                         alpha_score = s->ReadyAlpha->Score;
1122                 }
1123                 if (rgb_score > alpha_score) {
1124                         sinst = s->ReadyRGB;
1125                         s->ReadyRGB = s->ReadyRGB->NextReady;
1126                 } else if (s->ReadyAlpha) {
1127                         sinst = s->ReadyAlpha;
1128                         s->ReadyAlpha = s->ReadyAlpha->NextReady;
1129                 } else {
1130                         /*XXX Something real bad has happened. */
1131                         assert(0);
1132                 }
1133
1134                 rc_insert_instruction(before->Prev, sinst->Instruction);
1135                 commit_alu_instruction(s, sinst);
1136         }
1137         /* If the instruction we just emitted uses a presubtract value, and
1138          * the presubtract sources were written by the previous intstruction,
1139          * the previous instruction needs a nop. */
1140         presub_nop(before->Prev);
1141 }
1142
1143 static void add_tex_reader(
1144         struct schedule_state * s,
1145         struct schedule_instruction * writer,
1146         struct schedule_instruction * reader)
1147 {
1148         if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1149                 /*Not a TEX instructions */
1150                 return;
1151         }
1152         reader->TexReadCount++;
1153         rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1154 }
1155
1156 static void scan_read(void * data, struct rc_instruction * inst,
1157                 rc_register_file file, unsigned int index, unsigned int chan)
1158 {
1159         struct schedule_state * s = data;
1160         struct reg_value ** v = get_reg_valuep(s, file, index, chan);
1161         struct reg_value_reader * reader;
1162
1163         if (!v)
1164                 return;
1165
1166         if (*v && (*v)->Writer == s->Current) {
1167                 /* The instruction reads and writes to a register component.
1168                  * In this case, we only want to increment dependencies by one.
1169                  * Why?
1170                  * Because each instruction depends on the writers of its source
1171                  * registers _and_ the most recent writer of its destination
1172                  * register.  In this case, the current instruction (s->Current)
1173                  * has a dependency that both writes to one of its source
1174                  * registers and was the most recent writer to its destination
1175                  * register.  We have already marked this dependency in
1176                  * scan_write(), so we don't need to do it again.
1177                  */
1178
1179                 /* We need to make sure we are adding s->Current to the
1180                  * previous writer's list of TexReaders, if the previous writer
1181                  * was a TEX instruction.
1182                  */
1183                 add_tex_reader(s, s->PrevWriter[chan], s->Current);
1184
1185                 return;
1186         }
1187
1188         DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1189
1190         reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1191         reader->Reader = s->Current;
1192         if (!*v) {
1193                 /* In this situation, the instruction reads from a register
1194                  * that hasn't been written to or read from in the current
1195                  * block. */
1196                 *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1197                 memset(*v, 0, sizeof(struct reg_value));
1198                 (*v)->Readers = reader;
1199         } else {
1200                 reader->Next = (*v)->Readers;
1201                 (*v)->Readers = reader;
1202                 /* Only update the current instruction's dependencies if the
1203                  * register it reads from has been written to in this block. */
1204                 if ((*v)->Writer) {
1205                         add_tex_reader(s, (*v)->Writer, s->Current);
1206                         s->Current->NumDependencies++;
1207                 }
1208         }
1209         (*v)->NumReaders++;
1210
1211         if (s->Current->NumReadValues >= 12) {
1212                 rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
1213         } else {
1214                 s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1215         }
1216 }
1217
1218 static void scan_write(void * data, struct rc_instruction * inst,
1219                 rc_register_file file, unsigned int index, unsigned int chan)
1220 {
1221         struct schedule_state * s = data;
1222         struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
1223         struct reg_value * newv;
1224
1225         if (!pv)
1226                 return;
1227
1228         DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1229
1230         newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1231         memset(newv, 0, sizeof(*newv));
1232
1233         newv->Writer = s->Current;
1234
1235         if (*pv) {
1236                 (*pv)->Next = newv;
1237                 s->Current->NumDependencies++;
1238                 /* Keep track of the previous writer to s->Current's destination
1239                  * register */
1240                 s->PrevWriter[chan] = (*pv)->Writer;
1241         }
1242
1243         *pv = newv;
1244
1245         if (s->Current->NumWriteValues >= 4) {
1246                 rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
1247         } else {
1248                 s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1249         }
1250 }
1251
1252 static void is_rgb_to_alpha_possible_normal(
1253         void * userdata,
1254         struct rc_instruction * inst,
1255         struct rc_src_register * src)
1256 {
1257         struct rc_reader_data * reader_data = userdata;
1258         reader_data->Abort = 1;
1259
1260 }
1261
1262 static void schedule_block(struct schedule_state * s,
1263                 struct rc_instruction * begin, struct rc_instruction * end)
1264 {
1265         unsigned int ip;
1266
1267         /* Scan instructions for data dependencies */
1268         ip = 0;
1269         for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
1270                 s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1271                 memset(s->Current, 0, sizeof(struct schedule_instruction));
1272
1273                 if (inst->Type == RC_INSTRUCTION_NORMAL) {
1274                         const struct rc_opcode_info * info =
1275                                         rc_get_opcode_info(inst->U.I.Opcode);
1276                         if (info->HasTexture) {
1277                                 s->TEXCount++;
1278                         }
1279                 }
1280
1281                 /* XXX: This causes SemWait to be set for all instructions in
1282                  * a block if the previous block contained a TEX instruction.
1283                  * We can do better here, but it will take a lot of work. */
1284                 if (s->PrevBlockHasTex) {
1285                         s->Current->TexReadCount = 1;
1286                 }
1287
1288                 s->Current->Instruction = inst;
1289                 inst->IP = ip++;
1290
1291                 DBG("%i: Scanning\n", inst->IP);
1292
1293                 /* The order of things here is subtle and maybe slightly
1294                  * counter-intuitive, to account for the case where an
1295                  * instruction writes to the same register as it reads
1296                  * from. */
1297                 rc_for_all_writes_chan(inst, &scan_write, s);
1298                 rc_for_all_reads_chan(inst, &scan_read, s);
1299
1300                 DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1301
1302                 if (!s->Current->NumDependencies) {
1303                         instruction_ready(s, s->Current);
1304                 }
1305
1306                 /* Get global readers for possible RGB->Alpha conversion. */
1307                 s->Current->GlobalReaders.ExitOnAbort = 1;
1308                 rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
1309                                 is_rgb_to_alpha_possible_normal,
1310                                 is_rgb_to_alpha_possible, NULL);
1311         }
1312
1313         /* Temporarily unlink all instructions */
1314         begin->Prev->Next = end;
1315         end->Prev = begin->Prev;
1316
1317         /* Schedule instructions back */
1318         while(!s->C->Error &&
1319               (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1320                 if (s->C->is_r500) {
1321                         emit_instruction(s, end);
1322                 } else {
1323                         if (s->ReadyTEX)
1324                                 emit_all_tex(s, end);
1325
1326                         while(!s->C->Error && (s->ReadyFullALU || s->ReadyRGB || s->ReadyAlpha))
1327                                 emit_one_alu(s, end);
1328                 }
1329         }
1330 }
1331
1332 static int is_controlflow(struct rc_instruction * inst)
1333 {
1334         if (inst->Type == RC_INSTRUCTION_NORMAL) {
1335                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
1336                 return opcode->IsFlowControl;
1337         }
1338         return 0;
1339 }
1340
1341 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
1342 {
1343         struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
1344         struct schedule_state s;
1345         struct rc_instruction * inst = c->Base.Program.Instructions.Next;
1346         unsigned int * opt = user;
1347
1348         memset(&s, 0, sizeof(s));
1349         s.Opt = *opt;
1350         s.C = &c->Base;
1351         s.CalcScore = calc_score_readers;
1352         s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1353         while(inst != &c->Base.Program.Instructions) {
1354                 struct rc_instruction * first;
1355
1356                 if (is_controlflow(inst)) {
1357                         inst = inst->Next;
1358                         continue;
1359                 }
1360
1361                 first = inst;
1362
1363                 while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1364                         inst = inst->Next;
1365
1366                 DBG("Schedule one block\n");
1367                 memset(s.Temporary, 0, sizeof(s.Temporary));
1368                 s.TEXCount = 0;
1369                 schedule_block(&s, first, inst);
1370                 if (s.PendingTEX) {
1371                         s.PrevBlockHasTex = 1;
1372                 }
1373         }
1374 }