Fix utc_ecore_evas_pointer_xy_get timeout issue
[platform/upstream/mesa.git] / src / gallium / drivers / r300 / compiler / radeon_pair_schedule.c
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27
28 #include "radeon_program_pair.h"
29
30 #include <stdio.h>
31
32 #include "radeon_compiler.h"
33 #include "radeon_compiler_util.h"
34 #include "radeon_dataflow.h"
35 #include "radeon_list.h"
36 #include "radeon_variable.h"
37
38 #include "util/u_debug.h"
39
40 #define VERBOSE 0
41
42 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
43
44 struct schedule_instruction {
45         struct rc_instruction * Instruction;
46
47         /** Next instruction in the linked list of ready instructions. */
48         struct schedule_instruction *NextReady;
49
50         /** Values that this instruction reads and writes */
51         struct reg_value * WriteValues[4];
52         struct reg_value * ReadValues[12];
53         unsigned int NumWriteValues:3;
54         unsigned int NumReadValues:4;
55
56         /**
57          * Number of (read and write) dependencies that must be resolved before
58          * this instruction can be scheduled.
59          */
60         unsigned int NumDependencies:5;
61
62         /** List of all readers (see rc_get_readers() for the definition of
63          * "all readers"), even those outside the basic block this instruction
64          * lives in. */
65         struct rc_reader_data GlobalReaders;
66
67         /** If the scheduler has paired an RGB and an Alpha instruction together,
68          * PairedInst references the alpha instruction's dependency information.
69          */
70         struct schedule_instruction * PairedInst;
71
72         /** This scheduler uses the value of Score to determine which
73          * instruction to schedule.  Instructions with a higher value of Score
74          * will be scheduled first. */
75         int Score;
76
77         /** The number of components that read from a TEX instruction. */
78         unsigned TexReadCount;
79
80         /** For TEX instructions a list of readers */
81         struct rc_list * TexReaders;
82 };
83
84
85 /**
86  * Used to keep track of which instructions read a value.
87  */
88 struct reg_value_reader {
89         struct schedule_instruction *Reader;
90         struct reg_value_reader *Next;
91 };
92
93 /**
94  * Used to keep track which values are stored in each component of a
95  * RC_FILE_TEMPORARY.
96  */
97 struct reg_value {
98         struct schedule_instruction * Writer;
99
100         /**
101          * Unordered linked list of instructions that read from this value.
102          * When this value becomes available, we increase all readers'
103          * dependency count.
104          */
105         struct reg_value_reader *Readers;
106
107         /**
108          * Number of readers of this value. This is decremented each time
109          * a reader of the value is committed.
110          * When the reader count reaches zero, the dependency count
111          * of the instruction writing \ref Next is decremented.
112          */
113         unsigned int NumReaders;
114
115         struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
116 };
117
118 struct register_state {
119         struct reg_value * Values[4];
120 };
121
122 struct remap_reg {
123         struct rc_instruction * Inst;
124         unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
125         unsigned int OldSwizzle:3;
126         unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
127         unsigned int NewSwizzle:3;
128         unsigned int OnlyTexReads:1;
129         struct remap_reg * Next;
130 };
131
132 struct schedule_state {
133         struct radeon_compiler * C;
134         struct schedule_instruction * Current;
135         /** Array of the previous writers of Current's destination register
136          * indexed by channel. */
137         struct schedule_instruction * PrevWriter[4];
138
139         struct register_state Temporary[RC_REGISTER_MAX_INDEX];
140
141         /**
142          * Linked lists of instructions that can be scheduled right now,
143          * based on which ALU/TEX resources they require.
144          */
145         /*@{*/
146         struct schedule_instruction *ReadyFullALU;
147         struct schedule_instruction *ReadyRGB;
148         struct schedule_instruction *ReadyAlpha;
149         struct schedule_instruction *ReadyTEX;
150         /*@}*/
151         struct rc_list *PendingTEX;
152
153         void (*CalcScore)(struct schedule_instruction *);
154         long max_tex_group;
155         unsigned PrevBlockHasTex:1;
156         unsigned PrevBlockHasKil:1;
157         unsigned TEXCount;
158         unsigned Opt:1;
159 };
160
161 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
162                 rc_register_file file, unsigned int index, unsigned int chan)
163 {
164         if (file != RC_FILE_TEMPORARY)
165                 return NULL;
166
167         if (index >= RC_REGISTER_MAX_INDEX) {
168                 rc_error(s->C, "%s: index %i out of bounds\n", __func__, index);
169                 return NULL;
170         }
171
172         return &s->Temporary[index].Values[chan];
173 }
174
175 static unsigned get_tex_read_count(struct schedule_instruction * sinst)
176 {
177         unsigned tex_read_count = sinst->TexReadCount;
178         if (sinst->PairedInst) {
179                 tex_read_count += sinst->PairedInst->TexReadCount;
180         }
181         return tex_read_count;
182 }
183
184 #if VERBOSE
185 static void print_list(struct schedule_instruction * sinst)
186 {
187         struct schedule_instruction * ptr;
188         for (ptr = sinst; ptr; ptr=ptr->NextReady) {
189                 unsigned tex_read_count = get_tex_read_count(ptr);
190                 unsigned score = sinst->Score;
191                 fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
192                                                 tex_read_count);
193         }
194         fprintf(stderr, "\n");
195 }
196 #endif
197
198 static void remove_inst_from_list(struct schedule_instruction ** list,
199                                         struct schedule_instruction * inst)
200 {
201         struct schedule_instruction * prev = NULL;
202         struct schedule_instruction * list_ptr;
203         for (list_ptr = *list; list_ptr; prev = list_ptr,
204                                         list_ptr = list_ptr->NextReady) {
205                 if (list_ptr == inst) {
206                         if (prev) {
207                                 prev->NextReady = inst->NextReady;
208                         } else {
209                                 *list = inst->NextReady;
210                         }
211                         inst->NextReady = NULL;
212                         break;
213                 }
214         }
215 }
216
217 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
218 {
219         inst->NextReady = *list;
220         *list = inst;
221 }
222
223 static void add_inst_to_list_score(struct schedule_instruction ** list,
224                                         struct schedule_instruction * inst)
225 {
226         struct schedule_instruction * temp;
227         struct schedule_instruction * prev;
228         if (!*list) {
229                 *list = inst;
230                 return;
231         }
232         temp = *list;
233         prev = NULL;
234         while(temp && inst->Score <= temp->Score) {
235                 prev = temp;
236                 temp = temp->NextReady;
237         }
238
239         if (!prev) {
240                 inst->NextReady = temp;
241                 *list = inst;
242         } else {
243                 prev->NextReady = inst;
244                 inst->NextReady = temp;
245         }
246 }
247
248 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
249 {
250         DBG("%i is now ready\n", sinst->Instruction->IP);
251
252         /* Adding Ready TEX instructions to the end of the "Ready List" helps
253          * us emit TEX instructions in blocks without losing our place. */
254         if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
255                 add_inst_to_list_score(&s->ReadyTEX, sinst);
256         else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
257                 add_inst_to_list_score(&s->ReadyRGB, sinst);
258         else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
259                 add_inst_to_list_score(&s->ReadyAlpha, sinst);
260         else
261                 add_inst_to_list_score(&s->ReadyFullALU, sinst);
262 }
263
264 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
265 {
266         assert(sinst->NumDependencies > 0);
267         sinst->NumDependencies--;
268         if (!sinst->NumDependencies)
269                 instruction_ready(s, sinst);
270 }
271
272 /* These functions provide different heuristics for scheduling instructions.
273  * The default is calc_score_readers. */
274
275 #if 0
276
277 static void calc_score_zero(struct schedule_instruction * sinst)
278 {
279         sinst->Score = 0;
280 }
281
282 static void calc_score_deps(struct schedule_instruction * sinst)
283 {
284         int i;
285         sinst->Score = 0;
286         for (i = 0; i < sinst->NumWriteValues; i++) {
287                 struct reg_value * v = sinst->WriteValues[i];
288                 if (v->NumReaders) {
289                         struct reg_value_reader * r;
290                         for (r = v->Readers; r; r = r->Next) {
291                                 if (r->Reader->NumDependencies == 1) {
292                                         sinst->Score += 100;
293                                 }
294                                 sinst->Score += r->Reader->NumDependencies;
295                         }
296                 }
297         }
298 }
299
300 #endif
301
302 #define NO_OUTPUT_SCORE (1 << 24)
303
304 static void score_no_output(struct schedule_instruction * sinst)
305 {
306         assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
307         if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
308                         !sinst->Instruction->U.P.Alpha.OutputWriteMask) {
309                 if (sinst->PairedInst) {
310                         if (!sinst->PairedInst->Instruction->U.P.
311                                                         RGB.OutputWriteMask
312                                         && !sinst->PairedInst->Instruction->U.P.
313                                                         Alpha.OutputWriteMask) {
314                                 sinst->Score |= NO_OUTPUT_SCORE;
315                         }
316
317                 } else {
318                         sinst->Score |= NO_OUTPUT_SCORE;
319                 }
320         }
321 }
322
323 #define PAIRED_SCORE (1 << 16)
324
325 static void calc_score_r300(struct schedule_instruction * sinst)
326 {
327         unsigned src_idx;
328
329         if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
330                 sinst->Score = 0;
331                 return;
332         }
333
334         score_no_output(sinst);
335
336         if (sinst->PairedInst) {
337                 sinst->Score |= PAIRED_SCORE;
338                 return;
339         }
340
341         for (src_idx = 0; src_idx < 4; src_idx++) {
342                 sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
343                                 sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
344         }
345 }
346
347 #define NO_READ_TEX_SCORE (1 << 16)
348
349 static void calc_score_readers(struct schedule_instruction * sinst)
350 {
351         if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
352                 sinst->Score = 0;
353         } else {
354                 sinst->Score = sinst->NumReadValues;
355                 if (sinst->PairedInst) {
356                         sinst->Score += sinst->PairedInst->NumReadValues;
357                 }
358                 if (get_tex_read_count(sinst) == 0) {
359                         sinst->Score |= NO_READ_TEX_SCORE;
360                 }
361                 score_no_output(sinst);
362         }
363 }
364
365 /**
366  * This function decreases the dependencies of the next instruction that
367  * wants to write to each of sinst's read values.
368  */
369 static void commit_update_reads(struct schedule_state * s,
370                                         struct schedule_instruction * sinst){
371         do {
372                 for(unsigned int i = 0; i < sinst->NumReadValues; ++i) {
373                         struct reg_value * v = sinst->ReadValues[i];
374                         assert(v->NumReaders > 0);
375                         v->NumReaders--;
376                         if (!v->NumReaders) {
377                                 if (v->Next) {
378                                         decrease_dependencies(s, v->Next->Writer);
379                                 }
380                         }
381                 }
382         } while ((sinst = sinst->PairedInst));
383 }
384
385 static void commit_update_writes(struct schedule_state * s,
386                                         struct schedule_instruction * sinst){
387         do {
388                 for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
389                         struct reg_value * v = sinst->WriteValues[i];
390                         if (v->NumReaders) {
391                                 for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
392                                         decrease_dependencies(s, r->Reader);
393                                 }
394                         } else {
395                                 /* This happens in instruction sequences of the type
396                                  *  OP r.x, ...;
397                                  *  OP r.x, r.x, ...;
398                                  * See also the subtlety in how instructions that both
399                                  * read and write the same register are scanned.
400                                  */
401                                 if (v->Next)
402                                         decrease_dependencies(s, v->Next->Writer);
403                         }
404                 }
405         } while ((sinst = sinst->PairedInst));
406 }
407
408 static void notify_sem_wait(struct schedule_state *s)
409 {
410         struct rc_list * pend_ptr;
411         for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
412                 struct rc_list * read_ptr;
413                 struct schedule_instruction * pending = pend_ptr->Item;
414                 for (read_ptr = pending->TexReaders; read_ptr;
415                                                 read_ptr = read_ptr->Next) {
416                         struct schedule_instruction * reader = read_ptr->Item;
417                         reader->TexReadCount--;
418                 }
419         }
420         s->PendingTEX = NULL;
421 }
422
423 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
424 {
425         DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
426
427         commit_update_reads(s, sinst);
428
429         commit_update_writes(s, sinst);
430
431         if (get_tex_read_count(sinst) > 0) {
432                 sinst->Instruction->U.P.SemWait = 1;
433                 notify_sem_wait(s);
434         }
435 }
436
437 /**
438  * Emit all ready texture instructions in a single block.
439  *
440  * Emit as a single block to (hopefully) sample many textures in parallel,
441  * and to avoid hardware indirections on R300.
442  */
443 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
444 {
445         struct schedule_instruction *readytex;
446         struct rc_instruction * inst_begin;
447
448         assert(s->ReadyTEX);
449         notify_sem_wait(s);
450
451         /* Node marker for R300 */
452         inst_begin = rc_insert_new_instruction(s->C, before->Prev);
453         inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
454
455         /* Link texture instructions back in */
456         readytex = s->ReadyTEX;
457         while(readytex) {
458                 rc_insert_instruction(before->Prev, readytex->Instruction);
459                 DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
460
461                 /* All of the TEX instructions in the same TEX block have
462                  * their source registers read from before any of the
463                  * instructions in that block write to their destination
464                  * registers.  This means that when we commit a TEX
465                  * instruction, any other TEX instruction that wants to write
466                  * to one of the committed instruction's source register can be
467                  * marked as ready and should be emitted in the same TEX
468                  * block. This prevents the following sequence from being
469                  * emitted in two different TEX blocks:
470                  * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
471                  * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
472                  */
473                 commit_update_reads(s, readytex);
474                 readytex = readytex->NextReady;
475         }
476         readytex = s->ReadyTEX;
477         s->ReadyTEX = NULL;
478         while(readytex){
479                 DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
480                 commit_update_writes(s, readytex);
481                 /* Set semaphore bits for last TEX instruction in the block */
482                 if (!readytex->NextReady) {
483                         readytex->Instruction->U.I.TexSemAcquire = 1;
484                         readytex->Instruction->U.I.TexSemWait = 1;
485                 }
486                 rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
487                 readytex = readytex->NextReady;
488         }
489 }
490
491 /* This is a helper function for destructive_merge_instructions().  It helps
492  * merge presubtract sources from two instructions and makes sure the
493  * presubtract sources end up in the correct spot.  This function assumes that
494  * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
495  * but no scalar instruction (alpha).
496  * @return 0 if merging the presubtract sources fails.
497  * @return 1 if merging the presubtract sources succeeds.
498  */
499 static int merge_presub_sources(
500         struct rc_pair_instruction * dst_full,
501         struct rc_pair_sub_instruction src,
502         unsigned int type)
503 {
504         unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
505         struct rc_pair_sub_instruction * dst_sub;
506         const struct rc_opcode_info * info;
507
508         assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
509
510         switch(type) {
511         case RC_SOURCE_RGB:
512                 is_rgb = 1;
513                 is_alpha = 0;
514                 dst_sub = &dst_full->RGB;
515                 break;
516         case RC_SOURCE_ALPHA:
517                 is_rgb = 0;
518                 is_alpha = 1;
519                 dst_sub = &dst_full->Alpha;
520                 break;
521         default:
522                 assert(0);
523                 return 0;
524         }
525
526         info = rc_get_opcode_info(dst_full->RGB.Opcode);
527
528         if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
529                 return 0;
530
531         srcp_regs = rc_presubtract_src_reg_count(
532                                         src.Src[RC_PAIR_PRESUB_SRC].Index);
533         for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
534                 unsigned int arg;
535                 int free_source;
536                 unsigned int one_way = 0;
537                 struct rc_pair_instruction_source srcp = src.Src[srcp_src];
538                 struct rc_pair_instruction_source temp;
539
540                 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
541                                                         srcp.File, srcp.Index);
542
543                 /* If free_source < 0 then there are no free source
544                  * slots. */
545                 if (free_source < 0)
546                         return 0;
547
548                 temp = dst_sub->Src[srcp_src];
549                 dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
550
551                 /* srcp needs src0 and src1 to be the same */
552                 if (free_source < srcp_src) {
553                         if (!temp.Used)
554                                 continue;
555                         free_source = rc_pair_alloc_source(dst_full, is_rgb,
556                                         is_alpha, temp.File, temp.Index);
557                         if (free_source < 0)
558                                 return 0;
559                         one_way = 1;
560                 } else {
561                         dst_sub->Src[free_source] = temp;
562                 }
563
564                 /* If free_source == srcp_src, then the presubtract
565                  * source is already in the correct place. */
566                 if (free_source == srcp_src)
567                         continue;
568
569                 /* Shuffle the sources, so we can put the
570                  * presubtract source in the correct place. */
571                 for(arg = 0; arg < info->NumSrcRegs; arg++) {
572                         /* If the arg does read both from rgb and alpha, then we need to rewrite
573                          * both sources and the code currently doesn't handle this.
574                          * FIXME: This is definitely solvable, however shader-db shows it is
575                          * not worth the effort.
576                          */
577                         if (rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_ALPHA &&
578                                 rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_RGB)
579                                 return 0;
580
581                         /*If this arg does not read from an rgb source,
582                          * do nothing. */
583                         if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
584                                                                 & type)) {
585                                 continue;
586                         }
587
588                         if (dst_full->RGB.Arg[arg].Source == srcp_src)
589                                 dst_full->RGB.Arg[arg].Source = free_source;
590                         /* We need to do this just in case register
591                          * is one of the sources already, but in the
592                          * wrong spot. */
593                         else if(dst_full->RGB.Arg[arg].Source == free_source
594                                                         && !one_way) {
595                                 dst_full->RGB.Arg[arg].Source = srcp_src;
596                         }
597                 }
598         }
599         return 1;
600 }
601
602
603 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
604 static int destructive_merge_instructions(
605                 struct rc_pair_instruction * rgb,
606                 struct rc_pair_instruction * alpha)
607 {
608         const struct rc_opcode_info * opcode;
609
610         assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
611         assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
612
613         /* Presubtract registers need to be merged first so that registers
614          * needed by the presubtract operation can be placed in src0 and/or
615          * src1. */
616
617         /* Merge the rgb presubtract registers. */
618         if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
619                 if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
620                         return 0;
621                 }
622         }
623         /* Merge the alpha presubtract registers */
624         if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
625                 if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
626                         return 0;
627                 }
628         }
629
630         /* Copy alpha args into rgb */
631         opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
632
633         for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
634                 unsigned int srcrgb = 0;
635                 unsigned int srcalpha = 0;
636                 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
637                 rc_register_file file = 0;
638                 unsigned int index = 0;
639                 int source;
640
641                 if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
642                         srcrgb = 1;
643                         file = alpha->RGB.Src[oldsrc].File;
644                         index = alpha->RGB.Src[oldsrc].Index;
645                 } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
646                         srcalpha = 1;
647                         file = alpha->Alpha.Src[oldsrc].File;
648                         index = alpha->Alpha.Src[oldsrc].Index;
649                 }
650
651                 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
652                 if (source < 0)
653                         return 0;
654
655                 rgb->Alpha.Arg[arg].Source = source;
656                 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
657                 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
658                 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
659         }
660
661         /* Copy alpha opcode into rgb */
662         rgb->Alpha.Opcode = alpha->Alpha.Opcode;
663         rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
664         rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
665         rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
666         rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
667         rgb->Alpha.Saturate = alpha->Alpha.Saturate;
668         rgb->Alpha.Omod = alpha->Alpha.Omod;
669
670         /* Merge ALU result writing */
671         if (alpha->WriteALUResult) {
672                 if (rgb->WriteALUResult)
673                         return 0;
674
675                 rgb->WriteALUResult = alpha->WriteALUResult;
676                 rgb->ALUResultCompare = alpha->ALUResultCompare;
677         }
678
679         /* Copy SemWait */
680         rgb->SemWait |= alpha->SemWait;
681
682         return 1;
683 }
684
685 /**
686  * Try to merge the given instructions into the rgb instructions.
687  *
688  * Return true on success; on failure, return false, and keep
689  * the instructions untouched.
690  */
691 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
692 {
693         struct rc_pair_instruction backup;
694
695         /*Instructions can't write output registers and ALU result at the
696          * same time. */
697         if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
698                 || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
699                 return 0;
700         }
701
702         /* Writing output registers in the middle of shaders is slow, so
703          * we don't want to pair output writes with temp writes. */
704         if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
705                 || (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
706                 return 0;
707         }
708
709         memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
710
711         if (destructive_merge_instructions(rgb, alpha))
712                 return 1;
713
714         memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
715         return 0;
716 }
717
718 static void presub_nop(struct rc_instruction * emitted) {
719         int prev_rgb_index, prev_alpha_index, i, num_src;
720
721         /* We don't need a nop if the previous instruction is a TEX. */
722         if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
723                 return;
724         }
725         if (emitted->Prev->U.P.RGB.WriteMask)
726                 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
727         else
728                 prev_rgb_index = -1;
729         if (emitted->Prev->U.P.Alpha.WriteMask)
730                 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
731         else
732                 prev_alpha_index = 1;
733
734         /* Check the previous rgb instruction */
735         if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
736                 num_src = rc_presubtract_src_reg_count(
737                                 emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
738                 for (i = 0; i < num_src; i++) {
739                         unsigned int index = emitted->U.P.RGB.Src[i].Index;
740                         if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
741                             && (index  == prev_rgb_index
742                                 || index == prev_alpha_index)) {
743                                 emitted->Prev->U.P.Nop = 1;
744                                 return;
745                         }
746                 }
747         }
748
749         /* Check the previous alpha instruction. */
750         if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
751                 return;
752
753         num_src = rc_presubtract_src_reg_count(
754                                 emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
755         for (i = 0; i < num_src; i++) {
756                 unsigned int index = emitted->U.P.Alpha.Src[i].Index;
757                 if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
758                    && (index == prev_rgb_index || index == prev_alpha_index)) {
759                         emitted->Prev->U.P.Nop = 1;
760                         return;
761                 }
762         }
763 }
764
765 static void rgb_to_alpha_remap (
766         struct schedule_state * s,
767         struct rc_instruction * inst,
768         struct rc_pair_instruction_arg * arg,
769         rc_register_file old_file,
770         rc_swizzle old_swz,
771         unsigned int new_index)
772 {
773         int new_src_index;
774         unsigned int i;
775
776         for (i = 0; i < 3; i++) {
777                 if (get_swz(arg->Swizzle, i) == old_swz) {
778                         SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
779                 }
780         }
781         new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
782                                                         old_file, new_index);
783         /* This conversion is not possible, we must have made a mistake in
784          * is_rgb_to_alpha_possible. */
785         if (new_src_index < 0) {
786         rc_error(s->C, "rgb_to_alpha_remap failed to allocate src.\n");
787                 return;
788         }
789
790         arg->Source = new_src_index;
791 }
792
793 static int can_remap(unsigned int opcode)
794 {
795         switch(opcode) {
796         case RC_OPCODE_DDX:
797         case RC_OPCODE_DDY:
798                 return 0;
799         default:
800                 return 1;
801         }
802 }
803
804 static int can_convert_opcode_to_alpha(unsigned int opcode)
805 {
806         switch(opcode) {
807         case RC_OPCODE_DDX:
808         case RC_OPCODE_DDY:
809         case RC_OPCODE_DP2:
810         case RC_OPCODE_DP3:
811         case RC_OPCODE_DP4:
812                 return 0;
813         default:
814                 return 1;
815         }
816 }
817
818 static void is_rgb_to_alpha_possible(
819         void * userdata,
820         struct rc_instruction * inst,
821         struct rc_pair_instruction_arg * arg,
822         struct rc_pair_instruction_source * src)
823 {
824         unsigned int read_chan = RC_SWIZZLE_UNUSED;
825         unsigned int alpha_sources = 0;
826         unsigned int i;
827         struct rc_reader_data * reader_data = userdata;
828
829         if (!can_remap(inst->U.P.RGB.Opcode)
830             || !can_remap(inst->U.P.Alpha.Opcode)) {
831                 reader_data->Abort = 1;
832                 return;
833         }
834
835         if (!src)
836                 return;
837
838         /* XXX There are some cases where we can still do the conversion if
839          * a reader reads from a presubtract source, but for now we'll prevent
840          * it. */
841         if (arg->Source == RC_PAIR_PRESUB_SRC) {
842                 reader_data->Abort = 1;
843                 return;
844         }
845
846         /* Make sure the source only reads the register component that we
847          * are going to be converting from.  It is OK if the instruction uses
848          * this component more than once.
849          * XXX If the index we will be converting to is the same as the
850          * current index, then it is OK to read from more than one component.
851          */
852         for (i = 0; i < 3; i++) {
853                 rc_swizzle swz = get_swz(arg->Swizzle, i);
854                 switch(swz) {
855                 case RC_SWIZZLE_X:
856                 case RC_SWIZZLE_Y:
857                 case RC_SWIZZLE_Z:
858                 case RC_SWIZZLE_W:
859                         if (read_chan == RC_SWIZZLE_UNUSED) {
860                                 read_chan = swz;
861                         } else if (read_chan != swz) {
862                                 reader_data->Abort = 1;
863                                 return;
864                         }
865                         break;
866                 default:
867                         break;
868                 }
869         }
870
871         /* Make sure there are enough alpha sources.
872          * XXX If we know what register all the readers are going
873          * to be remapped to, then in some situations we can still do
874          * the substitution, even if all 3 alpha sources are being used.*/
875         for (i = 0; i < 3; i++) {
876                 if (inst->U.P.Alpha.Src[i].Used) {
877                         alpha_sources++;
878                 }
879         }
880         if (alpha_sources > 2) {
881                 reader_data->Abort = 1;
882                 return;
883         }
884 }
885
886 static int convert_rgb_to_alpha(
887         struct schedule_state * s,
888         struct schedule_instruction * sched_inst)
889 {
890         struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
891         unsigned int old_mask = pair_inst->RGB.WriteMask;
892         unsigned int old_swz = rc_mask_to_swizzle(old_mask);
893         const struct rc_opcode_info * info =
894                                 rc_get_opcode_info(pair_inst->RGB.Opcode);
895         int new_index = -1;
896         unsigned int i;
897
898         if (sched_inst->GlobalReaders.Abort)
899                 return 0;
900
901         /* Even though we checked that we can convert to alpha previously, it is
902          * possible that another rgb source of the reader instructions was already
903          * converted to alpha and we thus have no longer free alpha sources.
904          */
905         for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
906                 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
907                 if (reader.Inst->U.P.Alpha.Src[2].Used)
908                         return 0;
909         }
910
911         if (!pair_inst->RGB.WriteMask)
912                 return 0;
913
914         if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
915             || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
916                 return 0;
917         }
918
919         assert(sched_inst->NumWriteValues == 1);
920
921         if (!sched_inst->WriteValues[0]) {
922                 assert(0);
923                 return 0;
924         }
925
926         /* We start at the old index, because if we can reuse the same
927          * register and just change the swizzle then it is more likely we
928          * will be able to convert all the readers. */
929         for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
930                 struct reg_value ** new_regvalp = get_reg_valuep(
931                                                 s, RC_FILE_TEMPORARY, i, 3);
932                 if (!*new_regvalp) {
933                         struct reg_value ** old_regvalp =
934                                 get_reg_valuep(s,
935                                         RC_FILE_TEMPORARY,
936                                         pair_inst->RGB.DestIndex,
937                                         rc_mask_to_swizzle(old_mask));
938                         new_index = i;
939                         *new_regvalp = *old_regvalp;
940                         break;
941                 }
942         }
943         if (new_index < 0) {
944                 return 0;
945         }
946
947         /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
948          * as the RGB opcode, then the Alpha instruction will already contain
949          * the correct opcode and instruction args, so we do not want to
950          * overwrite them.
951          */
952         if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
953                 pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
954                 memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
955                                                 sizeof(pair_inst->Alpha.Arg));
956         }
957         pair_inst->Alpha.DestIndex = new_index;
958         pair_inst->Alpha.WriteMask = RC_MASK_W;
959         pair_inst->Alpha.Target = pair_inst->RGB.Target;
960         pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
961         pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
962         pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
963         pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
964         /* Move the swizzles into the first chan */
965         for (i = 0; i < info->NumSrcRegs; i++) {
966                 unsigned int j;
967                 for (j = 0; j < 3; j++) {
968                         unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
969                         if (swz != RC_SWIZZLE_UNUSED) {
970                                 pair_inst->Alpha.Arg[i].Swizzle =
971                                                         rc_init_swizzle(swz, 1);
972                                 break;
973                         }
974                 }
975         }
976         pair_inst->RGB.Opcode = RC_OPCODE_NOP;
977         pair_inst->RGB.DestIndex = 0;
978         pair_inst->RGB.WriteMask = 0;
979         pair_inst->RGB.Target = 0;
980         pair_inst->RGB.OutputWriteMask = 0;
981         pair_inst->RGB.DepthWriteMask = 0;
982         pair_inst->RGB.Saturate = 0;
983         memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
984
985         for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
986                 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
987                 rgb_to_alpha_remap(s, reader.Inst, reader.U.P.Arg,
988                                         RC_FILE_TEMPORARY, old_swz, new_index);
989         }
990         return 1;
991 }
992
993 static void try_convert_and_pair(
994         struct schedule_state *s,
995         struct schedule_instruction ** inst_list)
996 {
997         struct schedule_instruction * list_ptr = *inst_list;
998         while (list_ptr && *inst_list && (*inst_list)->NextReady) {
999                 int paired = 0;
1000                 if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
1001                         && list_ptr->Instruction->U.P.RGB.Opcode
1002                                                 != RC_OPCODE_REPL_ALPHA) {
1003                                 goto next;
1004                 }
1005                 if (list_ptr->NumWriteValues == 1
1006                                         && convert_rgb_to_alpha(s, list_ptr)) {
1007
1008                         struct schedule_instruction * pair_ptr;
1009                         remove_inst_from_list(inst_list, list_ptr);
1010                         add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
1011
1012                         for (pair_ptr = s->ReadyRGB; pair_ptr;
1013                                         pair_ptr = pair_ptr->NextReady) {
1014                                 if (merge_instructions(&pair_ptr->Instruction->U.P,
1015                                                 &list_ptr->Instruction->U.P)) {
1016                                         remove_inst_from_list(&s->ReadyAlpha, list_ptr);
1017                                         remove_inst_from_list(&s->ReadyRGB, pair_ptr);
1018                                         pair_ptr->PairedInst = list_ptr;
1019
1020                                         add_inst_to_list(&s->ReadyFullALU, pair_ptr);
1021                                         list_ptr = *inst_list;
1022                                         paired = 1;
1023                                         break;
1024                                 }
1025
1026                         }
1027                 }
1028                 if (!paired) {
1029 next:
1030                         list_ptr = list_ptr->NextReady;
1031                 }
1032         }
1033 }
1034
1035 /**
1036  * This function attempts to merge RGB and Alpha instructions together.
1037  */
1038 static void pair_instructions(struct schedule_state * s)
1039 {
1040         struct schedule_instruction *rgb_ptr;
1041         struct schedule_instruction *alpha_ptr;
1042
1043         /* Some pairings might fail because they require too
1044          * many source slots; try all possible pairings if necessary */
1045         rgb_ptr = s->ReadyRGB;
1046         while(rgb_ptr) {
1047                 struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
1048                 alpha_ptr = s->ReadyAlpha;
1049                 while(alpha_ptr) {
1050                         struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
1051                         if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1052                                 /* Remove RGB and Alpha from their ready lists.
1053                                  */
1054                                 remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1055                                 remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1056                                 rgb_ptr->PairedInst = alpha_ptr;
1057                                 add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1058                                 break;
1059                         }
1060                         alpha_ptr = alpha_next;
1061                 }
1062                 rgb_ptr = rgb_next;
1063         }
1064
1065         if (!s->Opt) {
1066                 return;
1067         }
1068
1069         /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1070          * slot can be converted into Alpha instructions. */
1071         try_convert_and_pair(s, &s->ReadyFullALU);
1072
1073         /* Try to convert some of the RGB instructions to Alpha and
1074          * try to pair it with another RGB. */
1075         try_convert_and_pair(s, &s->ReadyRGB);
1076 }
1077
1078 static void update_max_score(
1079         struct schedule_state * s,
1080         struct schedule_instruction ** list,
1081         int * max_score,
1082         struct schedule_instruction ** max_inst_out,
1083         struct schedule_instruction *** list_out)
1084 {
1085         struct schedule_instruction * list_ptr;
1086         for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1087                 int score;
1088                 s->CalcScore(list_ptr);
1089                 score = list_ptr->Score;
1090                 if (!*max_inst_out || score > *max_score) {
1091                         *max_score = score;
1092                         *max_inst_out = list_ptr;
1093                         *list_out = list;
1094                 }
1095         }
1096 }
1097
1098 static void emit_instruction(
1099         struct schedule_state * s,
1100         struct rc_instruction * before)
1101 {
1102         int max_score = -1;
1103         struct schedule_instruction * max_inst = NULL;
1104         struct schedule_instruction ** max_list = NULL;
1105         unsigned tex_count = 0;
1106         struct schedule_instruction * tex_ptr;
1107
1108         pair_instructions(s);
1109 #if VERBOSE
1110         fprintf(stderr, "Full:\n");
1111         print_list(s->ReadyFullALU);
1112         fprintf(stderr, "RGB:\n");
1113         print_list(s->ReadyRGB);
1114         fprintf(stderr, "Alpha:\n");
1115         print_list(s->ReadyAlpha);
1116         fprintf(stderr, "TEX:\n");
1117         print_list(s->ReadyTEX);
1118 #endif
1119
1120         for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1121                 if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
1122                         emit_all_tex(s, before);
1123                         s->PrevBlockHasKil = 1;
1124                         return;
1125                 }
1126                 tex_count++;
1127         }
1128         update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1129         update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1130         update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1131
1132         if (tex_count >= s->max_tex_group || max_score == -1
1133                 || (s->TEXCount > 0 && tex_count == s->TEXCount)
1134                 || (tex_count > 0 && max_score < NO_OUTPUT_SCORE)) {
1135                 emit_all_tex(s, before);
1136         } else {
1137
1138
1139                 remove_inst_from_list(max_list, max_inst);
1140                 rc_insert_instruction(before->Prev, max_inst->Instruction);
1141                 commit_alu_instruction(s, max_inst);
1142
1143                 presub_nop(before->Prev);
1144         }
1145 }
1146
1147 static void add_tex_reader(
1148         struct schedule_state * s,
1149         struct schedule_instruction * writer,
1150         struct schedule_instruction * reader)
1151 {
1152         if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1153                 /*Not a TEX instructions */
1154                 return;
1155         }
1156         reader->TexReadCount++;
1157         rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1158 }
1159
1160 static void scan_read(void * data, struct rc_instruction * inst,
1161                 rc_register_file file, unsigned int index, unsigned int chan)
1162 {
1163         struct schedule_state * s = data;
1164         struct reg_value ** v = get_reg_valuep(s, file, index, chan);
1165         struct reg_value_reader * reader;
1166
1167         if (!v)
1168                 return;
1169
1170         if (*v && (*v)->Writer == s->Current) {
1171                 /* The instruction reads and writes to a register component.
1172                  * In this case, we only want to increment dependencies by one.
1173                  * Why?
1174                  * Because each instruction depends on the writers of its source
1175                  * registers _and_ the most recent writer of its destination
1176                  * register.  In this case, the current instruction (s->Current)
1177                  * has a dependency that both writes to one of its source
1178                  * registers and was the most recent writer to its destination
1179                  * register.  We have already marked this dependency in
1180                  * scan_write(), so we don't need to do it again.
1181                  */
1182
1183                 /* We need to make sure we are adding s->Current to the
1184                  * previous writer's list of TexReaders, if the previous writer
1185                  * was a TEX instruction.
1186                  */
1187                 add_tex_reader(s, s->PrevWriter[chan], s->Current);
1188
1189                 return;
1190         }
1191
1192         DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1193
1194         reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1195         reader->Reader = s->Current;
1196         if (!*v) {
1197                 /* In this situation, the instruction reads from a register
1198                  * that hasn't been written to or read from in the current
1199                  * block. */
1200                 *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1201                 memset(*v, 0, sizeof(struct reg_value));
1202                 (*v)->Readers = reader;
1203         } else {
1204                 reader->Next = (*v)->Readers;
1205                 (*v)->Readers = reader;
1206                 /* Only update the current instruction's dependencies if the
1207                  * register it reads from has been written to in this block. */
1208                 if ((*v)->Writer) {
1209                         add_tex_reader(s, (*v)->Writer, s->Current);
1210                         s->Current->NumDependencies++;
1211                 }
1212         }
1213         (*v)->NumReaders++;
1214
1215         if (s->Current->NumReadValues >= 12) {
1216                 rc_error(s->C, "%s: NumReadValues overflow\n", __func__);
1217         } else {
1218                 s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1219         }
1220 }
1221
1222 static void scan_write(void * data, struct rc_instruction * inst,
1223                 rc_register_file file, unsigned int index, unsigned int chan)
1224 {
1225         struct schedule_state * s = data;
1226         struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
1227         struct reg_value * newv;
1228
1229         if (!pv)
1230                 return;
1231
1232         DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1233
1234         newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1235         memset(newv, 0, sizeof(*newv));
1236
1237         newv->Writer = s->Current;
1238
1239         if (*pv) {
1240                 (*pv)->Next = newv;
1241                 s->Current->NumDependencies++;
1242                 /* Keep track of the previous writer to s->Current's destination
1243                  * register */
1244                 s->PrevWriter[chan] = (*pv)->Writer;
1245         }
1246
1247         *pv = newv;
1248
1249         if (s->Current->NumWriteValues >= 4) {
1250                 rc_error(s->C, "%s: NumWriteValues overflow\n", __func__);
1251         } else {
1252                 s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1253         }
1254 }
1255
1256 static void is_rgb_to_alpha_possible_normal(
1257         void * userdata,
1258         struct rc_instruction * inst,
1259         struct rc_src_register * src)
1260 {
1261         struct rc_reader_data * reader_data = userdata;
1262         reader_data->Abort = 1;
1263
1264 }
1265
1266 static void schedule_block(struct schedule_state * s,
1267                 struct rc_instruction * begin, struct rc_instruction * end)
1268 {
1269         unsigned int ip;
1270
1271         /* Scan instructions for data dependencies */
1272         ip = 0;
1273         for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
1274                 s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1275                 memset(s->Current, 0, sizeof(struct schedule_instruction));
1276
1277                 if (inst->Type == RC_INSTRUCTION_NORMAL) {
1278                         const struct rc_opcode_info * info =
1279                                         rc_get_opcode_info(inst->U.I.Opcode);
1280                         if (info->HasTexture) {
1281                                 s->TEXCount++;
1282                         }
1283                 }
1284
1285                 /* XXX: This causes SemWait to be set for all instructions in
1286                  * a block if the previous block contained a TEX instruction.
1287                  * We can do better here, but it will take a lot of work. */
1288                 if (s->PrevBlockHasTex) {
1289                         s->Current->TexReadCount = 1;
1290                 }
1291
1292                 s->Current->Instruction = inst;
1293                 inst->IP = ip++;
1294
1295                 DBG("%i: Scanning\n", inst->IP);
1296
1297                 /* The order of things here is subtle and maybe slightly
1298                  * counter-intuitive, to account for the case where an
1299                  * instruction writes to the same register as it reads
1300                  * from. */
1301                 rc_for_all_writes_chan(inst, &scan_write, s);
1302                 rc_for_all_reads_chan(inst, &scan_read, s);
1303
1304                 DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1305
1306                 if (!s->Current->NumDependencies) {
1307                         instruction_ready(s, s->Current);
1308                 }
1309
1310                 /* Get global readers for possible RGB->Alpha conversion. */
1311                 s->Current->GlobalReaders.ExitOnAbort = 1;
1312                 rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
1313                                 is_rgb_to_alpha_possible_normal,
1314                                 is_rgb_to_alpha_possible, NULL);
1315         }
1316
1317         /* Temporarily unlink all instructions */
1318         begin->Prev->Next = end;
1319         end->Prev = begin->Prev;
1320
1321         /* Schedule instructions back */
1322         while(!s->C->Error &&
1323               (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1324                 emit_instruction(s, end);
1325         }
1326 }
1327
1328 static int is_controlflow(struct rc_instruction * inst)
1329 {
1330         if (inst->Type == RC_INSTRUCTION_NORMAL) {
1331                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
1332                 return opcode->IsFlowControl;
1333         }
1334         return 0;
1335 }
1336
1337 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
1338 {
1339         struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
1340         struct schedule_state s;
1341         struct rc_instruction * inst = c->Base.Program.Instructions.Next;
1342         unsigned int * opt = user;
1343
1344         memset(&s, 0, sizeof(s));
1345         s.Opt = *opt;
1346         s.C = &c->Base;
1347         if (s.C->is_r500) {
1348                 s.CalcScore = calc_score_readers;
1349         } else {
1350                 s.CalcScore = calc_score_r300;
1351         }
1352         s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1353         while(inst != &c->Base.Program.Instructions) {
1354                 struct rc_instruction * first;
1355
1356                 if (is_controlflow(inst)) {
1357                         /* The TexSemWait flag is already properly set for ALU
1358                          * instructions using the results of normal TEX lookup,
1359                          * however it was found empirically that TEXKIL also needs
1360                          * synchronization with the control flow. This might not be optimal,
1361                          * however the docs don't offer any guidance in this matter.
1362                          */
1363                         if (s.PrevBlockHasKil) {
1364                                 inst->U.I.TexSemWait = 1;
1365                                 s.PrevBlockHasKil = 0;
1366                         }
1367                         inst = inst->Next;
1368                         continue;
1369                 }
1370
1371                 first = inst;
1372
1373                 while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1374                         inst = inst->Next;
1375
1376                 DBG("Schedule one block\n");
1377                 memset(s.Temporary, 0, sizeof(s.Temporary));
1378                 s.TEXCount = 0;
1379                 schedule_block(&s, first, inst);
1380                 if (s.PendingTEX) {
1381                         s.PrevBlockHasTex = 1;
1382                 }
1383         }
1384 }