2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_program_pair.h"
32 #include "radeon_compiler.h"
33 #include "radeon_compiler_util.h"
34 #include "radeon_dataflow.h"
35 #include "radeon_list.h"
36 #include "radeon_variable.h"
38 #include "util/u_debug.h"
42 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
44 struct schedule_instruction {
45 struct rc_instruction * Instruction;
47 /** Next instruction in the linked list of ready instructions. */
48 struct schedule_instruction *NextReady;
50 /** Values that this instruction reads and writes */
51 struct reg_value * WriteValues[4];
52 struct reg_value * ReadValues[12];
53 unsigned int NumWriteValues:3;
54 unsigned int NumReadValues:4;
57 * Number of (read and write) dependencies that must be resolved before
58 * this instruction can be scheduled.
60 unsigned int NumDependencies:5;
62 /** List of all readers (see rc_get_readers() for the definition of
63 * "all readers"), even those outside the basic block this instruction
65 struct rc_reader_data GlobalReaders;
67 /** If the scheduler has paired an RGB and an Alpha instruction together,
68 * PairedInst references the alpha insturction's dependency information.
70 struct schedule_instruction * PairedInst;
72 /** This scheduler uses the value of Score to determine which
73 * instruction to schedule. Instructions with a higher value of Score
74 * will be scheduled first. */
77 /** The number of components that read from a TEX instruction. */
78 unsigned TexReadCount;
80 /** For TEX instructions a list of readers */
81 struct rc_list * TexReaders;
86 * Used to keep track of which instructions read a value.
88 struct reg_value_reader {
89 struct schedule_instruction *Reader;
90 struct reg_value_reader *Next;
94 * Used to keep track which values are stored in each component of a
98 struct schedule_instruction * Writer;
101 * Unordered linked list of instructions that read from this value.
102 * When this value becomes available, we increase all readers'
105 struct reg_value_reader *Readers;
108 * Number of readers of this value. This is decremented each time
109 * a reader of the value is committed.
110 * When the reader cound reaches zero, the dependency count
111 * of the instruction writing \ref Next is decremented.
113 unsigned int NumReaders;
115 struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
118 struct register_state {
119 struct reg_value * Values[4];
123 struct rc_instruciont * Inst;
124 unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
125 unsigned int OldSwizzle:3;
126 unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
127 unsigned int NewSwizzle:3;
128 unsigned int OnlyTexReads:1;
129 struct remap_reg * Next;
132 struct schedule_state {
133 struct radeon_compiler * C;
134 struct schedule_instruction * Current;
135 /** Array of the previous writers of Current's destination register
136 * indexed by channel. */
137 struct schedule_instruction * PrevWriter[4];
139 struct register_state Temporary[RC_REGISTER_MAX_INDEX];
142 * Linked lists of instructions that can be scheduled right now,
143 * based on which ALU/TEX resources they require.
146 struct schedule_instruction *ReadyFullALU;
147 struct schedule_instruction *ReadyRGB;
148 struct schedule_instruction *ReadyAlpha;
149 struct schedule_instruction *ReadyTEX;
151 struct rc_list *PendingTEX;
153 void (*CalcScore)(struct schedule_instruction *);
155 unsigned PrevBlockHasTex:1;
160 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
161 rc_register_file file, unsigned int index, unsigned int chan)
163 if (file != RC_FILE_TEMPORARY)
166 if (index >= RC_REGISTER_MAX_INDEX) {
167 rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
171 return &s->Temporary[index].Values[chan];
174 static unsigned get_tex_read_count(struct schedule_instruction * sinst)
176 unsigned tex_read_count = sinst->TexReadCount;
177 if (sinst->PairedInst) {
178 tex_read_count += sinst->PairedInst->TexReadCount;
180 return tex_read_count;
184 static void print_list(struct schedule_instruction * sinst)
186 struct schedule_instruction * ptr;
187 for (ptr = sinst; ptr; ptr=ptr->NextReady) {
188 unsigned tex_read_count = get_tex_read_count(ptr);
189 unsigned score = sinst->Score;
190 fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
193 fprintf(stderr, "\n");
197 static void remove_inst_from_list(struct schedule_instruction ** list,
198 struct schedule_instruction * inst)
200 struct schedule_instruction * prev = NULL;
201 struct schedule_instruction * list_ptr;
202 for (list_ptr = *list; list_ptr; prev = list_ptr,
203 list_ptr = list_ptr->NextReady) {
204 if (list_ptr == inst) {
206 prev->NextReady = inst->NextReady;
208 *list = inst->NextReady;
210 inst->NextReady = NULL;
216 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
218 inst->NextReady = *list;
222 static void add_inst_to_list_score(struct schedule_instruction ** list,
223 struct schedule_instruction * inst)
225 struct schedule_instruction * temp;
226 struct schedule_instruction * prev;
233 while(temp && inst->Score <= temp->Score) {
235 temp = temp->NextReady;
239 inst->NextReady = temp;
242 prev->NextReady = inst;
243 inst->NextReady = temp;
247 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
249 DBG("%i is now ready\n", sinst->Instruction->IP);
251 /* Adding Ready TEX instructions to the end of the "Ready List" helps
252 * us emit TEX instructions in blocks without losing our place. */
253 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
254 add_inst_to_list_score(&s->ReadyTEX, sinst);
255 else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
256 add_inst_to_list_score(&s->ReadyRGB, sinst);
257 else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
258 add_inst_to_list_score(&s->ReadyAlpha, sinst);
260 add_inst_to_list_score(&s->ReadyFullALU, sinst);
263 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
265 assert(sinst->NumDependencies > 0);
266 sinst->NumDependencies--;
267 if (!sinst->NumDependencies)
268 instruction_ready(s, sinst);
271 /* These functions provide different heuristics for scheduling instructions.
272 * The default is calc_score_readers. */
276 static void calc_score_zero(struct schedule_instruction * sinst)
281 static void calc_score_deps(struct schedule_instruction * sinst)
285 for (i = 0; i < sinst->NumWriteValues; i++) {
286 struct reg_value * v = sinst->WriteValues[i];
288 struct reg_value_reader * r;
289 for (r = v->Readers; r; r = r->Next) {
290 if (r->Reader->NumDependencies == 1) {
293 sinst->Score += r->Reader->NumDependencies;
301 #define NO_READ_TEX_SCORE (1 << 16)
302 #define NO_OUTPUT_SCORE (1 << 24)
304 static void calc_score_readers(struct schedule_instruction * sinst)
306 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
309 sinst->Score = sinst->NumReadValues;
310 if (sinst->PairedInst) {
311 sinst->Score += sinst->PairedInst->NumReadValues;
313 if (get_tex_read_count(sinst) == 0) {
314 sinst->Score |= NO_READ_TEX_SCORE;
316 if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
317 !sinst->Instruction->U.P.Alpha.OutputWriteMask) {
318 if (sinst->PairedInst) {
319 if (!sinst->PairedInst->Instruction->U.P.
321 && !sinst->PairedInst->Instruction->U.P.
322 Alpha.OutputWriteMask) {
323 sinst->Score |= NO_OUTPUT_SCORE;
327 sinst->Score |= NO_OUTPUT_SCORE;
334 * This function decreases the dependencies of the next instruction that
335 * wants to write to each of sinst's read values.
337 static void commit_update_reads(struct schedule_state * s,
338 struct schedule_instruction * sinst){
340 for(i = 0; i < sinst->NumReadValues; ++i) {
341 struct reg_value * v = sinst->ReadValues[i];
342 assert(v->NumReaders > 0);
344 if (!v->NumReaders) {
346 decrease_dependencies(s, v->Next->Writer);
350 if (sinst->PairedInst) {
351 commit_update_reads(s, sinst->PairedInst);
355 static void commit_update_writes(struct schedule_state * s,
356 struct schedule_instruction * sinst){
358 for(i = 0; i < sinst->NumWriteValues; ++i) {
359 struct reg_value * v = sinst->WriteValues[i];
361 for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
362 decrease_dependencies(s, r->Reader);
365 /* This happens in instruction sequences of the type
368 * See also the subtlety in how instructions that both
369 * read and write the same register are scanned.
372 decrease_dependencies(s, v->Next->Writer);
375 if (sinst->PairedInst) {
376 commit_update_writes(s, sinst->PairedInst);
380 static void notify_sem_wait(struct schedule_state *s)
382 struct rc_list * pend_ptr;
383 for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
384 struct rc_list * read_ptr;
385 struct schedule_instruction * pending = pend_ptr->Item;
386 for (read_ptr = pending->TexReaders; read_ptr;
387 read_ptr = read_ptr->Next) {
388 struct schedule_instruction * reader = read_ptr->Item;
389 reader->TexReadCount--;
392 s->PendingTEX = NULL;
395 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
397 DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
399 commit_update_reads(s, sinst);
401 commit_update_writes(s, sinst);
403 if (get_tex_read_count(sinst) > 0) {
404 sinst->Instruction->U.P.SemWait = 1;
410 * Emit all ready texture instructions in a single block.
412 * Emit as a single block to (hopefully) sample many textures in parallel,
413 * and to avoid hardware indirections on R300.
415 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
417 struct schedule_instruction *readytex;
418 struct rc_instruction * inst_begin;
423 /* Node marker for R300 */
424 inst_begin = rc_insert_new_instruction(s->C, before->Prev);
425 inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
427 /* Link texture instructions back in */
428 readytex = s->ReadyTEX;
430 rc_insert_instruction(before->Prev, readytex->Instruction);
431 DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
433 /* All of the TEX instructions in the same TEX block have
434 * their source registers read from before any of the
435 * instructions in that block write to their destination
436 * registers. This means that when we commit a TEX
437 * instruction, any other TEX instruction that wants to write
438 * to one of the committed instruction's source register can be
439 * marked as ready and should be emitted in the same TEX
440 * block. This prevents the following sequence from being
441 * emitted in two different TEX blocks:
442 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
443 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
445 commit_update_reads(s, readytex);
446 readytex = readytex->NextReady;
448 readytex = s->ReadyTEX;
451 DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
452 commit_update_writes(s, readytex);
453 /* Set semaphore bits for last TEX instruction in the block */
454 if (!readytex->NextReady) {
455 readytex->Instruction->U.I.TexSemAcquire = 1;
456 readytex->Instruction->U.I.TexSemWait = 1;
458 rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
459 readytex = readytex->NextReady;
463 /* This is a helper function for destructive_merge_instructions(). It helps
464 * merge presubtract sources from two instructions and makes sure the
465 * presubtract sources end up in the correct spot. This function assumes that
466 * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
467 * but no scalar instruction (alpha).
468 * @return 0 if merging the presubtract sources fails.
469 * @retrun 1 if merging the presubtract sources succeeds.
471 static int merge_presub_sources(
472 struct rc_pair_instruction * dst_full,
473 struct rc_pair_sub_instruction src,
476 unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
477 struct rc_pair_sub_instruction * dst_sub;
478 const struct rc_opcode_info * info;
480 assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
486 dst_sub = &dst_full->RGB;
488 case RC_SOURCE_ALPHA:
491 dst_sub = &dst_full->Alpha;
498 info = rc_get_opcode_info(dst_full->RGB.Opcode);
500 if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
503 srcp_regs = rc_presubtract_src_reg_count(
504 src.Src[RC_PAIR_PRESUB_SRC].Index);
505 for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
508 unsigned int one_way = 0;
509 struct rc_pair_instruction_source srcp = src.Src[srcp_src];
510 struct rc_pair_instruction_source temp;
512 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
513 srcp.File, srcp.Index);
515 /* If free_source < 0 then there are no free source
520 temp = dst_sub->Src[srcp_src];
521 dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
523 /* srcp needs src0 and src1 to be the same */
524 if (free_source < srcp_src) {
527 free_source = rc_pair_alloc_source(dst_full, is_rgb,
528 is_alpha, temp.File, temp.Index);
533 dst_sub->Src[free_source] = temp;
536 /* If free_source == srcp_src, then the presubtract
537 * source is already in the correct place. */
538 if (free_source == srcp_src)
541 /* Shuffle the sources, so we can put the
542 * presubtract source in the correct place. */
543 for(arg = 0; arg < info->NumSrcRegs; arg++) {
544 /*If this arg does not read from an rgb source,
546 if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
551 if (dst_full->RGB.Arg[arg].Source == srcp_src)
552 dst_full->RGB.Arg[arg].Source = free_source;
553 /* We need to do this just in case register
554 * is one of the sources already, but in the
556 else if(dst_full->RGB.Arg[arg].Source == free_source
558 dst_full->RGB.Arg[arg].Source = srcp_src;
566 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
567 static int destructive_merge_instructions(
568 struct rc_pair_instruction * rgb,
569 struct rc_pair_instruction * alpha)
571 const struct rc_opcode_info * opcode;
573 assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
574 assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
576 /* Presubtract registers need to be merged first so that registers
577 * needed by the presubtract operation can be placed in src0 and/or
580 /* Merge the rgb presubtract registers. */
581 if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
582 if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
586 /* Merge the alpha presubtract registers */
587 if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
588 if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){
593 /* Copy alpha args into rgb */
594 opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
596 for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
597 unsigned int srcrgb = 0;
598 unsigned int srcalpha = 0;
599 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
600 rc_register_file file = 0;
601 unsigned int index = 0;
604 if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
606 file = alpha->RGB.Src[oldsrc].File;
607 index = alpha->RGB.Src[oldsrc].Index;
608 } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
610 file = alpha->Alpha.Src[oldsrc].File;
611 index = alpha->Alpha.Src[oldsrc].Index;
614 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
618 rgb->Alpha.Arg[arg].Source = source;
619 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
620 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
621 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
624 /* Copy alpha opcode into rgb */
625 rgb->Alpha.Opcode = alpha->Alpha.Opcode;
626 rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
627 rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
628 rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
629 rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
630 rgb->Alpha.Saturate = alpha->Alpha.Saturate;
631 rgb->Alpha.Omod = alpha->Alpha.Omod;
633 /* Merge ALU result writing */
634 if (alpha->WriteALUResult) {
635 if (rgb->WriteALUResult)
638 rgb->WriteALUResult = alpha->WriteALUResult;
639 rgb->ALUResultCompare = alpha->ALUResultCompare;
643 rgb->SemWait |= alpha->SemWait;
649 * Try to merge the given instructions into the rgb instructions.
651 * Return true on success; on failure, return false, and keep
652 * the instructions untouched.
654 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
656 struct rc_pair_instruction backup;
658 /*Instructions can't write output registers and ALU result at the
660 if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
661 || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
665 /* Writing output registers in the middle of shaders is slow, so
666 * we don't want to pair output writes with temp writes. */
667 if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
668 || (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
672 memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
674 if (destructive_merge_instructions(rgb, alpha))
677 memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
681 static void presub_nop(struct rc_instruction * emitted) {
682 int prev_rgb_index, prev_alpha_index, i, num_src;
684 /* We don't need a nop if the previous instruction is a TEX. */
685 if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
688 if (emitted->Prev->U.P.RGB.WriteMask)
689 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
692 if (emitted->Prev->U.P.Alpha.WriteMask)
693 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
695 prev_alpha_index = 1;
697 /* Check the previous rgb instruction */
698 if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
699 num_src = rc_presubtract_src_reg_count(
700 emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
701 for (i = 0; i < num_src; i++) {
702 unsigned int index = emitted->U.P.RGB.Src[i].Index;
703 if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
704 && (index == prev_rgb_index
705 || index == prev_alpha_index)) {
706 emitted->Prev->U.P.Nop = 1;
712 /* Check the previous alpha instruction. */
713 if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
716 num_src = rc_presubtract_src_reg_count(
717 emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
718 for (i = 0; i < num_src; i++) {
719 unsigned int index = emitted->U.P.Alpha.Src[i].Index;
720 if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
721 && (index == prev_rgb_index || index == prev_alpha_index)) {
722 emitted->Prev->U.P.Nop = 1;
728 static void rgb_to_alpha_remap (
729 struct rc_instruction * inst,
730 struct rc_pair_instruction_arg * arg,
731 rc_register_file old_file,
733 unsigned int new_index)
738 for (i = 0; i < 3; i++) {
739 if (get_swz(arg->Swizzle, i) == old_swz) {
740 SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
743 new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
744 old_file, new_index);
745 /* This conversion is not possible, we must have made a mistake in
746 * is_rgb_to_alpha_possible. */
747 if (new_src_index < 0) {
752 arg->Source = new_src_index;
755 static int can_remap(unsigned int opcode)
766 static int can_convert_opcode_to_alpha(unsigned int opcode)
781 static void is_rgb_to_alpha_possible(
783 struct rc_instruction * inst,
784 struct rc_pair_instruction_arg * arg,
785 struct rc_pair_instruction_source * src)
787 unsigned int read_chan = RC_SWIZZLE_UNUSED;
788 unsigned int alpha_sources = 0;
790 struct rc_reader_data * reader_data = userdata;
792 if (!can_remap(inst->U.P.RGB.Opcode)
793 || !can_remap(inst->U.P.Alpha.Opcode)) {
794 reader_data->Abort = 1;
801 /* XXX There are some cases where we can still do the conversion if
802 * a reader reads from a presubtract source, but for now we'll prevent
804 if (arg->Source == RC_PAIR_PRESUB_SRC) {
805 reader_data->Abort = 1;
809 /* Make sure the source only reads the register component that we
810 * are going to be convering from. It is OK if the instruction uses
811 * this component more than once.
812 * XXX If the index we will be converting to is the same as the
813 * current index, then it is OK to read from more than one component.
815 for (i = 0; i < 3; i++) {
816 rc_swizzle swz = get_swz(arg->Swizzle, i);
822 if (read_chan == RC_SWIZZLE_UNUSED) {
824 } else if (read_chan != swz) {
825 reader_data->Abort = 1;
834 /* Make sure there are enough alpha sources.
835 * XXX If we know what register all the readers are going
836 * to be remapped to, then in some situations we can still do
837 * the subsitution, even if all 3 alpha sources are being used.*/
838 for (i = 0; i < 3; i++) {
839 if (inst->U.P.Alpha.Src[i].Used) {
843 if (alpha_sources > 2) {
844 reader_data->Abort = 1;
849 static int convert_rgb_to_alpha(
850 struct schedule_state * s,
851 struct schedule_instruction * sched_inst)
853 struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
854 unsigned int old_mask = pair_inst->RGB.WriteMask;
855 unsigned int old_swz = rc_mask_to_swizzle(old_mask);
856 const struct rc_opcode_info * info =
857 rc_get_opcode_info(pair_inst->RGB.Opcode);
861 if (sched_inst->GlobalReaders.Abort)
864 if (!pair_inst->RGB.WriteMask)
867 if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
868 || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
872 assert(sched_inst->NumWriteValues == 1);
874 if (!sched_inst->WriteValues[0]) {
879 /* We start at the old index, because if we can reuse the same
880 * register and just change the swizzle then it is more likely we
881 * will be able to convert all the readers. */
882 for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
883 struct reg_value ** new_regvalp = get_reg_valuep(
884 s, RC_FILE_TEMPORARY, i, 3);
886 struct reg_value ** old_regvalp =
889 pair_inst->RGB.DestIndex,
890 rc_mask_to_swizzle(old_mask));
892 *new_regvalp = *old_regvalp;
894 new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
902 /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
903 * as the RGB opcode, then the Alpha instruction will already contain
904 * the correct opcode and instruction args, so we do not want to
907 if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
908 pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
909 memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
910 sizeof(pair_inst->Alpha.Arg));
912 pair_inst->Alpha.DestIndex = new_index;
913 pair_inst->Alpha.WriteMask = RC_MASK_W;
914 pair_inst->Alpha.Target = pair_inst->RGB.Target;
915 pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
916 pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
917 pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
918 pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
919 /* Move the swizzles into the first chan */
920 for (i = 0; i < info->NumSrcRegs; i++) {
922 for (j = 0; j < 3; j++) {
923 unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
924 if (swz != RC_SWIZZLE_UNUSED) {
925 pair_inst->Alpha.Arg[i].Swizzle =
926 rc_init_swizzle(swz, 1);
931 pair_inst->RGB.Opcode = RC_OPCODE_NOP;
932 pair_inst->RGB.DestIndex = 0;
933 pair_inst->RGB.WriteMask = 0;
934 pair_inst->RGB.Target = 0;
935 pair_inst->RGB.OutputWriteMask = 0;
936 pair_inst->RGB.DepthWriteMask = 0;
937 pair_inst->RGB.Saturate = 0;
938 memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
940 for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
941 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
942 rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
943 RC_FILE_TEMPORARY, old_swz, new_index);
948 static void try_convert_and_pair(
949 struct schedule_state *s,
950 struct schedule_instruction ** inst_list)
952 struct schedule_instruction * list_ptr = *inst_list;
953 while (list_ptr && *inst_list && (*inst_list)->NextReady) {
955 if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
956 && list_ptr->Instruction->U.P.RGB.Opcode
957 != RC_OPCODE_REPL_ALPHA) {
960 if (list_ptr->NumWriteValues == 1
961 && convert_rgb_to_alpha(s, list_ptr)) {
963 struct schedule_instruction * pair_ptr;
964 remove_inst_from_list(inst_list, list_ptr);
965 add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
967 for (pair_ptr = s->ReadyRGB; pair_ptr;
968 pair_ptr = pair_ptr->NextReady) {
969 if (merge_instructions(&pair_ptr->Instruction->U.P,
970 &list_ptr->Instruction->U.P)) {
971 remove_inst_from_list(&s->ReadyAlpha, list_ptr);
972 remove_inst_from_list(&s->ReadyRGB, pair_ptr);
973 pair_ptr->PairedInst = list_ptr;
975 add_inst_to_list(&s->ReadyFullALU, pair_ptr);
976 list_ptr = *inst_list;
985 list_ptr = list_ptr->NextReady;
991 * This function attempts to merge RGB and Alpha instructions together.
993 static void pair_instructions(struct schedule_state * s)
995 struct schedule_instruction *rgb_ptr;
996 struct schedule_instruction *alpha_ptr;
998 /* Some pairings might fail because they require too
999 * many source slots; try all possible pairings if necessary */
1000 rgb_ptr = s->ReadyRGB;
1002 struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
1003 alpha_ptr = s->ReadyAlpha;
1005 struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
1006 if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1007 /* Remove RGB and Alpha from their ready lists.
1009 remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1010 remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1011 rgb_ptr->PairedInst = alpha_ptr;
1012 add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1015 alpha_ptr = alpha_next;
1024 /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1025 * slot can be converted into Alpha instructions. */
1026 try_convert_and_pair(s, &s->ReadyFullALU);
1028 /* Try to convert some of the RGB instructions to Alpha and
1029 * try to pair it with another RGB. */
1030 try_convert_and_pair(s, &s->ReadyRGB);
1033 static void update_max_score(
1034 struct schedule_state * s,
1035 struct schedule_instruction ** list,
1037 struct schedule_instruction ** max_inst_out,
1038 struct schedule_instruction *** list_out)
1040 struct schedule_instruction * list_ptr;
1041 for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1043 s->CalcScore(list_ptr);
1044 score = list_ptr->Score;
1045 if (!*max_inst_out || score > *max_score) {
1047 *max_inst_out = list_ptr;
1053 static void emit_instruction(
1054 struct schedule_state * s,
1055 struct rc_instruction * before)
1058 struct schedule_instruction * max_inst = NULL;
1059 struct schedule_instruction ** max_list = NULL;
1060 unsigned tex_count = 0;
1061 struct schedule_instruction * tex_ptr;
1063 pair_instructions(s);
1065 fprintf(stderr, "Full:\n");
1066 print_list(s->ReadyFullALU);
1067 fprintf(stderr, "RGB:\n");
1068 print_list(s->ReadyRGB);
1069 fprintf(stderr, "Alpha:\n");
1070 print_list(s->ReadyAlpha);
1071 fprintf(stderr, "TEX:\n");
1072 print_list(s->ReadyTEX);
1075 for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1078 update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1079 update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1080 update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1082 if (tex_count >= s->max_tex_group || max_score == -1
1083 || (s->TEXCount > 0 && tex_count == s->TEXCount)) {
1084 emit_all_tex(s, before);
1088 remove_inst_from_list(max_list, max_inst);
1089 rc_insert_instruction(before->Prev, max_inst->Instruction);
1090 commit_alu_instruction(s, max_inst);
1092 presub_nop(before->Prev);
1097 * Find a good ALU instruction or pair of ALU instruction and emit it.
1099 * Prefer emitting full ALU instructions, so that when we reach a point
1100 * where no full ALU instruction can be emitted, we have more candidates
1101 * for RGB/Alpha pairing.
1103 static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
1105 struct schedule_instruction * sinst;
1106 int rgb_score = -1, alpha_score = -1;
1108 /* Try to merge RGB and Alpha instructions together. */
1109 pair_instructions(s);
1111 if (s->ReadyFullALU) {
1112 sinst = s->ReadyFullALU;
1113 s->ReadyFullALU = s->ReadyFullALU->NextReady;
1114 rc_insert_instruction(before->Prev, sinst->Instruction);
1115 commit_alu_instruction(s, sinst);
1118 rgb_score = s->ReadyRGB->Score;
1120 if (s->ReadyAlpha) {
1121 alpha_score = s->ReadyAlpha->Score;
1123 if (rgb_score > alpha_score) {
1124 sinst = s->ReadyRGB;
1125 s->ReadyRGB = s->ReadyRGB->NextReady;
1126 } else if (s->ReadyAlpha) {
1127 sinst = s->ReadyAlpha;
1128 s->ReadyAlpha = s->ReadyAlpha->NextReady;
1130 /*XXX Something real bad has happened. */
1134 rc_insert_instruction(before->Prev, sinst->Instruction);
1135 commit_alu_instruction(s, sinst);
1137 /* If the instruction we just emitted uses a presubtract value, and
1138 * the presubtract sources were written by the previous intstruction,
1139 * the previous instruction needs a nop. */
1140 presub_nop(before->Prev);
1143 static void add_tex_reader(
1144 struct schedule_state * s,
1145 struct schedule_instruction * writer,
1146 struct schedule_instruction * reader)
1148 if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1149 /*Not a TEX instructions */
1152 reader->TexReadCount++;
1153 rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1156 static void scan_read(void * data, struct rc_instruction * inst,
1157 rc_register_file file, unsigned int index, unsigned int chan)
1159 struct schedule_state * s = data;
1160 struct reg_value ** v = get_reg_valuep(s, file, index, chan);
1161 struct reg_value_reader * reader;
1166 if (*v && (*v)->Writer == s->Current) {
1167 /* The instruction reads and writes to a register component.
1168 * In this case, we only want to increment dependencies by one.
1170 * Because each instruction depends on the writers of its source
1171 * registers _and_ the most recent writer of its destination
1172 * register. In this case, the current instruction (s->Current)
1173 * has a dependency that both writes to one of its source
1174 * registers and was the most recent writer to its destination
1175 * register. We have already marked this dependency in
1176 * scan_write(), so we don't need to do it again.
1179 /* We need to make sure we are adding s->Current to the
1180 * previous writer's list of TexReaders, if the previous writer
1181 * was a TEX instruction.
1183 add_tex_reader(s, s->PrevWriter[chan], s->Current);
1188 DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1190 reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1191 reader->Reader = s->Current;
1193 /* In this situation, the instruction reads from a register
1194 * that hasn't been written to or read from in the current
1196 *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1197 memset(*v, 0, sizeof(struct reg_value));
1198 (*v)->Readers = reader;
1200 reader->Next = (*v)->Readers;
1201 (*v)->Readers = reader;
1202 /* Only update the current instruction's dependencies if the
1203 * register it reads from has been written to in this block. */
1205 add_tex_reader(s, (*v)->Writer, s->Current);
1206 s->Current->NumDependencies++;
1211 if (s->Current->NumReadValues >= 12) {
1212 rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
1214 s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1218 static void scan_write(void * data, struct rc_instruction * inst,
1219 rc_register_file file, unsigned int index, unsigned int chan)
1221 struct schedule_state * s = data;
1222 struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
1223 struct reg_value * newv;
1228 DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1230 newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1231 memset(newv, 0, sizeof(*newv));
1233 newv->Writer = s->Current;
1237 s->Current->NumDependencies++;
1238 /* Keep track of the previous writer to s->Current's destination
1240 s->PrevWriter[chan] = (*pv)->Writer;
1245 if (s->Current->NumWriteValues >= 4) {
1246 rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
1248 s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1252 static void is_rgb_to_alpha_possible_normal(
1254 struct rc_instruction * inst,
1255 struct rc_src_register * src)
1257 struct rc_reader_data * reader_data = userdata;
1258 reader_data->Abort = 1;
1262 static void schedule_block(struct schedule_state * s,
1263 struct rc_instruction * begin, struct rc_instruction * end)
1267 /* Scan instructions for data dependencies */
1269 for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
1270 s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1271 memset(s->Current, 0, sizeof(struct schedule_instruction));
1273 if (inst->Type == RC_INSTRUCTION_NORMAL) {
1274 const struct rc_opcode_info * info =
1275 rc_get_opcode_info(inst->U.I.Opcode);
1276 if (info->HasTexture) {
1281 /* XXX: This causes SemWait to be set for all instructions in
1282 * a block if the previous block contained a TEX instruction.
1283 * We can do better here, but it will take a lot of work. */
1284 if (s->PrevBlockHasTex) {
1285 s->Current->TexReadCount = 1;
1288 s->Current->Instruction = inst;
1291 DBG("%i: Scanning\n", inst->IP);
1293 /* The order of things here is subtle and maybe slightly
1294 * counter-intuitive, to account for the case where an
1295 * instruction writes to the same register as it reads
1297 rc_for_all_writes_chan(inst, &scan_write, s);
1298 rc_for_all_reads_chan(inst, &scan_read, s);
1300 DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1302 if (!s->Current->NumDependencies) {
1303 instruction_ready(s, s->Current);
1306 /* Get global readers for possible RGB->Alpha conversion. */
1307 s->Current->GlobalReaders.ExitOnAbort = 1;
1308 rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
1309 is_rgb_to_alpha_possible_normal,
1310 is_rgb_to_alpha_possible, NULL);
1313 /* Temporarily unlink all instructions */
1314 begin->Prev->Next = end;
1315 end->Prev = begin->Prev;
1317 /* Schedule instructions back */
1318 while(!s->C->Error &&
1319 (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1320 if (s->C->is_r500) {
1321 emit_instruction(s, end);
1324 emit_all_tex(s, end);
1326 while(!s->C->Error && (s->ReadyFullALU || s->ReadyRGB || s->ReadyAlpha))
1327 emit_one_alu(s, end);
1332 static int is_controlflow(struct rc_instruction * inst)
1334 if (inst->Type == RC_INSTRUCTION_NORMAL) {
1335 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
1336 return opcode->IsFlowControl;
1341 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
1343 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
1344 struct schedule_state s;
1345 struct rc_instruction * inst = c->Base.Program.Instructions.Next;
1346 unsigned int * opt = user;
1348 memset(&s, 0, sizeof(s));
1351 s.CalcScore = calc_score_readers;
1352 s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1353 while(inst != &c->Base.Program.Instructions) {
1354 struct rc_instruction * first;
1356 if (is_controlflow(inst)) {
1363 while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1366 DBG("Schedule one block\n");
1367 memset(s.Temporary, 0, sizeof(s.Temporary));
1369 schedule_block(&s, first, inst);
1371 s.PrevBlockHasTex = 1;