Tizen 2.1 base
[sdk/emulator/qemu.git] / gl / mesa / src / gallium / drivers / r300 / compiler / radeon_pair_regalloc.c
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  */
28
29 #include "radeon_program_pair.h"
30
31 #include <stdio.h>
32
33 #include "main/glheader.h"
34 #include "program/register_allocate.h"
35 #include "ralloc.h"
36
37 #include "r300_fragprog_swizzle.h"
38 #include "radeon_compiler.h"
39 #include "radeon_compiler_util.h"
40 #include "radeon_dataflow.h"
41 #include "radeon_list.h"
42 #include "radeon_variable.h"
43
44 #define VERBOSE 0
45
46 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
47
48
49
50 struct register_info {
51         struct live_intervals Live[4];
52
53         unsigned int Used:1;
54         unsigned int Allocated:1;
55         unsigned int File:3;
56         unsigned int Index:RC_REGISTER_INDEX_BITS;
57         unsigned int Writemask;
58 };
59
60 struct regalloc_state {
61         struct radeon_compiler * C;
62
63         struct register_info * Input;
64         unsigned int NumInputs;
65
66         struct register_info * Temporary;
67         unsigned int NumTemporaries;
68
69         unsigned int Simple;
70         int LoopEnd;
71 };
72
73 enum rc_reg_class {
74         RC_REG_CLASS_SINGLE,
75         RC_REG_CLASS_DOUBLE,
76         RC_REG_CLASS_TRIPLE,
77         RC_REG_CLASS_ALPHA,
78         RC_REG_CLASS_SINGLE_PLUS_ALPHA,
79         RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
80         RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
81         RC_REG_CLASS_X,
82         RC_REG_CLASS_Y,
83         RC_REG_CLASS_Z,
84         RC_REG_CLASS_XY,
85         RC_REG_CLASS_YZ,
86         RC_REG_CLASS_XZ,
87         RC_REG_CLASS_XW,
88         RC_REG_CLASS_YW,
89         RC_REG_CLASS_ZW,
90         RC_REG_CLASS_XYW,
91         RC_REG_CLASS_YZW,
92         RC_REG_CLASS_XZW,
93         RC_REG_CLASS_COUNT
94 };
95
96 struct rc_class {
97         enum rc_reg_class Class;
98
99         unsigned int WritemaskCount;
100
101         /** This is 1 if this class is being used by the register allocator
102          * and 0 otherwise */
103         unsigned int Used;
104
105         /** This is the ID number assigned to this class by ra. */
106         unsigned int Id;
107
108         /** List of writemasks that belong to this class */
109         unsigned int Writemasks[3];
110
111
112 };
113
114 static void print_live_intervals(struct live_intervals * src)
115 {
116         if (!src || !src->Used) {
117                 DBG("(null)");
118                 return;
119         }
120
121         DBG("(%i,%i)", src->Start, src->End);
122 }
123
124 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
125 {
126         if (VERBOSE) {
127                 DBG("overlap_live_intervals: ");
128                 print_live_intervals(a);
129                 DBG(" to ");
130                 print_live_intervals(b);
131                 DBG("\n");
132         }
133
134         if (!a->Used || !b->Used) {
135                 DBG("    unused interval\n");
136                 return 0;
137         }
138
139         if (a->Start > b->Start) {
140                 if (a->Start < b->End) {
141                         DBG("    overlap\n");
142                         return 1;
143                 }
144         } else if (b->Start > a->Start) {
145                 if (b->Start < a->End) {
146                         DBG("    overlap\n");
147                         return 1;
148                 }
149         } else { /* a->Start == b->Start */
150                 if (a->Start != a->End && b->Start != b->End) {
151                         DBG("    overlap\n");
152                         return 1;
153                 }
154         }
155
156         DBG("    no overlap\n");
157
158         return 0;
159 }
160
161 static void scan_read_callback(void * data, struct rc_instruction * inst,
162                 rc_register_file file, unsigned int index, unsigned int mask)
163 {
164         struct regalloc_state * s = data;
165         struct register_info * reg;
166         unsigned int i;
167
168         if (file != RC_FILE_INPUT)
169                 return;
170
171         s->Input[index].Used = 1;
172         reg = &s->Input[index];
173
174         for (i = 0; i < 4; i++) {
175                 if (!((mask >> i) & 0x1)) {
176                         continue;
177                 }
178                 reg->Live[i].Used = 1;
179                 reg->Live[i].Start = 0;
180                 reg->Live[i].End =
181                         s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
182         }
183 }
184
185 static void remap_register(void * data, struct rc_instruction * inst,
186                 rc_register_file * file, unsigned int * index)
187 {
188         struct regalloc_state * s = data;
189         const struct register_info * reg;
190
191         if (*file == RC_FILE_TEMPORARY && s->Simple)
192                 reg = &s->Temporary[*index];
193         else if (*file == RC_FILE_INPUT)
194                 reg = &s->Input[*index];
195         else
196                 return;
197
198         if (reg->Allocated) {
199                 *index = reg->Index;
200         }
201 }
202
203 static void alloc_input_simple(void * data, unsigned int input,
204                                                         unsigned int hwreg)
205 {
206         struct regalloc_state * s = data;
207
208         if (input >= s->NumInputs)
209                 return;
210
211         s->Input[input].Allocated = 1;
212         s->Input[input].File = RC_FILE_TEMPORARY;
213         s->Input[input].Index = hwreg;
214 }
215
216 /* This functions offsets the temporary register indices by the number
217  * of input registers, because input registers are actually temporaries and
218  * should not occupy the same space.
219  *
220  * This pass is supposed to be used to maintain correct allocation of inputs
221  * if the standard register allocation is disabled. */
222 static void do_regalloc_inputs_only(struct regalloc_state * s)
223 {
224         for (unsigned i = 0; i < s->NumTemporaries; i++) {
225                 s->Temporary[i].Allocated = 1;
226                 s->Temporary[i].File = RC_FILE_TEMPORARY;
227                 s->Temporary[i].Index = i + s->NumInputs;
228         }
229 }
230
231 static unsigned int is_derivative(rc_opcode op)
232 {
233         return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
234 }
235
236 static int find_class(
237         struct rc_class * classes,
238         unsigned int writemask,
239         unsigned int max_writemask_count)
240 {
241         unsigned int i;
242         for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
243                 unsigned int j;
244                 if (classes[i].WritemaskCount > max_writemask_count) {
245                         continue;
246                 }
247                 for (j = 0; j < 3; j++) {
248                         if (classes[i].Writemasks[j] == writemask) {
249                                 return i;
250                         }
251                 }
252         }
253         return -1;
254 }
255
256 struct variable_get_class_cb_data {
257         unsigned int * can_change_writemask;
258         unsigned int conversion_swizzle;
259 };
260
261 static void variable_get_class_read_cb(
262         void * userdata,
263         struct rc_instruction * inst,
264         struct rc_pair_instruction_arg * arg,
265         struct rc_pair_instruction_source * src)
266 {
267         struct variable_get_class_cb_data * d = userdata;
268         unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
269                                                         d->conversion_swizzle);
270         if (!r300_swizzle_is_native_basic(new_swizzle)) {
271                 *d->can_change_writemask = 0;
272         }
273 }
274
275 static enum rc_reg_class variable_get_class(
276         struct rc_variable * variable,
277         struct rc_class * classes)
278 {
279         unsigned int i;
280         unsigned int can_change_writemask= 1;
281         unsigned int writemask = rc_variable_writemask_sum(variable);
282         struct rc_list * readers = rc_variable_readers_union(variable);
283         int class_index;
284
285         if (!variable->C->is_r500) {
286                 struct rc_class c;
287                 struct rc_variable * var_ptr;
288                 /* The assumption here is that if an instruction has type
289                  * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
290                  * r300 and r400 can't swizzle the result of a TEX lookup. */
291                 for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
292                         if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
293                                 writemask = RC_MASK_XYZW;
294                         }
295                 }
296
297                 /* Check if it is possible to do swizzle packing for r300/r400
298                  * without creating non-native swizzles. */
299                 class_index = find_class(classes, writemask, 3);
300                 if (class_index < 0) {
301                         goto error;
302                 }
303                 c = classes[class_index];
304                 if (c.WritemaskCount == 1) {
305                         goto done;
306                 }
307                 for (i = 0; i < c.WritemaskCount; i++) {
308                         struct rc_variable * var_ptr;
309                         for (var_ptr = variable; var_ptr;
310                                                 var_ptr = var_ptr->Friend) {
311                                 int j;
312                                 unsigned int conversion_swizzle =
313                                                 rc_make_conversion_swizzle(
314                                                 writemask, c.Writemasks[i]);
315                                 struct variable_get_class_cb_data d;
316                                 d.can_change_writemask = &can_change_writemask;
317                                 d.conversion_swizzle = conversion_swizzle;
318                                 /* If we get this far var_ptr->Inst has to
319                                  * be a pair instruction.  If variable or any
320                                  * of its friends are normal instructions,
321                                  * then the writemask will be set to RC_MASK_XYZW
322                                  * and the function will return before it gets
323                                  * here. */
324                                 rc_pair_for_all_reads_arg(var_ptr->Inst,
325                                         variable_get_class_read_cb, &d);
326
327                                 for (j = 0; j < var_ptr->ReaderCount; j++) {
328                                         unsigned int old_swizzle;
329                                         unsigned int new_swizzle;
330                                         struct rc_reader r = var_ptr->Readers[j];
331                                         if (r.Inst->Type ==
332                                                         RC_INSTRUCTION_PAIR ) {
333                                                 old_swizzle = r.U.P.Arg->Swizzle;
334                                         } else {
335                                                 old_swizzle = r.U.I.Src->Swizzle;
336                                         }
337                                         new_swizzle = rc_adjust_channels(
338                                                 old_swizzle, conversion_swizzle);
339                                         if (!r300_swizzle_is_native_basic(
340                                                                 new_swizzle)) {
341                                                 can_change_writemask = 0;
342                                                 break;
343                                         }
344                                 }
345                                 if (!can_change_writemask) {
346                                         break;
347                                 }
348                         }
349                         if (!can_change_writemask) {
350                                 break;
351                         }
352                 }
353         }
354
355         if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
356                 /* DDX/DDY seem to always fail when their writemasks are
357                  * changed.*/
358                 if (is_derivative(variable->Inst->U.P.RGB.Opcode)
359                     || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
360                         can_change_writemask = 0;
361                 }
362         }
363         for ( ; readers; readers = readers->Next) {
364                 struct rc_reader * r = readers->Item;
365                 if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
366                         if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
367                                 can_change_writemask = 0;
368                                 break;
369                         }
370                         /* DDX/DDY also fail when their swizzles are changed. */
371                         if (is_derivative(r->Inst->U.P.RGB.Opcode)
372                             || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
373                                 can_change_writemask = 0;
374                                 break;
375                         }
376                 }
377         }
378
379         class_index = find_class(classes, writemask,
380                                                 can_change_writemask ? 3 : 1);
381 done:
382         if (class_index > -1) {
383                 return classes[class_index].Class;
384         } else {
385 error:
386                 rc_error(variable->C,
387                                 "Could not find class for index=%u mask=%u\n",
388                                 variable->Dst.Index, writemask);
389                 return 0;
390         }
391 }
392
393 static unsigned int overlap_live_intervals_array(
394         struct live_intervals * a,
395         struct live_intervals * b)
396 {
397         unsigned int a_chan, b_chan;
398         for (a_chan = 0; a_chan < 4; a_chan++) {
399                 for (b_chan = 0; b_chan < 4; b_chan++) {
400                         if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
401                                         return 1;
402                         }
403                 }
404         }
405         return 0;
406 }
407
408 static unsigned int reg_get_index(int reg)
409 {
410         return reg / RC_MASK_XYZW;
411 }
412
413 static unsigned int reg_get_writemask(int reg)
414 {
415         return (reg % RC_MASK_XYZW) + 1;
416 }
417
418 static int get_reg_id(unsigned int index, unsigned int writemask)
419 {
420         assert(writemask);
421         if (writemask == 0) {
422                 return 0;
423         }
424         return (index * RC_MASK_XYZW) + (writemask - 1);
425 }
426
427 #if VERBOSE
428 static void print_reg(int reg)
429 {
430         unsigned int index = reg_get_index(reg);
431         unsigned int mask = reg_get_writemask(reg);
432         fprintf(stderr, "Temp[%u].%c%c%c%c", index,
433                 mask & RC_MASK_X ? 'x' : '_',
434                 mask & RC_MASK_Y ? 'y' : '_',
435                 mask & RC_MASK_Z ? 'z' : '_',
436                 mask & RC_MASK_W ? 'w' : '_');
437 }
438 #endif
439
440 static void add_register_conflicts(
441         struct ra_regs * regs,
442         unsigned int max_temp_regs)
443 {
444         unsigned int index, a_mask, b_mask;
445         for (index = 0; index < max_temp_regs; index++) {
446                 for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
447                         for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
448                                                                 b_mask++) {
449                                 if (a_mask & b_mask) {
450                                         ra_add_reg_conflict(regs,
451                                                 get_reg_id(index, a_mask),
452                                                 get_reg_id(index, b_mask));
453                                 }
454                         }
455                 }
456         }
457 }
458
459 static void do_advanced_regalloc(struct regalloc_state * s)
460 {
461         struct rc_class rc_class_list [] = {
462                 {RC_REG_CLASS_SINGLE, 3, 0, 0,
463                         {RC_MASK_X,
464                          RC_MASK_Y,
465                          RC_MASK_Z}},
466                 {RC_REG_CLASS_DOUBLE, 3, 0, 0,
467                         {RC_MASK_X | RC_MASK_Y,
468                          RC_MASK_X | RC_MASK_Z,
469                          RC_MASK_Y | RC_MASK_Z}},
470                 {RC_REG_CLASS_TRIPLE, 1, 0, 0,
471                         {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
472                          RC_MASK_NONE,
473                          RC_MASK_NONE}},
474                 {RC_REG_CLASS_ALPHA, 1, 0, 0,
475                         {RC_MASK_W,
476                          RC_MASK_NONE,
477                          RC_MASK_NONE}},
478                 {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
479                         {RC_MASK_X | RC_MASK_W,
480                          RC_MASK_Y | RC_MASK_W,
481                          RC_MASK_Z | RC_MASK_W}},
482                 {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
483                         {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
484                          RC_MASK_X | RC_MASK_Z | RC_MASK_W,
485                          RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
486                 {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
487                         {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
488                         RC_MASK_NONE,
489                         RC_MASK_NONE}},
490                 {RC_REG_CLASS_X, 1, 0, 0,
491                         {RC_MASK_X,
492                         RC_MASK_NONE,
493                         RC_MASK_NONE}},
494                 {RC_REG_CLASS_Y, 1, 0, 0,
495                         {RC_MASK_Y,
496                         RC_MASK_NONE,
497                         RC_MASK_NONE}},
498                 {RC_REG_CLASS_Z, 1, 0, 0,
499                         {RC_MASK_Z,
500                         RC_MASK_NONE,
501                         RC_MASK_NONE}},
502                 {RC_REG_CLASS_XY, 1, 0, 0,
503                         {RC_MASK_X | RC_MASK_Y,
504                         RC_MASK_NONE,
505                         RC_MASK_NONE}},
506                 {RC_REG_CLASS_YZ, 1, 0, 0,
507                         {RC_MASK_Y | RC_MASK_Z,
508                         RC_MASK_NONE,
509                         RC_MASK_NONE}},
510                 {RC_REG_CLASS_XZ, 1, 0, 0,
511                         {RC_MASK_X | RC_MASK_Z,
512                         RC_MASK_NONE,
513                         RC_MASK_NONE}},
514                 {RC_REG_CLASS_XW, 1, 0, 0,
515                         {RC_MASK_X | RC_MASK_W,
516                         RC_MASK_NONE,
517                         RC_MASK_NONE}},
518                 {RC_REG_CLASS_YW, 1, 0, 0,
519                         {RC_MASK_Y | RC_MASK_W,
520                         RC_MASK_NONE,
521                         RC_MASK_NONE}},
522                 {RC_REG_CLASS_ZW, 1, 0, 0,
523                         {RC_MASK_Z | RC_MASK_W,
524                         RC_MASK_NONE,
525                         RC_MASK_NONE}},
526                 {RC_REG_CLASS_XYW, 1, 0, 0,
527                         {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
528                         RC_MASK_NONE,
529                         RC_MASK_NONE}},
530                 {RC_REG_CLASS_YZW, 1, 0, 0,
531                         {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
532                         RC_MASK_NONE,
533                         RC_MASK_NONE}},
534                 {RC_REG_CLASS_XZW, 1, 0, 0,
535                         {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
536                         RC_MASK_NONE,
537                         RC_MASK_NONE}}
538         };
539
540         unsigned int i, j, index, input_node, node_count, node_index;
541         unsigned int * node_classes;
542         unsigned int * input_classes;
543         struct rc_instruction * inst;
544         struct rc_list * var_ptr;
545         struct rc_list * variables;
546         struct ra_regs * regs;
547         struct ra_graph * graph;
548
549         /* Allocate the main ra data structure */
550         regs = ra_alloc_reg_set(NULL, s->C->max_temp_regs * RC_MASK_XYZW);
551
552         /* Get list of program variables */
553         variables = rc_get_variables(s->C);
554         node_count = rc_list_count(variables);
555         node_classes = memory_pool_malloc(&s->C->Pool,
556                         node_count * sizeof(unsigned int));
557         input_classes = memory_pool_malloc(&s->C->Pool,
558                         s->NumInputs * sizeof(unsigned int));
559
560         for (var_ptr = variables, node_index = 0; var_ptr;
561                                         var_ptr = var_ptr->Next, node_index++) {
562                 unsigned int class_index;
563                 /* Compute the live intervals */
564                 rc_variable_compute_live_intervals(var_ptr->Item);
565
566                 class_index = variable_get_class(var_ptr->Item, rc_class_list);
567
568                 /* If we haven't used this register class yet, mark it
569                  * as used and allocate space for it. */
570                 if (!rc_class_list[class_index].Used) {
571                         rc_class_list[class_index].Used = 1;
572                         rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
573                 }
574
575                 node_classes[node_index] = rc_class_list[class_index].Id;
576         }
577
578
579         /* Assign registers to the classes */
580         for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
581                 struct rc_class class = rc_class_list[i];
582                 if (!class.Used) {
583                         continue;
584                 }
585
586                 for (index = 0; index < s->C->max_temp_regs; index++) {
587                         for (j = 0; j < class.WritemaskCount; j++) {
588                                 int reg_id = get_reg_id(index,
589                                                         class.Writemasks[j]);
590                                 ra_class_add_reg(regs, class.Id, reg_id);
591                         }
592                 }
593         }
594
595         /* Add register conflicts */
596         add_register_conflicts(regs, s->C->max_temp_regs);
597
598         /* Calculate live intervals for input registers */
599         for (inst = s->C->Program.Instructions.Next;
600                                         inst != &s->C->Program.Instructions;
601                                         inst = inst->Next) {
602                 rc_opcode op = rc_get_flow_control_inst(inst);
603                 if (op == RC_OPCODE_BGNLOOP) {
604                         struct rc_instruction * endloop =
605                                                         rc_match_bgnloop(inst);
606                         if (endloop->IP > s->LoopEnd) {
607                                 s->LoopEnd = endloop->IP;
608                         }
609                 }
610                 rc_for_all_reads_mask(inst, scan_read_callback, s);
611         }
612
613         /* Create classes for input registers */
614         for (i = 0; i < s->NumInputs; i++) {
615                 unsigned int chan, class_id, writemask = 0;
616                 for (chan = 0; chan < 4; chan++) {
617                         if (s->Input[i].Live[chan].Used) {
618                                 writemask |= (1 << chan);
619                         }
620                 }
621                 s->Input[i].Writemask = writemask;
622                 if (!writemask) {
623                         continue;
624                 }
625
626                 class_id = ra_alloc_reg_class(regs);
627                 input_classes[i] = class_id;
628                 ra_class_add_reg(regs, class_id,
629                                 get_reg_id(s->Input[i].Index, writemask));
630         }
631
632         ra_set_finalize(regs);
633
634         graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
635
636         /* Build the interference graph */
637         for (var_ptr = variables, node_index = 0; var_ptr;
638                                         var_ptr = var_ptr->Next,node_index++) {
639                 struct rc_list * a, * b;
640                 unsigned int b_index;
641
642                 ra_set_node_class(graph, node_index, node_classes[node_index]);
643
644                 for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
645                                                 b; b = b->Next, b_index++) {
646                         struct rc_variable * var_a = a->Item;
647                         while (var_a) {
648                                 struct rc_variable * var_b = b->Item;
649                                 while (var_b) {
650                                         if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
651                                                 ra_add_node_interference(graph,
652                                                         node_index, b_index);
653                                         }
654                                         var_b = var_b->Friend;
655                                 }
656                                 var_a = var_a->Friend;
657                         }
658                 }
659         }
660
661         /* Add input registers to the interference graph */
662         for (i = 0, input_node = 0; i< s->NumInputs; i++) {
663                 if (!s->Input[i].Writemask) {
664                         continue;
665                 }
666                 ra_set_node_class(graph, node_count + input_node,
667                                                         input_classes[i]);
668                 for (var_ptr = variables, node_index = 0;
669                                 var_ptr; var_ptr = var_ptr->Next, node_index++) {
670                         struct rc_variable * var = var_ptr->Item;
671                         if (overlap_live_intervals_array(s->Input[i].Live,
672                                                                 var->Live)) {
673                                 ra_add_node_interference(graph, node_index,
674                                                 node_count + input_node);
675                         }
676                 }
677                 /* Manually allocate a register for this input */
678                 ra_set_node_reg(graph, node_count + input_node, get_reg_id(
679                                 s->Input[i].Index, s->Input[i].Writemask));
680                 input_node++;
681         }
682
683         if (!ra_allocate_no_spills(graph)) {
684                 rc_error(s->C, "Ran out of hardware temporaries\n");
685                 return;
686         }
687
688         /* Rewrite the registers */
689         for (var_ptr = variables, node_index = 0; var_ptr;
690                                 var_ptr = var_ptr->Next, node_index++) {
691                 int reg = ra_get_node_reg(graph, node_index);
692                 unsigned int writemask = reg_get_writemask(reg);
693                 unsigned int index = reg_get_index(reg);
694                 struct rc_variable * var = var_ptr->Item;
695
696                 if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
697                         writemask = rc_variable_writemask_sum(var);
698                 }
699
700                 if (var->Dst.File == RC_FILE_INPUT) {
701                         continue;
702                 }
703                 rc_variable_change_dst(var, index, writemask);
704         }
705
706         ralloc_free(graph);
707         ralloc_free(regs);
708 }
709
710 /**
711  * @param user This parameter should be a pointer to an integer value.  If this
712  * integer value is zero, then a simple register allocator will be used that
713  * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
714  * user is non-zero, then the regular register allocator will be used
715  * (\sa do_regalloc).
716   */
717 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
718 {
719         struct r300_fragment_program_compiler *c =
720                                 (struct r300_fragment_program_compiler*)cc;
721         struct regalloc_state s;
722         int * do_full_regalloc = (int*)user;
723
724         memset(&s, 0, sizeof(s));
725         s.C = cc;
726         s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
727         s.Input = memory_pool_malloc(&cc->Pool,
728                         s.NumInputs * sizeof(struct register_info));
729         memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
730
731         s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
732         s.Temporary = memory_pool_malloc(&cc->Pool,
733                         s.NumTemporaries * sizeof(struct register_info));
734         memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
735
736         rc_recompute_ips(s.C);
737
738         c->AllocateHwInputs(c, &alloc_input_simple, &s);
739         if (*do_full_regalloc) {
740                 do_advanced_regalloc(&s);
741         } else {
742                 s.Simple = 1;
743                 do_regalloc_inputs_only(&s);
744         }
745
746         /* Rewrite inputs and if we are doing the simple allocation, rewrite
747          * temporaries too. */
748         for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
749                                         inst != &s.C->Program.Instructions;
750                                         inst = inst->Next) {
751                 rc_remap_registers(inst, &remap_register, &s);
752         }
753 }