Fix utc_ecore_evas_pointer_xy_get timeout issue
[platform/upstream/mesa.git] / src / gallium / drivers / r300 / compiler / radeon_pair_regalloc.c
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  */
28
29 #include "radeon_program_pair.h"
30
31 #include <stdio.h>
32
33 #include "util/glheader.h"
34 #include "util/register_allocate.h"
35 #include "util/u_memory.h"
36 #include "util/ralloc.h"
37
38 #include "r300_fragprog_swizzle.h"
39 #include "radeon_compiler.h"
40 #include "radeon_compiler_util.h"
41 #include "radeon_dataflow.h"
42 #include "radeon_list.h"
43 #include "radeon_regalloc.h"
44 #include "radeon_variable.h"
45
46 static void scan_read_callback(void * data, struct rc_instruction * inst,
47                 rc_register_file file, unsigned int index, unsigned int mask)
48 {
49         struct regalloc_state * s = data;
50         struct register_info * reg;
51         unsigned int i;
52
53         if (file != RC_FILE_INPUT)
54                 return;
55
56         s->Input[index].Used = 1;
57         reg = &s->Input[index];
58
59         for (i = 0; i < 4; i++) {
60                 if (!((mask >> i) & 0x1)) {
61                         continue;
62                 }
63                 reg->Live[i].Used = 1;
64                 reg->Live[i].Start = 0;
65                 reg->Live[i].End =
66                         s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
67         }
68 }
69
70 static void remap_register(void * data, struct rc_instruction * inst,
71                 rc_register_file * file, unsigned int * index)
72 {
73         struct regalloc_state * s = data;
74         const struct register_info * reg;
75
76         if (*file == RC_FILE_TEMPORARY && s->Simple)
77                 reg = &s->Temporary[*index];
78         else if (*file == RC_FILE_INPUT)
79                 reg = &s->Input[*index];
80         else
81                 return;
82
83         if (reg->Allocated) {
84                 *index = reg->Index;
85         }
86 }
87
88 static void alloc_input_simple(void * data, unsigned int input,
89                                                         unsigned int hwreg)
90 {
91         struct regalloc_state * s = data;
92
93         if (input >= s->NumInputs)
94                 return;
95
96         s->Input[input].Allocated = 1;
97         s->Input[input].File = RC_FILE_TEMPORARY;
98         s->Input[input].Index = hwreg;
99 }
100
101 /* This functions offsets the temporary register indices by the number
102  * of input registers, because input registers are actually temporaries and
103  * should not occupy the same space.
104  *
105  * This pass is supposed to be used to maintain correct allocation of inputs
106  * if the standard register allocation is disabled. */
107 static void do_regalloc_inputs_only(struct regalloc_state * s)
108 {
109         for (unsigned i = 0; i < s->NumTemporaries; i++) {
110                 s->Temporary[i].Allocated = 1;
111                 s->Temporary[i].File = RC_FILE_TEMPORARY;
112                 s->Temporary[i].Index = i + s->NumInputs;
113         }
114 }
115
116 static unsigned int is_derivative(rc_opcode op)
117 {
118         return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
119 }
120
121 struct variable_get_class_cb_data {
122         unsigned int * can_change_writemask;
123         unsigned int conversion_swizzle;
124         struct radeon_compiler * c;
125 };
126
127 static void variable_get_class_read_cb(
128         void * userdata,
129         struct rc_instruction * inst,
130         struct rc_pair_instruction_arg * arg,
131         struct rc_pair_instruction_source * src)
132 {
133         struct variable_get_class_cb_data * d = userdata;
134         unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
135                                                         d->conversion_swizzle);
136         /* We can't just call r300_swizzle_is_native basic here, because it ignores the
137          * extra requirements for presubtract. However, after pair translation we no longer
138          * have the rc_src_register required for the native swizzle, so we have to
139          * reconstruct it. */
140         struct rc_src_register reg = {};
141         reg.Swizzle = new_swizzle;
142         reg.File = src->File;
143
144         assert(inst->Type == RC_INSTRUCTION_PAIR);
145         /* The opcode is unimportant, we can't have TEX here. */
146         if (!d->c->SwizzleCaps->IsNative(RC_OPCODE_MAD, reg)) {
147                 *d->can_change_writemask = 0;
148         }
149 }
150
151 static unsigned variable_get_class(
152         struct rc_variable * variable,
153         const struct rc_class * classes)
154 {
155         unsigned int i;
156         unsigned int can_change_writemask= 1;
157         unsigned int writemask = rc_variable_writemask_sum(variable);
158         struct rc_list * readers = rc_variable_readers_union(variable);
159         int class_index;
160
161         if (!variable->C->is_r500) {
162                 struct rc_class c;
163                 struct rc_variable * var_ptr;
164                 /* The assumption here is that if an instruction has type
165                  * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
166                  * r300 and r400 can't swizzle the result of a TEX lookup. */
167                 for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
168                         if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
169                                 writemask = RC_MASK_XYZW;
170                         }
171                 }
172
173                 /* Check if it is possible to do swizzle packing for r300/r400
174                  * without creating non-native swizzles. */
175                 class_index = rc_find_class(classes, writemask, 3);
176                 if (class_index < 0) {
177                         goto error;
178                 }
179                 c = classes[class_index];
180                 if (c.WritemaskCount == 1) {
181                         goto done;
182                 }
183                 for (i = 0; i < c.WritemaskCount; i++) {
184                         struct rc_variable * var_ptr;
185                         for (var_ptr = variable; var_ptr;
186                                                 var_ptr = var_ptr->Friend) {
187                                 int j;
188                                 unsigned int conversion_swizzle =
189                                                 rc_make_conversion_swizzle(
190                                                 writemask, c.Writemasks[i]);
191                                 struct variable_get_class_cb_data d;
192                                 d.can_change_writemask = &can_change_writemask;
193                                 d.conversion_swizzle = conversion_swizzle;
194                                 d.c = variable->C;
195                                 /* If we get this far var_ptr->Inst has to
196                                  * be a pair instruction.  If variable or any
197                                  * of its friends are normal instructions,
198                                  * then the writemask will be set to RC_MASK_XYZW
199                                  * and the function will return before it gets
200                                  * here. */
201                                 rc_pair_for_all_reads_arg(var_ptr->Inst,
202                                         variable_get_class_read_cb, &d);
203
204                                 for (j = 0; j < var_ptr->ReaderCount; j++) {
205                                         unsigned int old_swizzle;
206                                         unsigned int new_swizzle;
207                                         struct rc_reader r = var_ptr->Readers[j];
208                                         if (r.Inst->Type ==
209                                                         RC_INSTRUCTION_PAIR ) {
210                                                 old_swizzle = r.U.P.Arg->Swizzle;
211                                         } else {
212                                                 /* Source operands of TEX
213                                                  * instructions can't be
214                                                  * swizzle on r300/r400 GPUs.
215                                                  */
216                                                 can_change_writemask = 0;
217                                                 break;
218                                         }
219                                         new_swizzle = rc_rewrite_swizzle(
220                                                 old_swizzle, conversion_swizzle);
221                                         if (!r300_swizzle_is_native_basic(
222                                                                 new_swizzle)) {
223                                                 can_change_writemask = 0;
224                                                 break;
225                                         }
226                                 }
227                                 if (!can_change_writemask) {
228                                         break;
229                                 }
230                         }
231                         if (!can_change_writemask) {
232                                 break;
233                         }
234                 }
235         }
236
237         if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
238                 /* DDX/DDY seem to always fail when their writemasks are
239                  * changed.*/
240                 if (is_derivative(variable->Inst->U.P.RGB.Opcode)
241                     || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
242                         can_change_writemask = 0;
243                 }
244         }
245         for ( ; readers; readers = readers->Next) {
246                 struct rc_reader * r = readers->Item;
247                 if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
248                         if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
249                                 can_change_writemask = 0;
250                                 break;
251                         }
252                         /* DDX/DDY also fail when their swizzles are changed. */
253                         if (is_derivative(r->Inst->U.P.RGB.Opcode)
254                             || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
255                                 can_change_writemask = 0;
256                                 break;
257                         }
258                 }
259         }
260
261         class_index = rc_find_class(classes, writemask,
262                                                 can_change_writemask ? 3 : 1);
263 done:
264         if (class_index > -1) {
265                 return classes[class_index].ID;
266         } else {
267 error:
268                 rc_error(variable->C,
269                                 "Could not find class for index=%u mask=%u\n",
270                                 variable->Dst.Index, writemask);
271                 return 0;
272         }
273 }
274
275 static void do_advanced_regalloc(struct regalloc_state * s)
276 {
277
278         unsigned int i, input_node, node_count, node_index;
279         struct ra_class ** node_classes;
280         struct rc_instruction * inst;
281         struct rc_list * var_ptr;
282         struct rc_list * variables;
283         struct ra_graph * graph;
284         const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
285
286         /* Get list of program variables */
287         variables = rc_get_variables(s->C);
288         node_count = rc_list_count(variables);
289         node_classes = memory_pool_malloc(&s->C->Pool,
290                         node_count * sizeof(struct ra_class *));
291
292         for (var_ptr = variables, node_index = 0; var_ptr;
293                                         var_ptr = var_ptr->Next, node_index++) {
294                 unsigned int class_index;
295                 /* Compute the live intervals */
296                 rc_variable_compute_live_intervals(var_ptr->Item);
297
298                 class_index = variable_get_class(var_ptr->Item, ra_state->class_list);
299                 node_classes[node_index] = ra_state->classes[class_index];
300         }
301
302
303         /* Calculate live intervals for input registers */
304         for (inst = s->C->Program.Instructions.Next;
305                                         inst != &s->C->Program.Instructions;
306                                         inst = inst->Next) {
307                 rc_opcode op = rc_get_flow_control_inst(inst);
308                 if (op == RC_OPCODE_BGNLOOP) {
309                         struct rc_instruction * endloop =
310                                                         rc_match_bgnloop(inst);
311                         if (endloop->IP > s->LoopEnd) {
312                                 s->LoopEnd = endloop->IP;
313                         }
314                 }
315                 rc_for_all_reads_mask(inst, scan_read_callback, s);
316         }
317
318         /* Compute the writemask for inputs. */
319         for (i = 0; i < s->NumInputs; i++) {
320                 unsigned int chan, writemask = 0;
321                 for (chan = 0; chan < 4; chan++) {
322                         if (s->Input[i].Live[chan].Used) {
323                                 writemask |= (1 << chan);
324                         }
325                 }
326                 s->Input[i].Writemask = writemask;
327         }
328
329         graph = ra_alloc_interference_graph(ra_state->regs,
330                                                 node_count + s->NumInputs);
331
332         for (node_index = 0; node_index < node_count; node_index++) {
333                 ra_set_node_class(graph, node_index, node_classes[node_index]);
334         }
335
336         rc_build_interference_graph(graph, variables);
337
338         /* Add input registers to the interference graph */
339         for (i = 0, input_node = 0; i< s->NumInputs; i++) {
340                 if (!s->Input[i].Writemask) {
341                         continue;
342                 }
343                 for (var_ptr = variables, node_index = 0;
344                                 var_ptr; var_ptr = var_ptr->Next, node_index++) {
345                         struct rc_variable * var = var_ptr->Item;
346                         if (rc_overlap_live_intervals_array(s->Input[i].Live,
347                                                                 var->Live)) {
348                                 ra_add_node_interference(graph, node_index,
349                                                 node_count + input_node);
350                         }
351                 }
352                 /* Manually allocate a register for this input */
353                 ra_set_node_reg(graph, node_count + input_node, get_reg_id(
354                                 s->Input[i].Index, s->Input[i].Writemask));
355                 input_node++;
356         }
357
358         if (!ra_allocate(graph)) {
359                 rc_error(s->C, "Ran out of hardware temporaries\n");
360                 return;
361         }
362
363         /* Rewrite the registers */
364         for (var_ptr = variables, node_index = 0; var_ptr;
365                                 var_ptr = var_ptr->Next, node_index++) {
366                 int reg = ra_get_node_reg(graph, node_index);
367                 unsigned int writemask = reg_get_writemask(reg);
368                 unsigned int index = reg_get_index(reg);
369                 struct rc_variable * var = var_ptr->Item;
370
371                 if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
372                         writemask = rc_variable_writemask_sum(var);
373                 }
374
375                 if (var->Dst.File == RC_FILE_INPUT) {
376                         continue;
377                 }
378                 rc_variable_change_dst(var, index, writemask);
379         }
380
381         ralloc_free(graph);
382 }
383
384 /**
385  * @param user This parameter should be a pointer to an integer value.  If this
386  * integer value is zero, then a simple register allocator will be used that
387  * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
388  * user is non-zero, then the regular register allocator will be used
389  * (\sa do_regalloc).
390   */
391 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
392 {
393         struct r300_fragment_program_compiler *c =
394                                 (struct r300_fragment_program_compiler*)cc;
395         struct regalloc_state s;
396         int * do_full_regalloc = (int*)user;
397
398         memset(&s, 0, sizeof(s));
399         s.C = cc;
400         s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
401         s.Input = memory_pool_malloc(&cc->Pool,
402                         s.NumInputs * sizeof(struct register_info));
403         memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
404
405         s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
406         s.Temporary = memory_pool_malloc(&cc->Pool,
407                         s.NumTemporaries * sizeof(struct register_info));
408         memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
409
410         rc_recompute_ips(s.C);
411
412         c->AllocateHwInputs(c, &alloc_input_simple, &s);
413         if (*do_full_regalloc) {
414                 do_advanced_regalloc(&s);
415         } else {
416                 s.Simple = 1;
417                 do_regalloc_inputs_only(&s);
418         }
419
420         /* Rewrite inputs and if we are doing the simple allocation, rewrite
421          * temporaries too. */
422         for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
423                                         inst != &s.C->Program.Instructions;
424                                         inst = inst->Next) {
425                 rc_remap_registers(inst, &remap_register, &s);
426         }
427 }