2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <tstellar@gmail.com>
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "radeon_program_pair.h"
33 #include "util/glheader.h"
34 #include "util/register_allocate.h"
35 #include "util/u_memory.h"
36 #include "util/ralloc.h"
38 #include "r300_fragprog_swizzle.h"
39 #include "radeon_compiler.h"
40 #include "radeon_compiler_util.h"
41 #include "radeon_dataflow.h"
42 #include "radeon_list.h"
43 #include "radeon_regalloc.h"
44 #include "radeon_variable.h"
46 static void scan_read_callback(void * data, struct rc_instruction * inst,
47 rc_register_file file, unsigned int index, unsigned int mask)
49 struct regalloc_state * s = data;
50 struct register_info * reg;
53 if (file != RC_FILE_INPUT)
56 s->Input[index].Used = 1;
57 reg = &s->Input[index];
59 for (i = 0; i < 4; i++) {
60 if (!((mask >> i) & 0x1)) {
63 reg->Live[i].Used = 1;
64 reg->Live[i].Start = 0;
66 s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
70 static void remap_register(void * data, struct rc_instruction * inst,
71 rc_register_file * file, unsigned int * index)
73 struct regalloc_state * s = data;
74 const struct register_info * reg;
76 if (*file == RC_FILE_TEMPORARY && s->Simple)
77 reg = &s->Temporary[*index];
78 else if (*file == RC_FILE_INPUT)
79 reg = &s->Input[*index];
88 static void alloc_input_simple(void * data, unsigned int input,
91 struct regalloc_state * s = data;
93 if (input >= s->NumInputs)
96 s->Input[input].Allocated = 1;
97 s->Input[input].File = RC_FILE_TEMPORARY;
98 s->Input[input].Index = hwreg;
101 /* This functions offsets the temporary register indices by the number
102 * of input registers, because input registers are actually temporaries and
103 * should not occupy the same space.
105 * This pass is supposed to be used to maintain correct allocation of inputs
106 * if the standard register allocation is disabled. */
107 static void do_regalloc_inputs_only(struct regalloc_state * s)
109 for (unsigned i = 0; i < s->NumTemporaries; i++) {
110 s->Temporary[i].Allocated = 1;
111 s->Temporary[i].File = RC_FILE_TEMPORARY;
112 s->Temporary[i].Index = i + s->NumInputs;
116 static unsigned int is_derivative(rc_opcode op)
118 return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
121 struct variable_get_class_cb_data {
122 unsigned int * can_change_writemask;
123 unsigned int conversion_swizzle;
124 struct radeon_compiler * c;
127 static void variable_get_class_read_cb(
129 struct rc_instruction * inst,
130 struct rc_pair_instruction_arg * arg,
131 struct rc_pair_instruction_source * src)
133 struct variable_get_class_cb_data * d = userdata;
134 unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
135 d->conversion_swizzle);
136 /* We can't just call r300_swizzle_is_native basic here, because it ignores the
137 * extra requirements for presubtract. However, after pair translation we no longer
138 * have the rc_src_register required for the native swizzle, so we have to
140 struct rc_src_register reg = {};
141 reg.Swizzle = new_swizzle;
142 reg.File = src->File;
144 assert(inst->Type == RC_INSTRUCTION_PAIR);
145 /* The opcode is unimportant, we can't have TEX here. */
146 if (!d->c->SwizzleCaps->IsNative(RC_OPCODE_MAD, reg)) {
147 *d->can_change_writemask = 0;
151 static unsigned variable_get_class(
152 struct rc_variable * variable,
153 const struct rc_class * classes)
156 unsigned int can_change_writemask= 1;
157 unsigned int writemask = rc_variable_writemask_sum(variable);
158 struct rc_list * readers = rc_variable_readers_union(variable);
161 if (!variable->C->is_r500) {
163 struct rc_variable * var_ptr;
164 /* The assumption here is that if an instruction has type
165 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
166 * r300 and r400 can't swizzle the result of a TEX lookup. */
167 for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
168 if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
169 writemask = RC_MASK_XYZW;
173 /* Check if it is possible to do swizzle packing for r300/r400
174 * without creating non-native swizzles. */
175 class_index = rc_find_class(classes, writemask, 3);
176 if (class_index < 0) {
179 c = classes[class_index];
180 if (c.WritemaskCount == 1) {
183 for (i = 0; i < c.WritemaskCount; i++) {
184 struct rc_variable * var_ptr;
185 for (var_ptr = variable; var_ptr;
186 var_ptr = var_ptr->Friend) {
188 unsigned int conversion_swizzle =
189 rc_make_conversion_swizzle(
190 writemask, c.Writemasks[i]);
191 struct variable_get_class_cb_data d;
192 d.can_change_writemask = &can_change_writemask;
193 d.conversion_swizzle = conversion_swizzle;
195 /* If we get this far var_ptr->Inst has to
196 * be a pair instruction. If variable or any
197 * of its friends are normal instructions,
198 * then the writemask will be set to RC_MASK_XYZW
199 * and the function will return before it gets
201 rc_pair_for_all_reads_arg(var_ptr->Inst,
202 variable_get_class_read_cb, &d);
204 for (j = 0; j < var_ptr->ReaderCount; j++) {
205 unsigned int old_swizzle;
206 unsigned int new_swizzle;
207 struct rc_reader r = var_ptr->Readers[j];
209 RC_INSTRUCTION_PAIR ) {
210 old_swizzle = r.U.P.Arg->Swizzle;
212 /* Source operands of TEX
213 * instructions can't be
214 * swizzle on r300/r400 GPUs.
216 can_change_writemask = 0;
219 new_swizzle = rc_rewrite_swizzle(
220 old_swizzle, conversion_swizzle);
221 if (!r300_swizzle_is_native_basic(
223 can_change_writemask = 0;
227 if (!can_change_writemask) {
231 if (!can_change_writemask) {
237 if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
238 /* DDX/DDY seem to always fail when their writemasks are
240 if (is_derivative(variable->Inst->U.P.RGB.Opcode)
241 || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
242 can_change_writemask = 0;
245 for ( ; readers; readers = readers->Next) {
246 struct rc_reader * r = readers->Item;
247 if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
248 if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
249 can_change_writemask = 0;
252 /* DDX/DDY also fail when their swizzles are changed. */
253 if (is_derivative(r->Inst->U.P.RGB.Opcode)
254 || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
255 can_change_writemask = 0;
261 class_index = rc_find_class(classes, writemask,
262 can_change_writemask ? 3 : 1);
264 if (class_index > -1) {
265 return classes[class_index].ID;
268 rc_error(variable->C,
269 "Could not find class for index=%u mask=%u\n",
270 variable->Dst.Index, writemask);
275 static void do_advanced_regalloc(struct regalloc_state * s)
278 unsigned int i, input_node, node_count, node_index;
279 struct ra_class ** node_classes;
280 struct rc_instruction * inst;
281 struct rc_list * var_ptr;
282 struct rc_list * variables;
283 struct ra_graph * graph;
284 const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
286 /* Get list of program variables */
287 variables = rc_get_variables(s->C);
288 node_count = rc_list_count(variables);
289 node_classes = memory_pool_malloc(&s->C->Pool,
290 node_count * sizeof(struct ra_class *));
292 for (var_ptr = variables, node_index = 0; var_ptr;
293 var_ptr = var_ptr->Next, node_index++) {
294 unsigned int class_index;
295 /* Compute the live intervals */
296 rc_variable_compute_live_intervals(var_ptr->Item);
298 class_index = variable_get_class(var_ptr->Item, ra_state->class_list);
299 node_classes[node_index] = ra_state->classes[class_index];
303 /* Calculate live intervals for input registers */
304 for (inst = s->C->Program.Instructions.Next;
305 inst != &s->C->Program.Instructions;
307 rc_opcode op = rc_get_flow_control_inst(inst);
308 if (op == RC_OPCODE_BGNLOOP) {
309 struct rc_instruction * endloop =
310 rc_match_bgnloop(inst);
311 if (endloop->IP > s->LoopEnd) {
312 s->LoopEnd = endloop->IP;
315 rc_for_all_reads_mask(inst, scan_read_callback, s);
318 /* Compute the writemask for inputs. */
319 for (i = 0; i < s->NumInputs; i++) {
320 unsigned int chan, writemask = 0;
321 for (chan = 0; chan < 4; chan++) {
322 if (s->Input[i].Live[chan].Used) {
323 writemask |= (1 << chan);
326 s->Input[i].Writemask = writemask;
329 graph = ra_alloc_interference_graph(ra_state->regs,
330 node_count + s->NumInputs);
332 for (node_index = 0; node_index < node_count; node_index++) {
333 ra_set_node_class(graph, node_index, node_classes[node_index]);
336 rc_build_interference_graph(graph, variables);
338 /* Add input registers to the interference graph */
339 for (i = 0, input_node = 0; i< s->NumInputs; i++) {
340 if (!s->Input[i].Writemask) {
343 for (var_ptr = variables, node_index = 0;
344 var_ptr; var_ptr = var_ptr->Next, node_index++) {
345 struct rc_variable * var = var_ptr->Item;
346 if (rc_overlap_live_intervals_array(s->Input[i].Live,
348 ra_add_node_interference(graph, node_index,
349 node_count + input_node);
352 /* Manually allocate a register for this input */
353 ra_set_node_reg(graph, node_count + input_node, get_reg_id(
354 s->Input[i].Index, s->Input[i].Writemask));
358 if (!ra_allocate(graph)) {
359 rc_error(s->C, "Ran out of hardware temporaries\n");
363 /* Rewrite the registers */
364 for (var_ptr = variables, node_index = 0; var_ptr;
365 var_ptr = var_ptr->Next, node_index++) {
366 int reg = ra_get_node_reg(graph, node_index);
367 unsigned int writemask = reg_get_writemask(reg);
368 unsigned int index = reg_get_index(reg);
369 struct rc_variable * var = var_ptr->Item;
371 if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
372 writemask = rc_variable_writemask_sum(var);
375 if (var->Dst.File == RC_FILE_INPUT) {
378 rc_variable_change_dst(var, index, writemask);
385 * @param user This parameter should be a pointer to an integer value. If this
386 * integer value is zero, then a simple register allocator will be used that
387 * only allocates space for input registers (\sa do_regalloc_inputs_only). If
388 * user is non-zero, then the regular register allocator will be used
391 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
393 struct r300_fragment_program_compiler *c =
394 (struct r300_fragment_program_compiler*)cc;
395 struct regalloc_state s;
396 int * do_full_regalloc = (int*)user;
398 memset(&s, 0, sizeof(s));
400 s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
401 s.Input = memory_pool_malloc(&cc->Pool,
402 s.NumInputs * sizeof(struct register_info));
403 memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
405 s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
406 s.Temporary = memory_pool_malloc(&cc->Pool,
407 s.NumTemporaries * sizeof(struct register_info));
408 memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
410 rc_recompute_ips(s.C);
412 c->AllocateHwInputs(c, &alloc_input_simple, &s);
413 if (*do_full_regalloc) {
414 do_advanced_regalloc(&s);
417 do_regalloc_inputs_only(&s);
420 /* Rewrite inputs and if we are doing the simple allocation, rewrite
421 * temporaries too. */
422 for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
423 inst != &s.C->Program.Instructions;
425 rc_remap_registers(inst, &remap_register, &s);