2 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 #include "radeon_compiler_util.h"
34 #include "radeon_compiler.h"
35 #include "radeon_dataflow.h"
38 unsigned int rc_swizzle_to_writemask(unsigned int swz)
40 unsigned int mask = 0;
43 for(i = 0; i < 4; i++) {
44 mask |= 1 << GET_SWZ(swz, i);
51 rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
55 return GET_SWZ(swz, idx);
59 * The purpose of this function is to standardize the number channels used by
60 * swizzles. All swizzles regardless of what instruction they are a part of
61 * should have 4 channels initialized with values.
62 * @param channels The number of channels in initial_value that have a
64 * @return An initialized swizzle that has all of the unused channels set to
67 unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels)
70 for (i = channels; i < 4; i++) {
71 SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED);
76 unsigned int combine_swizzles4(unsigned int src,
77 rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
81 ret |= get_swz(src, swz_x);
82 ret |= get_swz(src, swz_y) << 3;
83 ret |= get_swz(src, swz_z) << 6;
84 ret |= get_swz(src, swz_w) << 9;
89 unsigned int combine_swizzles(unsigned int src, unsigned int swz)
93 ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
94 ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
95 ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
96 ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
102 * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
104 rc_swizzle rc_mask_to_swizzle(unsigned int mask)
107 case RC_MASK_X: return RC_SWIZZLE_X;
108 case RC_MASK_Y: return RC_SWIZZLE_Y;
109 case RC_MASK_Z: return RC_SWIZZLE_Z;
110 case RC_MASK_W: return RC_SWIZZLE_W;
112 return RC_SWIZZLE_UNUSED;
115 /* Reorder mask bits according to swizzle. */
116 unsigned swizzle_mask(unsigned swizzle, unsigned mask)
119 for (unsigned chan = 0; chan < 4; ++chan) {
120 unsigned swz = GET_SWZ(swizzle, chan);
122 ret |= GET_BIT(mask, swz) << chan;
127 static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
129 if (info->HasTexture) {
132 switch (info->Opcode) {
145 * @return A swizzle the results from converting old_swizzle using
148 unsigned int rc_adjust_channels(
149 unsigned int old_swizzle,
150 unsigned int conversion_swizzle)
153 unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
154 for (i = 0; i < 4; i++) {
155 unsigned int new_chan = get_swz(conversion_swizzle, i);
156 if (new_chan == RC_SWIZZLE_UNUSED) {
159 SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));
164 static unsigned int rewrite_writemask(
165 unsigned int old_mask,
166 unsigned int conversion_swizzle)
168 unsigned int new_mask = 0;
171 for (i = 0; i < 4; i++) {
172 if (!GET_BIT(old_mask, i)
173 || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {
176 new_mask |= (1 << GET_SWZ(conversion_swizzle, i));
183 * This function rewrites the writemask of sub and adjusts the swizzles
184 * of all its source registers based on the conversion_swizzle.
185 * conversion_swizzle represents a mapping of the old writemask to the
186 * new writemask. For a detailed description of how conversion swizzles
187 * work see rc_rewrite_swizzle().
189 void rc_pair_rewrite_writemask(
190 struct rc_pair_sub_instruction * sub,
191 unsigned int conversion_swizzle)
193 const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
196 sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);
198 if (!srcs_need_rewrite(info)) {
202 for (i = 0; i < info->NumSrcRegs; i++) {
203 sub->Arg[i].Swizzle =
204 rc_adjust_channels(sub->Arg[i].Swizzle,
209 static void normal_rewrite_writemask_cb(
211 struct rc_instruction * inst,
212 struct rc_src_register * src)
214 unsigned int * new_mask = (unsigned int *)userdata;
215 src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask);
219 * This function is the same as rc_pair_rewrite_writemask() except it
220 * operates on normal instructions.
222 void rc_normal_rewrite_writemask(
223 struct rc_instruction * inst,
224 unsigned int conversion_swizzle)
226 unsigned int new_mask;
227 struct rc_sub_instruction * sub = &inst->U.I;
228 const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
229 sub->DstReg.WriteMask =
230 rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);
232 if (info->HasTexture) {
234 assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);
235 for (i = 0; i < 4; i++) {
236 unsigned int swz = GET_SWZ(conversion_swizzle, i);
239 SET_SWZ(sub->TexSwizzle, swz, i);
243 if (!srcs_need_rewrite(info)) {
247 new_mask = sub->DstReg.WriteMask;
248 rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask);
252 * This function replaces each value 'swz' in swizzle with the value of
253 * GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's
254 * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want
255 * to change all the Y's in swizzle to X, then conversion_swizzle should be
256 * _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then
257 * conversion swizzle should be YX__ (0xfc1).
258 * @param swizzle The swizzle to change
259 * @param conversion_swizzle Describes the conversion to perform on the swizzle
260 * @return A converted swizzle
262 unsigned int rc_rewrite_swizzle(
263 unsigned int swizzle,
264 unsigned int conversion_swizzle)
267 unsigned int out_swizzle = swizzle;
269 for (chan = 0; chan < 4; chan++) {
270 unsigned int swz = GET_SWZ(swizzle, chan);
271 unsigned int new_swz;
273 SET_SWZ(out_swizzle, chan, swz);
275 new_swz = GET_SWZ(conversion_swizzle, swz);
276 if (new_swz != RC_SWIZZLE_UNUSED) {
277 SET_SWZ(out_swizzle, chan, new_swz);
279 SET_SWZ(out_swizzle, chan, swz);
287 * Left multiplication of a register with a swizzle
289 struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
291 struct rc_src_register tmp = srcreg;
295 for(i = 0; i < 4; ++i) {
296 rc_swizzle swz = GET_SWZ(swizzle, i);
298 tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
299 tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
301 tmp.Swizzle |= swz << (i*3);
307 void reset_srcreg(struct rc_src_register* reg)
309 memset(reg, 0, sizeof(struct rc_src_register));
310 reg->Swizzle = RC_SWIZZLE_XYZW;
313 unsigned int rc_src_reads_dst_mask(
314 rc_register_file src_file,
315 unsigned int src_idx,
316 unsigned int src_swz,
317 rc_register_file dst_file,
318 unsigned int dst_idx,
319 unsigned int dst_mask)
321 if (src_file != dst_file || src_idx != dst_idx) {
324 return dst_mask & rc_swizzle_to_writemask(src_swz);
328 * @return A bit mask specifying whether this swizzle will select from an RGB
329 * source, an Alpha source, or both.
331 unsigned int rc_source_type_swz(unsigned int swizzle)
334 unsigned int swz = RC_SWIZZLE_UNUSED;
335 unsigned int ret = RC_SOURCE_NONE;
337 for(chan = 0; chan < 4; chan++) {
338 swz = GET_SWZ(swizzle, chan);
339 if (swz == RC_SWIZZLE_W) {
340 ret |= RC_SOURCE_ALPHA;
341 } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
342 || swz == RC_SWIZZLE_Z) {
343 ret |= RC_SOURCE_RGB;
349 unsigned int rc_source_type_mask(unsigned int mask)
351 unsigned int ret = RC_SOURCE_NONE;
353 if (mask & RC_MASK_XYZ)
354 ret |= RC_SOURCE_RGB;
356 if (mask & RC_MASK_W)
357 ret |= RC_SOURCE_ALPHA;
363 rc_register_file File;
365 unsigned int SrcType;
368 struct can_use_presub_data {
369 struct src_select Selects[5];
370 unsigned int SelectCount;
371 const struct rc_src_register * ReplaceReg;
372 unsigned int ReplaceRemoved;
375 static void can_use_presub_data_add_select(
376 struct can_use_presub_data * data,
377 rc_register_file file,
379 unsigned int src_type)
381 struct src_select * select;
383 select = &data->Selects[data->SelectCount++];
385 select->Index = index;
386 select->SrcType = src_type;
390 * This callback function counts the number of sources in inst that are
391 * different from the sources in can_use_presub_data->RemoveSrcs.
393 static void can_use_presub_read_cb(
395 struct rc_instruction * inst,
396 struct rc_src_register * src)
398 struct can_use_presub_data * d = userdata;
400 if (!d->ReplaceRemoved && src == d->ReplaceReg) {
401 d->ReplaceRemoved = 1;
405 if (src->File == RC_FILE_NONE)
408 can_use_presub_data_add_select(d, src->File, src->Index,
409 rc_source_type_swz(src->Swizzle));
412 unsigned int rc_inst_can_use_presub(
413 struct rc_instruction * inst,
414 rc_presubtract_op presub_op,
415 unsigned int presub_writemask,
416 const struct rc_src_register * replace_reg,
417 const struct rc_src_register * presub_src0,
418 const struct rc_src_register * presub_src1)
420 struct can_use_presub_data d;
421 unsigned int num_presub_srcs;
423 const struct rc_opcode_info * info =
424 rc_get_opcode_info(inst->U.I.Opcode);
425 int rgb_count = 0, alpha_count = 0;
426 unsigned int src_type0, src_type1;
428 if (presub_op == RC_PRESUB_NONE) {
432 if (info->HasTexture) {
436 /* We can't use more than one presubtract value in an
437 * instruction, unless the two prsubtract operations
438 * are the same and read from the same registers.
439 * XXX For now we will limit instructions to only one presubtract
441 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
445 memset(&d, 0, sizeof(d));
446 d.ReplaceReg = replace_reg;
448 rc_for_all_reads_src(inst, can_use_presub_read_cb, &d);
450 num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
452 src_type0 = rc_source_type_swz(presub_src0->Swizzle);
453 can_use_presub_data_add_select(&d,
458 if (num_presub_srcs > 1) {
459 src_type1 = rc_source_type_swz(presub_src1->Swizzle);
460 can_use_presub_data_add_select(&d,
465 /* Even if both of the presub sources read from the same
466 * register, we still need to use 2 different source selects
467 * for them, so we need to increment the count to compensate.
469 if (presub_src0->File == presub_src1->File
470 && presub_src0->Index == presub_src1->Index) {
471 if (src_type0 & src_type1 & RC_SOURCE_RGB) {
474 if (src_type0 & src_type1 & RC_SOURCE_ALPHA) {
480 /* Count the number of source selects for Alpha and RGB. If we
481 * encounter two of the same source selects then we can ignore the
483 for (i = 0; i < d.SelectCount; i++) {
485 unsigned int src_type = d.Selects[i].SrcType;
486 for (j = i + 1; j < d.SelectCount; j++) {
487 if (d.Selects[i].File == d.Selects[j].File
488 && d.Selects[i].Index == d.Selects[j].Index) {
489 src_type &= ~d.Selects[j].SrcType;
492 if (src_type & RC_SOURCE_RGB) {
496 if (src_type & RC_SOURCE_ALPHA) {
501 if (rgb_count > 3 || alpha_count > 3) {
510 unsigned int HasFileType;
511 rc_register_file File;
514 static void max_callback(
516 struct rc_instruction * inst,
517 rc_register_file file,
521 struct max_data * d = (struct max_data*)userdata;
522 if (file == d->File && (!d->HasFileType || index > d->Max)) {
529 * @return The maximum index of the specified register file used by the
532 int rc_get_max_index(
533 struct radeon_compiler * c,
534 rc_register_file file)
536 struct max_data data;
537 struct rc_instruction * inst;
539 data.HasFileType = 0;
541 for (inst = c->Program.Instructions.Next;
542 inst != &c->Program.Instructions;
544 rc_for_all_reads_mask(inst, max_callback, &data);
545 rc_for_all_writes_mask(inst, max_callback, &data);
547 if (!data.HasFileType) {
554 static unsigned int get_source_readmask(
555 struct rc_pair_sub_instruction * sub,
557 unsigned int src_type)
560 unsigned int readmask = 0;
561 const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
563 for (i = 0; i < info->NumSrcRegs; i++) {
564 if (sub->Arg[i].Source != source
565 || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {
568 readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);
574 * This function attempts to remove a source from a pair instructions.
576 * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
577 * @param source The index of the source to remove
578 * @param new_readmask A mask representing the components that are read by
579 * the source that is intended to replace the one you are removing. If you
580 * want to remove a source only and not replace it, this parameter should be
582 * @return 1 if the source was successfully removed, 0 if it was not
584 unsigned int rc_pair_remove_src(
585 struct rc_instruction * inst,
586 unsigned int src_type,
588 unsigned int new_readmask)
590 unsigned int readmask = 0;
592 readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);
593 readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);
595 if ((new_readmask & readmask) != readmask)
598 if (src_type & RC_SOURCE_RGB) {
599 memset(&inst->U.P.RGB.Src[source], 0,
600 sizeof(struct rc_pair_instruction_source));
603 if (src_type & RC_SOURCE_ALPHA) {
604 memset(&inst->U.P.Alpha.Src[source], 0,
605 sizeof(struct rc_pair_instruction_source));
612 * @return RC_OPCODE_NOOP if inst is not a flow control instruction.
613 * @return The opcode of inst if it is a flow control instruction.
615 rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst)
617 const struct rc_opcode_info * info;
618 if (inst->Type == RC_INSTRUCTION_NORMAL) {
619 info = rc_get_opcode_info(inst->U.I.Opcode);
621 info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
622 /*A flow control instruction shouldn't have an alpha
624 assert(!info->IsFlowControl ||
625 inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
628 if (info->IsFlowControl)
631 return RC_OPCODE_NOP;
636 * @return The BGNLOOP instruction that starts the loop ended by endloop.
638 struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop)
640 unsigned int endloop_count = 0;
641 struct rc_instruction * inst;
642 for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) {
643 rc_opcode op = rc_get_flow_control_inst(inst);
644 if (op == RC_OPCODE_ENDLOOP) {
646 } else if (op == RC_OPCODE_BGNLOOP) {
647 if (endloop_count == 0) {
658 * @return The ENDLOOP instruction that ends the loop started by bgnloop.
660 struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)
662 unsigned int bgnloop_count = 0;
663 struct rc_instruction * inst;
664 for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) {
665 rc_opcode op = rc_get_flow_control_inst(inst);
666 if (op == RC_OPCODE_BGNLOOP) {
668 } else if (op == RC_OPCODE_ENDLOOP) {
669 if (bgnloop_count == 0) {
680 * @return A conversion swizzle for converting from old_mask->new_mask
682 unsigned int rc_make_conversion_swizzle(
683 unsigned int old_mask,
684 unsigned int new_mask)
686 unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
687 unsigned int old_idx;
688 unsigned int new_idx = 0;
689 for (old_idx = 0; old_idx < 4; old_idx++) {
690 if (!GET_BIT(old_mask, old_idx))
692 for ( ; new_idx < 4; new_idx++) {
693 if (GET_BIT(new_mask, new_idx)) {
694 SET_SWZ(conversion_swizzle, old_idx, new_idx);
700 return conversion_swizzle;