2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 /* #define NV50PC_DEBUG */
27 #define DESCEND_ARBITRARY(j, f) \
29 b->pass_seq = ctx->pc->pass_seq; \
31 for (j = 0; j < 2; ++j) \
32 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
36 extern unsigned nv50_inst_min_size(struct nv_instruction *);
43 values_equal(struct nv_value *a, struct nv_value *b)
46 return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
50 inst_commutation_check(struct nv_instruction *a,
51 struct nv_instruction *b)
55 for (di = 0; di < 4; ++di) {
58 for (si = 0; si < 5; ++si) {
61 if (values_equal(a->def[di], b->src[si]->value))
66 if (b->flags_src && b->flags_src->value == a->flags_def)
72 /* Check whether we can swap the order of the instructions,
73 * where a & b may be either the earlier or the later one.
76 inst_commutation_legal(struct nv_instruction *a,
77 struct nv_instruction *b)
79 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
83 inst_cullable(struct nv_instruction *nvi)
85 return (!(nvi->is_terminator || nvi->is_join ||
88 nv_nvi_refcount(nvi)));
92 nvi_isnop(struct nv_instruction *nvi)
94 if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)
104 if (nvi->def[0]->join->reg.id < 0)
107 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
110 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
113 if (nvi->src[0]->value->join->reg.id < 0) {
114 NV50_DBGMSG("nvi_isnop: orphaned value detected\n");
118 if (nvi->opcode == NV_OP_SELECT)
119 if (!values_equal(nvi->def[0], nvi->src[1]->value))
122 return values_equal(nvi->def[0], nvi->src[0]->value);
132 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
135 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
137 struct nv_pc *pc = (struct nv_pc *)priv;
138 struct nv_basic_block *in;
139 struct nv_instruction *nvi, *next;
143 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
147 /* check for no-op branches (BRA $PC+8) */
148 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
152 for (++j; j < pc->num_blocks; ++j)
153 pc->bb_list[j]->bin_pos -= 8;
155 nv_nvi_delete(in->exit);
157 b->bin_pos = in->bin_pos + in->bin_size;
160 pc->bb_list[pc->num_blocks++] = b;
164 for (nvi = b->entry; nvi; nvi = next) {
170 for (nvi = b->entry; nvi; nvi = next) {
173 size = nv50_inst_min_size(nvi);
174 if (nvi->next && size < 8)
177 if ((n32 & 1) && nvi->next &&
178 nv50_inst_min_size(nvi->next) == 4 &&
179 inst_commutation_legal(nvi, nvi->next)) {
181 nv_nvi_permute(nvi, nvi->next);
186 b->bin_size += n32 & 1;
188 nvi->prev->is_long = 1;
191 b->bin_size += 1 + nvi->is_long;
195 NV50_DBGMSG("block %p is now empty\n", b);
197 if (!b->exit->is_long) {
199 b->exit->is_long = 1;
202 /* might have del'd a hole tail of instructions */
203 if (!b->exit->prev->is_long && !(n32 & 1)) {
205 b->exit->prev->is_long = 1;
208 assert(!b->entry || (b->exit && b->exit->is_long));
210 pc->bin_size += b->bin_size *= 4;
214 nv_pc_exec_pass2(struct nv_pc *pc)
221 nv_pass_flatten(&pass, pc->root);
223 NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks);
225 pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
228 nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
233 static INLINE boolean
234 is_cmem_load(struct nv_instruction *nvi)
236 return (nvi->opcode == NV_OP_LDA &&
237 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
238 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
241 static INLINE boolean
242 is_smem_load(struct nv_instruction *nvi)
244 return (nvi->opcode == NV_OP_LDA &&
245 (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
246 nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
249 static INLINE boolean
250 is_immd_move(struct nv_instruction *nvi)
252 return (nvi->opcode == NV_OP_MOV &&
253 nvi->src[0]->value->reg.file == NV_FILE_IMM);
257 check_swap_src_0_1(struct nv_instruction *nvi)
259 static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
261 struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
263 if (!nv_op_commutative(nvi->opcode))
265 assert(src0 && src1);
267 if (src1->value->reg.file == NV_FILE_IMM)
270 if (is_cmem_load(src0->value->insn)) {
271 if (!is_cmem_load(src1->value->insn)) {
274 /* debug_printf("swapping cmem load to 1\n"); */
277 if (is_smem_load(src1->value->insn)) {
278 if (!is_smem_load(src0->value->insn)) {
281 /* debug_printf("swapping smem load to 0\n"); */
285 if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
286 nvi->set_cond = cc_swapped[nvi->set_cond];
290 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
292 struct nv_instruction *nvi, *sti, *next;
295 for (sti = b->entry; sti; sti = next) {
298 /* only handling MOV to $oX here */
299 if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
301 if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
304 nvi = sti->src[0]->value->insn;
305 if (!nvi || nvi->opcode == NV_OP_PHI || nv_is_vector_op(nvi->opcode))
307 assert(nvi->def[0] == sti->src[0]->value);
309 if (nvi->def[0]->refc > 1)
312 /* cannot write to $oX when using immediate */
313 for (j = 0; j < 4 && nvi->src[j]; ++j)
314 if (nvi->src[j]->value->reg.file == NV_FILE_IMM)
316 if (j < 4 && nvi->src[j])
319 nvi->def[0] = sti->def[0];
320 nvi->fixed = sti->fixed;
324 DESCEND_ARBITRARY(j, nv_pass_fold_stores);
330 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
332 struct nv_instruction *nvi, *ld;
335 for (nvi = b->entry; nvi; nvi = nvi->next) {
336 check_swap_src_0_1(nvi);
338 for (j = 0; j < 3; ++j) {
341 ld = nvi->src[j]->value->insn;
345 if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
346 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
350 if (ld->opcode != NV_OP_LDA)
352 if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
355 if (j == 0 && ld->src[4]) /* can't load shared mem */
358 /* fold it ! */ /* XXX: ref->insn */
359 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
361 nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
363 if (!nv_nvi_refcount(ld))
367 DESCEND_ARBITRARY(j, nv_pass_fold_loads);
373 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
376 struct nv_instruction *nvi, *mi, *next;
379 for (nvi = b->entry; nvi; nvi = next) {
381 if (nvi->opcode == NV_OP_SUB) {
382 nvi->opcode = NV_OP_ADD;
383 nvi->src[1]->mod ^= NV_MOD_NEG;
386 /* should not put any modifiers on NEG and ABS */
387 assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
388 assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
390 for (j = 0; j < 4; ++j) {
394 mi = nvi->src[j]->value->insn;
397 if (mi->def[0]->refc > 1)
400 if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
402 if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
406 if (nvi->opcode == NV_OP_ABS)
407 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
409 if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
410 nvi->opcode = NV_OP_MOV;
414 if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
417 nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
419 nvi->src[j]->mod ^= mod;
422 if (nvi->opcode == NV_OP_SAT) {
423 mi = nvi->src[0]->value->insn;
425 if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
427 mi->def[0] = nvi->def[0];
432 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
437 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
439 static struct nv_value *
440 find_immediate(struct nv_ref *ref)
442 struct nv_value *src;
448 while (src->insn && src->insn->opcode == NV_OP_MOV) {
449 assert(!src->insn->src[0]->mod);
450 src = src->insn->src[0]->value;
452 return (src->reg.file == NV_FILE_IMM) ? src : NULL;
456 modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
458 if (mod & NV_MOD_ABS) {
459 if (type == NV_TYPE_F32)
462 if ((*val) & (1 << 31))
465 if (mod & NV_MOD_NEG) {
466 if (type == NV_TYPE_F32)
474 modifiers_opcode(ubyte mod)
477 case NV_MOD_NEG: return NV_OP_NEG;
478 case NV_MOD_ABS: return NV_OP_ABS;
487 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
488 struct nv_value *src0, struct nv_value *src1)
490 struct nv_value *val;
500 type = nvi->def[0]->reg.type;
503 u0.u32 = src0->reg.imm.u32;
504 u1.u32 = src1->reg.imm.u32;
506 modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
507 modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
509 switch (nvi->opcode) {
511 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
516 case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
517 case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
518 case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
526 case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
527 case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
528 case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
536 case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break;
537 case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break;
538 case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break;
548 nvi->opcode = NV_OP_MOV;
550 val = new_value(pc, NV_FILE_IMM, type);
552 val->reg.imm.u32 = u.u32;
554 nv_reference(pc, &nvi->src[1], NULL);
555 nv_reference(pc, &nvi->src[0], val);
557 if (nvi->src[2]) { /* from MAD */
558 nvi->src[1] = nvi->src[0];
559 nvi->src[0] = nvi->src[2];
561 nvi->opcode = NV_OP_ADD;
566 constant_operand(struct nv_pc *pc,
567 struct nv_instruction *nvi, struct nv_value *val, int s)
580 type = nvi->def[0]->reg.type;
582 u.u32 = val->reg.imm.u32;
583 modifiers_apply(&u.u32, type, nvi->src[s]->mod);
585 switch (nvi->opcode) {
587 if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
588 (NV_TYPE_ISINT(type) && u.u32 == 1)) {
589 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
592 nv_reference(pc, &nvi->src[s], NULL);
593 nvi->src[0] = nvi->src[t];
596 if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
597 (NV_TYPE_ISINT(type) && u.u32 == 2)) {
598 nvi->opcode = NV_OP_ADD;
599 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
600 nvi->src[s]->mod = nvi->src[t]->mod;
602 if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
603 if (nvi->src[t]->mod & NV_MOD_NEG)
604 nvi->opcode = NV_OP_MOV;
606 nvi->opcode = NV_OP_NEG;
607 nv_reference(pc, &nvi->src[s], NULL);
608 nvi->src[0] = nvi->src[t];
611 if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
612 nvi->opcode = NV_OP_ADD;
613 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
614 nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
617 nvi->opcode = NV_OP_MOV;
618 nv_reference(pc, &nvi->src[t], NULL);
620 nvi->src[0] = nvi->src[1];
627 if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
630 nv_reference(pc, &nvi->src[s], NULL);
631 nvi->src[0] = nvi->src[t];
636 u.f32 = 1.0f / u.f32;
637 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
638 nvi->opcode = NV_OP_MOV;
640 nv_reference(pc, &nvi->src[0], val);
643 u.f32 = 1.0f / sqrtf(u.f32);
644 (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
645 nvi->opcode = NV_OP_MOV;
647 nv_reference(pc, &nvi->src[0], val);
655 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
657 struct nv_instruction *nvi, *next;
660 for (nvi = b->entry; nvi; nvi = next) {
661 struct nv_value *src0, *src1, *src;
666 src0 = find_immediate(nvi->src[0]);
667 src1 = find_immediate(nvi->src[1]);
670 constant_expression(ctx->pc, nvi, src0, src1);
673 constant_operand(ctx->pc, nvi, src0, 0);
676 constant_operand(ctx->pc, nvi, src1, 1);
679 /* try to combine MUL, ADD into MAD */
680 if (nvi->opcode != NV_OP_ADD)
683 src0 = nvi->src[0]->value;
684 src1 = nvi->src[1]->value;
686 if (SRC_IS_MUL(src0) && src0->refc == 1)
689 if (SRC_IS_MUL(src1) && src1->refc == 1)
694 nvi->opcode = NV_OP_MAD;
695 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
696 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
697 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
699 assert(!(mod & ~NV_MOD_NEG));
700 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
701 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
702 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
703 nvi->src[1]->mod = src->insn->src[1]->mod;
705 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
710 /* TODO: redundant store elimination */
713 struct load_record *next;
715 struct nv_value *value;
718 #define LOAD_RECORD_POOL_SIZE 1024
720 struct nv_pass_reld_elim {
723 struct load_record *imm;
724 struct load_record *mem_s;
725 struct load_record *mem_v;
726 struct load_record *mem_c[16];
727 struct load_record *mem_l;
729 struct load_record pool[LOAD_RECORD_POOL_SIZE];
734 nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
736 struct load_record **rec, *it;
737 struct nv_instruction *ld, *next;
739 struct nv_value *val;
742 for (ld = b->entry; ld; ld = next) {
746 val = ld->src[0]->value;
749 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
753 if (ld->opcode == NV_OP_LDA) {
755 if (val->reg.file >= NV_FILE_MEM_C(0) &&
756 val->reg.file <= NV_FILE_MEM_C(15))
757 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
759 if (val->reg.file == NV_FILE_MEM_S)
762 if (val->reg.file == NV_FILE_MEM_L)
765 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
766 data = val->reg.imm.u32;
770 if (!rec || !ld->def[0]->refc)
773 for (it = *rec; it; it = it->next)
774 if (it->data == data)
778 if (ld->def[0]->reg.id >= 0)
779 it->value = ld->def[0];
781 nvcg_replace_value(ctx->pc, ld->def[0], it->value);
783 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
785 it = &ctx->pool[ctx->alloc++];
788 it->value = ld->def[0];
796 for (j = 0; j < 16; ++j)
797 ctx->mem_c[j] = NULL;
801 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
807 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
811 for (i = 0; i < ctx->pc->num_instructions; ++i) {
812 struct nv_instruction *nvi = &ctx->pc->instructions[i];
813 struct nv_value *def[4];
815 if (!nv_is_vector_op(nvi->opcode))
819 for (c = 0; c < 4; ++c) {
820 if (nvi->def[c]->refc)
821 nvi->tex_mask |= 1 << c;
822 def[c] = nvi->def[c];
826 for (c = 0; c < 4; ++c)
827 if (nvi->tex_mask & (1 << c))
828 nvi->def[j++] = def[c];
829 for (c = 0; c < 4; ++c)
830 if (!(nvi->tex_mask & (1 << c)))
831 nvi->def[j++] = def[c];
843 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
846 struct nv_instruction *nvi, *next;
848 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
851 if (inst_cullable(nvi)) {
857 DESCEND_ARBITRARY(j, nv_pass_dce);
862 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
863 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
864 * BREAK and dummy ELSE block.
866 static INLINE boolean
867 bb_is_if_else_endif(struct nv_basic_block *bb)
869 if (!bb->out[0] || !bb->out[1])
872 if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
873 return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
874 !bb->out[1]->out[1]);
876 return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
877 !bb->out[0]->out[1] &&
878 !bb->out[1]->out[1]);
882 /* predicate instructions and remove branch at the end */
884 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
885 struct nv_value *p, ubyte cc)
887 struct nv_instruction *nvi;
891 for (nvi = b->entry; nvi->next; nvi = nvi->next) {
892 if (!nvi_isnop(nvi)) {
894 nv_reference(pc, &nvi->flags_src, p);
898 if (nvi->opcode == NV_OP_BRA)
901 if (!nvi_isnop(nvi)) {
903 nv_reference(pc, &nvi->flags_src, p);
907 /* NOTE: Run this after register allocation, we can just cut out the cflow
908 * instructions and hook the predicates to the conditional OPs if they are
909 * not using immediates; better than inserting SELECT to join definitions.
911 * NOTE: Should adapt prior optimization to make this possible more often.
914 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
916 struct nv_instruction *nvi;
917 struct nv_value *pred;
921 if (bb_is_if_else_endif(b)) {
923 NV50_DBGMSG("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id);
925 for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
926 if (!nv50_nvi_can_predicate(nvi))
929 for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
930 if (!nv50_nvi_can_predicate(nvi))
934 debug_printf("cannot predicate: "); nv_print_instruction(nvi);
937 debug_printf("cannot predicate: "); nv_print_instruction(nvi);
941 if (!nvi && n0 < 12 && n1 < 12) { /* 12 as arbitrary limit */
942 assert(b->exit && b->exit->flags_src);
943 pred = b->exit->flags_src->value;
945 predicate_instructions(ctx->pc, b->out[0], pred, NV_CC_NE | NV_CC_U);
946 predicate_instructions(ctx->pc, b->out[1], pred, NV_CC_EQ);
948 assert(b->exit && b->exit->opcode == NV_OP_BRA);
949 nv_nvi_delete(b->exit);
951 if (b->exit && b->exit->opcode == NV_OP_JOINAT)
952 nv_nvi_delete(b->exit);
954 i = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
956 if ((nvi = b->out[0]->out[i]->entry)) {
958 if (nvi->opcode == NV_OP_JOIN)
963 DESCEND_ARBITRARY(i, nv_pass_flatten);
968 /* local common subexpression elimination, stupid O(n^2) implementation */
970 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
972 struct nv_instruction *ir, *ik, *next;
973 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
979 for (ir = entry; ir; ir = next) {
981 for (ik = entry; ik != ir; ik = ik->next) {
982 if (ir->opcode != ik->opcode)
985 if (!ir->def[0] || !ik->def[0] ||
986 ik->opcode == NV_OP_LDA ||
987 ik->opcode == NV_OP_STA ||
988 ik->opcode == NV_OP_MOV ||
989 nv_is_vector_op(ik->opcode))
990 continue; /* ignore loads, stores & moves */
992 if (ik->src[4] || ir->src[4])
993 continue; /* don't mess with address registers */
995 if (ik->flags_src || ir->flags_src ||
996 ik->flags_def || ir->flags_def)
997 continue; /* and also not with flags, for now */
999 if (ik->def[0]->reg.file == NV_FILE_OUT ||
1000 ir->def[0]->reg.file == NV_FILE_OUT ||
1001 !values_equal(ik->def[0], ir->def[0]))
1004 for (s = 0; s < 3; ++s) {
1005 struct nv_value *a, *b;
1012 if (ik->src[s]->mod != ir->src[s]->mod)
1014 a = ik->src[s]->value;
1015 b = ir->src[s]->value;
1018 if (a->reg.file != b->reg.file ||
1020 a->reg.id != b->reg.id)
1026 nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
1033 DESCEND_ARBITRARY(s, nv_pass_cse);
1039 nv_pc_exec_pass0(struct nv_pc *pc)
1041 struct nv_pass_reld_elim *reldelim;
1042 struct nv_pass pass;
1043 struct nv_pass_dce dce;
1049 /* Do this first, so we don't have to pay attention
1050 * to whether sources are supported memory loads.
1053 ret = nv_pass_lower_arith(&pass, pc->root);
1058 ret = nv_pass_fold_loads(&pass, pc->root);
1063 ret = nv_pass_fold_stores(&pass, pc->root);
1067 reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
1070 ret = nv_pass_reload_elim(reldelim, pc->root);
1076 ret = nv_pass_cse(&pass, pc->root);
1081 ret = nv_pass_lower_mods(&pass, pc->root);
1089 ret = nv_pass_dce(&dce, pc->root);
1092 } while (dce.removed);
1094 ret = nv_pass_tex_mask(&pass, pc->root);