2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nvc0_program.h"
26 #define DESCEND_ARBITRARY(j, f) \
28 b->pass_seq = ctx->pc->pass_seq; \
30 for (j = 0; j < 2; ++j) \
31 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
36 registers_interfere(struct nv_value *a, struct nv_value *b)
38 if (a->reg.file != b->reg.file)
40 if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
43 assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
45 if (a->join->reg.id < b->join->reg.id) {
46 return (a->join->reg.id + a->reg.size >= b->join->reg.id);
48 if (a->join->reg.id > b->join->reg.id) {
49 return (b->join->reg.id + b->reg.size >= a->join->reg.id);
56 values_equal(struct nv_value *a, struct nv_value *b)
58 if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
60 if (NV_IS_MEMORY_FILE(a->reg.file))
61 return a->reg.address == b->reg.address;
63 return a->join->reg.id == b->join->reg.id;
68 inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
72 for (di = 0; di < 4 && a->def[di]; ++di)
73 for (si = 0; si < 5 && b->src[si]; ++si)
74 if (registers_interfere(a->def[di], b->src[si]->value))
80 /* Check whether we can swap the order of the instructions,
81 * where a & b may be either the earlier or the later one.
84 inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
86 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
91 inst_removable(struct nv_instruction *nvi)
93 if (nvi->opcode == NV_OP_ST)
95 return (!(nvi->terminator ||
99 nvc0_insn_refcount(nvi)));
102 /* Check if we do not actually have to emit this instruction. */
103 static INLINE boolean
104 inst_is_noop(struct nv_instruction *nvi)
106 if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
108 if (nvi->terminator || nvi->join)
110 if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
112 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
114 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
117 if (nvi->src[0]->value->join->reg.id < 0) {
118 NV50_DBGMSG(PROG_IR, "inst_is_noop: orphaned value detected\n");
122 if (nvi->opcode == NV_OP_SELECT)
123 if (!values_equal(nvi->def[0], nvi->src[1]->value))
125 return values_equal(nvi->def[0], nvi->src[0]->value);
135 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
138 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
140 struct nv_pc *pc = (struct nv_pc *)priv;
141 struct nv_basic_block *in;
142 struct nv_instruction *nvi, *next;
145 /* find first non-empty block emitted before b */
146 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
148 for (; j >= 0; --j) {
151 /* check for no-op branches (BRA $PC+8) */
152 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
156 for (++j; j < pc->num_blocks; ++j)
157 pc->bb_list[j]->emit_pos -= 8;
159 nvc0_insn_delete(in->exit);
161 b->emit_pos = in->emit_pos + in->emit_size;
163 if (in->emit_size) /* no more no-op branches to b */
167 pc->bb_list[pc->num_blocks++] = b;
171 for (nvi = b->entry; nvi; nvi = next) {
173 if (inst_is_noop(nvi) ||
174 (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
175 nvc0_insn_delete(nvi);
179 pc->emit_size += b->emit_size;
181 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
183 debug_printf("BB:%i is now empty\n", b->id);
185 debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
190 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
197 nv_pass_flatten(&pass, root);
199 nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
205 nvc0_pc_exec_pass2(struct nv_pc *pc)
209 NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks);
211 pc->num_blocks = 0; /* will reorder bb_list */
213 for (i = 0; i < pc->num_subroutines + 1; ++i)
214 if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
219 static INLINE boolean
220 is_cspace_load(struct nv_instruction *nvi)
224 assert(nvi->indirect != 0);
225 return (nvi->opcode == NV_OP_LD &&
226 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
227 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
230 static INLINE boolean
231 is_immd32_load(struct nv_instruction *nvi)
235 return (nvi->opcode == NV_OP_MOV &&
236 nvi->src[0]->value->reg.file == NV_FILE_IMM &&
237 nvi->src[0]->value->reg.size == 4);
241 check_swap_src_0_1(struct nv_instruction *nvi)
243 struct nv_ref *src0 = nvi->src[0];
244 struct nv_ref *src1 = nvi->src[1];
246 if (!nv_op_commutative(nvi->opcode) &&
247 NV_BASEOP(nvi->opcode) != NV_OP_SET &&
248 NV_BASEOP(nvi->opcode) != NV_OP_SLCT)
250 assert(src0 && src1 && src0->value && src1->value);
252 if (src1->value->reg.file != NV_FILE_GPR)
255 if (is_cspace_load(src0->value->insn)) {
256 if (!is_cspace_load(src1->value->insn)) {
261 if (is_immd32_load(src0->value->insn)) {
262 if (!is_cspace_load(src1->value->insn) &&
263 !is_immd32_load(src1->value->insn)) {
269 if (nvi->src[0] != src0) {
270 if (NV_BASEOP(nvi->opcode) == NV_OP_SET)
271 nvi->set_cond = nvc0_ir_reverse_cc(nvi->set_cond);
273 if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT)
274 nvi->set_cond = NV_CC_INVERSE(nvi->set_cond);
279 nvi_set_indirect_load(struct nv_pc *pc,
280 struct nv_instruction *nvi, struct nv_value *val)
282 for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
284 assert(nvi->indirect < 6);
285 nv_reference(pc, nvi, nvi->indirect, val);
289 nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
291 struct nv_instruction *nvi, *ld;
294 for (nvi = b->entry; nvi; nvi = nvi->next) {
295 check_swap_src_0_1(nvi);
297 for (s = 0; s < 3 && nvi->src[s]; ++s) {
298 ld = nvi->src[s]->value->insn;
299 if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
301 if (!nvc0_insn_can_load(nvi, s, ld))
305 nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
306 if (ld->indirect >= 0)
307 nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
309 if (!nvc0_insn_refcount(ld))
310 nvc0_insn_delete(ld);
313 DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
318 /* NOTE: Assumes loads have not yet been folded. */
320 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
322 struct nv_instruction *nvi, *mi, *next;
326 for (nvi = b->entry; nvi; nvi = next) {
328 if (nvi->opcode == NV_OP_SUB) {
329 nvi->src[1]->mod ^= NV_MOD_NEG;
330 nvi->opcode = NV_OP_ADD;
333 for (j = 0; j < 3 && nvi->src[j]; ++j) {
334 mi = nvi->src[j]->value->insn;
337 if (mi->def[0]->refc > 1 || mi->predicate >= 0)
340 if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
342 if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
345 assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
347 mod |= mi->src[0]->mod;
349 if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
350 /* abs neg [abs] = abs */
351 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
353 if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
354 /* neg as opcode and modifier on same insn cannot occur */
355 /* neg neg abs = abs, neg neg = identity */
357 if (mod & NV_MOD_ABS)
358 nvi->opcode = NV_OP_ABS;
360 nvi->opcode = NV_OP_MOV;
364 if ((nv_op_supported_src_mods(nvi->opcode, j) & mod) != mod)
367 nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
369 nvi->src[j]->mod ^= mod;
372 if (nvi->opcode == NV_OP_SAT) {
373 mi = nvi->src[0]->value->insn;
375 if (mi->def[0]->refc > 1 ||
376 (mi->opcode != NV_OP_ADD &&
377 mi->opcode != NV_OP_MUL &&
378 mi->opcode != NV_OP_MAD))
381 mi->def[0] = nvi->def[0];
382 mi->def[0]->insn = mi;
383 nvc0_insn_delete(nvi);
386 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
391 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
394 apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
396 if (mod & NV_MOD_ABS) {
397 if (type == NV_TYPE_F32)
400 if ((*val) & (1 << 31))
403 if (mod & NV_MOD_NEG) {
404 if (type == NV_TYPE_F32)
409 if (mod & NV_MOD_SAT) {
416 if (type == NV_TYPE_F32) {
417 u.f = CLAMP(u.f, -1.0f, 1.0f);
419 if (type == NV_TYPE_U16) {
420 u.u = MIN2(u.u, 0xffff);
422 if (type == NV_TYPE_S16) {
423 u.i = CLAMP(u.i, -32768, 32767);
427 if (mod & NV_MOD_NOT)
432 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
433 struct nv_value *src0, struct nv_value *src1)
435 struct nv_value *val;
445 type = NV_OPTYPE(nvi->opcode);
448 u0.u32 = src0->reg.imm.u32;
449 u1.u32 = src1->reg.imm.u32;
451 apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
452 apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
454 switch (nvi->opcode) {
456 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
460 u.f32 = u0.f32 * u1.f32;
463 u.u32 = u0.u32 * u1.u32;
466 u.f32 = u0.f32 + u1.f32;
469 u.u32 = u0.u32 + u1.u32;
472 u.f32 = u0.f32 - u1.f32;
476 u.u32 = u0.u32 - u1.u32;
483 val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
484 val->reg.imm.u32 = u.u32;
486 nv_reference(pc, nvi, 1, NULL);
487 nv_reference(pc, nvi, 0, val);
489 if (nvi->opcode == NV_OP_MAD_F32) {
490 nvi->src[1] = nvi->src[0];
491 nvi->src[0] = nvi->src[2];
493 nvi->opcode = NV_OP_ADD_F32;
495 if (val->reg.imm.u32 == 0) {
497 nvi->opcode = NV_OP_MOV;
500 nvi->opcode = NV_OP_MOV;
505 constant_operand(struct nv_pc *pc,
506 struct nv_instruction *nvi, struct nv_value *val, int s)
520 type = NV_OPTYPE(nvi->opcode);
522 u.u32 = val->reg.imm.u32;
523 apply_modifiers(&u.u32, type, nvi->src[s]->mod);
525 if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
526 nvi->opcode = NV_OP_MOV;
527 nv_reference(pc, nvi, t, NULL);
529 nvi->src[0] = nvi->src[1];
535 switch (nvi->opcode) {
537 if (u.f32 == 1.0f || u.f32 == -1.0f) {
539 nvi->src[t]->mod ^= NV_MOD_NEG;
540 switch (nvi->src[t]->mod) {
541 case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
542 case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
543 case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
548 nv_reference(pc, nvi, 0, nvi->src[t]->value);
549 nv_reference(pc, nvi, 1, NULL);
550 nvi->src[0]->mod = 0;
552 if (u.f32 == 2.0f || u.f32 == -2.0f) {
554 nvi->src[t]->mod ^= NV_MOD_NEG;
555 nvi->opcode = NV_OP_ADD_F32;
556 nv_reference(pc, nvi, s, nvi->src[t]->value);
557 nvi->src[s]->mod = nvi->src[t]->mod;
562 switch (nvi->src[t]->mod) {
563 case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
564 case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
565 case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
566 case NV_MOD_NEG | NV_MOD_ABS:
568 nvi->ext.cvt.s = nvi->ext.cvt.d = type;
574 nv_reference(pc, nvi, 0, nvi->src[t]->value);
575 nv_reference(pc, nvi, 1, NULL);
576 if (nvi->opcode != NV_OP_CVT)
577 nvi->src[0]->mod = 0;
582 assert(nvi->src[t]->mod == 0);
583 nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
584 nvi->ext.cvt.s = nvi->ext.cvt.d = type;
585 nv_reference(pc, nvi, 0, nvi->src[t]->value);
586 nv_reference(pc, nvi, 1, NULL);
590 /* multiplication by 0 already handled above */
591 assert(nvi->src[s]->mod == 0);
592 shift = ffs(u.s32) - 1;
594 nvi->opcode = NV_OP_MOV;
595 nv_reference(pc, nvi, 0, nvi->src[t]->value);
596 nv_reference(pc, nvi, 1, NULL);
598 if (u.s32 > 0 && u.s32 == (1 << shift)) {
599 nvi->opcode = NV_OP_SHL;
600 (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift;
601 nv_reference(pc, nvi, 0, nvi->src[t]->value);
602 nv_reference(pc, nvi, 1, val);
607 u.f32 = 1.0f / u.f32;
608 (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
609 nvi->opcode = NV_OP_MOV;
611 nv_reference(pc, nvi, 0, val);
614 u.f32 = 1.0f / sqrtf(u.f32);
615 (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
616 nvi->opcode = NV_OP_MOV;
618 nv_reference(pc, nvi, 0, val);
626 handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
628 struct nv_value *src0 = nvi->src[0]->value;
629 struct nv_value *src1 = nvi->src[1]->value;
631 if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
633 if (src0->reg.file != NV_FILE_GPR)
635 nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
636 nvc0_insn_delete(nvi);
639 /* check if we can MUL + ADD -> MAD/FMA */
641 handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
643 struct nv_value *src0 = nvi->src[0]->value;
644 struct nv_value *src1 = nvi->src[1]->value;
645 struct nv_value *src;
649 if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
651 if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
655 if ((src0->insn && src0->insn->bb != nvi->bb) ||
656 (src1->insn && src1->insn->bb != nvi->bb))
659 /* check for immediates from prior constant folding */
660 if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
662 src = nvi->src[s]->value;
664 mod[0] = nvi->src[0]->mod;
665 mod[1] = nvi->src[1]->mod;
666 mod[2] = src->insn->src[0]->mod;
667 mod[3] = src->insn->src[1]->mod;
669 if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
672 nvi->opcode = NV_OP_MAD_F32;
674 nv_reference(ctx->pc, nvi, s, NULL);
675 nvi->src[2] = nvi->src[!s];
678 nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
679 nvi->src[0]->mod = mod[2] ^ mod[s];
680 nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
681 nvi->src[1]->mod = mod[3];
685 nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
687 struct nv_instruction *nvi, *next;
690 for (nvi = b->entry; nvi; nvi = next) {
691 struct nv_value *src0, *src1;
692 uint baseop = NV_BASEOP(nvi->opcode);
696 src0 = nvc0_pc_find_immediate(nvi->src[0]);
697 src1 = nvc0_pc_find_immediate(nvi->src[1]);
700 constant_expression(ctx->pc, nvi, src0, src1);
703 constant_operand(ctx->pc, nvi, src0, 0);
706 constant_operand(ctx->pc, nvi, src1, 1);
709 if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
710 handle_min_max(ctx, nvi);
712 if (nvi->opcode == NV_OP_ADD_F32)
713 handle_add_mul(ctx, nvi);
715 DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
720 /* TODO: redundant store elimination */
723 struct mem_record *next;
724 struct nv_instruction *insn;
730 #define MEM_RECORD_POOL_SIZE 1024
732 struct pass_reld_elim {
735 struct mem_record *imm;
736 struct mem_record *mem_v;
737 struct mem_record *mem_a;
738 struct mem_record *mem_c[16];
739 struct mem_record *mem_l;
741 struct mem_record pool[MEM_RECORD_POOL_SIZE];
745 /* Extend the load operation in @rec to also cover the data loaded by @ld.
746 * The two loads may not overlap but reference adjacent memory locations.
749 combine_load(struct nv_pc *pc, struct mem_record *rec,
750 struct nv_instruction *ld)
752 struct nv_instruction *fv = rec->insn;
753 struct nv_value *mem = ld->src[0]->value;
754 uint32_t size = rec->size + mem->reg.size;
756 int d = rec->size / 4;
758 assert(rec->size < 16);
759 if (rec->ofst > mem->reg.address) {
760 if ((size == 8 && mem->reg.address & 3) ||
761 (size > 8 && mem->reg.address & 7))
763 rec->ofst = mem->reg.address;
764 for (j = 0; j < d; ++j)
765 fv->def[mem->reg.size / 4 + j] = fv->def[j];
768 if ((size == 8 && rec->ofst & 3) ||
769 (size > 8 && rec->ofst & 7)) {
773 for (j = 0; j < mem->reg.size / 4; ++j) {
774 fv->def[d] = ld->def[j];
775 fv->def[d++]->insn = fv;
778 if (fv->src[0]->value->refc > 1)
779 nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value));
780 fv->src[0]->value->reg.address = rec->ofst;
781 fv->src[0]->value->reg.size = rec->size = size;
783 nvc0_insn_delete(ld);
787 combine_export(struct mem_record *rec, struct nv_instruction *ex)
793 add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
794 uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
796 struct mem_record *it = &ctx->pool[ctx->alloc++];
803 it->size = nvi->src[0]->value->reg.size;
806 /* vectorize and reuse loads from memory or of immediates */
808 nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
810 struct mem_record **rec, *it;
811 struct nv_instruction *ld, *next;
812 struct nv_value *mem;
816 for (ld = b->entry; ld; ld = next) {
819 if (is_cspace_load(ld)) {
820 mem = ld->src[0]->value;
821 rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
823 if (ld->opcode == NV_OP_VFETCH) {
824 mem = ld->src[0]->value;
827 if (ld->opcode == NV_OP_EXPORT) {
828 mem = ld->src[0]->value;
829 if (mem->reg.file != NV_FILE_MEM_V)
835 if (ld->def[0] && ld->def[0]->refc == 0)
837 ofst = mem->reg.address;
838 base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
840 for (it = *rec; it; it = it->next) {
841 if (it->base == base &&
842 ((it->ofst >> 4) == (ofst >> 4)) &&
843 ((it->ofst + it->size == ofst) ||
844 (it->ofst - mem->reg.size == ofst))) {
845 /* only NV_OP_VFETCH can load exactly 12 bytes */
846 if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
848 if (it->ofst < ofst) {
849 if ((it->ofst & 0xf) == 4)
852 if ((ofst & 0xf) == 4)
858 switch (ld->opcode) {
859 case NV_OP_EXPORT: combine_export(it, ld); break;
861 combine_load(ctx->pc, it, ld);
865 if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
866 add_mem_record(ctx, rec, base, ofst, ld);
871 ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
872 for (s = 0; s < 16; ++s)
873 ctx->mem_c[s] = NULL;
875 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
880 eliminate_store(struct mem_record *rec, struct nv_instruction *st)
884 /* elimination of redundant stores */
886 pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
888 struct mem_record **rec, *it;
889 struct nv_instruction *st, *next;
890 struct nv_value *mem;
891 uint32_t base, ofst, size;
894 for (st = b->entry; st; st = next) {
897 if (st->opcode == NV_OP_ST) {
898 mem = st->src[0]->value;
901 if (st->opcode == NV_OP_EXPORT) {
902 mem = st->src[0]->value;
903 if (mem->reg.file != NV_FILE_MEM_V)
907 if (st->opcode == NV_OP_ST) {
910 ofst = mem->reg.address;
911 base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
912 size = mem->reg.size;
914 for (it = *rec; it; it = it->next) {
915 if (it->base == base &&
916 (it->ofst <= ofst && (it->ofst + size) > ofst))
920 eliminate_store(it, st);
922 add_mem_record(ctx, rec, base, ofst, st);
925 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
929 /* TODO: properly handle loads from l[] memory in the presence of stores */
931 nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
934 struct load_record **rec, *it;
935 struct nv_instruction *ld, *next;
937 struct nv_value *val;
940 for (ld = b->entry; ld; ld = next) {
944 val = ld->src[0]->value;
947 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
948 data[0] = val->reg.id;
952 if (ld->opcode == NV_OP_LDA) {
953 data[0] = val->reg.id;
954 data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
955 if (val->reg.file >= NV_FILE_MEM_C(0) &&
956 val->reg.file <= NV_FILE_MEM_C(15))
957 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
959 if (val->reg.file == NV_FILE_MEM_S)
962 if (val->reg.file == NV_FILE_MEM_L)
965 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
966 data[0] = val->reg.imm.u32;
971 if (!rec || !ld->def[0]->refc)
974 for (it = *rec; it; it = it->next)
975 if (it->data[0] == data[0] && it->data[1] == data[1])
979 if (ld->def[0]->reg.id >= 0)
980 it->value = ld->def[0];
983 nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
985 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
987 it = &ctx->pool[ctx->alloc++];
989 it->data[0] = data[0];
990 it->data[1] = data[1];
991 it->value = ld->def[0];
999 for (j = 0; j < 16; ++j)
1000 ctx->mem_c[j] = NULL;
1004 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
1010 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
1014 for (i = 0; i < ctx->pc->num_instructions; ++i) {
1015 struct nv_instruction *nvi = &ctx->pc->instructions[i];
1016 struct nv_value *def[4];
1018 if (!nv_is_texture_op(nvi->opcode))
1022 for (c = 0; c < 4; ++c) {
1023 if (nvi->def[c]->refc)
1024 nvi->tex_mask |= 1 << c;
1025 def[c] = nvi->def[c];
1029 for (c = 0; c < 4; ++c)
1030 if (nvi->tex_mask & (1 << c))
1031 nvi->def[j++] = def[c];
1032 for (c = 0; c < 4; ++c)
1033 if (!(nvi->tex_mask & (1 << c)))
1034 nvi->def[j++] = def[c];
1040 struct nv_pass_dce {
1046 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
1049 struct nv_instruction *nvi, *next;
1051 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
1054 if (inst_removable(nvi)) {
1055 nvc0_insn_delete(nvi);
1059 DESCEND_ARBITRARY(j, nv_pass_dce);
1064 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
1065 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
1066 * BREAK and dummy ELSE block.
1068 static INLINE boolean
1069 bb_is_if_else_endif(struct nv_basic_block *bb)
1071 if (!bb->out[0] || !bb->out[1])
1074 if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
1075 return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
1076 !bb->out[1]->out[1]);
1078 return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
1079 !bb->out[0]->out[1] &&
1080 !bb->out[1]->out[1]);
1084 /* Predicate instructions and delete any branch at the end if it is
1085 * not a break from a loop.
1088 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
1089 struct nv_value *pred, uint8_t cc)
1091 struct nv_instruction *nvi, *prev;
1096 for (nvi = b->entry; nvi; nvi = nvi->next) {
1098 if (inst_is_noop(nvi))
1100 for (s = 0; nvi->src[s]; ++s);
1104 nv_reference(pc, nvi, nvi->predicate, pred);
1106 if (prev->opcode == NV_OP_BRA &&
1107 b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
1108 b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
1109 nvc0_insn_delete(prev);
1112 static INLINE boolean
1113 may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
1115 if (nvi->def[0] && values_equal(nvi->def[0], pred))
1117 return nvc0_insn_is_predicateable(nvi);
1120 /* Transform IF/ELSE/ENDIF constructs into predicated instructions
1124 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
1126 struct nv_instruction *nvi;
1127 struct nv_value *pred;
1129 int n0, n1; /* instruction counts of outgoing blocks */
1131 if (bb_is_if_else_endif(b)) {
1132 assert(b->exit && b->exit->opcode == NV_OP_BRA);
1134 assert(b->exit->predicate >= 0);
1135 pred = b->exit->src[b->exit->predicate]->value;
1138 for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
1139 if (!may_predicate_insn(nvi, pred))
1142 /* we're after register allocation, so there always is an ELSE block */
1143 for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
1144 if (!may_predicate_insn(nvi, pred))
1148 /* 12 is an arbitrary limit */
1149 if (!nvi && n0 < 12 && n1 < 12) {
1150 predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
1151 predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
1153 nvc0_insn_delete(b->exit); /* delete the branch */
1155 /* and a potential joinat before it */
1156 if (b->exit && b->exit->opcode == NV_OP_JOINAT)
1157 nvc0_insn_delete(b->exit);
1159 /* remove join operations at the end of the conditional */
1160 k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
1161 if ((nvi = b->out[0]->out[k]->entry)) {
1163 if (nvi->opcode == NV_OP_JOIN)
1164 nvc0_insn_delete(nvi);
1168 DESCEND_ARBITRARY(k, nv_pass_flatten);
1173 /* Tests instructions for equality, but independently of sources. */
1175 is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
1177 if (a->opcode != b->opcode)
1179 if (nv_is_texture_op(a->opcode)) {
1180 if (a->ext.tex.t != b->ext.tex.t ||
1181 a->ext.tex.s != b->ext.tex.s)
1183 if (a->tex_dim != b->tex_dim ||
1184 a->tex_array != b->tex_array ||
1185 a->tex_cube != b->tex_cube ||
1186 a->tex_shadow != b->tex_shadow ||
1187 a->tex_live != b->tex_live)
1190 if (a->opcode == NV_OP_CVT) {
1191 if (a->ext.cvt.s != b->ext.cvt.s ||
1192 a->ext.cvt.d != b->ext.cvt.d)
1195 if (NV_BASEOP(a->opcode) == NV_OP_SET ||
1196 NV_BASEOP(a->opcode) == NV_OP_SLCT) {
1197 if (a->set_cond != b->set_cond)
1200 if (a->opcode == NV_OP_LINTERP ||
1201 a->opcode == NV_OP_PINTERP) {
1202 if (a->centroid != b->centroid ||
1208 if (a->lanes != b->lanes ||
1209 a->patch != b->patch ||
1210 a->saturate != b->saturate)
1212 if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
1217 /* local common subexpression elimination, stupid O(n^2) implementation */
1219 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1221 struct nv_instruction *ir, *ik, *next;
1222 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1228 for (ir = entry; ir; ir = next) {
1232 for (ik = entry; ik != ir; ik = ik->next) {
1233 if (!is_operation_equal(ir, ik))
1235 if (!ir->def[0] || !ik->def[0])
1238 if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
1241 for (d = 0; d < 4; ++d) {
1242 if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
1245 if (!values_equal(ik->def[0], ir->def[0]))
1255 for (s = 0; s < 5; ++s) {
1256 struct nv_value *a, *b;
1258 if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
1265 if (ik->src[s]->mod != ir->src[s]->mod)
1267 a = ik->src[s]->value;
1268 b = ir->src[s]->value;
1271 if (a->reg.file != b->reg.file ||
1272 a->reg.id < 0 || /* this excludes memory loads/stores */
1273 a->reg.id != b->reg.id)
1277 nvc0_insn_delete(ir);
1278 for (d = 0; d < 4 && ir->def[d]; ++d)
1279 nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
1287 DESCEND_ARBITRARY(s, nv_pass_cse);
1292 /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
1293 * neighbouring registers. CSE might have messed this up.
1294 * Just generate a MOV for each source to avoid conflicts if they're used in
1295 * multiple NV_OP_BIND at different positions.
1297 * Add a dummy use of the pointer source of >= 8 byte loads after the load
1298 * to prevent it from being assigned a register which overlaps the load's
1299 * destination, which would produce random corruptions.
1302 nv_pass_fixups(struct nv_pass *ctx, struct nv_basic_block *b)
1304 struct nv_value *val;
1305 struct nv_instruction *fix, *nvi, *next;
1308 for (fix = b->entry; fix; fix = next) {
1311 if (fix->opcode == NV_OP_LD) {
1312 if (fix->indirect >= 0 && fix->src[0]->value->reg.size >= 8) {
1313 nvi = nv_alloc_instruction(ctx->pc, NV_OP_UNDEF);
1314 nv_reference(ctx->pc, nvi, 0, fix->src[fix->indirect]->value);
1316 nvc0_insn_insert_after(fix, nvi);
1320 if (fix->opcode == NV_OP_BIND) {
1321 for (s = 0; s < 4 && fix->src[s]; ++s) {
1322 val = fix->src[s]->value;
1324 nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
1325 nvi->def[0] = new_value_like(ctx->pc, val);
1326 nvi->def[0]->insn = nvi;
1327 nv_reference(ctx->pc, nvi, 0, val);
1328 nv_reference(ctx->pc, fix, s, nvi->def[0]);
1330 nvc0_insn_insert_before(fix, nvi);
1334 DESCEND_ARBITRARY(s, nv_pass_fixups);
1340 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1342 struct pass_reld_elim *reldelim;
1343 struct nv_pass pass;
1344 struct nv_pass_dce dce;
1350 /* Do CSE so we can just compare values by pointer in subsequent passes. */
1352 ret = nv_pass_cse(&pass, root);
1356 /* Do this first, so we don't have to pay attention
1357 * to whether sources are supported memory loads.
1360 ret = nv_pass_algebraic_opt(&pass, root);
1365 ret = nv_pass_lower_mods(&pass, root);
1370 ret = nvc0_pass_fold_loads(&pass, root);
1374 if (pc->opt_reload_elim) {
1375 reldelim = CALLOC_STRUCT(pass_reld_elim);
1379 ret = nv_pass_reload_elim(reldelim, root);
1384 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1388 /* May run DCE before load-combining since that pass will clean up
1395 ret = nv_pass_dce(&dce, root);
1398 } while (dce.removed);
1400 if (pc->opt_reload_elim) {
1402 ret = nv_pass_mem_opt(reldelim, root);
1404 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1408 ret = nv_pass_mem_opt(reldelim, root);
1415 ret = nv_pass_tex_mask(&pass, root);
1420 ret = nv_pass_fixups(&pass, root);
1426 nvc0_pc_exec_pass0(struct nv_pc *pc)
1430 for (i = 0; i < pc->num_subroutines + 1; ++i)
1431 if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))