2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #define DESCEND_ARBITRARY(j, f) \
27 b->pass_seq = ctx->pc->pass_seq; \
29 for (j = 0; j < 2; ++j) \
30 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
34 extern unsigned nv50_inst_min_size(struct nv_instruction *);
41 values_equal(struct nv_value *a, struct nv_value *b)
44 return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
48 inst_commutation_check(struct nv_instruction *a,
49 struct nv_instruction *b)
53 for (di = 0; di < 4; ++di) {
56 for (si = 0; si < 5; ++si) {
59 if (values_equal(a->def[di], b->src[si]->value))
64 if (b->flags_src && b->flags_src->value == a->flags_def)
70 /* Check whether we can swap the order of the instructions,
71 * where a & b may be either the earlier or the later one.
74 inst_commutation_legal(struct nv_instruction *a,
75 struct nv_instruction *b)
77 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
81 inst_cullable(struct nv_instruction *nvi)
83 return (!(nvi->is_terminator ||
86 nv_nvi_refcount(nvi)));
90 nvi_isnop(struct nv_instruction *nvi)
92 if (nvi->opcode == NV_OP_EXPORT)
101 if (nvi->def[0]->join->reg.id < 0)
104 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
107 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
110 if (nvi->src[0]->value->join->reg.id < 0) {
111 debug_printf("nvi_isnop: orphaned value detected\n");
115 if (nvi->opcode == NV_OP_SELECT)
116 if (!values_equal(nvi->def[0], nvi->src[1]->value))
119 return values_equal(nvi->def[0], nvi->src[0]->value);
123 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
125 struct nv_pc *pc = (struct nv_pc *)priv;
126 struct nv_basic_block *in;
127 struct nv_instruction *nvi, *next;
131 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
135 /* check for no-op branches (BRA $PC+8) */
136 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
140 for (++j; j < pc->num_blocks; ++j)
141 pc->bb_list[j]->bin_pos -= 8;
143 nv_nvi_delete(in->exit);
145 b->bin_pos = in->bin_pos + in->bin_size;
148 pc->bb_list[pc->num_blocks++] = b;
152 for (nvi = b->entry; nvi; nvi = next) {
158 for (nvi = b->entry; nvi; nvi = next) {
161 size = nv50_inst_min_size(nvi);
162 if (nvi->next && size < 8)
165 if ((n32 & 1) && nvi->next &&
166 nv50_inst_min_size(nvi->next) == 4 &&
167 inst_commutation_legal(nvi, nvi->next)) {
169 debug_printf("permuting: ");
170 nv_print_instruction(nvi);
171 nv_print_instruction(nvi->next);
172 nv_nvi_permute(nvi, nvi->next);
177 b->bin_size += n32 & 1;
179 nvi->prev->is_long = 1;
182 b->bin_size += 1 + nvi->is_long;
186 debug_printf("block %p is now empty\n", b);
188 if (!b->exit->is_long) {
190 b->exit->is_long = 1;
193 /* might have del'd a hole tail of instructions */
194 if (!b->exit->prev->is_long && !(n32 & 1)) {
196 b->exit->prev->is_long = 1;
199 assert(!b->entry || (b->exit && b->exit->is_long));
201 pc->bin_size += b->bin_size *= 4;
205 nv_pc_exec_pass2(struct nv_pc *pc)
207 debug_printf("preparing %u blocks for emission\n", pc->num_blocks);
209 pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
212 nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
217 static INLINE boolean
218 is_cmem_load(struct nv_instruction *nvi)
220 return (nvi->opcode == NV_OP_LDA &&
221 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
222 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
225 static INLINE boolean
226 is_smem_load(struct nv_instruction *nvi)
228 return (nvi->opcode == NV_OP_LDA &&
229 (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
230 nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
233 static INLINE boolean
234 is_immd_move(struct nv_instruction *nvi)
236 return (nvi->opcode == NV_OP_MOV &&
237 nvi->src[0]->value->reg.file == NV_FILE_IMM);
241 check_swap_src_0_1(struct nv_instruction *nvi)
243 static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
245 struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
247 if (!nv_op_commutative(nvi->opcode))
249 assert(src0 && src1);
251 if (is_cmem_load(src0->value->insn)) {
252 if (!is_cmem_load(src1->value->insn)) {
255 /* debug_printf("swapping cmem load to 1\n"); */
258 if (is_smem_load(src1->value->insn)) {
259 if (!is_smem_load(src0->value->insn)) {
262 /* debug_printf("swapping smem load to 0\n"); */
266 if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
267 nvi->set_cond = cc_swapped[nvi->set_cond];
277 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
279 struct nv_instruction *nvi, *sti;
282 for (sti = b->entry; sti; sti = sti->next) {
283 if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
286 /* only handling MOV to $oX here */
287 if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
290 nvi = sti->src[0]->value->insn;
291 if (!nvi || nvi->opcode == NV_OP_PHI)
293 assert(nvi->def[0] == sti->src[0]->value);
295 if (nvi->def[0]->refc > 1)
298 /* cannot write to $oX when using immediate */
299 for (j = 0; j < 4 && nvi->src[j]; ++j)
300 if (nvi->src[j]->value->reg.file == NV_FILE_IMM)
305 nvi->def[0] = sti->def[0];
307 nvi->fixed = sti->fixed;
310 DESCEND_ARBITRARY(j, nv_pass_fold_stores);
316 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
318 struct nv_instruction *nvi, *ld;
321 for (nvi = b->entry; nvi; nvi = nvi->next) {
322 check_swap_src_0_1(nvi);
324 for (j = 0; j < 3; ++j) {
327 ld = nvi->src[j]->value->insn;
331 if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
332 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
336 if (ld->opcode != NV_OP_LDA)
338 if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
341 if (j == 0 && ld->src[4]) /* can't load shared mem */
344 /* fold it ! */ /* XXX: ref->insn */
345 nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
347 nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
350 DESCEND_ARBITRARY(j, nv_pass_fold_loads);
356 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
359 struct nv_instruction *nvi, *mi, *next;
362 for (nvi = b->entry; nvi; nvi = next) {
364 if (nvi->opcode == NV_OP_SUB) {
365 nvi->opcode = NV_OP_ADD;
366 nvi->src[1]->mod ^= NV_MOD_NEG;
369 /* should not put any modifiers on NEG and ABS */
370 assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
371 assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
373 for (j = 0; j < 4; ++j) {
377 mi = nvi->src[j]->value->insn;
380 if (mi->def[0]->refc > 1)
383 if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
385 if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
389 if (nvi->opcode == NV_OP_ABS)
390 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
392 if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
393 nvi->opcode = NV_OP_MOV;
397 if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
400 nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
402 nvi->src[j]->mod ^= mod;
405 if (nvi->opcode == NV_OP_SAT) {
406 mi = nvi->src[0]->value->insn;
408 if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
410 mi->def[0] = nvi->def[0];
415 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
420 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
422 static struct nv_value *
423 find_immediate(struct nv_ref *ref)
425 struct nv_value *src;
431 while (src->insn && src->insn->opcode == NV_OP_MOV) {
432 assert(!src->insn->src[0]->mod);
433 src = src->insn->src[0]->value;
435 return (src->reg.file == NV_FILE_IMM) ? src : NULL;
439 constant_operand(struct nv_pc *pc,
440 struct nv_instruction *nvi, struct nv_value *val, int s)
447 type = nvi->def[0]->reg.type;
449 switch (nvi->opcode) {
451 if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 1.0f) ||
452 (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 1)) {
453 nvi->opcode = NV_OP_MOV;
454 nv_reference(pc, &nvi->src[s], NULL);
456 nvi->src[0] = nvi->src[1];
460 if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 2.0f) ||
461 (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 2)) {
462 nvi->opcode = NV_OP_ADD;
463 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
465 if (type == NV_TYPE_F32 && val->reg.imm.f32 == -1.0f) {
466 nvi->opcode = NV_OP_NEG;
467 nv_reference(pc, &nvi->src[s], NULL);
468 nvi->src[0] = nvi->src[t];
471 if (type == NV_TYPE_F32 && val->reg.imm.f32 == -2.0f) {
472 nvi->opcode = NV_OP_ADD;
473 assert(!nvi->src[s]->mod);
474 nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
475 nvi->src[t]->mod ^= NV_MOD_NEG;
476 nvi->src[s]->mod |= NV_MOD_NEG;
478 if (val->reg.imm.u32 == 0) {
479 nvi->opcode = NV_OP_MOV;
480 nv_reference(pc, &nvi->src[t], NULL);
482 nvi->src[0] = nvi->src[1];
488 if (val->reg.imm.u32 == 0) {
489 nvi->opcode = NV_OP_MOV;
490 nv_reference(pc, &nvi->src[s], NULL);
491 nvi->src[0] = nvi->src[t];
501 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
503 struct nv_instruction *nvi, *next;
506 for (nvi = b->entry; nvi; nvi = next) {
507 struct nv_value *src0, *src1, *src;
512 if ((src = find_immediate(nvi->src[0])) != NULL)
513 constant_operand(ctx->pc, nvi, src, 0);
515 if ((src = find_immediate(nvi->src[1])) != NULL)
516 constant_operand(ctx->pc, nvi, src, 1);
518 /* try to combine MUL, ADD into MAD */
519 if (nvi->opcode != NV_OP_ADD)
522 src0 = nvi->src[0]->value;
523 src1 = nvi->src[1]->value;
525 if (SRC_IS_MUL(src0) && src0->refc == 1)
528 if (SRC_IS_MUL(src1) && src1->refc == 1)
533 nvi->opcode = NV_OP_MAD;
534 mod = nvi->src[(src == src0) ? 0 : 1]->mod;
535 nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
536 nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
538 assert(!(mod & ~NV_MOD_NEG));
539 nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
540 nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
541 nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
542 nvi->src[1]->mod = src->insn->src[1]->mod;
544 DESCEND_ARBITRARY(j, nv_pass_lower_arith);
550 set $r2 g f32 $r2 $r3
551 cvt abs rn f32 $r2 s32 $r2
552 cvt f32 $c0 # f32 $r2
557 nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b)
559 /* XXX: easier in IR builder for now */
564 /* TODO: redundant store elimination */
567 struct load_record *next;
569 struct nv_value *value;
572 #define LOAD_RECORD_POOL_SIZE 1024
574 struct nv_pass_reld_elim {
577 struct load_record *imm;
578 struct load_record *mem_s;
579 struct load_record *mem_v;
580 struct load_record *mem_c[16];
581 struct load_record *mem_l;
583 struct load_record pool[LOAD_RECORD_POOL_SIZE];
588 nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
590 struct load_record **rec, *it;
591 struct nv_instruction *ld, *next;
593 struct nv_value *val;
596 for (ld = b->entry; ld; ld = next) {
600 val = ld->src[0]->value;
603 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
607 if (ld->opcode == NV_OP_LDA) {
609 if (val->reg.file >= NV_FILE_MEM_C(0) &&
610 val->reg.file <= NV_FILE_MEM_C(15))
611 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
613 if (val->reg.file == NV_FILE_MEM_S)
616 if (val->reg.file == NV_FILE_MEM_L)
619 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
620 data = val->reg.imm.u32;
624 if (!rec || !ld->def[0]->refc)
627 for (it = *rec; it; it = it->next)
628 if (it->data == data)
632 if (ld->def[0]->reg.id >= 0)
633 it->value = ld->def[0];
635 nvcg_replace_value(ctx->pc, ld->def[0], it->value);
637 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
639 it = &ctx->pool[ctx->alloc++];
642 it->value = ld->def[0];
650 for (j = 0; j < 16; ++j)
651 ctx->mem_c[j] = NULL;
655 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
661 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
665 for (i = 0; i < ctx->pc->num_instructions; ++i) {
666 struct nv_instruction *nvi = &ctx->pc->instructions[i];
667 struct nv_value *def[4];
669 if (!nv_is_vector_op(nvi->opcode))
673 for (c = 0; c < 4; ++c) {
674 if (nvi->def[c]->refc)
675 nvi->tex_mask |= 1 << c;
676 def[c] = nvi->def[c];
680 for (c = 0; c < 4; ++c)
681 if (nvi->tex_mask & (1 << c))
682 nvi->def[j++] = def[c];
683 for (c = 0; c < 4; ++c)
684 if (!(nvi->tex_mask & (1 << c)))
685 nvi->def[j++] = def[c];
697 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
700 struct nv_instruction *nvi, *next;
702 for (nvi = b->entry; nvi; nvi = next) {
705 if (inst_cullable(nvi)) {
711 DESCEND_ARBITRARY(j, nv_pass_dce);
716 static INLINE boolean
717 bb_simple_if_endif(struct nv_basic_block *bb)
719 return (bb->out[0] && bb->out[1] &&
720 bb->out[0]->out[0] == bb->out[1] &&
721 !bb->out[0]->out[1]);
725 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
729 if (bb_simple_if_endif(b)) {
731 debug_printf("nv_pass_flatten: total IF/ENDIF constructs: %i\n", ctx->n);
733 DESCEND_ARBITRARY(j, nv_pass_flatten);
738 /* local common subexpression elimination, stupid O(n^2) implementation */
740 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
742 struct nv_instruction *ir, *ik, *next;
743 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
749 for (ir = entry; ir; ir = next) {
751 for (ik = entry; ik != ir; ik = ik->next) {
752 if (ir->opcode != ik->opcode)
755 if (ik->opcode == NV_OP_LDA ||
756 ik->opcode == NV_OP_STA ||
757 ik->opcode == NV_OP_MOV ||
758 nv_is_vector_op(ik->opcode))
759 continue; /* ignore loads, stores & moves */
761 if (ik->src[4] || ir->src[4])
762 continue; /* don't mess with address registers */
764 if (ik->flags_src || ir->flags_src ||
765 ik->flags_def || ir->flags_def)
766 continue; /* and also not with flags, for now */
768 for (s = 0; s < 3; ++s) {
769 struct nv_value *a, *b;
776 if (ik->src[s]->mod != ir->src[s]->mod)
778 a = ik->src[s]->value;
779 b = ir->src[s]->value;
782 if (a->reg.file != b->reg.file ||
784 a->reg.id != b->reg.id)
790 nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
797 DESCEND_ARBITRARY(s, nv_pass_cse);
803 nv_pc_exec_pass0(struct nv_pc *pc)
805 struct nv_pass_reld_elim *reldelim;
807 struct nv_pass_dce dce;
814 ret = nv_pass_flatten(&pass, pc->root);
818 /* Do this first, so we don't have to pay attention
819 * to whether sources are supported memory loads.
822 ret = nv_pass_lower_arith(&pass, pc->root);
827 ret = nv_pass_fold_loads(&pass, pc->root);
832 ret = nv_pass_fold_stores(&pass, pc->root);
836 reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
839 ret = nv_pass_reload_elim(reldelim, pc->root);
845 ret = nv_pass_cse(&pass, pc->root);
850 ret = nv_pass_lower_mods(&pass, pc->root);
858 ret = nv_pass_dce(&dce, pc->root);
861 } while (dce.removed);
863 ret = nv_pass_tex_mask(&pass, pc->root);