2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nvc0_program.h"
26 #define NVC0_FIXUP_CODE_RELOC 0
27 #define NVC0_FIXUP_DATA_RELOC 1
38 nvc0_relocate_program(struct nvc0_program *prog,
42 struct nvc0_fixup *f = (struct nvc0_fixup *)prog->relocs;
45 for (i = 0; i < prog->num_relocs; ++i) {
49 case NVC0_FIXUP_CODE_RELOC: data = code_base + f[i].data; break;
50 case NVC0_FIXUP_DATA_RELOC: data = data_base + f[i].data; break;
55 data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift);
57 prog->code[f[i].ofst / 4] &= ~f[i].mask;
58 prog->code[f[i].ofst / 4] |= data & f[i].mask;
63 create_fixup(struct nv_pc *pc, uint8_t ty,
64 int w, uint32_t data, uint32_t m, int s)
68 const unsigned size = sizeof(struct nvc0_fixup);
69 const unsigned n = pc->num_relocs;
72 pc->reloc_entries = REALLOC(pc->reloc_entries, n * size, (n + 8) * size);
74 f = (struct nvc0_fixup *)pc->reloc_entries;
76 f[n].ofst = pc->emit_pos + w * 4;
86 SSIZE(struct nv_instruction *nvi, int s)
88 return nvi->src[s]->value->reg.size;
92 DSIZE(struct nv_instruction *nvi, int d)
94 return nvi->def[d]->reg.size;
97 static INLINE struct nv_reg *
98 SREG(struct nv_ref *ref)
102 return &ref->value->join->reg;
105 static INLINE struct nv_reg *
106 DREG(struct nv_value *val)
110 return &val->join->reg;
114 SFILE(struct nv_instruction *nvi, int s)
116 return nvi->src[s]->value->reg.file;
120 DFILE(struct nv_instruction *nvi, int d)
122 return nvi->def[0]->reg.file;
126 SID(struct nv_pc *pc, struct nv_ref *ref, int pos)
128 pc->emit[pos / 32] |= (SREG(ref) ? SREG(ref)->id : 63) << (pos % 32);
132 DID(struct nv_pc *pc, struct nv_value *val, int pos)
134 pc->emit[pos / 32] |= (DREG(val) ? DREG(val)->id : 63) << (pos % 32);
137 static INLINE uint32_t
138 get_immd_u32(struct nv_ref *ref) /* XXX: dependent on [0]:2 */
140 assert(ref->value->reg.file == NV_FILE_IMM);
141 return ref->value->reg.imm.u32;
145 set_immd_u32_l(struct nv_pc *pc, uint32_t u32)
147 pc->emit[0] |= (u32 & 0x3f) << 26;
148 pc->emit[1] |= u32 >> 6;
152 set_immd_u32(struct nv_pc *pc, uint32_t u32)
154 if ((pc->emit[0] & 0xf) == 0x2) {
155 set_immd_u32_l(pc, u32);
157 if ((pc->emit[0] & 0xf) == 0x3) {
158 assert(!(pc->emit[1] & 0xc000));
159 pc->emit[1] |= 0xc000;
160 assert(!(u32 & 0xfff00000));
161 set_immd_u32_l(pc, u32);
163 assert(!(pc->emit[1] & 0xc000));
164 pc->emit[1] |= 0xc000;
165 assert(!(u32 & 0xfff));
166 set_immd_u32_l(pc, u32 >> 12);
171 set_immd(struct nv_pc *pc, struct nv_instruction *i, int s)
173 set_immd_u32(pc, get_immd_u32(i->src[s]));
177 DVS(struct nv_pc *pc, struct nv_instruction *i)
179 uint s = i->def[0]->reg.size;
181 for (n = 1; n < 4 && i->def[n]; ++n)
182 s += i->def[n]->reg.size;
183 pc->emit[0] |= ((s / 4) - 1) << 5;
187 SVS(struct nv_pc *pc, struct nv_ref *src)
189 pc->emit[0] |= (SREG(src)->size / 4 - 1) << 5;
193 set_pred(struct nv_pc *pc, struct nv_instruction *i)
195 if (i->predicate >= 0) {
196 SID(pc, i->src[i->predicate], 6);
198 pc->emit[0] |= 0x2000; /* negate */
200 pc->emit[0] |= 0x1c00;
205 set_address_16(struct nv_pc *pc, struct nv_ref *src)
207 pc->emit[0] |= (src->value->reg.address & 0x003f) << 26;
208 pc->emit[1] |= (src->value->reg.address & 0xffc0) >> 6;
211 static INLINE unsigned
212 const_space_index(struct nv_instruction *i, int s)
214 return SFILE(i, s) - NV_FILE_MEM_C(0);
218 emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op)
220 pc->emit[0] = 0x00000007;
221 pc->emit[1] = op << 24;
223 if (op == 0x40 || (op >= 0x80 && op <= 0x98)) {
224 /* bra, exit, ret or kil */
225 pc->emit[0] |= 0x1e0;
230 int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8);
232 /* we will need relocations only for global functions */
234 create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000);
235 create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff);
238 pc->emit[0] |= (pcrel & 0x3f) << 26;
239 pc->emit[1] |= (pcrel >> 6) & 0x3ffff;
243 /* doesn't work for vfetch, export, ld, st, mov ... */
245 emit_form_0(struct nv_pc *pc, struct nv_instruction *i)
251 DID(pc, i->def[0], 14);
253 for (s = 0; s < 3 && i->src[s]; ++s) {
254 if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
255 SFILE(i, s) <= NV_FILE_MEM_C(15)) {
256 assert(!(pc->emit[1] & 0xc000));
258 pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
259 set_address_16(pc, i->src[s]);
261 if (SFILE(i, s) == NV_FILE_GPR) {
262 SID(pc, i->src[s], s ? ((s == 2) ? 49 : 26) : 20);
264 if (SFILE(i, s) == NV_FILE_IMM) {
265 assert(!(pc->emit[1] & 0xc000));
266 assert(s == 1 || i->opcode == NV_OP_MOV);
273 emit_form_1(struct nv_pc *pc, struct nv_instruction *i)
279 DID(pc, i->def[0], 14);
281 for (s = 0; s < 1 && i->src[s]; ++s) {
282 if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
283 SFILE(i, s) <= NV_FILE_MEM_C(15)) {
284 assert(!(pc->emit[1] & 0xc000));
286 pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
287 set_address_16(pc, i->src[s]);
289 if (SFILE(i, s) == NV_FILE_GPR) {
290 SID(pc, i->src[s], 26);
292 if (SFILE(i, s) == NV_FILE_IMM) {
293 assert(!(pc->emit[1] & 0xc000));
294 assert(s == 1 || i->opcode == NV_OP_MOV);
301 emit_neg_abs_1_2(struct nv_pc *pc, struct nv_instruction *i)
303 if (i->src[0]->mod & NV_MOD_ABS)
304 pc->emit[0] |= 1 << 7;
305 if (i->src[0]->mod & NV_MOD_NEG)
306 pc->emit[0] |= 1 << 9;
307 if (i->src[1]->mod & NV_MOD_ABS)
308 pc->emit[0] |= 1 << 6;
309 if (i->src[1]->mod & NV_MOD_NEG)
310 pc->emit[0] |= 1 << 8;
314 emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
316 pc->emit[0] = 0x00000000;
317 pc->emit[1] = 0x50000000;
321 emit_neg_abs_1_2(pc, i);
324 pc->emit[1] |= 1 << 17;
328 emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i)
330 pc->emit[0] = 0x00000000;
331 pc->emit[1] = 0x58000000;
335 if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
336 pc->emit[1] |= 1 << 25;
339 pc->emit[0] |= 1 << 5;
343 emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i)
345 pc->emit[0] = 0x00000000;
346 pc->emit[1] = 0x30000000;
350 if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
351 pc->emit[0] |= 1 << 9;
353 if (i->src[2]->mod & NV_MOD_NEG)
354 pc->emit[0] |= 1 << 8;
357 pc->emit[0] |= 1 << 5;
361 emit_minmax(struct nv_pc *pc, struct nv_instruction *i)
363 pc->emit[0] = 0x00000000;
364 pc->emit[1] = 0x08000000;
366 if (NV_BASEOP(i->opcode) == NV_OP_MAX)
367 pc->emit[1] |= 0x001e0000;
369 pc->emit[1] |= 0x000e0000; /* predicate ? */
373 emit_neg_abs_1_2(pc, i);
382 pc->emit[0] |= 3 | (1 << 5);
392 emit_tex(struct nv_pc *pc, struct nv_instruction *i)
394 int src1 = i->tex_array + i->tex_dim + i->tex_cube;
398 pc->emit[0] = 0x00000086;
399 pc->emit[1] = 0x80000000;
402 case NV_OP_TEX: pc->emit[1] = 0x80000000; break;
403 case NV_OP_TXB: pc->emit[1] = 0x84000000; break;
404 case NV_OP_TXL: pc->emit[1] = 0x86000000; break;
405 case NV_OP_TXF: pc->emit[1] = 0x90000000; break;
406 case NV_OP_TXG: pc->emit[1] = 0xe0000000; break;
413 pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */
415 pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */
419 DID(pc, i->def[0], 14);
420 SID(pc, i->src[0], 20);
421 SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */
423 pc->emit[1] |= i->tex_mask << 14;
424 pc->emit[1] |= (i->tex_dim - 1) << 20;
426 pc->emit[1] |= 3 << 20;
428 assert(i->ext.tex.s < 16);
430 pc->emit[1] |= i->ext.tex.t;
431 pc->emit[1] |= i->ext.tex.s << 8;
434 pc->emit[0] |= 1 << 9;
437 /* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */
439 emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op)
441 pc->emit[0] = 0x00000000;
442 pc->emit[1] = 0xc8000000;
446 DID(pc, i->def[0], 14);
447 SID(pc, i->src[0], 20);
449 pc->emit[0] |= op << 26;
452 if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9;
453 if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7;
455 assert(!i->src[0]->mod);
460 emit_quadop(struct nv_pc *pc, struct nv_instruction *i)
462 pc->emit[0] = 0x00000000;
463 pc->emit[1] = 0x48000000;
467 assert(SFILE(i, 0) == NV_FILE_GPR && SFILE(i, 1) == NV_FILE_GPR);
469 DID(pc, i->def[0], 14);
470 SID(pc, i->src[0], 20);
471 SID(pc, i->src[0], 26);
473 pc->emit[0] |= i->lanes << 6; /* l0, l1, l2, l3, dx, dy */
474 pc->emit[1] |= i->quadop;
478 emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
482 i->src[1] = i->src[0];
487 emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
491 i->src[1] = i->src[0];
495 /* preparation op (preex2, presin / convert to fixed point) */
497 emit_preop(struct nv_pc *pc, struct nv_instruction *i)
499 pc->emit[0] = 0x00000000;
500 pc->emit[1] = 0x60000000;
502 if (i->opcode == NV_OP_PREEX2)
507 if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 8;
508 if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 6;
512 emit_shift(struct nv_pc *pc, struct nv_instruction *i)
514 pc->emit[0] = 0x00000003;
518 pc->emit[0] |= 0x20; /* fall through */
520 pc->emit[1] = 0x58000000;
524 pc->emit[1] = 0x60000000;
532 emit_bitop(struct nv_pc *pc, struct nv_instruction *i)
534 if (SFILE(i, 1) == NV_FILE_IMM) {
535 pc->emit[0] = 0x00000002;
536 pc->emit[1] = 0x38000000;
538 pc->emit[0] = 0x00000003;
539 pc->emit[1] = 0x68000000;
558 emit_set(struct nv_pc *pc, struct nv_instruction *i)
560 pc->emit[0] = 0x00000000;
564 pc->emit[0] |= 0x20; /* fall through */
567 pc->emit[1] = 0x100e0000;
569 case NV_OP_SET_F32_AND:
570 pc->emit[1] = 0x18000000;
572 case NV_OP_SET_F32_OR:
573 pc->emit[1] = 0x18200000;
575 case NV_OP_SET_F32_XOR:
576 pc->emit[1] = 0x18400000;
579 pc->emit[0] |= 0x20; /* fall through */
582 pc->emit[1] = 0x180e0000;
586 if (DFILE(i, 0) == NV_FILE_PRED) {
587 pc->emit[0] |= 0x1c000;
588 pc->emit[1] += 0x08000000;
591 pc->emit[1] |= i->set_cond << 23;
595 emit_neg_abs_1_2(pc, i); /* maybe assert that U/S32 don't use mods */
599 emit_selp(struct nv_pc *pc, struct nv_instruction *i)
601 pc->emit[0] = 0x00000004;
602 pc->emit[1] = 0x20000000;
606 if (i->cc || (i->src[2]->mod & NV_MOD_NOT))
607 pc->emit[1] |= 1 << 20;
611 emit_slct(struct nv_pc *pc, struct nv_instruction *i)
613 uint8_t cc = i->set_cond;
615 pc->emit[0] = 0x00000000;
619 pc->emit[0] |= 0x20; /* fall through */
622 pc->emit[1] = 0x30000000;
626 pc->emit[1] = 0x38000000;
632 if (i->src[2]->mod & NV_MOD_NEG)
633 cc = nvc0_ir_reverse_cc(cc);
635 pc->emit[1] |= cc << 23;
639 emit_cvt(struct nv_pc *pc, struct nv_instruction *i)
643 pc->emit[0] = 0x00000004;
644 pc->emit[1] = 0x10000000;
646 /* if no type conversion specified, get type from opcode */
647 if (i->opcode != NV_OP_CVT && i->ext.cvt.d == i->ext.cvt.s)
648 i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode);
650 switch (i->ext.cvt.d) {
652 switch (i->ext.cvt.s) {
653 case NV_TYPE_F32: pc->emit[1] = 0x10000000; break;
654 case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */
655 case NV_TYPE_U32: pc->emit[1] = 0x18000000; break;
658 case NV_TYPE_S32: pc->emit[0] |= 0x80; /* fall through */
660 switch (i->ext.cvt.s) {
661 case NV_TYPE_F32: pc->emit[1] = 0x14000000; break;
662 case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */
663 case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break;
667 assert(!"cvt: unknown type");
671 rint = (i->ext.cvt.d == NV_TYPE_F32) ? 1 << 7 : 0;
673 if (i->opcode == NV_OP_FLOOR) {
675 pc->emit[1] |= 2 << 16;
677 if (i->opcode == NV_OP_CEIL) {
679 pc->emit[1] |= 4 << 16;
681 if (i->opcode == NV_OP_TRUNC) {
683 pc->emit[1] |= 6 << 16;
686 if (i->saturate || i->opcode == NV_OP_SAT)
689 if (NV_BASEOP(i->opcode) == NV_OP_ABS || i->src[0]->mod & NV_MOD_ABS)
690 pc->emit[0] |= 1 << 6;
691 if (NV_BASEOP(i->opcode) == NV_OP_NEG || i->src[0]->mod & NV_MOD_NEG)
692 pc->emit[0] |= 1 << 8;
694 pc->emit[0] |= util_logbase2(DREG(i->def[0])->size) << 20;
695 pc->emit[0] |= util_logbase2(SREG(i->src[0])->size) << 23;
701 emit_interp(struct nv_pc *pc, struct nv_instruction *i)
703 pc->emit[0] = 0x00000000;
704 pc->emit[1] = 0xc07e0000;
706 DID(pc, i->def[0], 14);
711 SID(pc, i->src[i->indirect], 20);
715 if (i->opcode == NV_OP_PINTERP) {
716 pc->emit[0] |= 0x040;
717 SID(pc, i->src[1], 26);
719 if (i->src[0]->value->reg.address >= 0x280 &&
720 i->src[0]->value->reg.address <= 0x29c)
721 pc->emit[0] |= 0x080; /* XXX: ? */
726 pc->emit[1] |= i->src[0]->value->reg.address & 0xffff;
729 pc->emit[0] |= 0x100;
732 pc->emit[0] |= 0x080;
736 emit_vfetch(struct nv_pc *pc, struct nv_instruction *i)
738 pc->emit[0] = 0x03f00006;
739 pc->emit[1] = 0x06000000 | i->src[0]->value->reg.address;
741 pc->emit[0] |= 0x100;
746 DID(pc, i->def[0], 14);
748 SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 26);
752 emit_export(struct nv_pc *pc, struct nv_instruction *i)
754 pc->emit[0] = 0x00000006;
755 pc->emit[1] = 0x0a000000;
757 pc->emit[0] |= 0x100;
761 assert(SFILE(i, 0) == NV_FILE_MEM_V);
762 assert(SFILE(i, 1) == NV_FILE_GPR);
764 SID(pc, i->src[1], 26); /* register source */
767 pc->emit[1] |= i->src[0]->value->reg.address & 0xfff;
769 SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
773 emit_mov(struct nv_pc *pc, struct nv_instruction *i)
775 if (i->opcode == NV_OP_MOV)
778 if (SFILE(i, 0) == NV_FILE_IMM) {
779 pc->emit[0] = 0x000001e2;
780 pc->emit[1] = 0x18000000;
782 if (SFILE(i, 0) == NV_FILE_PRED) {
783 pc->emit[0] = 0x1c000004;
784 pc->emit[1] = 0x080e0000;
786 pc->emit[0] = 0x00000004 | (i->lanes << 5);
787 pc->emit[1] = 0x28000000;
794 emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i)
796 assert(NV_IS_MEMORY_FILE(SFILE(i, 0)));
798 switch (SSIZE(i, 0)) {
800 if (NV_TYPE_ISSGD(i->ext.cvt.s))
805 if (NV_TYPE_ISSGD(i->ext.cvt.s))
808 case 4: pc->emit[0] |= 0x80; break;
809 case 8: pc->emit[0] |= 0xa0; break;
810 case 16: pc->emit[0] |= 0xc0; break;
812 NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i, 0));
818 emit_ld_common(struct nv_pc *pc, struct nv_instruction *i)
820 emit_ldst_size(pc, i);
823 set_address_16(pc, i->src[0]);
825 SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
826 DID(pc, i->def[0], 14);
830 emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
832 pc->emit[0] = 0x00000006;
833 pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10);
835 emit_ld_common(pc, i);
839 emit_ld(struct nv_pc *pc, struct nv_instruction *i)
841 if (SFILE(i, 0) >= NV_FILE_MEM_C(0) &&
842 SFILE(i, 0) <= NV_FILE_MEM_C(15)) {
843 if (SSIZE(i, 0) == 4 && i->indirect < 0) {
847 emit_ld_const(pc, i);
850 if (SFILE(i, 0) == NV_FILE_MEM_L) {
851 pc->emit[0] = 0x00000005;
852 pc->emit[1] = 0xc0000000;
854 emit_ld_common(pc, i);
856 NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0));
862 emit_st(struct nv_pc *pc, struct nv_instruction *i)
864 if (SFILE(i, 0) != NV_FILE_MEM_L)
865 NOUVEAU_ERR("emit_st(%u): file not handled yet\n", SFILE(i, 0));
867 pc->emit[0] = 0x00000005 | (0 << 8); /* write-back caching */
868 pc->emit[1] = 0xc8000000;
870 emit_ldst_size(pc, i);
873 set_address_16(pc, i->src[0]);
875 SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
876 DID(pc, i->src[1]->value, 14);
880 nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
882 #if NV50_DEBUG & NV50_DEBUG_SHADER
883 debug_printf("EMIT: "); nvc0_print_instruction(i);
891 if (!pc->is_fragprog)
969 case NV_OP_SET_F32_AND:
970 case NV_OP_SET_F32_OR:
971 case NV_OP_SET_F32_XOR:
988 emit_flow(pc, i, 0x40);
991 emit_flow(pc, i, 0x50);
994 emit_flow(pc, i, 0x60);
997 emit_flow(pc, i, 0x80);
1000 emit_flow(pc, i, 0x90);
1003 emit_flow(pc, i, 0x98);
1007 pc->emit[0] = 0x00003de4;
1008 pc->emit[1] = 0x40000000;
1013 case NV_OP_SLCT_F32:
1014 case NV_OP_SLCT_S32:
1015 case NV_OP_SLCT_U32:
1019 NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode);
1025 pc->emit[0] |= 0x10;