1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "util/u_inlines.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/tgsi_parse.h"
8 #include "tgsi/tgsi_util.h"
10 #include "nvfx_context.h"
11 #include "nvfx_shader.h"
12 #include "nvfx_resource.h"
14 #define MAX_CONSTS 128
17 struct nvfx_fragment_program *fp;
19 uint attrib_map[PIPE_MAX_SHADER_INPUTS];
22 unsigned r_temps_discard;
23 struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
24 struct nvfx_sreg *r_temp;
37 struct nvfx_sreg imm[MAX_IMM];
41 static INLINE struct nvfx_sreg
42 temp(struct nvfx_fpc *fpc)
44 int idx = ffs(~fpc->r_temps) - 1;
47 NOUVEAU_ERR("out of temps!!\n");
49 return nvfx_sr(NVFXSR_TEMP, 0);
52 fpc->r_temps |= (1 << idx);
53 fpc->r_temps_discard |= (1 << idx);
54 return nvfx_sr(NVFXSR_TEMP, idx);
58 release_temps(struct nvfx_fpc *fpc)
60 fpc->r_temps &= ~fpc->r_temps_discard;
61 fpc->r_temps_discard = 0;
64 static INLINE struct nvfx_sreg
65 constant(struct nvfx_fpc *fpc, int pipe, float vals[4])
69 if (fpc->nr_consts == MAX_CONSTS)
71 idx = fpc->nr_consts++;
73 fpc->consts[idx].pipe = pipe;
75 memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
76 return nvfx_sr(NVFXSR_CONST, idx);
79 #define arith(cc,s,o,d,m,s0,s1,s2) \
80 nvfx_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \
81 (d), (m), (s0), (s1), (s2))
82 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
83 nvfx_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \
84 (d), (m), (s0), none, none)
87 grow_insns(struct nvfx_fpc *fpc, int size)
89 struct nvfx_fragment_program *fp = fpc->fp;
92 fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
96 emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src)
98 struct nvfx_fragment_program *fp = fpc->fp;
99 uint32_t *hw = &fp->insn[fpc->inst_offset];
104 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
105 hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
108 sr |= NVFX_FP_REG_SRC_HALF;
111 sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
112 sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);
115 if (!fpc->have_const) {
120 hw = &fp->insn[fpc->inst_offset];
121 if (fpc->consts[src.index].pipe >= 0) {
122 struct nvfx_fragment_program_data *fpd;
124 fp->consts = realloc(fp->consts, ++fp->nr_consts *
126 fpd = &fp->consts[fp->nr_consts - 1];
127 fpd->offset = fpc->inst_offset + 4;
128 fpd->index = fpc->consts[src.index].pipe;
129 memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
131 memcpy(&fp->insn[fpc->inst_offset + 4],
132 fpc->consts[src.index].vals,
133 sizeof(uint32_t) * 4);
136 sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
139 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
146 sr |= NVFX_FP_REG_NEGATE;
149 hw[1] |= (1 << (29 + pos));
151 sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
152 (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
153 (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
154 (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
160 emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst)
162 struct nvfx_fragment_program *fp = fpc->fp;
163 uint32_t *hw = &fp->insn[fpc->inst_offset];
167 if (fpc->num_regs < (dst.index + 1))
168 fpc->num_regs = dst.index + 1;
171 if (dst.index == 1) {
172 fp->fp_control |= 0xe;
174 hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
184 hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
188 nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op,
189 struct nvfx_sreg dst, int mask,
190 struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
192 struct nvfx_fragment_program *fp = fpc->fp;
195 fpc->inst_offset = fp->insn_len;
198 hw = &fp->insn[fpc->inst_offset];
199 memset(hw, 0, sizeof(uint32_t) * 4);
201 if (op == NVFX_FP_OP_OPCODE_KIL)
202 fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
203 hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT);
204 hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT);
205 hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);
208 hw[0] |= NVFX_FP_OP_OUT_SAT;
211 hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
212 hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT);
213 hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
214 (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
215 (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
216 (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
219 emit_src(fpc, 0, s0);
220 emit_src(fpc, 1, s1);
221 emit_src(fpc, 2, s2);
225 nvfx_fp_tex(struct nvfx_fpc *fpc, int sat, int op, int unit,
226 struct nvfx_sreg dst, int mask,
227 struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
229 struct nvfx_fragment_program *fp = fpc->fp;
231 nvfx_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
233 fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
234 fp->samplers |= (1 << unit);
237 static INLINE struct nvfx_sreg
238 tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
240 struct nvfx_sreg src;
242 switch (fsrc->Register.File) {
243 case TGSI_FILE_INPUT:
244 src = nvfx_sr(NVFXSR_INPUT,
245 fpc->attrib_map[fsrc->Register.Index]);
247 case TGSI_FILE_CONSTANT:
248 src = constant(fpc, fsrc->Register.Index, NULL);
250 case TGSI_FILE_IMMEDIATE:
251 assert(fsrc->Register.Index < fpc->nr_imm);
252 src = fpc->imm[fsrc->Register.Index];
254 case TGSI_FILE_TEMPORARY:
255 src = fpc->r_temp[fsrc->Register.Index];
257 /* NV40 fragprog result regs are just temps, so this is simple */
258 case TGSI_FILE_OUTPUT:
259 src = fpc->r_result[fsrc->Register.Index];
262 NOUVEAU_ERR("bad src file\n");
266 src.abs = fsrc->Register.Absolute;
267 src.negate = fsrc->Register.Negate;
268 src.swz[0] = fsrc->Register.SwizzleX;
269 src.swz[1] = fsrc->Register.SwizzleY;
270 src.swz[2] = fsrc->Register.SwizzleZ;
271 src.swz[3] = fsrc->Register.SwizzleW;
275 static INLINE struct nvfx_sreg
276 tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
277 switch (fdst->Register.File) {
278 case TGSI_FILE_OUTPUT:
279 return fpc->r_result[fdst->Register.Index];
280 case TGSI_FILE_TEMPORARY:
281 return fpc->r_temp[fdst->Register.Index];
283 return nvfx_sr(NVFXSR_NONE, 0);
285 NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
286 return nvfx_sr(NVFXSR_NONE, 0);
295 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X;
296 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y;
297 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z;
298 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W;
303 nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
304 const struct tgsi_full_instruction *finst)
306 const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
307 struct nvfx_sreg src[3], dst, tmp;
309 int ai = -1, ci = -1, ii = -1;
312 if (finst->Instruction.Opcode == TGSI_OPCODE_END)
315 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
316 const struct tgsi_full_src_register *fsrc;
318 fsrc = &finst->Src[i];
319 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
320 src[i] = tgsi_src(fpc, fsrc);
324 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
325 const struct tgsi_full_src_register *fsrc;
327 fsrc = &finst->Src[i];
329 switch (fsrc->Register.File) {
330 case TGSI_FILE_INPUT:
331 if (ai == -1 || ai == fsrc->Register.Index) {
332 ai = fsrc->Register.Index;
333 src[i] = tgsi_src(fpc, fsrc);
336 arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
337 tgsi_src(fpc, fsrc), none, none);
340 case TGSI_FILE_CONSTANT:
341 if ((ci == -1 && ii == -1) ||
342 ci == fsrc->Register.Index) {
343 ci = fsrc->Register.Index;
344 src[i] = tgsi_src(fpc, fsrc);
347 arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
348 tgsi_src(fpc, fsrc), none, none);
351 case TGSI_FILE_IMMEDIATE:
352 if ((ci == -1 && ii == -1) ||
353 ii == fsrc->Register.Index) {
354 ii = fsrc->Register.Index;
355 src[i] = tgsi_src(fpc, fsrc);
358 arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
359 tgsi_src(fpc, fsrc), none, none);
362 case TGSI_FILE_TEMPORARY:
365 case TGSI_FILE_SAMPLER:
366 unit = fsrc->Register.Index;
368 case TGSI_FILE_OUTPUT:
371 NOUVEAU_ERR("bad src file\n");
376 dst = tgsi_dst(fpc, &finst->Dst[0]);
377 mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
378 sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
380 switch (finst->Instruction.Opcode) {
381 case TGSI_OPCODE_ABS:
382 arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
384 case TGSI_OPCODE_ADD:
385 arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
387 case TGSI_OPCODE_CMP:
388 tmp = nvfx_sr(NVFXSR_NONE, 0);
390 arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
391 dst.cc_test = NVFX_COND_GE;
392 arith(fpc, sat, MOV, dst, mask, src[2], none, none);
393 dst.cc_test = NVFX_COND_LT;
394 arith(fpc, sat, MOV, dst, mask, src[1], none, none);
396 case TGSI_OPCODE_COS:
397 arith(fpc, sat, COS, dst, mask, src[0], none, none);
399 case TGSI_OPCODE_DDX:
400 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
402 arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
403 swz(src[0], Z, W, Z, W), none, none);
404 arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
405 swz(tmp, X, Y, X, Y), none, none);
406 arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
408 arith(fpc, 0, MOV, dst, mask, tmp, none, none);
410 arith(fpc, sat, DDX, dst, mask, src[0], none, none);
413 case TGSI_OPCODE_DDY:
414 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
416 arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
417 swz(src[0], Z, W, Z, W), none, none);
418 arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
419 swz(tmp, X, Y, X, Y), none, none);
420 arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
422 arith(fpc, 0, MOV, dst, mask, tmp, none, none);
424 arith(fpc, sat, DDY, dst, mask, src[0], none, none);
427 case TGSI_OPCODE_DP3:
428 arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
430 case TGSI_OPCODE_DP4:
431 arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
433 case TGSI_OPCODE_DPH:
435 arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[1], none);
436 arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
437 swz(src[1], W, W, W, W), none);
439 case TGSI_OPCODE_DST:
440 arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
442 case TGSI_OPCODE_EX2:
443 arith(fpc, sat, EX2, dst, mask, src[0], none, none);
445 case TGSI_OPCODE_FLR:
446 arith(fpc, sat, FLR, dst, mask, src[0], none, none);
448 case TGSI_OPCODE_FRC:
449 arith(fpc, sat, FRC, dst, mask, src[0], none, none);
451 case TGSI_OPCODE_KILP:
452 arith(fpc, 0, KIL, none, 0, none, none, none);
454 case TGSI_OPCODE_KIL:
455 dst = nvfx_sr(NVFXSR_NONE, 0);
457 arith(fpc, 0, MOV, dst, NVFX_FP_MASK_ALL, src[0], none, none);
458 dst.cc_update = 0; dst.cc_test = NVFX_COND_LT;
459 arith(fpc, 0, KIL, dst, 0, none, none, none);
461 case TGSI_OPCODE_LG2:
462 arith(fpc, sat, LG2, dst, mask, src[0], none, none);
464 // case TGSI_OPCODE_LIT:
465 case TGSI_OPCODE_LRP:
467 arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]);
470 arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
471 arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
474 case TGSI_OPCODE_MAD:
475 arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
477 case TGSI_OPCODE_MAX:
478 arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
480 case TGSI_OPCODE_MIN:
481 arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
483 case TGSI_OPCODE_MOV:
484 arith(fpc, sat, MOV, dst, mask, src[0], none, none);
486 case TGSI_OPCODE_MUL:
487 arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
489 case TGSI_OPCODE_POW:
491 arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none);
494 arith(fpc, 0, LG2, tmp, NVFX_FP_MASK_X,
495 swz(src[0], X, X, X, X), none, none);
496 arith(fpc, 0, MUL, tmp, NVFX_FP_MASK_X, swz(tmp, X, X, X, X),
497 swz(src[1], X, X, X, X), none);
498 arith(fpc, sat, EX2, dst, mask,
499 swz(tmp, X, X, X, X), none, none);
502 case TGSI_OPCODE_RCP:
503 arith(fpc, sat, RCP, dst, mask, src[0], none, none);
505 case TGSI_OPCODE_RET:
508 case TGSI_OPCODE_RFL:
510 arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none);
513 arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[0], none);
514 arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_Y, src[0], src[1], none);
515 arith(fpc, 0, DIV, scale(tmp, 2X), NVFX_FP_MASK_Z,
516 swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
517 arith(fpc, sat, MAD, dst, mask,
518 swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
521 case TGSI_OPCODE_RSQ:
523 arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
526 arith(fpc, 0, LG2, scale(tmp, INV_2X), NVFX_FP_MASK_X,
527 abs(swz(src[0], X, X, X, X)), none, none);
528 arith(fpc, sat, EX2, dst, mask,
529 neg(swz(tmp, X, X, X, X)), none, none);
532 case TGSI_OPCODE_SCS:
533 /* avoid overwriting the source */
534 if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
536 if (mask & NVFX_FP_MASK_X) {
537 arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
538 swz(src[0], X, X, X, X), none, none);
540 if (mask & NVFX_FP_MASK_Y) {
541 arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
542 swz(src[0], X, X, X, X), none, none);
547 if (mask & NVFX_FP_MASK_Y) {
548 arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
549 swz(src[0], X, X, X, X), none, none);
551 if (mask & NVFX_FP_MASK_X) {
552 arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
553 swz(src[0], X, X, X, X), none, none);
557 case TGSI_OPCODE_SEQ:
558 arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none);
560 case TGSI_OPCODE_SFL:
561 arith(fpc, sat, SFL, dst, mask, src[0], src[1], none);
563 case TGSI_OPCODE_SGE:
564 arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
566 case TGSI_OPCODE_SGT:
567 arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
569 case TGSI_OPCODE_SIN:
570 arith(fpc, sat, SIN, dst, mask, src[0], none, none);
572 case TGSI_OPCODE_SLE:
573 arith(fpc, sat, SLE, dst, mask, src[0], src[1], none);
575 case TGSI_OPCODE_SLT:
576 arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
578 case TGSI_OPCODE_SNE:
579 arith(fpc, sat, SNE, dst, mask, src[0], src[1], none);
581 case TGSI_OPCODE_STR:
582 arith(fpc, sat, STR, dst, mask, src[0], src[1], none);
584 case TGSI_OPCODE_SUB:
585 arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
587 case TGSI_OPCODE_TEX:
588 tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
590 case TGSI_OPCODE_TXB:
591 tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
593 case TGSI_OPCODE_TXP:
594 tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
596 case TGSI_OPCODE_XPD:
598 arith(fpc, 0, MUL, tmp, mask,
599 swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
600 arith(fpc, sat, MAD, dst, (mask & ~NVFX_FP_MASK_W),
601 swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
605 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
614 nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
615 const struct tgsi_full_declaration *fdec)
619 switch (fdec->Semantic.Name) {
620 case TGSI_SEMANTIC_POSITION:
621 hw = NVFX_FP_OP_INPUT_SRC_POSITION;
623 case TGSI_SEMANTIC_COLOR:
624 if (fdec->Semantic.Index == 0) {
625 hw = NVFX_FP_OP_INPUT_SRC_COL0;
627 if (fdec->Semantic.Index == 1) {
628 hw = NVFX_FP_OP_INPUT_SRC_COL1;
630 NOUVEAU_ERR("bad colour semantic index\n");
634 case TGSI_SEMANTIC_FOG:
635 hw = NVFX_FP_OP_INPUT_SRC_FOGC;
637 case TGSI_SEMANTIC_GENERIC:
638 if (fdec->Semantic.Index <= 7) {
639 hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.
642 NOUVEAU_ERR("bad generic semantic index\n");
647 NOUVEAU_ERR("bad input semantic\n");
651 fpc->attrib_map[fdec->Range.First] = hw;
656 nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
657 const struct tgsi_full_declaration *fdec)
659 unsigned idx = fdec->Range.First;
662 switch (fdec->Semantic.Name) {
663 case TGSI_SEMANTIC_POSITION:
666 case TGSI_SEMANTIC_COLOR:
668 switch (fdec->Semantic.Index) {
669 case 0: hw = 0; break;
670 case 1: hw = 2; break;
671 case 2: hw = 3; break;
672 case 3: hw = 4; break;
674 if(hw > ((nvfx->is_nv4x) ? 4 : 2)) {
675 NOUVEAU_ERR("bad rcol index\n");
680 NOUVEAU_ERR("bad output semantic\n");
684 fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
685 fpc->r_temps |= (1 << hw);
690 nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
692 struct tgsi_parse_context p;
693 int high_temp = -1, i;
695 tgsi_parse_init(&p, fpc->fp->pipe.tokens);
696 while (!tgsi_parse_end_of_tokens(&p)) {
697 const union tgsi_full_token *tok = &p.FullToken;
699 tgsi_parse_token(&p);
700 switch(tok->Token.Type) {
701 case TGSI_TOKEN_TYPE_DECLARATION:
703 const struct tgsi_full_declaration *fdec;
704 fdec = &p.FullToken.FullDeclaration;
705 switch (fdec->Declaration.File) {
706 case TGSI_FILE_INPUT:
707 if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec))
710 case TGSI_FILE_OUTPUT:
711 if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec))
714 case TGSI_FILE_TEMPORARY:
715 if (fdec->Range.Last > high_temp) {
725 case TGSI_TOKEN_TYPE_IMMEDIATE:
727 struct tgsi_full_immediate *imm;
730 imm = &p.FullToken.FullImmediate;
731 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
732 assert(fpc->nr_imm < MAX_IMM);
734 vals[0] = imm->u[0].Float;
735 vals[1] = imm->u[1].Float;
736 vals[2] = imm->u[2].Float;
737 vals[3] = imm->u[3].Float;
738 fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
748 fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
749 for (i = 0; i < high_temp; i++)
750 fpc->r_temp[i] = temp(fpc);
751 fpc->r_temps_discard = 0;
764 nvfx_fragprog_translate(struct nvfx_context *nvfx,
765 struct nvfx_fragment_program *fp)
767 struct tgsi_parse_context parse;
768 struct nvfx_fpc *fpc = NULL;
770 fpc = CALLOC(1, sizeof(struct nvfx_fpc));
776 if (!nvfx_fragprog_prepare(nvfx, fpc)) {
781 tgsi_parse_init(&parse, fp->pipe.tokens);
783 while (!tgsi_parse_end_of_tokens(&parse)) {
784 tgsi_parse_token(&parse);
786 switch (parse.FullToken.Token.Type) {
787 case TGSI_TOKEN_TYPE_INSTRUCTION:
789 const struct tgsi_full_instruction *finst;
791 finst = &parse.FullToken.FullInstruction;
792 if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst))
802 fp->fp_control |= (fpc->num_regs-1)/2;
804 fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
806 /* Terminate final instruction */
808 fp->insn[fpc->inst_offset] |= 0x00000001;
810 /* Append NOP + END instruction, may or may not be necessary. */
811 fpc->inst_offset = fp->insn_len;
813 fp->insn[fpc->inst_offset + 0] = 0x00000001;
814 fp->insn[fpc->inst_offset + 1] = 0x00000000;
815 fp->insn[fpc->inst_offset + 2] = 0x00000000;
816 fp->insn[fpc->inst_offset + 3] = 0x00000000;
818 fp->translated = TRUE;
820 tgsi_parse_free(&parse);
827 nvfx_fragprog_upload(struct nvfx_context *nvfx,
828 struct nvfx_fragment_program *fp)
830 struct pipe_context *pipe = &nvfx->pipe;
831 const uint32_t le = 1;
834 for (i = 0; i < fp->insn_len; i++) {
835 fflush(stdout); fflush(stderr);
836 NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
837 fflush(stdout); fflush(stderr);
841 if ((*(const uint8_t *)&le)) {
842 /* Can do this with an inline transfer */
843 pipe_buffer_write(pipe,
846 fp->insn_len * sizeof fp->insn[0],
849 struct pipe_transfer *transfer;
853 map = pipe_buffer_map(pipe, fp->buffer,
857 /* Weird swapping for big-endian chips */
858 for (i = 0; i < fp->insn_len; i++) {
859 map[i] = ((fp->insn[i] & 0xffff) << 16) |
860 ((fp->insn[i] >> 16) & 0xffff);
863 pipe_buffer_unmap(pipe, fp->buffer, transfer);
868 nvfx_fragprog_validate(struct nvfx_context *nvfx)
870 struct pipe_context *pipe = &nvfx->pipe;
871 struct nvfx_fragment_program *fp = nvfx->fragprog;
872 struct pipe_resource *constbuf =
873 nvfx->constbuf[PIPE_SHADER_FRAGMENT];
874 struct pipe_screen *pscreen = nvfx->pipe.screen;
875 struct nouveau_stateobj *so;
876 boolean new_consts = FALSE;
880 goto update_constants;
882 nvfx->fallback_swrast &= ~NVFX_NEW_FRAGPROG;
883 nvfx_fragprog_translate(nvfx, fp);
884 if (!fp->translated) {
885 nvfx->fallback_swrast |= NVFX_NEW_FRAGPROG;
889 fp->buffer = pipe_buffer_create(pscreen,
890 /* XXX: no alignment, maybe use a priv bind flag
893 0, fp->insn_len * 4);
894 nvfx_fragprog_upload(nvfx, fp);
896 so = so_new(4, 4, 1);
897 so_method(so, nvfx->screen->eng3d, NV34TCL_FP_ACTIVE_PROGRAM, 1);
898 so_reloc (so, nvfx_resource(fp->buffer)->bo, 0, NOUVEAU_BO_VRAM |
899 NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
900 NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
901 NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
902 so_method(so, nvfx->screen->eng3d, NV34TCL_FP_CONTROL, 1);
903 so_data (so, fp->fp_control);
905 so_method(so, nvfx->screen->eng3d, NV34TCL_FP_REG_CONTROL, 1);
906 so_data (so, (1<<16)|0x4);
907 so_method(so, nvfx->screen->eng3d, NV34TCL_TX_UNITS_ENABLE, 1);
908 so_data (so, fp->samplers);
916 struct pipe_transfer *transfer;
919 map = pipe_buffer_map(pipe, constbuf,
923 /* XXX: probably a bad idea to be reading back data
924 * from a buffer the gpu has been using. Not really
925 * sure what this code is doing though, or how to
928 for (i = 0; i < fp->nr_consts; i++) {
929 struct nvfx_fragment_program_data *fpd = &fp->consts[i];
930 uint32_t *p = &fp->insn[fpd->offset];
931 uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
933 if (!memcmp(p, cb, 4 * sizeof(float)))
935 memcpy(p, cb, 4 * sizeof(float));
938 pipe_buffer_unmap(pipe, constbuf, transfer);
941 nvfx_fragprog_upload(nvfx, fp);
944 if (new_consts || fp->so != nvfx->state.hw[NVFX_STATE_FRAGPROG]) {
945 so_ref(fp->so, &nvfx->state.hw[NVFX_STATE_FRAGPROG]);
953 nvfx_fragprog_destroy(struct nvfx_context *nvfx,
954 struct nvfx_fragment_program *fp)
957 pipe_resource_reference(&fp->buffer, NULL);
960 so_ref(NULL, &fp->so);
966 struct nvfx_state_entry nvfx_state_fragprog = {
967 .validate = nvfx_fragprog_validate,
969 .pipe = NVFX_NEW_FRAGPROG,
970 .hw = NVFX_STATE_FRAGPROG