2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35 #include "program/prog_parameter.h"
37 #include "radeon_debug.h"
38 #include "r600_context.h"
40 #include "r700_assembler.h"
41 #include "evergreen_sq.h"
43 #define USE_CF_FOR_CONTINUE_BREAK 1
44 #define USE_CF_FOR_POP_AFTER 1
46 struct prog_instruction noise1_insts[12] = {
47 {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
48 {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
49 {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
50 {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
51 {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
52 {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
53 {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
54 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
55 {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
56 {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
57 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
58 {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
60 float noise1_const[2][4] = {
61 {0.300000f, 0.900000f, 0.500000f, 0.300000f}
64 COMPILED_SUB noise1_presub = {
79 BITS addrmode_PVSDST(PVSDST * pPVSDST)
81 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
84 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
86 pPVSDST->addrmode0 = addrmode & 1;
87 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
90 void nomask_PVSDST(PVSDST * pPVSDST)
92 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
95 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
97 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
100 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
102 pPVSSRC->addrmode0 = addrmode & 1;
103 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
107 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
112 pPVSSRC->swizzlew = swz;
115 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
117 pPVSSRC->swizzlex = SQ_SEL_X;
118 pPVSSRC->swizzley = SQ_SEL_Y;
119 pPVSSRC->swizzlez = SQ_SEL_Z;
120 pPVSSRC->swizzlew = SQ_SEL_W;
124 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
128 case SQ_SEL_X: x = pPVSSRC->swizzlex;
130 case SQ_SEL_Y: x = pPVSSRC->swizzley;
132 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
134 case SQ_SEL_W: x = pPVSSRC->swizzlew;
141 case SQ_SEL_X: y = pPVSSRC->swizzlex;
143 case SQ_SEL_Y: y = pPVSSRC->swizzley;
145 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
147 case SQ_SEL_W: y = pPVSSRC->swizzlew;
154 case SQ_SEL_X: z = pPVSSRC->swizzlex;
156 case SQ_SEL_Y: z = pPVSSRC->swizzley;
158 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
160 case SQ_SEL_W: z = pPVSSRC->swizzlew;
167 case SQ_SEL_X: w = pPVSSRC->swizzlex;
169 case SQ_SEL_Y: w = pPVSSRC->swizzley;
171 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
173 case SQ_SEL_W: w = pPVSSRC->swizzlew;
178 pPVSSRC->swizzlex = x;
179 pPVSSRC->swizzley = y;
180 pPVSSRC->swizzlez = z;
181 pPVSSRC->swizzlew = w;
184 void neg_PVSSRC(PVSSRC* pPVSSRC)
192 void noneg_PVSSRC(PVSSRC* pPVSSRC)
200 // negate argument (for SUB instead of ADD and alike)
201 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
203 pPVSSRC->negx = !pPVSSRC->negx;
204 pPVSSRC->negy = !pPVSSRC->negy;
205 pPVSSRC->negz = !pPVSSRC->negz;
206 pPVSSRC->negw = !pPVSSRC->negw;
209 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
213 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
214 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
215 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
216 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
221 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
225 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
226 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
227 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
228 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
233 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
235 return (pOutVTXFmt0->point_size |
236 pOutVTXFmt0->edge_flag |
237 pOutVTXFmt0->rta_index |
238 pOutVTXFmt0->kill_flag |
239 pOutVTXFmt0->viewport_index);
242 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
244 return (pFPOutFmt->depth |
245 pFPOutFmt->stencil_ref |
247 pFPOutFmt->coverage_to_mask);
250 GLboolean is_reduction_opcode(PVSDWORD* dest)
252 if (dest->dst.op3 == 0)
254 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
263 GLboolean EG_is_reduction_opcode(PVSDWORD* dest)
265 if (dest->dst.op3 == 0)
267 if ( (dest->dst.opcode == EG_OP2_INST_DOT4 || dest->dst.opcode == EG_OP2_INST_DOT4_IEEE || dest->dst.opcode == EG_OP2_INST_CUBE) )
276 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
278 GLuint format = FMT_INVALID;
279 GLuint uiElemSize = 0;
284 case GL_UNSIGNED_BYTE:
289 format = FMT_8; break;
291 format = FMT_8_8; break;
293 /* for some (small/unaligned) strides using 4 comps works
294 * better, probably same as GL_SHORT below
295 * test piglit/draw-vertices */
296 format = FMT_8_8_8_8; break;
298 format = FMT_8_8_8_8; break;
304 case GL_UNSIGNED_SHORT:
310 format = FMT_16; break;
312 format = FMT_16_16; break;
314 /* 3 comp GL_SHORT vertex format doesnt work on r700
315 4 somehow works, test - sauerbraten */
316 format = FMT_16_16_16_16; break;
318 format = FMT_16_16_16_16; break;
324 case GL_UNSIGNED_INT:
330 format = FMT_32; break;
332 format = FMT_32_32; break;
334 format = FMT_32_32_32; break;
336 format = FMT_32_32_32_32; break;
347 format = FMT_32_FLOAT; break;
349 format = FMT_32_32_FLOAT; break;
351 format = FMT_32_32_32_FLOAT; break;
353 format = FMT_32_32_32_32_FLOAT; break;
363 format = FMT_32_FLOAT; break;
365 format = FMT_32_32_FLOAT; break;
367 format = FMT_32_32_32_FLOAT; break;
369 format = FMT_32_32_32_32_FLOAT; break;
376 //GL_ASSERT_NO_CASE();
379 if(NULL != pClient_size)
381 *pClient_size = uiElemSize * nChannels;
387 unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3)
396 case SQ_OP2_INST_ADD:
397 case SQ_OP2_INST_KILLE:
398 case SQ_OP2_INST_KILLGT:
399 case SQ_OP2_INST_KILLGE:
400 case SQ_OP2_INST_KILLNE:
401 case SQ_OP2_INST_MUL:
402 case SQ_OP2_INST_MAX:
403 case SQ_OP2_INST_MIN:
404 //case SQ_OP2_INST_MAX_DX10:
405 //case SQ_OP2_INST_MIN_DX10:
406 case SQ_OP2_INST_SETE:
407 case SQ_OP2_INST_SETNE:
408 case SQ_OP2_INST_SETGT:
409 case SQ_OP2_INST_SETGE:
410 case SQ_OP2_INST_PRED_SETE:
411 case SQ_OP2_INST_PRED_SETGT:
412 case SQ_OP2_INST_PRED_SETGE:
413 case SQ_OP2_INST_PRED_SETNE:
414 case SQ_OP2_INST_DOT4:
415 case SQ_OP2_INST_DOT4_IEEE:
416 case SQ_OP2_INST_CUBE:
419 case SQ_OP2_INST_MOV:
420 case SQ_OP2_INST_MOVA_FLOOR:
421 case SQ_OP2_INST_FRACT:
422 case SQ_OP2_INST_FLOOR:
423 case SQ_OP2_INST_TRUNC:
424 case SQ_OP2_INST_EXP_IEEE:
425 case SQ_OP2_INST_LOG_CLAMPED:
426 case SQ_OP2_INST_LOG_IEEE:
427 case SQ_OP2_INST_RECIP_IEEE:
428 case SQ_OP2_INST_RECIPSQRT_IEEE:
429 case SQ_OP2_INST_FLT_TO_INT:
430 case SQ_OP2_INST_SIN:
431 case SQ_OP2_INST_COS:
434 default: radeon_error(
435 "Need instruction operand number for %x.\n", opcode);
441 unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3)
450 case EG_OP2_INST_ADD:
451 case EG_OP2_INST_KILLE:
452 case EG_OP2_INST_KILLGT:
453 case EG_OP2_INST_KILLGE:
454 case EG_OP2_INST_KILLNE:
455 case EG_OP2_INST_MUL:
456 case EG_OP2_INST_MAX:
457 case EG_OP2_INST_MIN:
458 //case EG_OP2_INST_MAX_DX10:
459 //case EG_OP2_INST_MIN_DX10:
460 case EG_OP2_INST_SETE:
461 case EG_OP2_INST_SETNE:
462 case EG_OP2_INST_SETGT:
463 case EG_OP2_INST_SETGE:
464 case EG_OP2_INST_PRED_SETE:
465 case EG_OP2_INST_PRED_SETGT:
466 case EG_OP2_INST_PRED_SETGE:
467 case EG_OP2_INST_PRED_SETNE:
468 case EG_OP2_INST_DOT4:
469 case EG_OP2_INST_DOT4_IEEE:
470 case EG_OP2_INST_CUBE:
473 case EG_OP2_INST_MOV:
474 //case SQ_OP2_INST_MOVA_FLOOR:
475 case EG_OP2_INST_FRACT:
476 case EG_OP2_INST_FLOOR:
477 case EG_OP2_INST_TRUNC:
478 case EG_OP2_INST_EXP_IEEE:
479 case EG_OP2_INST_LOG_CLAMPED:
480 case EG_OP2_INST_LOG_IEEE:
481 case EG_OP2_INST_RECIP_IEEE:
482 case EG_OP2_INST_RECIPSQRT_IEEE:
483 case EG_OP2_INST_FLT_TO_INT:
484 case EG_OP2_INST_SIN:
485 case EG_OP2_INST_COS:
486 case EG_OP2_INST_FLT_TO_INT_FLOOR:
487 case EG_OP2_INST_MOVA_INT:
490 default: radeon_error(
491 "Need instruction operand number for %x.\n", opcode);
497 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
501 Init_R700_Shader(pShader);
502 pAsm->pR700Shader = pShader;
503 pAsm->currentShaderType = spt;
505 pAsm->cf_last_export_ptr = NULL;
507 pAsm->cf_current_export_clause_ptr = NULL;
508 pAsm->cf_current_alu_clause_ptr = NULL;
509 pAsm->cf_current_tex_clause_ptr = NULL;
510 pAsm->cf_current_vtx_clause_ptr = NULL;
511 pAsm->cf_current_cf_clause_ptr = NULL;
513 // No clause has been created yet
514 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
516 pAsm->number_of_colorandz_exports = 0;
517 pAsm->number_of_exports = 0;
518 pAsm->number_of_export_opcodes = 0;
520 pAsm->alu_x_opcode = 0;
529 pAsm->uLastPosUpdate = 0;
531 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
535 pAsm->number_used_registers = 0;
536 pAsm->uUsedConsts = 256;
540 pAsm->uBoolConsts = 0;
541 pAsm->uIntConsts = 0;
546 pAsm->fc_stack[0].type = FC_NONE;
551 pAsm->aArgSubst[3] = (-1);
555 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
557 pAsm->color_export_register_number[i] = (-1);
561 pAsm->depth_export_register_number = (-1);
562 pAsm->stencil_export_register_number = (-1);
563 pAsm->coverage_to_mask_export_register_number = (-1);
564 pAsm->mask_export_register_number = (-1);
566 pAsm->starting_export_register_number = 0;
567 pAsm->starting_vfetch_register_number = 0;
568 pAsm->starting_temp_register_number = 0;
569 pAsm->uFirstHelpReg = 0;
571 pAsm->input_position_is_used = GL_FALSE;
572 pAsm->input_normal_is_used = GL_FALSE;
574 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
576 pAsm->input_color_is_used[ i ] = GL_FALSE;
579 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
581 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
584 for (i=0; i<VERT_ATTRIB_MAX; i++)
586 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
589 pAsm->number_of_inputs = 0;
591 pAsm->is_tex = GL_FALSE;
592 pAsm->need_tex_barrier = GL_FALSE;
595 pAsm->unSubArraySize = 0;
596 pAsm->unSubArrayPointer = 0;
597 pAsm->callers = NULL;
598 pAsm->unCallerArraySize = 0;
599 pAsm->unCallerArrayPointer = 0;
602 pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
603 pAsm->CALLSTACK[0].plstCFInstructions_local
604 = &(pAsm->pR700Shader->lstCFInstructions);
606 pAsm->CALLSTACK[0].max = 0;
607 pAsm->CALLSTACK[0].current = 0;
609 SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
613 pAsm->presubs = NULL;
614 pAsm->unPresubArraySize = 0;
615 pAsm->unNumPresub = 0;
616 pAsm->unCurNumILInsts = 0;
618 pAsm->unVetTexBits = 0;
623 GLboolean IsTex(gl_inst_opcode Opcode)
625 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
626 (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) || (OPCODE_TXL==Opcode) )
633 GLboolean IsAlu(gl_inst_opcode Opcode)
635 //TODO : more for fc and ex for higher spec.
643 int check_current_clause(r700_AssemblerBase* pAsm,
644 CF_CLAUSE_TYPE new_clause_type)
646 if (pAsm->cf_current_clause_type != new_clause_type)
647 { //Close last open clause
648 switch (pAsm->cf_current_clause_type)
651 if ( pAsm->cf_current_alu_clause_ptr != NULL)
653 pAsm->cf_current_alu_clause_ptr = NULL;
657 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
659 pAsm->cf_current_vtx_clause_ptr = NULL;
663 if ( pAsm->cf_current_tex_clause_ptr != NULL)
665 pAsm->cf_current_tex_clause_ptr = NULL;
668 case CF_EXPORT_CLAUSE:
669 if ( pAsm->cf_current_export_clause_ptr != NULL)
671 pAsm->cf_current_export_clause_ptr = NULL;
674 case CF_OTHER_CLAUSE:
675 if ( pAsm->cf_current_cf_clause_ptr != NULL)
677 pAsm->cf_current_cf_clause_ptr = NULL;
680 case CF_EMPTY_CLAUSE:
684 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
688 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
691 switch (new_clause_type)
694 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
697 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
700 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
702 case CF_EXPORT_CLAUSE:
704 R700ControlFlowSXClause* pR700ControlFlowSXClause
705 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
707 // Add new export instruction to control flow program
708 if (pR700ControlFlowSXClause != 0)
710 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
711 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
712 AddCFInstruction( pAsm->pR700Shader,
713 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
718 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
721 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
724 case CF_EMPTY_CLAUSE:
726 case CF_OTHER_CLAUSE:
727 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
731 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
739 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
741 if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
746 pAsm->cf_current_cf_clause_ptr =
747 (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
749 if (pAsm->cf_current_cf_clause_ptr != NULL)
751 Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
752 AddCFInstruction( pAsm->pR700Shader,
753 (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
757 radeon_error("Could not allocate a new VFetch CF instruction.\n");
764 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
765 R700VertexInstruction* vertex_instruction_ptr)
767 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
772 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
773 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
774 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
777 // Create new Vfetch control flow instruction for this new clause
778 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
780 if (pAsm->cf_current_vtx_clause_ptr != NULL)
782 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
783 AddCFInstruction( pAsm->pR700Shader,
784 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
788 radeon_error("Could not allocate a new VFetch CF instruction.\n");
792 if(8 == pAsm->unAsic)
794 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, EG_CF_INST_VC,
795 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
796 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
797 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
798 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
799 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
800 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE,
801 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
802 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
803 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
804 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
805 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
806 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
807 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
808 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
809 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
810 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 1,
811 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
815 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
816 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
817 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
818 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
819 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
820 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
821 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
822 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
823 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
826 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
830 if(8 == pAsm->unAsic)
832 unsigned int count = GETbits(pAsm->cf_current_vtx_clause_ptr->m_Word1.val,
833 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1;
834 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, count,
835 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
839 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
843 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
848 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
849 R700TextureInstruction* tex_instruction_ptr)
851 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
856 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
857 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
858 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
861 // new tex cf instruction for this new clause
862 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
864 if (pAsm->cf_current_tex_clause_ptr != NULL)
866 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
867 AddCFInstruction( pAsm->pR700Shader,
868 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
872 radeon_error("Could not allocate a new TEX CF instruction.\n");
876 if(8 == pAsm->unAsic)
878 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, EG_CF_INST_TC,
879 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
880 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
881 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
882 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
883 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
884 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE,
885 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
886 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
887 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
888 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
889 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
890 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
891 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
892 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
893 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
894 #ifdef FORCE_CF_TEX_BARRIER
895 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 1,
896 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
898 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
899 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
904 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
905 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
906 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
908 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
909 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
910 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
911 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
912 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
917 if(8 == pAsm->unAsic)
919 unsigned int count = GETbits(pAsm->cf_current_tex_clause_ptr->m_Word1.val,
920 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1;
921 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, count,
922 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
926 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
930 // If this clause constains any TEX instruction that is dependent on a
931 // previous instruction, set the barrier bit, also always set for vert
932 // programs as tex deps are not(yet) computed for them
933 if( pAsm->currentShaderType == SPT_VP || pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
935 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
938 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
940 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
941 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
944 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
949 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
951 GLuint destination_register,
952 GLuint number_of_elements,
953 GLenum dataElementType,
954 VTX_FETCH_METHOD* pFetchMethod)
956 GLuint client_size_inbyte;
958 GLuint mega_fetch_count;
959 GLuint is_mega_fetch_flag;
961 R700VertexGenericFetch* vfetch_instruction_ptr;
962 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
964 if (assembled_vfetch_instruction_ptr == NULL)
966 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
967 if (vfetch_instruction_ptr == NULL)
971 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
975 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
978 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
980 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
983 mega_fetch_count = 0;
984 is_mega_fetch_flag = 0;
988 mega_fetch_count = MEGA_FETCH_BYTES - 1;
989 is_mega_fetch_flag = 0x1;
990 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
993 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
994 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
995 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
997 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
998 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
999 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1000 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
1001 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
1003 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
1004 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1005 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1006 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
1008 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
1010 // Destination register
1011 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
1012 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
1014 vfetch_instruction_ptr->m_Word2.f.offset = 0;
1015 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
1017 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
1019 if (assembled_vfetch_instruction_ptr == NULL)
1021 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
1026 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
1032 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
1039 GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
1040 GLuint destination_register,
1045 GLboolean normalize,
1047 VTX_FETCH_METHOD * pFetchMethod)
1049 GLuint client_size_inbyte;
1051 GLuint mega_fetch_count;
1052 GLuint is_mega_fetch_flag;
1054 GLuint dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w;
1056 R700VertexGenericFetch* vfetch_instruction_ptr;
1057 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
1058 = pAsm->vfetch_instruction_ptr_array[element];
1060 if (assembled_vfetch_instruction_ptr == NULL)
1062 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
1063 if (vfetch_instruction_ptr == NULL)
1067 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
1071 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
1074 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
1076 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
1079 mega_fetch_count = 0;
1080 is_mega_fetch_flag = 0;
1084 mega_fetch_count = MEGA_FETCH_BYTES - 1;
1085 is_mega_fetch_flag = 0x1;
1086 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
1089 SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VC_INST_FETCH,
1090 EG_VTX_WORD0__VC_INST_shift,
1091 EG_VTX_WORD0__VC_INST_mask);
1092 SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VTX_FETCH_VERTEX_DATA,
1093 EG_VTX_WORD0__FETCH_TYPE_shift,
1094 EG_VTX_WORD0__FETCH_TYPE_mask);
1095 CLEARbit(vfetch_instruction_ptr->m_Word0.val,
1096 EG_VTX_WORD0__FWQ_bit);
1097 SETfield(vfetch_instruction_ptr->m_Word0.val, element,
1098 EG_VTX_WORD0__BUFFER_ID_shift,
1099 EG_VTX_WORD0__BUFFER_ID_mask);
1100 SETfield(vfetch_instruction_ptr->m_Word0.val, 0x0,
1101 EG_VTX_WORD0__SRC_GPR_shift,
1102 EG_VTX_WORD0__SRC_GPR_mask);
1103 SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_ABSOLUTE,
1104 EG_VTX_WORD0__SRC_REL_shift,
1105 EG_VTX_WORD0__SRC_REL_bit);
1106 SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_SEL_X,
1107 EG_VTX_WORD0__SRC_SEL_X_shift,
1108 EG_VTX_WORD0__SRC_SEL_X_mask);
1109 SETfield(vfetch_instruction_ptr->m_Word0.val, mega_fetch_count,
1110 EG_VTX_WORD0__MFC_shift,
1111 EG_VTX_WORD0__MFC_mask);
1113 if(format == GL_BGRA)
1115 dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
1116 dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1117 dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
1118 dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1122 dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
1123 dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1124 dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1125 dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1128 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_x,
1129 EG_VTX_WORD1__DST_SEL_X_shift,
1130 EG_VTX_WORD1__DST_SEL_X_mask);
1131 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_y,
1132 EG_VTX_WORD1__DST_SEL_Y_shift,
1133 EG_VTX_WORD1__DST_SEL_Y_mask);
1134 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_z,
1135 EG_VTX_WORD1__DST_SEL_Z_shift,
1136 EG_VTX_WORD1__DST_SEL_Z_mask);
1137 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_w,
1138 EG_VTX_WORD1__DST_SEL_W_shift,
1139 EG_VTX_WORD1__DST_SEL_W_mask);
1141 SETfield(vfetch_instruction_ptr->m_Word1.val, 1,
1142 EG_VTX_WORD1__UCF_shift,
1143 EG_VTX_WORD1__UCF_bit);
1144 SETfield(vfetch_instruction_ptr->m_Word1.val, data_format,
1145 EG_VTX_WORD1__DATA_FORMAT_shift,
1146 EG_VTX_WORD1__DATA_FORMAT_mask);
1148 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED,
1149 EG_VTX_WORD1__FCA_shift,
1150 EG_VTX_WORD1__FCA_bit);
1154 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED,
1155 EG_VTX_WORD1__FCA_shift,
1156 EG_VTX_WORD1__FCA_bit);
1160 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_UNSIGNED,
1161 EG_VTX_WORD1__FCA_shift,
1162 EG_VTX_WORD1__FCA_bit);
1164 #endif /* TEST_VFETCH */
1166 if(GL_TRUE == normalize)
1168 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_NORM,
1169 EG_VTX_WORD1__NFA_shift,
1170 EG_VTX_WORD1__NFA_mask);
1174 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_SCALED,
1175 EG_VTX_WORD1__NFA_shift,
1176 EG_VTX_WORD1__NFA_mask);
1179 /* Destination register */
1180 SETfield(vfetch_instruction_ptr->m_Word1.val, destination_register,
1181 EG_VTX_WORD1_GPR__DST_GPR_shift,
1182 EG_VTX_WORD1_GPR__DST_GPR_mask);
1183 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_ABSOLUTE,
1184 EG_VTX_WORD1_GPR__DST_REL_shift,
1185 EG_VTX_WORD1_GPR__DST_REL_bit);
1188 SETfield(vfetch_instruction_ptr->m_Word2.val, 0,
1189 EG_VTX_WORD2__OFFSET_shift,
1190 EG_VTX_WORD2__OFFSET_mask);
1191 SETfield(vfetch_instruction_ptr->m_Word2.val,
1192 #ifdef MESA_BIG_ENDIAN
1197 EG_VTX_WORD2__ENDIAN_SWAP_shift,
1198 EG_VTX_WORD2__ENDIAN_SWAP_mask);
1199 SETfield(vfetch_instruction_ptr->m_Word2.val, 0,
1200 EG_VTX_WORD2__CBNS_shift,
1201 EG_VTX_WORD2__CBNS_bit);
1202 SETfield(vfetch_instruction_ptr->m_Word2.val, is_mega_fetch_flag,
1203 EG_VTX_WORD2__MEGA_FETCH_shift,
1204 EG_VTX_WORD2__MEGA_FETCH_mask);
1206 if (assembled_vfetch_instruction_ptr == NULL)
1208 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
1213 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
1219 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
1226 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
1227 GLuint destination_register,
1232 GLboolean normalize,
1234 VTX_FETCH_METHOD * pFetchMethod)
1236 GLuint client_size_inbyte;
1238 GLuint mega_fetch_count;
1239 GLuint is_mega_fetch_flag;
1241 R700VertexGenericFetch* vfetch_instruction_ptr;
1242 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
1243 = pAsm->vfetch_instruction_ptr_array[element];
1245 if (assembled_vfetch_instruction_ptr == NULL)
1247 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
1248 if (vfetch_instruction_ptr == NULL)
1252 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
1256 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
1259 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
1261 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
1264 mega_fetch_count = 0;
1265 is_mega_fetch_flag = 0;
1269 mega_fetch_count = MEGA_FETCH_BYTES - 1;
1270 is_mega_fetch_flag = 0x1;
1271 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
1274 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
1275 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
1276 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1278 vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
1279 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
1280 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1281 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
1282 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
1284 if(format == GL_BGRA)
1286 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
1287 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1288 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
1289 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1293 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
1294 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1295 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1296 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1300 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
1301 vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
1302 #ifdef MESA_BIG_ENDIAN
1303 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_8IN32;
1305 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
1310 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
1314 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
1317 if(GL_TRUE == normalize)
1319 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
1323 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
1326 // Destination register
1327 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
1328 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
1330 vfetch_instruction_ptr->m_Word2.f.offset = 0;
1331 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
1333 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
1335 if (assembled_vfetch_instruction_ptr == NULL)
1337 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
1342 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
1348 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
1355 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
1358 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
1359 pAsm->cf_current_vtx_clause_ptr = NULL;
1361 for (i=0; i<VERT_ATTRIB_MAX; i++)
1363 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
1366 cleanup_vfetch_shaderinst(pAsm->pR700Shader);
1371 GLuint gethelpr(r700_AssemblerBase* pAsm)
1373 GLuint r = pAsm->uHelpReg;
1375 if (pAsm->uHelpReg > pAsm->number_used_registers)
1377 pAsm->number_used_registers = pAsm->uHelpReg;
1381 void resethelpr(r700_AssemblerBase* pAsm)
1383 pAsm->uHelpReg = pAsm->uFirstHelpReg;
1386 void checkop_init(r700_AssemblerBase* pAsm)
1389 pAsm->aArgSubst[0] =
1390 pAsm->aArgSubst[1] =
1391 pAsm->aArgSubst[2] =
1392 pAsm->aArgSubst[3] = -1;
1395 static GLboolean next_ins(r700_AssemblerBase *pAsm)
1397 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1399 if (GL_TRUE == pAsm->is_tex)
1401 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX)
1403 if (GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE))
1405 radeon_error("Error assembling TEX instruction\n");
1411 if (GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE))
1413 radeon_error("Error assembling TEX instruction\n");
1420 if (GL_FALSE == assemble_alu_instruction(pAsm))
1422 radeon_error("Error assembling ALU instruction\n");
1427 if (pAsm->D.dst.rtype == DST_REG_OUT)
1429 assert(pAsm->D.dst.reg >= pAsm->starting_export_register_number);
1432 //reset for next inst.
1435 pAsm->S[0].bits = 0;
1436 pAsm->S[1].bits = 0;
1437 pAsm->S[2].bits = 0;
1438 pAsm->is_tex = GL_FALSE;
1439 pAsm->need_tex_barrier = GL_FALSE;
1441 pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
1445 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
1447 GLuint tmp = gethelpr(pAsm);
1449 //mov src to temp helper gpr.
1450 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
1452 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1454 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1455 pAsm->D.dst.reg = tmp;
1457 nomask_PVSDST(&(pAsm->D.dst));
1459 if( GL_FALSE == assemble_src(pAsm, src, 0) )
1464 noswizzle_PVSSRC(&(pAsm->S[0].src));
1465 noneg_PVSSRC(&(pAsm->S[0].src));
1467 if( GL_FALSE == next_ins(pAsm) )
1472 pAsm->aArgSubst[1 + src] = tmp;
1477 GLboolean checkop1(r700_AssemblerBase* pAsm)
1483 GLboolean checkop2(r700_AssemblerBase* pAsm)
1485 GLboolean bSrcConst[2];
1486 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1490 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1491 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1492 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1493 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1494 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1496 bSrcConst[0] = GL_TRUE;
1500 bSrcConst[0] = GL_FALSE;
1502 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1503 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1504 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1505 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1506 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1508 bSrcConst[1] = GL_TRUE;
1512 bSrcConst[1] = GL_FALSE;
1515 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1517 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1519 if( GL_FALSE == mov_temp(pAsm, 1) )
1529 GLboolean checkop3(r700_AssemblerBase* pAsm)
1531 GLboolean bSrcConst[3];
1532 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1536 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1537 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1538 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1539 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1540 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1542 bSrcConst[0] = GL_TRUE;
1546 bSrcConst[0] = GL_FALSE;
1548 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1549 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1550 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1551 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1552 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1554 bSrcConst[1] = GL_TRUE;
1558 bSrcConst[1] = GL_FALSE;
1560 if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) ||
1561 (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
1562 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1563 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
1564 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1566 bSrcConst[2] = GL_TRUE;
1570 bSrcConst[2] = GL_FALSE;
1573 if( (GL_TRUE == bSrcConst[0]) &&
1574 (GL_TRUE == bSrcConst[1]) &&
1575 (GL_TRUE == bSrcConst[2]) )
1577 if( GL_FALSE == mov_temp(pAsm, 1) )
1581 if( GL_FALSE == mov_temp(pAsm, 2) )
1588 else if( (GL_TRUE == bSrcConst[0]) &&
1589 (GL_TRUE == bSrcConst[1]) )
1591 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1593 if( GL_FALSE == mov_temp(pAsm, 1) )
1601 else if ( (GL_TRUE == bSrcConst[0]) &&
1602 (GL_TRUE == bSrcConst[2]) )
1604 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
1606 if( GL_FALSE == mov_temp(pAsm, 2) )
1614 else if( (GL_TRUE == bSrcConst[1]) &&
1615 (GL_TRUE == bSrcConst[2]) )
1617 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1619 if( GL_FALSE == mov_temp(pAsm, 2) )
1631 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1635 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1642 if(pAsm->aArgSubst[1+src] >= 0)
1645 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1646 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1647 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1651 if (1 == pILInst->SrcReg[src].RelAddr)
1653 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1657 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1659 switch (pILInst->SrcReg[src].File)
1661 case PROGRAM_TEMPORARY:
1662 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1663 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1665 case PROGRAM_CONSTANT:
1666 case PROGRAM_LOCAL_PARAM:
1667 case PROGRAM_ENV_PARAM:
1668 case PROGRAM_STATE_VAR:
1669 case PROGRAM_UNIFORM:
1670 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1671 if(pILInst->SrcReg[src].Index < 0)
1673 WARN_ONCE("Negative register offsets not supported yet!\n");
1674 pAsm->S[fld].src.reg = 0;
1678 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1682 pAsm->S[fld].src.rtype = SRC_REG_GPR;
1683 switch (pAsm->currentShaderType)
1686 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1689 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1693 case PROGRAM_OUTPUT:
1694 pAsm->S[fld].src.rtype = SRC_REG_GPR;
1695 switch (pAsm->currentShaderType)
1698 pAsm->S[fld].src.reg = pAsm->uiFP_OutputMap[pILInst->SrcReg[src].Index];
1701 pAsm->S[fld].src.reg = pAsm->ucVP_OutputMap[pILInst->SrcReg[src].Index];
1706 radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
1711 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1712 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1713 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1714 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1716 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1717 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1718 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1719 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1724 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1726 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1727 switch (pILInst->DstReg.File)
1729 case PROGRAM_TEMPORARY:
1730 if (1 == pILInst->DstReg.RelAddr)
1732 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_RELATIVE_A0);
1736 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1738 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1739 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1741 case PROGRAM_ADDRESS:
1742 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1743 pAsm->D.dst.rtype = DST_REG_A0;
1744 pAsm->D.dst.reg = 0;
1746 case PROGRAM_OUTPUT:
1747 if (1 == pILInst->DstReg.RelAddr)
1749 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_RELATIVE_A0);
1753 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1755 pAsm->D.dst.rtype = DST_REG_OUT;
1756 switch (pAsm->currentShaderType)
1759 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1762 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1767 radeon_error("Invalid destination output argument type\n");
1771 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1772 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1773 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1774 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1776 if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
1778 pAsm->D2.dst2.SaturateMode = 1;
1782 pAsm->D2.dst2.SaturateMode = 0;
1788 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1790 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1792 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1794 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1795 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1797 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1799 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1801 pAsm->D.dst.rtype = DST_REG_OUT;
1802 switch (pAsm->currentShaderType)
1805 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1808 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1812 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1816 radeon_error("Invalid destination output argument type\n");
1820 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1821 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1822 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1823 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1828 GLboolean tex_src(r700_AssemblerBase *pAsm)
1830 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1832 GLboolean bValidTexCoord = GL_FALSE;
1834 if(pAsm->aArgSubst[1] >= 0)
1836 bValidTexCoord = GL_TRUE;
1837 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1838 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1839 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1843 switch (pILInst->SrcReg[0].File) {
1844 case PROGRAM_UNIFORM:
1845 case PROGRAM_CONSTANT:
1846 case PROGRAM_LOCAL_PARAM:
1847 case PROGRAM_ENV_PARAM:
1848 case PROGRAM_STATE_VAR:
1850 case PROGRAM_TEMPORARY:
1851 bValidTexCoord = GL_TRUE;
1852 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1853 pAsm->starting_temp_register_number;
1854 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1857 if(SPT_VP == pAsm->currentShaderType)
1859 switch (pILInst->SrcReg[0].Index)
1861 case VERT_ATTRIB_TEX0:
1862 case VERT_ATTRIB_TEX1:
1863 case VERT_ATTRIB_TEX2:
1864 case VERT_ATTRIB_TEX3:
1865 case VERT_ATTRIB_TEX4:
1866 case VERT_ATTRIB_TEX5:
1867 case VERT_ATTRIB_TEX6:
1868 case VERT_ATTRIB_TEX7:
1869 bValidTexCoord = GL_TRUE;
1870 pAsm->S[0].src.reg =
1871 pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
1872 pAsm->S[0].src.rtype = SRC_REG_GPR;
1878 switch (pILInst->SrcReg[0].Index)
1880 case FRAG_ATTRIB_WPOS:
1881 case FRAG_ATTRIB_COL0:
1882 case FRAG_ATTRIB_COL1:
1883 case FRAG_ATTRIB_FOGC:
1884 case FRAG_ATTRIB_TEX0:
1885 case FRAG_ATTRIB_TEX1:
1886 case FRAG_ATTRIB_TEX2:
1887 case FRAG_ATTRIB_TEX3:
1888 case FRAG_ATTRIB_TEX4:
1889 case FRAG_ATTRIB_TEX5:
1890 case FRAG_ATTRIB_TEX6:
1891 case FRAG_ATTRIB_TEX7:
1892 bValidTexCoord = GL_TRUE;
1893 pAsm->S[0].src.reg =
1894 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1895 pAsm->S[0].src.rtype = SRC_REG_GPR;
1897 case FRAG_ATTRIB_FACE:
1898 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1900 case FRAG_ATTRIB_PNTC:
1901 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1905 if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
1906 (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
1908 bValidTexCoord = GL_TRUE;
1909 pAsm->S[0].src.reg =
1910 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1911 pAsm->S[0].src.rtype = SRC_REG_GPR;
1919 if(GL_TRUE == bValidTexCoord)
1921 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1925 radeon_error("Invalid source texcoord for TEX instruction\n");
1929 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1930 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1931 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1932 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1934 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1935 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1936 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1937 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1942 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1944 PVSSRC * texture_coordinate_source;
1945 PVSSRC * texture_unit_source;
1947 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1948 if (tex_instruction_ptr == NULL)
1952 Init_R700TextureInstruction(tex_instruction_ptr);
1954 texture_coordinate_source = &(pAsm->S[0].src);
1955 texture_unit_source = &(pAsm->S[1].src);
1957 if(8 == pAsm->unAsic) /* evergreen */
1960 SETfield(tex_instruction_ptr->m_Word0.val, pAsm->D.dst.opcode,
1961 EG_TEX_WORD0__TEX_INST_shift,
1962 EG_TEX_WORD0__TEX_INST_mask);
1964 if( (SQ_TEX_INST_GET_GRADIENTS_H == pAsm->D.dst.opcode)
1965 ||(SQ_TEX_INST_GET_GRADIENTS_V == pAsm->D.dst.opcode) )
1967 /* Use fine texel derivative calculation rather than use quad derivative */
1968 SETfield(tex_instruction_ptr->m_Word0.val, 1,
1969 EG_TEX_WORD0__INST_MOD_shift,
1970 EG_TEX_WORD0__INST_MOD_mask);
1974 SETfield(tex_instruction_ptr->m_Word0.val, 0,
1975 EG_TEX_WORD0__INST_MOD_shift,
1976 EG_TEX_WORD0__INST_MOD_mask);
1979 CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__FWQ_bit);
1981 if(SPT_VP == pAsm->currentShaderType)
1983 SETfield(tex_instruction_ptr->m_Word0.val, (texture_unit_source->reg + VERT_ATTRIB_MAX),
1984 EG_TEX_WORD0__RESOURCE_ID_shift,
1985 EG_TEX_WORD0__RESOURCE_ID_mask);
1986 pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
1990 SETfield(tex_instruction_ptr->m_Word0.val, texture_unit_source->reg,
1991 EG_TEX_WORD0__RESOURCE_ID_shift,
1992 EG_TEX_WORD0__RESOURCE_ID_mask);
1995 CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__ALT_CONST_bit);
1996 SETfield(tex_instruction_ptr->m_Word0.val, 0,
1997 EG_TEX_WORD0__RIM_shift,
1998 EG_TEX_WORD0__RIM_mask);
1999 SETfield(tex_instruction_ptr->m_Word0.val, 0,
2000 EG_TEX_WORD0__SIM_shift,
2001 EG_TEX_WORD0__SIM_mask);
2005 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
2006 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
2007 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
2008 tex_instruction_ptr->m_Word0.f.alt_const = 0;
2010 if(SPT_VP == pAsm->currentShaderType)
2012 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
2013 pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
2017 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
2021 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
2023 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
2024 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
2025 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
2026 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
2028 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
2029 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
2030 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
2031 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
2032 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
2035 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
2036 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
2037 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
2038 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
2041 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2042 (pAsm->D.dst.rtype == DST_REG_OUT) )
2044 if(8 == pAsm->unAsic) /* evergreen */
2046 SETfield(tex_instruction_ptr->m_Word0.val, texture_coordinate_source->reg,
2047 EG_TEX_WORD0__SRC_GPR_shift,
2048 EG_TEX_WORD0__SRC_GPR_mask);
2049 SETfield(tex_instruction_ptr->m_Word0.val, SQ_ABSOLUTE,
2050 EG_TEX_WORD0__SRC_REL_shift,
2051 EG_TEX_WORD0__SRC_REL_bit);
2055 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
2056 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
2059 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2060 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
2062 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
2063 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
2064 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
2065 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
2068 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
2069 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
2070 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
2071 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
2075 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
2079 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
2087 void initialize(r700_AssemblerBase *pAsm)
2089 GLuint cycle, component;
2091 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
2093 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
2095 pAsm->hw_gpr[cycle][component] = (-1);
2098 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
2100 pAsm->hw_cfile_addr[component] = (-1);
2101 pAsm->hw_cfile_chan[component] = (-1);
2105 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
2108 BITS scalar_channel_index,
2109 r700_AssemblerBase *pAsm)
2116 //--------------------------------------------------------------------------
2117 // Source for operands src0, src1.
2118 // Values [0,127] correspond to GPR[0..127].
2119 // Values [256,511] correspond to cfile constants c[0..255].
2121 //--------------------------------------------------------------------------
2122 // Other special values are shown in the list below.
2124 // 248 SQ_ALU_SRC_0: special constant 0.0.
2125 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
2127 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
2128 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
2130 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
2131 // 253 SQ_ALU_SRC_LITERAL: literal constant.
2133 // 254 SQ_ALU_SRC_PV: previous vector result.
2134 // 255 SQ_ALU_SRC_PS: previous scalar result.
2135 //--------------------------------------------------------------------------
2137 BITS channel_swizzle;
2138 switch (scalar_channel_index)
2140 case 0: channel_swizzle = pSource->swizzlex; break;
2141 case 1: channel_swizzle = pSource->swizzley; break;
2142 case 2: channel_swizzle = pSource->swizzlez; break;
2143 case 3: channel_swizzle = pSource->swizzlew; break;
2144 default: channel_swizzle = SQ_SEL_MASK; break;
2147 if(channel_swizzle == SQ_SEL_0)
2149 src_sel = SQ_ALU_SRC_0;
2151 else if (channel_swizzle == SQ_SEL_1)
2153 src_sel = SQ_ALU_SRC_1;
2157 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
2158 (pSource->rtype == SRC_REG_GPR)
2161 src_sel = pSource->reg;
2163 else if (pSource->rtype == SRC_REG_CONSTANT)
2165 /* TODO : 4 const buffers */
2166 if(GL_TRUE == pAsm->bUseMemConstant)
2168 src_sel = pSource->reg + SQ_ALU_SRC_KCACHE0_BASE;
2169 pAsm->kcacheUsed = SQ_ALU_SRC_KCACHE0_BASE;
2173 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
2176 else if (pSource->rtype == SRC_REC_LITERAL)
2178 src_sel = SQ_ALU_SRC_LITERAL;
2182 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
2183 source_index, pSource->rtype);
2188 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
2190 src_rel = SQ_ABSOLUTE;
2194 src_rel = SQ_RELATIVE;
2197 switch (channel_swizzle)
2200 src_chan = SQ_CHAN_X;
2203 src_chan = SQ_CHAN_Y;
2206 src_chan = SQ_CHAN_Z;
2209 src_chan = SQ_CHAN_W;
2213 // Does not matter since src_sel controls
2214 src_chan = SQ_CHAN_X;
2217 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
2222 switch (scalar_channel_index)
2224 case 0: src_neg = pSource->negx; break;
2225 case 1: src_neg = pSource->negy; break;
2226 case 2: src_neg = pSource->negz; break;
2227 case 3: src_neg = pSource->negw; break;
2228 default: src_neg = 0; break;
2231 switch (source_index)
2234 assert(alu_instruction_ptr);
2235 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
2236 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
2237 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
2238 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
2241 assert(alu_instruction_ptr);
2242 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
2243 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
2244 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
2245 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
2248 assert(alu_instruction_ptr);
2249 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
2250 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
2251 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
2252 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
2255 radeon_error("Only three sources allowed in ALU opcodes.\n");
2263 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
2264 R700ALUInstruction* alu_instruction_ptr,
2265 GLuint contiguous_slots_needed)
2267 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
2272 if ( pAsm->alu_x_opcode != 0 ||
2273 pAsm->cf_current_alu_clause_ptr == NULL ||
2274 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
2275 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
2279 //new cf inst for this clause
2280 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
2282 // link the new cf to cf segment
2283 if(NULL != pAsm->cf_current_alu_clause_ptr)
2285 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
2286 AddCFInstruction( pAsm->pR700Shader,
2287 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
2291 radeon_error("Could not allocate a new ALU CF instruction.\n");
2295 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
2296 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
2297 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
2299 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
2300 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
2301 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
2303 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
2305 if(pAsm->alu_x_opcode != 0)
2307 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
2308 pAsm->alu_x_opcode = 0;
2312 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
2315 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
2317 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
2321 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
2324 /* TODO : handle 4 bufs */
2325 if( (pAsm->kcacheUsed > 0) && (GL_TRUE == pAsm->bUseMemConstant) )
2327 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
2328 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
2329 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_LOCK_2;
2330 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
2331 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
2332 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
2335 // If this clause constains any instruction that is forward dependent on a TEX instruction,
2336 // set the whole_quad_mode for this clause
2337 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
2339 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
2342 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
2344 alu_instruction_ptr->m_Word0.f.last = 1;
2347 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
2349 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
2350 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
2353 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
2358 GLboolean EG_add_ps_interp(r700_AssemblerBase* pAsm)
2360 R700ALUInstruction * alu_instruction_ptr = NULL;
2364 unsigned int unWord0Temp = 0x380C00;
2365 unsigned int unWord1Temp = 0x146B10; //SQ_SEL_X
2369 for(ui=(pAsm->uIIns-1); ui>=0; ui--)
2371 for(uj=0; uj<8; uj++)
2373 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2374 Init_R700ALUInstruction(alu_instruction_ptr);
2375 alu_instruction_ptr->m_Word0.val = unWord0Temp;
2376 alu_instruction_ptr->m_Word1.val = unWord1Temp;
2380 SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_ZW,
2381 EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask);
2385 SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_XY,
2386 EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask);
2388 if( (uj > 1) && (uj < 6) )
2390 SETfield(alu_instruction_ptr->m_Word1.val, 1,
2391 EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit);
2395 SETfield(alu_instruction_ptr->m_Word1.val, 0,
2396 EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit);
2398 if( (uj > 1) && (uj < 6) )
2400 SETfield(alu_instruction_ptr->m_Word1.val, ui,
2401 EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask);
2405 SETfield(alu_instruction_ptr->m_Word1.val, 111,
2406 EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask);
2409 SETfield(alu_instruction_ptr->m_Word1.val, (uj % 4),
2410 EG_ALU_WORD1__DST_CHAN_shift, EG_ALU_WORD1__DST_CHAN_mask);
2411 SETfield(alu_instruction_ptr->m_Word0.val, (1 - (uj % 2)),
2412 EG_ALU_WORD0__SRC0_CHAN_shift, EG_ALU_WORD0__SRC0_CHAN_mask);
2413 SETfield(alu_instruction_ptr->m_Word0.val, (EG_ALU_SRC_PARAM_BASE + ui),
2414 EG_ALU_WORD0__SRC1_SEL_shift, EG_ALU_WORD0__SRC1_SEL_mask);
2417 SETfield(alu_instruction_ptr->m_Word0.val, 1,
2418 EG_ALU_WORD0__LAST_shift, EG_ALU_WORD0__LAST_bit);
2421 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, 4) )
2432 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
2439 switch (source_index)
2442 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
2443 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
2444 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
2445 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
2449 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
2450 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
2451 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
2452 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
2456 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
2457 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
2458 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
2459 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
2464 int is_cfile(BITS sel)
2466 if (sel > 255 && sel < 512)
2473 int is_const(BITS sel)
2479 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
2486 int is_gpr(BITS sel)
2488 if (sel >= 0 && sel < 128)
2495 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
2496 SQ_ALU_VEC_120, //001
2497 SQ_ALU_VEC_102, //010
2499 SQ_ALU_VEC_201, //011
2500 SQ_ALU_VEC_012, //100
2501 SQ_ALU_VEC_021, //101
2503 SQ_ALU_VEC_012, //110
2504 SQ_ALU_VEC_012}; //111
2506 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
2507 SQ_ALU_SCL_122, //001
2508 SQ_ALU_SCL_122, //010
2510 SQ_ALU_SCL_221, //011
2511 SQ_ALU_SCL_212, //100
2512 SQ_ALU_SCL_122, //101
2514 SQ_ALU_SCL_122, //110
2515 SQ_ALU_SCL_122}; //111
2517 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
2521 int res_match = (-1);
2522 int res_empty = (-1);
2526 for (res=3; res>=0; res--)
2528 if(pAsm->hw_cfile_addr[ res] < 0)
2532 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
2534 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
2542 // Read for this scalar component already reserved, nothing to do here.
2545 else if(res_empty >= 0)
2547 pAsm->hw_cfile_addr[ res_empty ] = sel;
2548 pAsm->hw_cfile_chan[ res_empty ] = chan;
2552 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
2558 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
2560 if(pAsm->hw_gpr[cycle][chan] < 0)
2562 pAsm->hw_gpr[cycle][chan] = sel;
2564 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
2566 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
2573 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2577 case SQ_ALU_SCL_210:
2579 int table[3] = {2, 1, 0};
2580 *pCycle = table[sel];
2584 case SQ_ALU_SCL_122:
2586 int table[3] = {1, 2, 2};
2587 *pCycle = table[sel];
2591 case SQ_ALU_SCL_212:
2593 int table[3] = {2, 1, 2};
2594 *pCycle = table[sel];
2598 case SQ_ALU_SCL_221:
2600 int table[3] = {2, 2, 1};
2601 *pCycle = table[sel];
2606 radeon_error("Bad Scalar bank swizzle value\n");
2613 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2617 case SQ_ALU_VEC_012:
2619 int table[3] = {0, 1, 2};
2620 *pCycle = table[sel];
2623 case SQ_ALU_VEC_021:
2625 int table[3] = {0, 2, 1};
2626 *pCycle = table[sel];
2629 case SQ_ALU_VEC_120:
2631 int table[3] = {1, 2, 0};
2632 *pCycle = table[sel];
2635 case SQ_ALU_VEC_102:
2637 int table[3] = {1, 0, 2};
2638 *pCycle = table[sel];
2641 case SQ_ALU_VEC_201:
2643 int table[3] = {2, 0, 1};
2644 *pCycle = table[sel];
2647 case SQ_ALU_VEC_210:
2649 int table[3] = {2, 1, 0};
2650 *pCycle = table[sel];
2654 radeon_error("Bad Vec bank swizzle value\n");
2662 GLboolean check_scalar(r700_AssemblerBase* pAsm,
2663 R700ALUInstruction* alu_instruction_ptr)
2666 GLuint bank_swizzle;
2667 GLuint const_count = 0;
2676 BITS src_sel [3] = {0,0,0};
2677 BITS src_chan[3] = {0,0,0};
2678 BITS src_rel [3] = {0,0,0};
2679 BITS src_neg [3] = {0,0,0};
2682 GLuint number_of_operands;
2684 if(8 == pAsm->unAsic)
2686 number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2690 number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2693 for (src=0; src<number_of_operands; src++)
2695 get_src_properties(alu_instruction_ptr,
2704 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2705 (is_const( src_sel[1] ) ? 2 : 0) +
2706 (is_const( src_sel[2] ) ? 1 : 0) );
2708 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
2710 for (src=0; src<number_of_operands; src++)
2712 sel = src_sel [src];
2713 chan = src_chan[src];
2714 rel = src_rel [src];
2715 neg = src_neg [src];
2717 if (is_const( sel ))
2719 // Any constant, including literal and inline constants
2722 if (is_cfile( sel ))
2724 reserve_cfile(pAsm, sel, chan);
2730 for (src=0; src<number_of_operands; src++)
2732 sel = src_sel [src];
2733 chan = src_chan[src];
2734 rel = src_rel [src];
2735 neg = src_neg [src];
2739 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2741 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
2746 if(cycle < const_count)
2748 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2759 GLboolean check_vector(r700_AssemblerBase* pAsm,
2760 R700ALUInstruction* alu_instruction_ptr)
2763 GLuint bank_swizzle;
2764 GLuint const_count = 0;
2773 BITS src_sel [3] = {0,0,0};
2774 BITS src_chan[3] = {0,0,0};
2775 BITS src_rel [3] = {0,0,0};
2776 BITS src_neg [3] = {0,0,0};
2779 GLuint number_of_operands;
2781 if(8 == pAsm->unAsic)
2783 number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2787 number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2790 for (src=0; src<number_of_operands; src++)
2792 get_src_properties(alu_instruction_ptr,
2801 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2802 (is_const( src_sel[1] ) ? 2 : 0) +
2803 (is_const( src_sel[2] ) ? 1 : 0)
2806 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2808 for (src=0; src<number_of_operands; src++)
2810 sel = src_sel [src];
2811 chan = src_chan[src];
2812 rel = src_rel [src];
2813 neg = src_neg [src];
2816 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2820 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2826 (sel == src_sel[0]) &&
2827 (chan == src_chan[0]) )
2832 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2838 else if( is_const(sel) )
2844 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2855 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2857 R700ALUInstruction * alu_instruction_ptr = NULL;
2858 R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
2859 R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
2861 GLuint number_of_scalar_operations;
2862 GLboolean is_single_scalar_operation;
2863 GLuint scalar_channel_index;
2865 PVSSRC * pcurrent_source;
2866 int current_source_index;
2867 GLuint contiguous_slots_needed;
2869 GLboolean bSplitInst;
2871 if(8 == pAsm->unAsic)
2873 uNumSrc = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2877 uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2880 //GLuint channel_swizzle, j;
2881 //GLuint chan_counter[4] = {0, 0, 0, 0};
2882 //PVSSRC * pSource[3];
2883 bSplitInst = GL_FALSE;
2884 pAsm->kcacheUsed = 0;
2886 if (1 == pAsm->D.dst.math)
2888 is_single_scalar_operation = GL_TRUE;
2889 number_of_scalar_operations = 1;
2893 is_single_scalar_operation = GL_FALSE;
2894 number_of_scalar_operations = 4;
2896 /* current assembler doesn't do more than 1 register per source */
2898 /* check read port, only very preliminary algorithm, not count in
2899 src0/1 same comp case and prev slot repeat case; also not count relative
2900 addressing. TODO: improve performance. */
2901 for(j=0; j<uNumSrc; j++)
2903 pSource[j] = &(pAsm->S[j].src);
2905 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
2907 for(j=0; j<uNumSrc; j++)
2909 switch (scalar_channel_index)
2911 case 0: channel_swizzle = pSource[j]->swizzlex; break;
2912 case 1: channel_swizzle = pSource[j]->swizzley; break;
2913 case 2: channel_swizzle = pSource[j]->swizzlez; break;
2914 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2915 default: channel_swizzle = SQ_SEL_MASK; break;
2917 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2918 (pSource[j]->rtype == SRC_REG_GPR))
2919 && (channel_swizzle <= SQ_SEL_W) )
2921 chan_counter[channel_swizzle]++;
2925 if( (chan_counter[SQ_SEL_X] > 3)
2926 || (chan_counter[SQ_SEL_Y] > 3)
2927 || (chan_counter[SQ_SEL_Z] > 3)
2928 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2930 bSplitInst = GL_TRUE;
2935 contiguous_slots_needed = 0;
2937 if(!is_single_scalar_operation)
2939 contiguous_slots_needed = 4;
2942 contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
2946 for (scalar_channel_index=0;
2947 scalar_channel_index < number_of_scalar_operations;
2948 scalar_channel_index++)
2950 if(scalar_channel_index == (number_of_scalar_operations-1))
2952 switch(pAsm->D2.dst2.literal_slots)
2955 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2956 Init_R700ALUInstruction(alu_instruction_ptr);
2959 alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
2960 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
2961 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
2964 alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
2965 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
2966 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
2972 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2973 Init_R700ALUInstruction(alu_instruction_ptr);
2977 current_source_index = 0;
2978 pcurrent_source = &(pAsm->S[0].src);
2980 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2981 current_source_index,
2983 scalar_channel_index,
2992 current_source_index = 1;
2993 pcurrent_source = &(pAsm->S[current_source_index].src);
2995 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2996 current_source_index,
2998 scalar_channel_index,
3006 alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
3008 if( (is_single_scalar_operation == GL_TRUE)
3009 || (GL_TRUE == bSplitInst) )
3011 alu_instruction_ptr->m_Word0.f.last = 1;
3015 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
3018 alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
3019 if(1 == pAsm->D.dst.predicated)
3021 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
3022 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
3026 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
3027 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
3031 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
3032 (pAsm->D.dst.rtype == DST_REG_OUT) )
3034 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
3038 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
3042 if ( ADDR_RELATIVE_A0 == addrmode_PVSDST(&(pAsm->D.dst)) )
3044 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_RELATIVE;
3048 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
3051 if ( is_single_scalar_operation == GL_TRUE )
3053 // Override scalar_channel_index since only one scalar value will be written
3054 if(pAsm->D.dst.writex)
3056 scalar_channel_index = 0;
3058 else if(pAsm->D.dst.writey)
3060 scalar_channel_index = 1;
3062 else if(pAsm->D.dst.writez)
3064 scalar_channel_index = 2;
3066 else if(pAsm->D.dst.writew)
3068 scalar_channel_index = 3;
3072 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
3074 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
3076 if (pAsm->D.dst.op3)
3080 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
3082 //There's 3rd src for op3
3083 current_source_index = 2;
3084 pcurrent_source = &(pAsm->S[current_source_index].src);
3086 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
3087 current_source_index,
3089 scalar_channel_index,
3100 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
3102 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs;
3103 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs;
3105 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
3106 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
3107 switch (scalar_channel_index)
3110 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
3113 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
3116 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
3119 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
3122 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
3125 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
3129 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
3131 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs;
3132 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs;
3134 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
3135 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
3136 switch (scalar_channel_index)
3139 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
3142 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
3145 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
3148 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
3151 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
3154 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
3158 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
3164 * Judge the type of current instruction, is it vector or scalar
3167 if (is_single_scalar_operation)
3169 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
3176 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
3182 contiguous_slots_needed -= 1;
3188 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
3194 tmp = gethelpr(pAsm);
3196 // opcode tmp.x, a.x
3199 pAsm->D.dst.opcode = opcode;
3200 pAsm->D.dst.math = 1;
3202 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3203 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3204 pAsm->D.dst.reg = tmp;
3205 pAsm->D.dst.writex = 1;
3207 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3212 if( pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_RSQ )
3213 pAsm->S[0].src.abs = 1;
3215 if ( GL_FALSE == next_ins(pAsm) )
3220 // Now replicate result to all necessary channels in destination
3221 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3223 if( GL_FALSE == assemble_dst(pAsm) )
3228 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3229 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3230 pAsm->S[0].src.reg = tmp;
3232 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3233 noneg_PVSSRC(&(pAsm->S[0].src));
3235 if( GL_FALSE == next_ins(pAsm) )
3243 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
3247 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3249 if( GL_FALSE == assemble_dst(pAsm) )
3253 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3258 pAsm->S[1].bits = pAsm->S[0].bits;
3259 flipneg_PVSSRC(&(pAsm->S[1].src));
3261 if ( GL_FALSE == next_ins(pAsm) )
3269 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
3271 if( GL_FALSE == checkop2(pAsm) )
3276 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3278 if( GL_FALSE == assemble_dst(pAsm) )
3283 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3288 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3293 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
3295 flipneg_PVSSRC(&(pAsm->S[1].src));
3298 if( GL_FALSE == next_ins(pAsm) )
3306 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
3307 { /* TODO: ar values dont' persist between clauses */
3308 if( GL_FALSE == checkop1(pAsm) )
3313 if(8 == pAsm->unAsic)
3317 /* Float to Signed Integer Using FLOOR */
3318 pAsm->D.dst.opcode = EG_OP2_INST_FLT_TO_INT_FLOOR;
3319 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3320 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3321 pAsm->D.dst.reg = 0;
3322 pAsm->D.dst.writex = 0;
3323 pAsm->D.dst.writey = 0;
3324 pAsm->D.dst.writez = 0;
3325 pAsm->D.dst.writew = 0;
3327 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3332 if( GL_FALSE == next_ins(pAsm) )
3337 /* Copy Signed Integer To Integer in AR and GPR */
3338 pAsm->D.dst.opcode = EG_OP2_INST_MOVA_INT;
3339 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3340 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3341 pAsm->D.dst.reg = 0;
3342 pAsm->D.dst.writex = 0;
3343 pAsm->D.dst.writey = 0;
3344 pAsm->D.dst.writez = 0;
3345 pAsm->D.dst.writew = 0;
3347 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3352 if( GL_FALSE == next_ins(pAsm) )
3361 /* Truncate floating-point to the nearest integer
3362 in the range [-256, +255], and copy to AR and
3365 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
3366 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3367 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3368 pAsm->D.dst.reg = 0;
3369 pAsm->D.dst.writex = 0;
3370 pAsm->D.dst.writey = 0;
3371 pAsm->D.dst.writez = 0;
3372 pAsm->D.dst.writew = 0;
3374 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3379 if( GL_FALSE == next_ins(pAsm) )
3388 GLboolean assemble_BAD(char *opcode_str)
3390 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
3394 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
3398 if( GL_FALSE == checkop3(pAsm) )
3403 if(8 == pAsm->unAsic)
3405 pAsm->D.dst.opcode = EG_OP3_INST_CNDGE;
3409 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
3411 pAsm->D.dst.op3 = 1;
3415 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3417 //OP3 has no support for write mask
3418 tmp = gethelpr(pAsm);
3420 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3421 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3422 pAsm->D.dst.reg = tmp;
3424 nomask_PVSDST(&(pAsm->D.dst));
3428 if( GL_FALSE == assemble_dst(pAsm) )
3434 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3439 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
3444 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
3449 if ( GL_FALSE == next_ins(pAsm) )
3454 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3456 if( GL_FALSE == assemble_dst(pAsm) )
3461 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3464 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3465 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3466 pAsm->S[0].src.reg = tmp;
3468 noneg_PVSSRC(&(pAsm->S[0].src));
3469 noswizzle_PVSSRC(&(pAsm->S[0].src));
3471 if( GL_FALSE == next_ins(pAsm) )
3480 GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
3483 * r600 - trunc to -PI..PI range
3484 * r700 - normalize by dividing by 2PI
3491 tmp = gethelpr(pAsm);
3492 if(8 == pAsm->unAsic)
3494 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
3498 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3500 pAsm->D.dst.op3 = 1;
3502 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3503 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3504 pAsm->D.dst.reg = tmp;
3506 assemble_src(pAsm, 0, -1);
3508 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
3509 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3511 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
3512 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
3514 pAsm->D2.dst2.literal_slots = 1;
3515 pAsm->C[0].f = 1/(3.1415926535 * 2);
3516 pAsm->C[1].f = 0.5f;
3518 if ( GL_FALSE == next_ins(pAsm) )
3523 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3525 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3526 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3527 pAsm->D.dst.reg = tmp;
3528 pAsm->D.dst.writex = 1;
3530 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3531 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3532 pAsm->S[0].src.reg = tmp;
3533 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3535 if(( GL_FALSE == next_ins(pAsm) ))
3539 if(8 == pAsm->unAsic)
3541 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
3545 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3547 pAsm->D.dst.op3 = 1;
3549 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3550 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3551 pAsm->D.dst.reg = tmp;
3553 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3554 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3555 pAsm->S[0].src.reg = tmp;
3556 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3558 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
3559 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3561 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
3562 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
3564 pAsm->D2.dst2.literal_slots = 1;
3568 pAsm->C[0].f = 3.1415926535897f * 2.0f;
3569 pAsm->C[1].f = -3.1415926535897f;
3573 pAsm->C[0].f = 1.0f;
3574 pAsm->C[1].f = -0.5f;
3577 if(( GL_FALSE == next_ins(pAsm) ))
3582 pAsm->D.dst.opcode = opcode;
3583 pAsm->D.dst.math = 1;
3587 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3588 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3589 pAsm->S[0].src.reg = tmp;
3590 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3591 noneg_PVSSRC(&(pAsm->S[0].src));
3595 //TODO - replicate if more channels set in WriteMask
3600 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
3602 if( GL_FALSE == checkop2(pAsm) )
3607 if(8 == pAsm->unAsic)
3609 pAsm->D.dst.opcode = EG_OP2_INST_DOT4;
3613 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
3616 if( GL_FALSE == assemble_dst(pAsm) )
3621 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3626 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3631 if(OPCODE_DP2 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3633 zerocomp_PVSSRC(&(pAsm->S[0].src),2);
3634 zerocomp_PVSSRC(&(pAsm->S[0].src),3);
3635 zerocomp_PVSSRC(&(pAsm->S[1].src),2);
3636 zerocomp_PVSSRC(&(pAsm->S[1].src),3);
3638 else if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3640 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
3641 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
3643 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
3645 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3648 if ( GL_FALSE == next_ins(pAsm) )
3656 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
3658 if( GL_FALSE == checkop2(pAsm) )
3663 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3665 if( GL_FALSE == assemble_dst(pAsm) )
3670 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3675 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3680 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
3681 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3683 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
3684 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
3686 if ( GL_FALSE == next_ins(pAsm) )
3694 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
3696 if(8 == pAsm->unAsic)
3698 return assemble_math_function(pAsm, EG_OP2_INST_EXP_IEEE);
3701 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
3704 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
3710 tmp = gethelpr(pAsm);
3715 if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
3716 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3718 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3719 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3720 pAsm->D.dst.reg = tmp;
3721 pAsm->D.dst.writex = 1;
3723 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3728 if( GL_FALSE == next_ins(pAsm) )
3733 if(8 == pAsm->unAsic)
3735 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
3739 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3741 pAsm->D.dst.math = 1;
3743 if( GL_FALSE == assemble_dst(pAsm) )
3748 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3750 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3751 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3752 pAsm->S[0].src.reg = tmp;
3754 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3755 noneg_PVSSRC(&(pAsm->S[0].src));
3757 if( GL_FALSE == next_ins(pAsm) )
3765 if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
3766 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3768 if( GL_FALSE == assemble_dst(pAsm) )
3773 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3778 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3780 if( GL_FALSE == next_ins(pAsm) )
3788 if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
3789 if(8 == pAsm->unAsic)
3791 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
3795 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3797 pAsm->D.dst.math = 1;
3799 if( GL_FALSE == assemble_dst(pAsm) )
3804 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3809 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3811 if( GL_FALSE == next_ins(pAsm) )
3819 if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
3820 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3822 if( GL_FALSE == assemble_dst(pAsm) )
3827 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3829 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3830 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3831 pAsm->S[0].src.reg = tmp;
3833 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3834 noneg_PVSSRC(&(pAsm->S[0].src));
3836 if( GL_FALSE == next_ins(pAsm) )
3845 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
3849 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3851 if ( GL_FALSE == assemble_dst(pAsm) )
3856 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3861 if ( GL_FALSE == next_ins(pAsm) )
3869 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
3871 if(8 == pAsm->unAsic)
3873 return assemble_math_function(pAsm, EG_OP2_INST_FLT_TO_INT);
3876 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
3879 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
3883 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3885 if ( GL_FALSE == assemble_dst(pAsm) )
3890 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3895 if ( GL_FALSE == next_ins(pAsm) )
3903 GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
3905 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3907 if(pILInst->Opcode == OPCODE_KIL)
3910 pAsm->D.dst.opcode = opcode;
3911 //pAsm->D.dst.math = 1;
3913 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3914 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3915 pAsm->D.dst.reg = 0;
3916 pAsm->D.dst.writex = 0;
3917 pAsm->D.dst.writey = 0;
3918 pAsm->D.dst.writez = 0;
3919 pAsm->D.dst.writew = 0;
3921 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3922 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3923 pAsm->S[0].src.reg = 0;
3924 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
3925 noneg_PVSSRC(&(pAsm->S[0].src));
3927 if(pILInst->Opcode == OPCODE_KIL_NV)
3929 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3930 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3931 pAsm->S[1].src.reg = 0;
3932 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
3933 neg_PVSSRC(&(pAsm->S[1].src));
3937 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3944 if ( GL_FALSE == next_ins(pAsm) )
3949 /* Doc says KILL has to be last(end) ALU clause */
3950 pAsm->pR700Shader->killIsUsed = GL_TRUE;
3951 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
3956 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
3958 if(8 == pAsm->unAsic)
3960 return assemble_math_function(pAsm, EG_OP2_INST_LOG_IEEE);
3963 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
3966 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
3970 if( GL_FALSE == checkop3(pAsm) )
3975 tmp = gethelpr(pAsm);
3977 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3979 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3980 pAsm->D.dst.reg = tmp;
3981 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3982 nomask_PVSDST(&(pAsm->D.dst));
3985 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3990 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
3995 neg_PVSSRC(&(pAsm->S[1].src));
3997 if( GL_FALSE == next_ins(pAsm) )
4002 if(8 == pAsm->unAsic)
4004 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4008 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4010 pAsm->D.dst.op3 = 1;
4012 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4013 pAsm->D.dst.reg = tmp;
4014 nomask_PVSDST(&(pAsm->D.dst));
4015 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4017 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4018 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4019 pAsm->S[0].src.reg = tmp;
4020 noswizzle_PVSSRC(&(pAsm->S[0].src));
4023 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4028 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
4033 if( GL_FALSE == next_ins(pAsm) )
4038 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4040 if( GL_FALSE == assemble_dst(pAsm) )
4045 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4046 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4047 pAsm->S[0].src.reg = tmp;
4048 noswizzle_PVSSRC(&(pAsm->S[0].src));
4050 if( GL_FALSE == next_ins(pAsm) )
4058 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
4060 BITS tmp1, tmp2, tmp3;
4064 tmp1 = gethelpr(pAsm);
4065 tmp2 = gethelpr(pAsm);
4066 tmp3 = gethelpr(pAsm);
4068 // FIXME: The hardware can do fabs() directly on input
4069 // elements, but the compiler doesn't have the
4070 // capability to use that.
4072 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
4074 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4076 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4077 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4078 pAsm->D.dst.reg = tmp1;
4079 pAsm->D.dst.writex = 1;
4081 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4086 pAsm->S[1].bits = pAsm->S[0].bits;
4087 flipneg_PVSSRC(&(pAsm->S[1].src));
4089 if ( GL_FALSE == next_ins(pAsm) )
4096 // LG2 tmp2.x, tmp1.x
4097 // FLOOR tmp3.x, tmp2.x
4098 // MOV dst.x, tmp3.x
4099 // ADD tmp3.x, tmp2.x, -tmp3.x
4100 // EX2 dst.y, tmp3.x
4101 // MOV dst.z, tmp2.x
4104 // LG2 tmp2.x, tmp1.x
4105 // FLOOR tmp3.x, tmp2.x
4107 if(8 == pAsm->unAsic)
4109 pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE;
4113 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
4115 pAsm->D.dst.math = 1;
4117 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4118 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4119 pAsm->D.dst.reg = tmp2;
4120 pAsm->D.dst.writex = 1;
4122 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4123 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4124 pAsm->S[0].src.reg = tmp1;
4126 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4127 noneg_PVSSRC(&(pAsm->S[0].src));
4129 if( GL_FALSE == next_ins(pAsm) )
4134 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
4136 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4137 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4138 pAsm->D.dst.reg = tmp3;
4139 pAsm->D.dst.writex = 1;
4141 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4142 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4143 pAsm->S[0].src.reg = tmp2;
4145 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4146 noneg_PVSSRC(&(pAsm->S[0].src));
4148 if( GL_FALSE == next_ins(pAsm) )
4153 // MOV dst.x, tmp3.x
4155 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4157 if( GL_FALSE == assemble_dst(pAsm) )
4162 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4164 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4165 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4166 pAsm->S[0].src.reg = tmp3;
4168 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4169 noneg_PVSSRC(&(pAsm->S[0].src));
4171 if( GL_FALSE == next_ins(pAsm) )
4176 // ADD tmp3.x, tmp2.x, -tmp3.x
4177 // EX2 dst.y, tmp3.x
4179 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
4181 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4182 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4183 pAsm->D.dst.reg = tmp3;
4184 pAsm->D.dst.writex = 1;
4186 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4187 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4188 pAsm->S[0].src.reg = tmp2;
4190 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4191 noneg_PVSSRC(&(pAsm->S[0].src));
4193 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4194 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
4195 pAsm->S[1].src.reg = tmp3;
4197 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4198 neg_PVSSRC(&(pAsm->S[1].src));
4200 if( GL_FALSE == next_ins(pAsm) )
4205 if(8 == pAsm->unAsic)
4207 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
4211 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4213 pAsm->D.dst.math = 1;
4215 if( GL_FALSE == assemble_dst(pAsm) )
4220 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4222 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4223 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4224 pAsm->S[0].src.reg = tmp3;
4226 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4227 noneg_PVSSRC(&(pAsm->S[0].src));
4229 if( GL_FALSE == next_ins(pAsm) )
4234 // MOV dst.z, tmp2.x
4236 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4238 if( GL_FALSE == assemble_dst(pAsm) )
4243 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
4245 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4246 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4247 pAsm->S[0].src.reg = tmp2;
4249 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4250 noneg_PVSSRC(&(pAsm->S[0].src));
4252 if( GL_FALSE == next_ins(pAsm) )
4259 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4261 if( GL_FALSE == assemble_dst(pAsm) )
4266 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
4268 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4269 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4270 pAsm->S[0].src.reg = tmp1;
4272 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
4273 noneg_PVSSRC(&(pAsm->S[0].src));
4275 if( GL_FALSE == next_ins(pAsm) )
4283 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
4286 GLboolean bReplaceDst = GL_FALSE;
4287 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
4289 if( GL_FALSE == checkop3(pAsm) )
4294 if(8 == pAsm->unAsic)
4296 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4300 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4302 pAsm->D.dst.op3 = 1;
4306 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
4307 { /* TODO : more investigation on MAD src and dst using same register */
4308 for(ii=0; ii<3; ii++)
4310 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
4311 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
4313 bReplaceDst = GL_TRUE;
4318 if(0xF != pILInst->DstReg.WriteMask)
4319 { /* OP3 has no support for write mask */
4320 bReplaceDst = GL_TRUE;
4323 if(GL_TRUE == bReplaceDst)
4325 tmp = gethelpr(pAsm);
4327 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4328 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4329 pAsm->D.dst.reg = tmp;
4331 nomask_PVSDST(&(pAsm->D.dst));
4335 if( GL_FALSE == assemble_dst(pAsm) )
4341 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4346 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4351 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
4356 if ( GL_FALSE == next_ins(pAsm) )
4361 if (GL_TRUE == bReplaceDst)
4363 if( GL_FALSE == assemble_dst(pAsm) )
4368 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4371 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4372 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4373 pAsm->S[0].src.reg = tmp;
4375 noneg_PVSSRC(&(pAsm->S[0].src));
4376 noswizzle_PVSSRC(&(pAsm->S[0].src));
4378 if( GL_FALSE == next_ins(pAsm) )
4388 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
4390 unsigned int dstReg;
4391 unsigned int dstType;
4393 int tmp = gethelpr(pAsm);
4395 if( GL_FALSE == assemble_dst(pAsm) )
4399 dstReg = pAsm->D.dst.reg;
4400 dstType = pAsm->D.dst.rtype;
4402 /* dst.xw, <- 1.0 */
4403 if( pAsm->D.dst.writex || pAsm->D.dst.writew )
4405 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4410 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4411 pAsm->D.dst.writey = 0;
4412 pAsm->D.dst.writez = 0;
4413 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4414 pAsm->S[0].src.reg = tmp;
4415 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4416 noneg_PVSSRC(&(pAsm->S[0].src));
4417 pAsm->S[0].src.swizzlex = SQ_SEL_1;
4418 pAsm->S[0].src.swizzley = SQ_SEL_1;
4419 pAsm->S[0].src.swizzlez = SQ_SEL_1;
4420 pAsm->S[0].src.swizzlew = SQ_SEL_1;
4421 if( GL_FALSE == next_ins(pAsm) )
4427 if( GL_FALSE == assemble_dst(pAsm) )
4432 if( pAsm->D.dst.writey ) {
4434 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4439 /* dst.y = max(src.x, 0.0) */
4440 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4441 pAsm->D.dst.writex = 0;
4442 pAsm->D.dst.writey = 1;
4443 pAsm->D.dst.writez = 0;
4444 pAsm->D.dst.writew = 0;
4445 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4446 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4447 pAsm->S[1].src.reg = tmp;
4448 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4449 noneg_PVSSRC(&(pAsm->S[1].src));
4450 pAsm->S[1].src.swizzlex = SQ_SEL_0;
4451 pAsm->S[1].src.swizzley = SQ_SEL_0;
4452 pAsm->S[1].src.swizzlez = SQ_SEL_0;
4453 pAsm->S[1].src.swizzlew = SQ_SEL_0;
4454 if( GL_FALSE == next_ins(pAsm) )
4460 if( GL_FALSE == assemble_dst(pAsm) )
4464 if ( pAsm->D.dst.writez) {
4466 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4471 /* dst.z = log(src.y) */
4472 if(8 == pAsm->unAsic)
4474 pAsm->D.dst.opcode = EG_OP2_INST_LOG_CLAMPED;
4478 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
4480 pAsm->D.dst.math = 1;
4481 pAsm->D.dst.writex = 0;
4482 pAsm->D.dst.writey = 0;
4483 pAsm->D.dst.writez = 1;
4484 pAsm->D.dst.writew = 0;
4485 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
4486 if( GL_FALSE == next_ins(pAsm) )
4491 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4496 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
4501 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4503 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4505 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
4506 if(8 == pAsm->unAsic)
4508 pAsm->D.dst.opcode = EG_OP3_INST_MUL_LIT;
4512 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
4514 pAsm->D.dst.math = 1;
4515 pAsm->D.dst.op3 = 1;
4516 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4517 pAsm->D.dst.reg = tmp;
4518 pAsm->D.dst.writex = 1;
4519 pAsm->D.dst.writey = 0;
4520 pAsm->D.dst.writez = 0;
4521 pAsm->D.dst.writew = 0;
4524 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4525 pAsm->S[1].src.reg = dstReg;
4526 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4527 noneg_PVSSRC(&(pAsm->S[1].src));
4528 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
4529 pAsm->S[1].src.swizzley = SQ_SEL_Z;
4530 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4531 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
4533 if( GL_FALSE == next_ins(pAsm) )
4538 /* dst.z = exp(tmp.x) */
4539 if( GL_FALSE == assemble_dst(pAsm) )
4543 if(8 == pAsm->unAsic)
4545 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
4549 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4551 pAsm->D.dst.math = 1;
4552 pAsm->D.dst.writex = 0;
4553 pAsm->D.dst.writey = 0;
4554 pAsm->D.dst.writez = 1;
4555 pAsm->D.dst.writew = 0;
4557 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4558 pAsm->S[0].src.reg = tmp;
4559 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4560 noneg_PVSSRC(&(pAsm->S[0].src));
4561 pAsm->S[0].src.swizzlex = SQ_SEL_X;
4562 pAsm->S[0].src.swizzley = SQ_SEL_X;
4563 pAsm->S[0].src.swizzlez = SQ_SEL_X;
4564 pAsm->S[0].src.swizzlew = SQ_SEL_X;
4566 if( GL_FALSE == next_ins(pAsm) )
4574 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
4576 if( GL_FALSE == checkop2(pAsm) )
4581 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4583 if( GL_FALSE == assemble_dst(pAsm) )
4588 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4593 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4598 if( GL_FALSE == next_ins(pAsm) )
4606 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
4608 if( GL_FALSE == checkop2(pAsm) )
4613 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
4615 if( GL_FALSE == assemble_dst(pAsm) )
4620 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4625 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4630 if( GL_FALSE == next_ins(pAsm) )
4638 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
4642 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4644 if (GL_FALSE == assemble_dst(pAsm))
4649 if (GL_FALSE == assemble_src(pAsm, 0, -1))
4654 if ( GL_FALSE == next_ins(pAsm) )
4662 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
4664 if( GL_FALSE == checkop2(pAsm) )
4669 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4671 if( GL_FALSE == assemble_dst(pAsm) )
4676 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4681 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4686 if( GL_FALSE == next_ins(pAsm) )
4694 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
4700 tmp = gethelpr(pAsm);
4702 // LG2 tmp.x, a.swizzle
4703 if(8 == pAsm->unAsic)
4705 pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE;
4709 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
4711 pAsm->D.dst.math = 1;
4713 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4714 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4715 pAsm->D.dst.reg = tmp;
4716 nomask_PVSDST(&(pAsm->D.dst));
4718 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4723 if( GL_FALSE == next_ins(pAsm) )
4728 // MUL tmp.x, tmp.x, b.swizzle
4729 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4731 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4732 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4733 pAsm->D.dst.reg = tmp;
4734 nomask_PVSDST(&(pAsm->D.dst));
4736 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4737 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4738 pAsm->S[0].src.reg = tmp;
4739 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4740 noneg_PVSSRC(&(pAsm->S[0].src));
4742 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4747 if( GL_FALSE == next_ins(pAsm) )
4752 // EX2 dst.mask, tmp.x
4754 if(8 == pAsm->unAsic)
4756 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
4760 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4762 pAsm->D.dst.math = 1;
4764 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4765 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4766 pAsm->D.dst.reg = tmp;
4767 nomask_PVSDST(&(pAsm->D.dst));
4769 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4770 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4771 pAsm->S[0].src.reg = tmp;
4772 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4773 noneg_PVSSRC(&(pAsm->S[0].src));
4775 if( GL_FALSE == next_ins(pAsm) )
4780 // Now replicate result to all necessary channels in destination
4781 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4783 if( GL_FALSE == assemble_dst(pAsm) )
4788 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4789 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4790 pAsm->S[0].src.reg = tmp;
4792 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4793 noneg_PVSSRC(&(pAsm->S[0].src));
4795 if( GL_FALSE == next_ins(pAsm) )
4803 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
4805 if(8 == pAsm->unAsic)
4807 return assemble_math_function(pAsm, EG_OP2_INST_RECIP_IEEE);
4810 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
4813 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
4815 if(8 == pAsm->unAsic)
4817 return assemble_math_function(pAsm, EG_OP2_INST_RECIPSQRT_IEEE);
4820 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
4823 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
4829 tmp = gethelpr(pAsm);
4831 if(8 == pAsm->unAsic)
4833 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4837 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4839 pAsm->D.dst.op3 = 1;
4841 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4842 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4843 pAsm->D.dst.reg = tmp;
4845 assemble_src(pAsm, 0, -1);
4847 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4848 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4850 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4851 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
4853 pAsm->D2.dst2.literal_slots = 1;
4854 pAsm->C[0].f = 1/(3.1415926535 * 2);
4855 pAsm->C[1].f = 0.5F;
4857 if ( GL_FALSE == next_ins(pAsm) )
4862 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
4864 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4865 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4866 pAsm->D.dst.reg = tmp;
4867 pAsm->D.dst.writex = 1;
4869 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4870 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4871 pAsm->S[0].src.reg = tmp;
4872 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4874 if(( GL_FALSE == next_ins(pAsm) ))
4878 if(8 == pAsm->unAsic)
4880 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4884 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4886 pAsm->D.dst.op3 = 1;
4888 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4889 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4890 pAsm->D.dst.reg = tmp;
4892 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4893 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4894 pAsm->S[0].src.reg = tmp;
4895 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4897 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4898 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4900 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4901 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
4903 pAsm->D2.dst2.literal_slots = 1;
4906 pAsm->C[0].f = 3.1415926535897f * 2.0f;
4907 pAsm->C[1].f = -3.1415926535897f;
4909 pAsm->C[0].f = 1.0f;
4910 pAsm->C[1].f = -0.5f;
4913 if(( GL_FALSE == next_ins(pAsm) ))
4919 if(8 == pAsm->unAsic)
4921 pAsm->D.dst.opcode = EG_OP2_INST_COS;
4925 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
4927 pAsm->D.dst.math = 1;
4931 pAsm->D.dst.writey = 0;
4933 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4934 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4935 pAsm->S[0].src.reg = tmp;
4936 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4937 noneg_PVSSRC(&(pAsm->S[0].src));
4939 if ( GL_FALSE == next_ins(pAsm) )
4945 if(8 == pAsm->unAsic)
4947 pAsm->D.dst.opcode = EG_OP2_INST_SIN;
4951 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
4953 pAsm->D.dst.math = 1;
4957 pAsm->D.dst.writex = 0;
4959 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4960 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4961 pAsm->S[0].src.reg = tmp;
4962 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4963 noneg_PVSSRC(&(pAsm->S[0].src));
4965 if( GL_FALSE == next_ins(pAsm) )
4973 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
4975 if( GL_FALSE == checkop2(pAsm) )
4980 pAsm->D.dst.opcode = opcode;
4981 //pAsm->D.dst.math = 1;
4983 if( GL_FALSE == assemble_dst(pAsm) )
4988 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4993 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4998 if( GL_FALSE == next_ins(pAsm) )
5006 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
5008 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
5010 pAsm->D.dst.opcode = opcode;
5011 pAsm->D.dst.math = 1;
5012 pAsm->D.dst.predicated = 1;
5014 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5015 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5016 pAsm->D.dst.reg = pAsm->uHelpReg;
5017 pAsm->D.dst.writex = 1;
5018 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
5020 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5021 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5022 pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
5023 pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
5024 noneg_PVSSRC(&(pAsm->S[0].src));
5026 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5027 pAsm->S[1].src.reg = pAsm->uHelpReg;
5028 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5029 noneg_PVSSRC(&(pAsm->S[1].src));
5030 pAsm->S[1].src.swizzlex = SQ_SEL_0;
5031 pAsm->S[1].src.swizzley = SQ_SEL_0;
5032 pAsm->S[1].src.swizzlez = SQ_SEL_0;
5033 pAsm->S[1].src.swizzlew = SQ_SEL_0;
5035 if( GL_FALSE == next_ins(pAsm) )
5043 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
5045 if( GL_FALSE == checkop2(pAsm) )
5050 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
5052 if( GL_FALSE == assemble_dst(pAsm) )
5057 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5062 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
5067 if( GL_FALSE == next_ins(pAsm) )
5075 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
5077 if( GL_FALSE == checkop2(pAsm) )
5082 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
5084 if( GL_FALSE == assemble_dst(pAsm) )
5089 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
5094 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
5099 if( GL_FALSE == next_ins(pAsm) )
5107 GLboolean assemble_SSG(r700_AssemblerBase *pAsm)
5111 GLuint tmp = gethelpr(pAsm);
5112 /* tmp = (src > 0 ? 1 : src) */
5113 if(8 == pAsm->unAsic)
5115 pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
5119 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
5121 pAsm->D.dst.op3 = 1;
5122 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5123 pAsm->D.dst.reg = tmp;
5125 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5130 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
5132 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
5137 if( GL_FALSE == next_ins(pAsm) )
5142 /* dst = (-tmp > 0 ? -1 : tmp) */
5143 if(8 == pAsm->unAsic)
5145 pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
5149 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
5151 pAsm->D.dst.op3 = 1;
5153 if( GL_FALSE == assemble_dst(pAsm) )
5158 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5159 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5160 pAsm->S[0].src.reg = tmp;
5161 noswizzle_PVSSRC(&(pAsm->S[0].src));
5162 neg_PVSSRC(&(pAsm->S[0].src));
5164 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
5165 neg_PVSSRC(&(pAsm->S[1].src));
5167 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
5168 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
5169 pAsm->S[2].src.reg = tmp;
5170 noswizzle_PVSSRC(&(pAsm->S[2].src));
5172 if( GL_FALSE == next_ins(pAsm) )
5180 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
5185 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
5187 GLboolean src_const;
5188 GLboolean need_barrier = GL_FALSE;
5192 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
5194 case PROGRAM_UNIFORM:
5195 case PROGRAM_CONSTANT:
5196 case PROGRAM_LOCAL_PARAM:
5197 case PROGRAM_ENV_PARAM:
5198 case PROGRAM_STATE_VAR:
5199 src_const = GL_TRUE;
5201 case PROGRAM_TEMPORARY:
5204 src_const = GL_FALSE;
5208 if (GL_TRUE == src_const)
5210 if ( GL_FALSE == mov_temp(pAsm, 0) )
5212 need_barrier = GL_TRUE;
5215 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
5217 GLuint tmp = gethelpr(pAsm);
5218 if(8 == pAsm->unAsic)
5220 pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE;
5224 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
5226 pAsm->D.dst.math = 1;
5227 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5228 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5229 pAsm->D.dst.reg = tmp;
5230 pAsm->D.dst.writew = 1;
5232 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5236 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
5237 if( GL_FALSE == next_ins(pAsm) )
5242 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
5243 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5244 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5245 pAsm->D.dst.reg = tmp;
5246 pAsm->D.dst.writex = 1;
5247 pAsm->D.dst.writey = 1;
5248 pAsm->D.dst.writez = 1;
5249 pAsm->D.dst.writew = 0;
5251 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5255 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5256 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5257 pAsm->S[1].src.reg = tmp;
5258 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
5260 if( GL_FALSE == next_ins(pAsm) )
5265 pAsm->aArgSubst[1] = tmp;
5266 need_barrier = GL_TRUE;
5269 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
5271 GLuint tmp1 = gethelpr(pAsm);
5272 GLuint tmp2 = gethelpr(pAsm);
5274 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
5275 if(8 == pAsm->unAsic)
5277 pAsm->D.dst.opcode = EG_OP2_INST_CUBE;
5281 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
5283 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5284 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5285 pAsm->D.dst.reg = tmp1;
5286 nomask_PVSDST(&(pAsm->D.dst));
5288 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5293 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
5298 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
5299 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
5301 if( GL_FALSE == next_ins(pAsm) )
5306 /* tmp1.z = RCP_e(|tmp1.z|) */
5307 if(8 == pAsm->unAsic)
5309 pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE;
5313 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
5315 pAsm->D.dst.math = 1;
5316 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5317 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5318 pAsm->D.dst.reg = tmp1;
5319 pAsm->D.dst.writez = 1;
5321 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5322 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5323 pAsm->S[0].src.reg = tmp1;
5324 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
5325 pAsm->S[0].src.abs = 1;
5329 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
5330 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
5331 * muladd has no writemask, have to use another temp
5333 if(8 == pAsm->unAsic)
5335 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
5339 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
5341 pAsm->D.dst.op3 = 1;
5342 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5343 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5344 pAsm->D.dst.reg = tmp2;
5346 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5347 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5348 pAsm->S[0].src.reg = tmp1;
5349 noswizzle_PVSSRC(&(pAsm->S[0].src));
5350 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5351 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5352 pAsm->S[1].src.reg = tmp1;
5353 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
5354 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
5355 /* immediate c 1.5 */
5356 pAsm->D2.dst2.literal_slots = 1;
5357 pAsm->C[0].f = 1.5F;
5358 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
5359 pAsm->S[2].src.reg = tmp1;
5360 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
5364 /* tmp1.xy = temp2.xy */
5365 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5366 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5367 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5368 pAsm->D.dst.reg = tmp1;
5369 pAsm->D.dst.writex = 1;
5370 pAsm->D.dst.writey = 1;
5371 pAsm->D.dst.writez = 0;
5372 pAsm->D.dst.writew = 0;
5374 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5375 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5376 pAsm->S[0].src.reg = tmp2;
5377 noswizzle_PVSSRC(&(pAsm->S[0].src));
5380 pAsm->aArgSubst[1] = tmp1;
5381 need_barrier = GL_TRUE;
5385 switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
5388 /* will these need WQM(1) on CF inst ? */
5389 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
5392 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
5395 /* this should actually be SAMPLE_LB but that needs bias to be
5396 * embedded in the instruction - cant do here */
5397 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
5400 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
5403 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5404 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C;
5406 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
5409 pAsm->is_tex = GL_TRUE;
5410 if ( GL_TRUE == need_barrier )
5412 pAsm->is_tex = GL_TRUE;
5413 if ( GL_TRUE == need_barrier )
5415 pAsm->need_tex_barrier = GL_TRUE;
5417 // Set src1 to tex unit id
5418 pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
5419 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5421 //No sw info from mesa compiler, so hard code here.
5422 pAsm->S[1].src.swizzlex = SQ_SEL_X;
5423 pAsm->S[1].src.swizzley = SQ_SEL_Y;
5424 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
5425 pAsm->S[1].src.swizzlew = SQ_SEL_W;
5427 if( GL_FALSE == tex_dst(pAsm) )
5432 if( GL_FALSE == tex_src(pAsm) )
5437 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
5439 /* hopefully did swizzles before */
5440 noswizzle_PVSSRC(&(pAsm->S[0].src));
5443 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
5445 /* SAMPLE dst, tmp.yxwy, CUBE */
5446 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
5447 pAsm->S[0].src.swizzley = SQ_SEL_X;
5448 pAsm->S[0].src.swizzlez = SQ_SEL_W;
5449 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
5452 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5454 /* compare value goes to w chan ? */
5455 pAsm->S[0].src.swizzlew = SQ_SEL_Z;
5458 if ( GL_FALSE == next_ins(pAsm) )
5463 /* add ARB shadow ambient but clamp to 0..1 */
5464 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5466 /* ADD_SAT dst, dst, ambient[texunit] */
5467 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
5469 if( GL_FALSE == assemble_dst(pAsm) )
5473 pAsm->D2.dst2.SaturateMode = 1;
5475 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5476 pAsm->S[0].src.reg = pAsm->D.dst.reg;
5477 noswizzle_PVSSRC(&(pAsm->S[0].src));
5478 noneg_PVSSRC(&(pAsm->S[0].src));
5480 pAsm->S[1].src.rtype = SRC_REG_CONSTANT;
5481 pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
5482 noswizzle_PVSSRC(&(pAsm->S[1].src));
5483 noneg_PVSSRC(&(pAsm->S[1].src));
5485 if( GL_FALSE == next_ins(pAsm) )
5495 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
5500 if( GL_FALSE == checkop2(pAsm) )
5505 tmp1 = gethelpr(pAsm);
5507 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
5509 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5510 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5511 pAsm->D.dst.reg = tmp1;
5512 nomask_PVSDST(&(pAsm->D.dst));
5514 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5519 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
5524 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
5525 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
5527 if( GL_FALSE == next_ins(pAsm) )
5532 if(8 == pAsm->unAsic)
5534 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
5538 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
5540 pAsm->D.dst.op3 = 1;
5542 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
5544 tmp2 = gethelpr(pAsm);
5546 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5547 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5548 pAsm->D.dst.reg = tmp2;
5550 nomask_PVSDST(&(pAsm->D.dst));
5554 if( GL_FALSE == assemble_dst(pAsm) )
5560 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5565 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
5570 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
5571 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
5573 // result1 + (neg) result0
5574 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
5575 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
5576 pAsm->S[2].src.reg = tmp1;
5578 neg_PVSSRC(&(pAsm->S[2].src));
5579 noswizzle_PVSSRC(&(pAsm->S[2].src));
5581 if( GL_FALSE == next_ins(pAsm) )
5587 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
5589 if( GL_FALSE == assemble_dst(pAsm) )
5594 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5596 // Use tmp as source
5597 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5598 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5599 pAsm->S[0].src.reg = tmp2;
5601 noneg_PVSSRC(&(pAsm->S[0].src));
5602 noswizzle_PVSSRC(&(pAsm->S[0].src));
5604 if( GL_FALSE == next_ins(pAsm) )
5613 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
5618 static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
5623 pAsm->CALLSTACK[pAsm->CALLSP].current--;
5626 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
5629 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
5632 /* TODO : for 16 vp asic, should -= 2; */
5633 pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
5638 static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
5640 if(GL_TRUE == bCheckMaxOnly)
5645 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
5646 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5648 pAsm->CALLSTACK[pAsm->CALLSP].max =
5649 pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
5653 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
5654 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5656 pAsm->CALLSTACK[pAsm->CALLSP].max =
5657 pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
5667 pAsm->CALLSTACK[pAsm->CALLSP].current++;
5670 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
5673 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
5676 /* TODO : for 16 vp asic, should += 2; */
5677 pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
5681 if(pAsm->CALLSTACK[pAsm->CALLSP].current
5682 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5684 pAsm->CALLSTACK[pAsm->CALLSP].max =
5685 pAsm->CALLSTACK[pAsm->CALLSP].current;
5689 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
5691 if(GL_FALSE == add_cf_instruction(pAsm) )
5696 if(8 == pAsm->unAsic)
5698 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5700 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5701 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5703 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5704 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5706 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5707 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5709 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5710 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5712 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5713 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5715 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5716 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5718 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5719 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5721 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5722 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5724 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5728 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
5729 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5730 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5732 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5733 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5734 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
5735 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5737 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5740 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
5745 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
5747 if(GL_FALSE == add_cf_instruction(pAsm) )
5752 if(8 == pAsm->unAsic)
5754 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5756 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5757 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5759 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5760 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5762 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5763 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5765 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5766 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5768 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5769 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5771 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5772 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5774 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5775 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5777 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5778 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5780 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5784 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
5785 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5786 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5788 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5789 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5790 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5792 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5794 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5796 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5801 GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
5803 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5805 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
5808 if(GL_FALSE == add_cf_instruction(pAsm) )
5813 if(8 == pAsm->unAsic)
5815 if(GL_TRUE != bHasElse)
5817 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5819 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5823 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5825 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5828 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5830 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5831 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5833 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5834 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5836 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5837 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5839 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5840 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5842 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5843 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5845 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5846 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5848 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5849 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5851 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5855 if(GL_TRUE != bHasElse)
5857 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5861 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5863 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5864 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5866 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5867 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5868 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
5869 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5871 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5875 pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
5876 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
5877 pAsm->fc_stack[pAsm->FCSP].midLen= 0;
5878 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5880 #ifndef USE_CF_FOR_POP_AFTER
5881 if(GL_TRUE != bHasElse)
5883 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5885 #endif /* USE_CF_FOR_POP_AFTER */
5887 checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE);
5892 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
5894 if(GL_FALSE == add_cf_instruction(pAsm) )
5899 if(8 == pAsm->unAsic)
5901 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5903 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5904 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5906 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5907 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5909 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5910 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5912 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5913 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5915 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5916 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5918 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5919 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5921 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5922 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5924 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5925 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5927 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5931 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
5932 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5933 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5935 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5936 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5937 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
5938 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5940 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5943 pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
5945 sizeof(R700ControlFlowGenericClause *) );
5946 pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
5947 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
5949 #ifndef USE_CF_FOR_POP_AFTER
5950 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5951 #endif /* USE_CF_FOR_POP_AFTER */
5953 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
5958 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
5960 #ifdef USE_CF_FOR_POP_AFTER
5962 #endif /* USE_CF_FOR_POP_AFTER */
5964 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5966 if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
5968 /* no else in between */
5969 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5973 pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5976 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5978 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5981 if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
5983 radeon_error("if/endif in shader code are not paired. \n");
5989 decreaseCurrent(pAsm, FC_PUSH_VPM);
5994 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
5996 if(GL_FALSE == add_cf_instruction(pAsm) )
6001 if(8 == pAsm->unAsic)
6003 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6005 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6006 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6007 EG_CF_INST_LOOP_START_NO_AL,
6008 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6009 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6011 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6012 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6014 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6015 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6017 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6018 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6020 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6021 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6023 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6024 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6026 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6027 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6029 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6033 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
6034 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6035 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6037 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6038 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6039 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
6040 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6042 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6046 pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
6047 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
6048 pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
6049 pAsm->fc_stack[pAsm->FCSP].midLen = 0;
6050 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
6052 checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
6057 GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
6059 #ifdef USE_CF_FOR_CONTINUE_BREAK
6061 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6063 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
6065 unsigned int unFCSP;
6066 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
6068 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6075 radeon_error("Break is not inside loop/endloop pair.\n");
6079 if(GL_FALSE == add_cf_instruction(pAsm) )
6084 if(8 == pAsm->unAsic)
6086 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6088 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6089 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6090 EG_CF_INST_LOOP_BREAK,
6091 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6092 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6094 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6095 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6097 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6098 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6100 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6101 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6103 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6104 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6106 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6107 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6109 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6110 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6112 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6116 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6117 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6118 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6120 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6121 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6122 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
6124 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6126 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6129 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
6130 (void *)pAsm->fc_stack[unFCSP].mid,
6131 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6132 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6133 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6134 pAsm->fc_stack[unFCSP].unNumMid++;
6136 if(GL_FALSE == add_cf_instruction(pAsm) )
6141 if(8 == pAsm->unAsic)
6143 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6145 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6146 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6148 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6149 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6151 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6152 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6154 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6155 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6157 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6158 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6160 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6161 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6163 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6164 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6166 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6167 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6169 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6173 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6174 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6175 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6177 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6178 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6179 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
6181 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6183 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6186 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6188 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6190 #endif //USE_CF_FOR_CONTINUE_BREAK
6194 GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
6196 #ifdef USE_CF_FOR_CONTINUE_BREAK
6197 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6199 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
6201 unsigned int unFCSP;
6202 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
6204 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6211 radeon_error("Continue is not inside loop/endloop pair.\n");
6215 if(GL_FALSE == add_cf_instruction(pAsm) )
6220 if(8 == pAsm->unAsic)
6222 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6224 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6225 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6226 EG_CF_INST_LOOP_CONTINUE,
6227 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6228 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6230 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6231 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6233 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6234 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6236 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6237 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6239 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6240 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6242 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6243 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6245 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6246 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6248 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6252 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6253 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6254 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6256 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6257 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6258 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
6260 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6262 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6265 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
6266 (void *)pAsm->fc_stack[unFCSP].mid,
6267 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6268 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6269 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6270 pAsm->fc_stack[unFCSP].unNumMid++;
6272 if(GL_FALSE == add_cf_instruction(pAsm) )
6277 if(8 == pAsm->unAsic)
6279 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6281 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6282 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6284 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6285 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6287 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6288 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6290 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6291 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6293 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6294 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6296 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6297 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6299 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6300 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6302 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6303 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6305 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6309 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6310 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6311 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6313 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6314 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6315 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
6317 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6319 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6322 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6324 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6326 #endif /* USE_CF_FOR_CONTINUE_BREAK */
6331 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
6335 if(GL_FALSE == add_cf_instruction(pAsm) )
6340 if(8 == pAsm->unAsic)
6342 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6344 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6345 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6346 EG_CF_INST_LOOP_END,
6347 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6348 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6350 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6351 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6353 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6354 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6356 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6357 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6359 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6360 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6362 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6363 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6365 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6366 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6368 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6372 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
6373 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6374 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6376 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6377 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6378 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
6379 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6381 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6384 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
6385 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6387 #ifdef USE_CF_FOR_CONTINUE_BREAK
6388 for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
6390 pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
6392 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
6394 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
6398 if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
6400 radeon_error("loop/endloop in shader code are not paired. \n");
6406 if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
6408 for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
6410 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6412 breakLoopOnFlag(pAsm, unFCSP);
6415 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
6420 if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
6422 #ifdef USE_CF_FOR_POP_AFTER
6423 returnOnFlag(pAsm, unIF);
6425 returnOnFlag(pAsm, 0);
6426 #endif /* USE_CF_FOR_POP_AFTER */
6427 pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
6433 decreaseCurrent(pAsm, FC_LOOP);
6438 void add_return_inst(r700_AssemblerBase *pAsm)
6440 if(GL_FALSE == add_cf_instruction(pAsm) )
6445 if(8 == pAsm->unAsic)
6447 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6449 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6450 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6452 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6453 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6455 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6456 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6458 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6459 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6461 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6462 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6464 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6465 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6467 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6468 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6470 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6471 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6473 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6477 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6478 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
6479 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6480 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6482 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6483 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6484 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
6485 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6487 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6491 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
6494 if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
6496 pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
6497 sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
6498 sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
6499 if(NULL == pAsm->subs)
6503 pAsm->unSubArraySize += 10;
6506 pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
6507 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
6508 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
6509 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
6512 pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
6513 pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
6514 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
6515 = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
6516 pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
6517 pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
6518 SetActiveCFlist(pAsm->pR700Shader,
6519 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
6521 pAsm->unSubArrayPointer++;
6524 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6527 pAsm->fc_stack[pAsm->FCSP].type = FC_REP;
6529 checkStackDepth(pAsm, FC_REP, GL_FALSE);
6534 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
6536 if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
6538 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
6542 /* copy max to sub structure */
6543 pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
6544 = pAsm->CALLSTACK[pAsm->CALLSP].max;
6546 decreaseCurrent(pAsm, FC_REP);
6549 SetActiveCFlist(pAsm->pR700Shader,
6550 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
6552 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6559 GLboolean assemble_RET(r700_AssemblerBase *pAsm)
6563 if(pAsm->CALLSP > 0)
6566 for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
6568 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6570 setRetInLoopFlag(pAsm, SQ_SEL_1);
6571 breakLoopOnFlag(pAsm, unFCSP);
6572 pAsm->unCFflags |= LOOPRET_FLAGS;
6576 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
6583 #ifdef USE_CF_FOR_POP_AFTER
6588 #endif /* USE_CF_FOR_POP_AFTER */
6590 add_return_inst(pAsm);
6595 GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
6598 GLuint uiNumberInsts,
6599 struct prog_instruction *pILInst,
6600 PRESUB_DESC * pPresubDesc)
6604 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6606 if(GL_FALSE == add_cf_instruction(pAsm) )
6611 if(8 == pAsm->unAsic)
6613 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6615 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6616 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6618 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6619 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6621 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6622 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6624 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6625 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6627 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6628 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6630 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6631 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6633 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6634 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6636 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6637 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6639 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6643 pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
6644 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
6645 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6646 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6648 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6649 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6650 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
6651 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6653 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6657 if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
6659 pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
6660 sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
6661 sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
6662 if(NULL == pAsm->callers)
6666 pAsm->unCallerArraySize += 10;
6669 uiIL_Offset = nILindex + uiIL_Shift;
6670 pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset;
6671 pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
6673 pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL;
6674 pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL;
6676 pAsm->unCallerArrayPointer++;
6682 for(j=0; j<pAsm->unSubArrayPointer; j++)
6684 if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
6685 { /* compiled before */
6687 max = pAsm->subs[j].unStackDepthMax
6688 + pAsm->CALLSTACK[pAsm->CALLSP].current;
6689 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
6691 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
6694 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
6699 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
6700 unSubID = pAsm->unSubArrayPointer;
6702 bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
6706 max = pAsm->subs[unSubID].unStackDepthMax
6707 + pAsm->CALLSTACK[pAsm->CALLSP].current;
6708 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
6710 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
6713 pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
6719 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
6721 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6723 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6724 pAsm->D.dst.op3 = 0;
6725 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6726 pAsm->D.dst.reg = pAsm->flag_reg_index;
6727 pAsm->D.dst.writex = 1;
6728 pAsm->D.dst.writey = 0;
6729 pAsm->D.dst.writez = 0;
6730 pAsm->D.dst.writew = 0;
6731 pAsm->D2.dst2.literal_slots = 1;
6732 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
6733 pAsm->D.dst.predicated = 0;
6734 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
6735 pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
6736 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
6738 pAsm->S[0].src.rtype = SRC_REC_LITERAL;
6739 //pAsm->S[0].src.reg = 0;
6740 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6741 noneg_PVSSRC(&(pAsm->S[0].src));
6742 pAsm->S[0].src.swizzlex = SQ_SEL_X;
6743 pAsm->S[0].src.swizzley = SQ_SEL_Y;
6744 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
6745 pAsm->S[0].src.swizzlew = SQ_SEL_W;
6747 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
6752 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6753 pAsm->S[0].src.reg = 0;
6754 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6755 noneg_PVSSRC(&(pAsm->S[0].src));
6756 pAsm->S[0].src.swizzlex = flagValue;
6757 pAsm->S[0].src.swizzley = flagValue;
6758 pAsm->S[0].src.swizzlez = flagValue;
6759 pAsm->S[0].src.swizzlew = flagValue;
6761 if( GL_FALSE == next_ins(pAsm) )
6770 GLboolean testFlag(r700_AssemblerBase *pAsm)
6772 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6775 GLuint tmp = gethelpr(pAsm);
6776 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6778 pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
6779 pAsm->D.dst.math = 1;
6780 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6781 pAsm->D.dst.reg = tmp;
6782 pAsm->D.dst.writex = 1;
6783 pAsm->D.dst.writey = 0;
6784 pAsm->D.dst.writez = 0;
6785 pAsm->D.dst.writew = 0;
6786 pAsm->D2.dst2.literal_slots = 1;
6787 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
6788 pAsm->D.dst.predicated = 1;
6789 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
6791 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6792 pAsm->S[0].src.reg = pAsm->flag_reg_index;
6793 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6794 noneg_PVSSRC(&(pAsm->S[0].src));
6795 pAsm->S[0].src.swizzlex = SQ_SEL_X;
6796 pAsm->S[0].src.swizzley = SQ_SEL_Y;
6797 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
6798 pAsm->S[0].src.swizzlew = SQ_SEL_W;
6800 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
6801 //pAsm->S[1].src.reg = 0;
6802 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
6803 noneg_PVSSRC(&(pAsm->S[1].src));
6804 pAsm->S[1].src.swizzlex = SQ_SEL_X;
6805 pAsm->S[1].src.swizzley = SQ_SEL_Y;
6806 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
6807 pAsm->S[1].src.swizzlew = SQ_SEL_W;
6809 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
6814 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
6815 pAsm->S[1].src.reg = 0;
6816 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
6817 noneg_PVSSRC(&(pAsm->S[1].src));
6818 pAsm->S[1].src.swizzlex = SQ_SEL_1;
6819 pAsm->S[1].src.swizzley = SQ_SEL_1;
6820 pAsm->S[1].src.swizzlez = SQ_SEL_1;
6821 pAsm->S[1].src.swizzlew = SQ_SEL_1;
6823 if( GL_FALSE == next_ins(pAsm) )
6829 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6834 GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
6837 jumpToOffest(pAsm, 1, 4);
6838 setRetInLoopFlag(pAsm, SQ_SEL_0);
6839 pops(pAsm, unIF + 1);
6840 add_return_inst(pAsm);
6845 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
6850 if(GL_FALSE == add_cf_instruction(pAsm) )
6855 if(8 == pAsm->unAsic)
6857 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6859 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6860 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6861 EG_CF_INST_LOOP_BREAK,
6862 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6863 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6865 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6866 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6868 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6869 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6871 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6872 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6874 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6875 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6877 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6878 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6880 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6881 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6883 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6887 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6888 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6889 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6891 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6892 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6893 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
6894 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6896 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6899 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
6900 (void *)pAsm->fc_stack[unFCSP].mid,
6901 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6902 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6903 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6904 pAsm->fc_stack[unFCSP].unNumMid++;
6911 GLboolean AssembleInstr(GLuint uiFirstInst,
6913 GLuint uiNumberInsts,
6914 struct prog_instruction *pILInst,
6915 r700_AssemblerBase *pR700AsmCode)
6919 pR700AsmCode->pILInst = pILInst;
6920 for(i=uiFirstInst; i<uiNumberInsts; i++)
6922 pR700AsmCode->uiCurInst = i;
6924 #ifndef USE_CF_FOR_CONTINUE_BREAK
6925 if(OPCODE_BRK == pILInst[i+1].Opcode)
6927 switch(pILInst[i].Opcode)
6930 pILInst[i].Opcode = OPCODE_SGT;
6933 pILInst[i].Opcode = OPCODE_SGE;
6936 pILInst[i].Opcode = OPCODE_SLT;
6939 pILInst[i].Opcode = OPCODE_SLE;
6942 pILInst[i].Opcode = OPCODE_SNE;
6945 pILInst[i].Opcode = OPCODE_SEQ;
6952 if(pILInst[i].CondUpdate == 1)
6954 /* remember dest register used for cond evaluation */
6955 /* XXX also handle PROGRAM_OUTPUT registers here? */
6956 pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index;
6959 switch (pILInst[i].Opcode)
6962 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
6967 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
6972 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
6976 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
6977 //if ( GL_FALSE == assemble_BAD("ARR") )
6982 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
6986 if(8 == pR700AsmCode->unAsic)
6988 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_COS) )
6993 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
7002 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
7007 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
7012 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
7016 if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
7021 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
7024 //case OP_FLR_INT: ;
7026 // if ( GL_FALSE == assemble_FLR_INT() )
7031 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
7037 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
7041 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
7045 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
7049 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
7053 if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
7058 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
7062 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
7066 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
7071 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
7075 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
7081 callPreSub(pR700AsmCode,
7084 pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number,
7086 radeon_error("noise1: not yet supported shader instruction\n");
7090 radeon_error("noise2: not yet supported shader instruction\n");
7093 radeon_error("noise3: not yet supported shader instruction\n");
7096 radeon_error("noise4: not yet supported shader instruction\n");
7100 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
7104 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
7108 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
7112 if(8 == pR700AsmCode->unAsic)
7114 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_SIN) )
7119 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
7124 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
7129 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
7136 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
7143 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
7149 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
7152 struct prog_src_register SrcRegSave[2];
7153 SrcRegSave[0] = pILInst[i].SrcReg[0];
7154 SrcRegSave[1] = pILInst[i].SrcReg[1];
7155 pILInst[i].SrcReg[0] = SrcRegSave[1];
7156 pILInst[i].SrcReg[1] = SrcRegSave[0];
7157 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
7159 pILInst[i].SrcReg[0] = SrcRegSave[0];
7160 pILInst[i].SrcReg[1] = SrcRegSave[1];
7163 pILInst[i].SrcReg[0] = SrcRegSave[0];
7164 pILInst[i].SrcReg[1] = SrcRegSave[1];
7170 struct prog_src_register SrcRegSave[2];
7171 SrcRegSave[0] = pILInst[i].SrcReg[0];
7172 SrcRegSave[1] = pILInst[i].SrcReg[1];
7173 pILInst[i].SrcReg[0] = SrcRegSave[1];
7174 pILInst[i].SrcReg[1] = SrcRegSave[0];
7175 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
7177 pILInst[i].SrcReg[0] = SrcRegSave[0];
7178 pILInst[i].SrcReg[1] = SrcRegSave[1];
7181 pILInst[i].SrcReg[0] = SrcRegSave[0];
7182 pILInst[i].SrcReg[1] = SrcRegSave[1];
7187 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
7194 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
7199 if ( GL_FALSE == assemble_SSG(pR700AsmCode) )
7206 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
7212 if( (i+1)<uiNumberInsts )
7214 if(OPCODE_END != pILInst[i+1].Opcode)
7216 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
7218 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
7230 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
7235 if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
7240 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
7246 GLboolean bHasElse = GL_FALSE;
7248 if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE)
7253 if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
7261 if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
7266 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
7270 case OPCODE_BGNLOOP:
7271 if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
7278 if( GL_FALSE == assemble_BRK(pR700AsmCode) )
7285 if( GL_FALSE == assemble_CONT(pR700AsmCode) )
7291 case OPCODE_ENDLOOP:
7292 if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
7299 if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
7306 if( GL_FALSE == assemble_RET(pR700AsmCode) )
7313 if( GL_FALSE == assemble_CAL(pR700AsmCode,
7314 pILInst[i].BranchTarget,
7324 //case OPCODE_EXPORT:
7325 // if ( GL_FALSE == assemble_EXPORT() )
7330 return assemble_ENDSUB(pR700AsmCode);
7333 //pR700AsmCode->uiCurInst = i;
7334 //This is to remaind that if in later exoort there is depth/stencil
7335 //export, we need a mov to re-arrange DST channel, where using a
7336 //psuedo inst, we will use this end inst to do it.
7340 radeon_error("r600: unknown instruction %d\n", pILInst[i].Opcode);
7348 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
7350 #ifndef GENERATE_SHADER_FOR_2D
7351 setRetInLoopFlag(pAsm, SQ_SEL_0);
7354 if((SPT_FP == pAsm->currentShaderType) && (8 == pAsm->unAsic))
7356 EG_add_ps_interp(pAsm);
7359 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7363 GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
7367 TypedShaderList * plstCFmain;
7368 TypedShaderList * plstCFsub;
7370 R700ShaderInstruction * pInst;
7371 R700ControlFlowGenericClause * pCFInst;
7373 R700ControlFlowALUClause * pCF_ALU;
7374 R700ALUInstruction * pALU;
7375 GLuint unConstOffset = 0;
7377 GLuint unMinRegIndex;
7379 plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
7381 #ifndef GENERATE_SHADER_FOR_2D
7382 /* remove flags init if they are not used */
7383 if((pAsm->unCFflags & HAS_LOOPRET) == 0)
7385 R700ControlFlowALUClause * pCF_ALU;
7386 pInst = plstCFmain->pHead;
7389 if(SIT_CF_ALU == pInst->m_ShaderInstType)
7391 pCF_ALU = (R700ControlFlowALUClause *)pInst;
7392 if(0 == pCF_ALU->m_Word1.f.count)
7394 pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
7398 R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
7400 pALU->m_pLinkedALUClause = NULL;
7401 pALU = (R700ALUInstruction *)(pALU->pNextInst);
7402 pALU->m_pLinkedALUClause = pCF_ALU;
7403 pCF_ALU->m_pLinkedALUInstruction = pALU;
7405 pCF_ALU->m_Word1.f.count--;
7409 pInst = pInst->pNextInst;
7412 #endif /* GENERATE_SHADER_FOR_2D */
7414 if(pAsm->CALLSTACK[0].max > 0)
7416 pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
7419 if(0 == pAsm->unSubArrayPointer)
7424 unCFoffset = plstCFmain->uNumOfNode;
7426 if(NULL != pILProg->Parameters)
7428 unConstOffset = pILProg->Parameters->NumParameters;
7432 for(i=0; i<pAsm->unSubArrayPointer; i++)
7434 pAsm->subs[i].unCFoffset = unCFoffset;
7435 plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
7437 pInst = plstCFsub->pHead;
7439 /* reloc instructions */
7442 if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
7444 pCFInst = (R700ControlFlowGenericClause *)pInst;
7446 switch (pCFInst->m_Word1.f.cf_inst)
7448 case SQ_CF_INST_POP:
7449 case SQ_CF_INST_JUMP:
7450 case SQ_CF_INST_ELSE:
7451 case SQ_CF_INST_LOOP_END:
7452 case SQ_CF_INST_LOOP_START:
7453 case SQ_CF_INST_LOOP_START_NO_AL:
7454 case SQ_CF_INST_LOOP_CONTINUE:
7455 case SQ_CF_INST_LOOP_BREAK:
7456 pCFInst->m_Word0.f.addr += unCFoffset;
7463 pInst->m_uIndex += unCFoffset;
7465 pInst = pInst->pNextInst;
7468 if(NULL != pAsm->subs[i].pPresubDesc)
7472 unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
7473 unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg;
7474 unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
7476 pInst = plstCFsub->pHead;
7479 if(SIT_CF_ALU == pInst->m_ShaderInstType)
7481 pCF_ALU = (R700ControlFlowALUClause *)pInst;
7483 pALU = pCF_ALU->m_pLinkedALUInstruction;
7484 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7486 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
7488 if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
7490 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
7492 else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
7494 pALU->m_Word0.f.src0_sel += unConstOffset;
7497 if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F)
7498 >= SQ_OP3_INST_MUL_LIT )
7499 { /* op3 : 3 srcs */
7500 if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
7502 pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
7504 else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
7506 pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
7508 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
7510 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
7512 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
7514 pALU->m_Word0.f.src1_sel += unConstOffset;
7519 if(8 == pAsm->unAsic)
7521 uNumSrc = EG_GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
7527 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
7531 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
7536 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
7538 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
7540 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
7542 pALU->m_Word0.f.src1_sel += unConstOffset;
7546 pALU = (R700ALUInstruction*)(pALU->pNextInst);
7549 pInst = pInst->pNextInst;
7553 /* Put sub into main */
7554 plstCFmain->pTail->pNextInst = plstCFsub->pHead;
7555 plstCFmain->pTail = plstCFsub->pTail;
7556 plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
7558 unCFoffset += plstCFsub->uNumOfNode;
7562 for(i=0; i<pAsm->unCallerArrayPointer; i++)
7564 pAsm->callers[i].cf_ptr->m_Word0.f.addr
7565 = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
7567 if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
7569 unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
7570 unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
7572 if(NULL != pAsm->callers[i].prelude_cf_ptr)
7574 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
7575 pALU = pCF_ALU->m_pLinkedALUInstruction;
7576 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7578 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
7579 pALU = (R700ALUInstruction*)(pALU->pNextInst);
7582 if(NULL != pAsm->callers[i].finale_cf_ptr)
7584 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
7585 pALU = pCF_ALU->m_pLinkedALUInstruction;
7586 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7588 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
7589 pALU = (R700ALUInstruction*)(pALU->pNextInst);
7598 GLboolean callPreSub(r700_AssemblerBase* pAsm,
7599 LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
7600 COMPILED_SUB * pCompiledSub,
7602 GLshort uNumValidSrc)
7604 /* save assemble context */
7605 GLuint starting_temp_register_number_save;
7606 GLuint number_used_registers_save;
7607 GLuint uFirstHelpReg_save;
7608 GLuint uHelpReg_save;
7609 GLuint uiCurInst_save;
7610 struct prog_instruction *pILInst_save;
7611 PRESUB_DESC * pPresubDesc;
7615 R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
7617 /* copy srcs to presub inputs */
7618 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7619 for(i=0; i<uNumValidSrc; i++)
7621 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
7622 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
7623 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
7624 pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i];
7625 pAsm->D.dst.writex = 1;
7626 pAsm->D.dst.writey = 1;
7627 pAsm->D.dst.writez = 1;
7628 pAsm->D.dst.writew = 1;
7630 if( GL_FALSE == assemble_src(pAsm, i, 0) )
7637 if(uNumValidSrc > 0)
7639 prelude_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
7640 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7643 /* browse thro existing presubs. */
7644 for(i=0; i<pAsm->unNumPresub; i++)
7646 if(pAsm->presubs[i].sptSigniture == scriptSigniture)
7652 if(i == pAsm->unNumPresub)
7653 { /* not loaded yet */
7654 /* save assemble context */
7655 number_used_registers_save = pAsm->number_used_registers;
7656 uFirstHelpReg_save = pAsm->uFirstHelpReg;
7657 uHelpReg_save = pAsm->uHelpReg;
7658 starting_temp_register_number_save = pAsm->starting_temp_register_number;
7659 pILInst_save = pAsm->pILInst;
7660 uiCurInst_save = pAsm->uiCurInst;
7662 /* alloc in presub */
7663 if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
7665 pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
7666 sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
7667 sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
7668 if(NULL == pAsm->presubs)
7670 radeon_error("No memeory to allocate built in shader function description structures. \n");
7673 pAsm->unPresubArraySize += 4;
7676 pPresubDesc = &(pAsm->presubs[i]);
7677 pPresubDesc->sptSigniture = scriptSigniture;
7679 /* constants offsets need to be final resolved at reloc. */
7680 if(0 == pAsm->unNumPresub)
7682 pPresubDesc->unConstantsStart = 0;
7686 pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart
7687 + pAsm->presubs[i-1].pCompiledSub->NumParameters;
7690 pPresubDesc->pCompiledSub = pCompiledSub;
7692 pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
7693 pPresubDesc->maxStartReg = uFirstHelpReg_save;
7694 pAsm->unCurNumILInsts += pCompiledSub->NumInstructions;
7696 pAsm->unNumPresub++;
7698 /* setup new assemble context */
7699 pAsm->starting_temp_register_number = 0;
7700 pAsm->number_used_registers = pCompiledSub->NumTemporaries;
7701 pAsm->uFirstHelpReg = pAsm->number_used_registers;
7702 pAsm->uHelpReg = pAsm->uFirstHelpReg;
7704 bRet = assemble_CAL(pAsm,
7706 pPresubDesc->subIL_Shift,
7707 pCompiledSub->NumInstructions,
7708 pCompiledSub->Instructions,
7712 pPresubDesc->number_used_registers = pAsm->number_used_registers;
7714 /* restore assemble context */
7715 pAsm->number_used_registers = number_used_registers_save;
7716 pAsm->uFirstHelpReg = uFirstHelpReg_save;
7717 pAsm->uHelpReg = uHelpReg_save;
7718 pAsm->starting_temp_register_number = starting_temp_register_number_save;
7719 pAsm->pILInst = pILInst_save;
7720 pAsm->uiCurInst = uiCurInst_save;
7724 pPresubDesc = &(pAsm->presubs[i]);
7726 bRet = assemble_CAL(pAsm,
7728 pPresubDesc->subIL_Shift,
7729 pCompiledSub->NumInstructions,
7730 pCompiledSub->Instructions,
7734 if(GL_FALSE == bRet)
7736 radeon_error("Shader presub assemble failed. \n");
7740 /* copy presub output to real dst */
7741 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7742 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
7744 if( GL_FALSE == assemble_dst(pAsm) )
7749 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
7750 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
7751 pAsm->S[0].src.reg = pCompiledSub->dstRegIndex;
7752 pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
7753 pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
7754 pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
7755 pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
7759 pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
7760 pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
7761 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7764 if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
7766 pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
7768 if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
7770 pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
7776 GLboolean Process_Export(r700_AssemblerBase* pAsm,
7778 GLuint export_starting_index,
7779 GLuint export_count,
7780 GLuint starting_register_number,
7781 GLboolean is_depth_export)
7783 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
7784 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
7786 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
7790 case SQ_EXPORT_PIXEL:
7791 if(GL_TRUE == is_depth_export)
7793 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
7797 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
7802 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
7805 case SQ_EXPORT_PARAM:
7806 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
7810 radeon_error("Unknown export type: %d\n", type);
7815 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
7817 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
7818 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
7819 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
7821 if(8 == pAsm->unAsic)
7823 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7825 EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_shift,
7826 EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_mask);
7827 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7829 EG_CF_ALLOC_EXPORT_WORD1__EOP_shift,
7830 EG_CF_ALLOC_EXPORT_WORD1__EOP_bit);
7831 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7833 EG_CF_ALLOC_EXPORT_WORD1__VPM_shift,
7834 EG_CF_ALLOC_EXPORT_WORD1__VPM_bit);
7835 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7837 EG_CF_WORD1__CF_INST_shift,
7838 EG_CF_WORD1__CF_INST_mask);
7839 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7841 EG_CF_ALLOC_EXPORT_WORD1__MARK_shift,
7842 EG_CF_ALLOC_EXPORT_WORD1__MARK_bit);
7843 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7845 EG_CF_ALLOC_EXPORT_WORD1__BARRIER_shift,
7846 EG_CF_ALLOC_EXPORT_WORD1__BARRIER_bit);
7850 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
7851 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
7852 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
7853 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
7854 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
7855 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
7858 if (export_count == 1)
7860 assert(starting_register_number >= pAsm->starting_export_register_number);
7862 /* exports Z as a float into Red channel */
7863 if (GL_TRUE == is_depth_export)
7865 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_Z;
7866 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
7867 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
7868 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
7872 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
7873 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
7874 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
7875 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
7880 // This should only be used if all components for all registers have been written
7881 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
7882 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
7883 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
7884 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
7887 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
7892 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
7893 GLbitfield OutputsWritten)
7896 GLuint export_count = 0;
7899 for (i = 0; i < FRAG_RESULT_MAX; ++i)
7903 if (OutputsWritten & unBit)
7905 GLboolean is_depth = i == FRAG_RESULT_DEPTH ? GL_TRUE : GL_FALSE;
7906 if (!Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->uiFP_OutputMap[i], is_depth))
7912 /* Need to export something, otherwise we'll hang
7913 * results are undefined anyway */
7914 if(export_count == 0)
7916 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE);
7919 if(pR700AsmCode->cf_last_export_ptr != NULL)
7921 if(8 == pR700AsmCode->unAsic)
7923 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
7925 EG_CF_ALLOC_EXPORT_WORD1__EOP_shift,
7926 EG_CF_ALLOC_EXPORT_WORD1__EOP_bit);
7927 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
7928 EG_CF_INST_EXPORT_DONE,
7929 EG_CF_WORD1__CF_INST_shift,
7930 EG_CF_WORD1__CF_INST_mask);
7934 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
7935 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
7942 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
7943 GLbitfield OutputsWritten)
7948 GLuint export_starting_index = 0;
7949 GLuint export_count = pR700AsmCode->number_of_exports;
7951 unBit = 1 << VERT_RESULT_HPOS;
7952 if(OutputsWritten & unBit)
7954 if( GL_FALSE == Process_Export(pR700AsmCode,
7956 export_starting_index,
7958 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
7963 export_starting_index++;
7967 unBit = 1 << VERT_RESULT_PSIZ;
7968 if(OutputsWritten & unBit)
7970 if( GL_FALSE == Process_Export(pR700AsmCode,
7972 export_starting_index,
7974 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_PSIZ],
7982 if(8 == pR700AsmCode->unAsic)
7984 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
7985 EG_CF_INST_EXPORT_DONE,
7986 EG_CF_WORD1__CF_INST_shift,
7987 EG_CF_WORD1__CF_INST_mask);
7991 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
7995 pR700AsmCode->number_of_exports = export_count;
7996 export_starting_index = 0;
7998 unBit = 1 << VERT_RESULT_COL0;
7999 if(OutputsWritten & unBit)
8001 if( GL_FALSE == Process_Export(pR700AsmCode,
8003 export_starting_index,
8005 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
8011 export_starting_index++;
8014 unBit = 1 << VERT_RESULT_COL1;
8015 if(OutputsWritten & unBit)
8017 if( GL_FALSE == Process_Export(pR700AsmCode,
8019 export_starting_index,
8021 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
8027 export_starting_index++;
8030 unBit = 1 << VERT_RESULT_FOGC;
8031 if(OutputsWritten & unBit)
8033 if( GL_FALSE == Process_Export(pR700AsmCode,
8035 export_starting_index,
8037 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
8043 export_starting_index++;
8048 unBit = 1 << (VERT_RESULT_TEX0 + i);
8049 if(OutputsWritten & unBit)
8051 if( GL_FALSE == Process_Export(pR700AsmCode,
8053 export_starting_index,
8055 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
8061 export_starting_index++;
8065 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
8068 if(OutputsWritten & unBit)
8070 if( GL_FALSE == Process_Export(pR700AsmCode,
8072 export_starting_index,
8074 pR700AsmCode->ucVP_OutputMap[i],
8080 export_starting_index++;
8084 // At least one param should be exported
8087 if(8 == pR700AsmCode->unAsic)
8089 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
8090 EG_CF_INST_EXPORT_DONE,
8091 EG_CF_WORD1__CF_INST_shift,
8092 EG_CF_WORD1__CF_INST_mask);
8096 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
8101 if( GL_FALSE == Process_Export(pR700AsmCode,
8105 pR700AsmCode->starting_export_register_number,
8111 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
8112 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
8113 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
8114 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
8115 if(8 == pR700AsmCode->unAsic)
8117 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
8118 EG_CF_INST_EXPORT_DONE,
8119 EG_CF_WORD1__CF_INST_shift,
8120 EG_CF_WORD1__CF_INST_mask);
8124 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
8128 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
8133 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
8135 if(NULL != pR700AsmCode->pInstDeps)
8137 FREE(pR700AsmCode->pInstDeps);
8138 pR700AsmCode->pInstDeps = NULL;
8141 if(NULL != pR700AsmCode->subs)
8143 FREE(pR700AsmCode->subs);
8144 pR700AsmCode->subs = NULL;
8146 if(NULL != pR700AsmCode->callers)
8148 FREE(pR700AsmCode->callers);
8149 pR700AsmCode->callers = NULL;
8152 if(NULL != pR700AsmCode->presubs)
8154 FREE(pR700AsmCode->presubs);
8155 pR700AsmCode->presubs = NULL;