2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
27 #ifndef _R700_ASSEMBLER_H_
28 #define _R700_ASSEMBLER_H_
30 #include "main/mtypes.h"
31 #include "program/prog_instruction.h"
33 #include "r700_chip.h"
34 #include "r700_shaderinst.h"
35 #include "r700_shader.h"
37 typedef enum LOADABLE_SCRIPT_SIGNITURE
39 GLSL_NOISE1 = 0x10000001,
40 GLSL_NOISE2 = 0x10000002,
41 GLSL_NOISE3 = 0x10000003,
42 GLSL_NOISE4 = 0x10000004
43 }LOADABLE_SCRIPT_SIGNITURE;
45 typedef struct COMPILED_SUB
47 struct prog_instruction *Instructions;
48 GLuint NumInstructions;
49 GLuint NumTemporaries;
52 GLfloat (*ParameterValues)[4];
53 GLbyte outputSwizzleX;
54 GLbyte outputSwizzleY;
55 GLbyte outputSwizzleZ;
56 GLbyte outputSwizzleW;
57 GLshort srcRegIndex[3];
61 typedef struct PRESUB_DESCtag
63 LOADABLE_SCRIPT_SIGNITURE sptSigniture;
65 struct prog_src_register InReg[3];
66 struct prog_dst_register OutReg;
69 GLushort number_used_registers;
71 GLuint unConstantsStart;
73 COMPILED_SUB * pCompiledSub;
76 typedef enum SHADER_PIPE_TYPE
82 typedef enum ConstantCycles
85 NUMBER_OF_COMPONENTS = 4
88 typedef enum HARDWARE_LIMIT_VALUES
90 TEMPORARY_REGISTER_OFFSET = SQ_ALU_SRC_GPR_BASE,
91 MAX_TEMPORARY_REGISTERS = SQ_ALU_SRC_GPR_SIZE,
92 MAX_CONSTANT_REGISTERS = SQ_ALU_SRC_CFILE_SIZE,
93 CFILE_REGISTER_OFFSET = SQ_ALU_SRC_CFILE_BASE,
94 NUMBER_OF_INPUT_COLORS = 2,
95 NUMBER_OF_OUTPUT_COLORS = 8,
96 NUMBER_OF_TEXTURE_UNITS = 16,
98 } HARDWARE_LIMIT_VALUES;
100 typedef enum AddressMode
103 ADDR_RELATIVE_A0 = 1,
104 ADDR_RELATIVE_FLI_0 = 2,
105 NUMBER_OF_ADDR_MOD = 3
108 typedef enum SrcRegisterType
110 SRC_REG_TEMPORARY = 0,
112 SRC_REG_CONSTANT = 2,
113 SRC_REG_ALT_TEMPORARY = 3,
115 NUMBER_OF_SRC_REG_TYPE = 5
118 typedef enum DstRegisterType
120 DST_REG_TEMPORARY = 0,
123 DST_REG_OUT_X_REPL = 3,
124 DST_REG_ALT_TEMPORARY = 4,
126 NUMBER_OF_DST_REG_TYPE = 6
129 typedef unsigned int BITS;
131 typedef struct PVSDSTtag
133 #ifdef MESA_BIG_ENDIAN
134 BITS addrmode1:1; //32
135 BITS addrmode0:1; //31 //29
137 BITS dualop:1; // 30 //26
139 BITS op3:1; // 29 Represents *_OP3_* ALU opcode
146 BITS reg:10; //24 //20
149 BITS pred_inv :1; //11 //8
150 BITS predicated:1; //10 //8
152 BITS opcode:8; //(:6) //@@@ really should be 10 bits for OP2
154 BITS opcode:8; //(:6) //@@@ really should be 10 bits for OP2
156 BITS predicated:1; //10 //8
157 BITS pred_inv :1; //11 //8
160 BITS reg:10; //24 //20
167 BITS op3:1; // 29 Represents *_OP3_* ALU opcode
169 BITS dualop:1; // 30 //26
171 BITS addrmode0:1; //31 //29
172 BITS addrmode1:1; //32
176 typedef struct PVSINSTtag
178 #ifdef MESA_BIG_ENDIAN
180 BITS SaturateMode :2;
181 BITS literal_slots :2;
183 BITS literal_slots :2;
184 BITS SaturateMode :2;
189 typedef struct PVSSRCtag
191 #ifdef MESA_BIG_ENDIAN
192 BITS addrmode1:1; //32
200 BITS swizzlew:3; //26
204 BITS reg:10; //14 (8)
210 BITS reg:10; //14 (8)
214 BITS swizzlew:3; //26
222 BITS addrmode1:1; //32
226 typedef struct PVSMATHtag
228 #ifdef MESA_BIG_ENDIAN
230 BITS dstcomp:2; // select dest component
234 BITS dstoff:2; // 2 bits of dest offset into alt ram
246 BITS dstoff:2; // 2 bits of dest offset into alt ram
250 BITS dstcomp:2; // select dest component
255 typedef union PVSDWORDtag
265 typedef struct VAP_OUT_VTX_FMT_0tag
267 #ifdef MESA_BIG_ENDIAN
268 BITS resvd1:12; // 20
270 BITS viewport_index:1; // 19
272 BITS rta_index:1; // shares same channel as kill_flag
274 BITS point_size:1; // 15
289 BITS pos_param:1; // 4
299 BITS pos_param:1; // 4
314 BITS point_size:1; // 15
316 BITS rta_index:1; // shares same channel as kill_flag
318 BITS viewport_index:1; // 19
320 BITS resvd1:12; // 20
324 typedef struct VAP_OUT_VTX_FMT_1tag
326 #ifdef MESA_BIG_ENDIAN
351 typedef struct VAP_OUT_VTX_FMT_2tag
353 #ifdef MESA_BIG_ENDIAN
378 typedef struct OUT_FRAGMENT_FMT_0tag
380 #ifdef MESA_BIG_ENDIAN
384 BITS coverage_to_mask:1;
408 BITS coverage_to_mask:1;
413 } OUT_FRAGMENT_FMT_0;
415 typedef enum CF_CLAUSE_TYPE
423 NUMBER_CF_CLAUSE_TYPES
428 MAX_BOOL_CONSTANTS = 32,
429 MAX_INT_CONSTANTS = 32,
430 MAX_FLOAT_CONSTANTS = 256,
444 SAFEDIST_TEX = 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
445 SAFEDIST_ALU = 6 ///< the same for alu->fc
448 typedef struct FC_LEVEL
450 R700ControlFlowGenericClause * first;
451 R700ControlFlowGenericClause ** mid;
452 unsigned int unNumMid;
457 int id; ///< id of bool or int variable
460 typedef struct VTX_FETCH_METHOD
462 GLboolean bEnableMini;
463 GLuint mega_fetch_remainder;
466 typedef struct SUB_OFFSET
470 GLuint unStackDepthMax;
471 PRESUB_DESC * pPresubDesc;
472 TypedShaderList lstCFInstructions_local;
475 typedef struct CALLER_POINTER
479 R700ControlFlowGenericClause* cf_ptr;
481 R700ControlFlowGenericClause* prelude_cf_ptr;
482 R700ControlFlowGenericClause* finale_cf_ptr;
485 #define SQ_MAX_CALL_DEPTH 0x00000020
487 typedef struct CALL_LEVEL
489 unsigned int FCSP_BeforeEntry;
493 TypedShaderList * plstCFInstructions_local;
496 #define HAS_CURRENT_LOOPRET 0x1L
497 #define HAS_LOOPRET 0x2L
498 #define LOOPRET_FLAGS HAS_LOOPRET | HAS_CURRENT_LOOPRET
500 typedef struct r700_AssemblerBase
502 R700ControlFlowSXClause* cf_last_export_ptr;
503 R700ControlFlowSXClause* cf_current_export_clause_ptr;
504 R700ControlFlowALUClause* cf_current_alu_clause_ptr;
505 R700ControlFlowGenericClause* cf_current_tex_clause_ptr;
506 R700ControlFlowGenericClause* cf_current_vtx_clause_ptr;
507 R700ControlFlowGenericClause* cf_current_cf_clause_ptr;
510 R700_Shader * pR700Shader;
512 // No clause has been created yet
513 CF_CLAUSE_TYPE cf_current_clause_type;
517 GLuint number_of_exports;
518 GLuint number_of_colorandz_exports;
519 GLuint number_of_export_opcodes;
526 unsigned int uLastPosUpdate;
527 unsigned int last_cond_register;
529 OUT_FRAGMENT_FMT_0 fp_stOutFmt0;
533 unsigned int number_used_registers;
534 unsigned int uUsedConsts;
536 unsigned int flag_reg_index;
539 unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX];
540 unsigned int uiFP_OutputMap[FRAG_RESULT_MAX];
541 unsigned int uBoolConsts;
542 unsigned int uIntConsts;
544 unsigned int uConsts;
547 unsigned char ucVP_AttributeMap[VERT_ATTRIB_MAX];
548 unsigned char ucVP_OutputMap[VERT_RESULT_MAX];
550 //-----------------------------------------------------------------------------------
551 // flow control members
552 //-----------------------------------------------------------------------------------
554 FC_LEVEL fc_stack[32];
556 //-----------------------------------------------------------------------------------
557 // ArgSubst used in Assemble_Source() function
558 //-----------------------------------------------------------------------------------
561 GLint hw_gpr[ NUMBER_OF_CYCLES ][ NUMBER_OF_COMPONENTS ];
562 GLint hw_cfile_addr[ NUMBER_OF_COMPONENTS ];
563 GLint hw_cfile_chan[ NUMBER_OF_COMPONENTS ];
567 GLint color_export_register_number[NUMBER_OF_OUTPUT_COLORS];
568 GLint depth_export_register_number;
570 GLint stencil_export_register_number;
571 GLint coverage_to_mask_export_register_number;
572 GLint mask_export_register_number;
574 GLuint starting_export_register_number;
575 GLuint starting_vfetch_register_number;
576 GLuint starting_temp_register_number;
578 GLuint uFirstHelpReg;
580 GLboolean input_position_is_used;
581 GLboolean input_normal_is_used;
583 GLboolean input_color_is_used[NUMBER_OF_INPUT_COLORS];
585 GLboolean input_texture_unit_is_used[NUMBER_OF_TEXTURE_UNITS];
587 R700VertexGenericFetch* vfetch_instruction_ptr_array[VERT_ATTRIB_MAX];
589 GLuint number_of_inputs;
593 SHADER_PIPE_TYPE currentShaderType;
594 struct prog_instruction * pILInst;
596 GLubyte SamplerUnits[MAX_SAMPLERS];
599 /* TODO : merge bR6xx */
602 /* helper to decide which type of instruction to assemble */
604 /* we inserted helper intructions and need barrier on next TEX ins */
605 GLboolean need_tex_barrier;
608 GLuint unSubArraySize;
609 GLuint unSubArrayPointer;
610 CALLER_POINTER * callers;
611 GLuint unCallerArraySize;
612 GLuint unCallerArrayPointer;
614 CALL_LEVEL CALLSTACK[SQ_MAX_CALL_DEPTH];
618 PRESUB_DESC * presubs;
619 GLuint unPresubArraySize;
621 GLuint unCurNumILInsts;
625 GLuint shadow_regs[R700_MAX_TEXTURE_UNITS];
627 GLboolean bUseMemConstant;
630 } r700_AssemblerBase;
633 BITS addrmode_PVSDST(PVSDST * pPVSDST);
634 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode);
635 void nomask_PVSDST(PVSDST * pPVSDST);
636 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC);
637 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode);
638 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz);
639 void noswizzle_PVSSRC(PVSSRC* pPVSSRC);
640 void swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w);
641 void neg_PVSSRC(PVSSRC* pPVSSRC);
642 void noneg_PVSSRC(PVSSRC* pPVSSRC);
643 void flipneg_PVSSRC(PVSSRC* pPVSSRC);
644 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c);
645 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c);
646 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0);
647 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ;
648 GLboolean is_reduction_opcode(PVSDWORD * dest);
649 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
651 unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3);
653 unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3);
655 GLboolean IsTex(gl_inst_opcode Opcode);
656 GLboolean IsAlu(gl_inst_opcode Opcode);
657 int check_current_clause(r700_AssemblerBase* pAsm,
658 CF_CLAUSE_TYPE new_clause_type);
659 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
660 R700VertexInstruction* vertex_instruction_ptr);
661 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
662 R700TextureInstruction* tex_instruction_ptr);
663 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
665 GLuint destination_register,
666 GLuint number_of_elements,
667 GLenum dataElementType,
668 VTX_FETCH_METHOD* pFetchMethod);
669 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
670 GLuint destination_register,
677 VTX_FETCH_METHOD * pFetchMethod);
679 GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
680 GLuint destination_register,
687 VTX_FETCH_METHOD * pFetchMethod);
688 //-----------------------
690 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm);
691 GLuint gethelpr(r700_AssemblerBase* pAsm);
692 void resethelpr(r700_AssemblerBase* pAsm);
693 void checkop_init(r700_AssemblerBase* pAsm);
694 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src);
695 GLboolean checkop1(r700_AssemblerBase* pAsm);
696 GLboolean checkop2(r700_AssemblerBase* pAsm);
697 GLboolean checkop3(r700_AssemblerBase* pAsm);
698 GLboolean assemble_src(r700_AssemblerBase *pAsm,
701 GLboolean assemble_dst(r700_AssemblerBase *pAsm);
702 GLboolean tex_dst(r700_AssemblerBase *pAsm);
703 GLboolean tex_src(r700_AssemblerBase *pAsm);
704 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized);
705 void initialize(r700_AssemblerBase *pAsm);
706 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
709 BITS scalar_channel_index,
710 r700_AssemblerBase *pAsm);
712 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
713 R700ALUInstruction* alu_instruction_ptr,
714 GLuint contiguous_slots_needed);
716 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm);
717 void add_return_inst(r700_AssemblerBase *pAsm);
719 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
725 int is_cfile(BITS sel);
726 int is_const(BITS sel);
727 int is_gpr(BITS sel);
728 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
731 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle);
732 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
733 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
734 GLboolean check_scalar(r700_AssemblerBase* pAsm,
735 R700ALUInstruction* alu_instruction_ptr);
736 GLboolean check_vector(r700_AssemblerBase* pAsm,
737 R700ALUInstruction* alu_instruction_ptr);
738 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
740 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops);
741 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset);
742 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue);
743 GLboolean testFlag(r700_AssemblerBase *pAsm);
744 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP);
745 GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF);
747 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
748 GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
749 GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
750 GLboolean assemble_ARL(r700_AssemblerBase *pAsm);
751 GLboolean assemble_BAD(char *opcode_str);
752 GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
753 GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
754 GLboolean assemble_DST(r700_AssemblerBase *pAsm);
755 GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
756 GLboolean assemble_EXP(r700_AssemblerBase *pAsm);
757 GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
758 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
759 GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
760 GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode);
761 GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
762 GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
763 GLboolean assemble_LOG(r700_AssemblerBase *pAsm);
764 GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
765 GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
766 GLboolean assemble_MAX(r700_AssemblerBase *pAsm);
767 GLboolean assemble_MIN(r700_AssemblerBase *pAsm);
768 GLboolean assemble_MOV(r700_AssemblerBase *pAsm);
769 GLboolean assemble_MUL(r700_AssemblerBase *pAsm);
770 GLboolean assemble_POW(r700_AssemblerBase *pAsm);
771 GLboolean assemble_RCP(r700_AssemblerBase *pAsm);
772 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
773 GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
774 GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
775 GLboolean assemble_CONT(r700_AssemblerBase *pAsm);
777 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode);
778 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode);
779 GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode);
781 GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
782 GLboolean assemble_SSG(r700_AssemblerBase *pAsm);
783 GLboolean assemble_STP(r700_AssemblerBase *pAsm);
784 GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
785 GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
786 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm);
787 GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse);
788 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm);
789 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm);
791 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm);
792 GLboolean assemble_BRK(r700_AssemblerBase *pAsm);
793 GLboolean assemble_COND(r700_AssemblerBase *pAsm);
794 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm);
796 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift);
797 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm);
798 GLboolean assemble_RET(r700_AssemblerBase *pAsm);
799 GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
802 GLuint uiNumberInsts,
803 struct prog_instruction *pILInst,
804 PRESUB_DESC * pPresubDesc);
806 GLboolean Process_Export(r700_AssemblerBase* pAsm,
808 GLuint export_starting_index,
810 GLuint starting_register_number,
811 GLboolean is_depth_export);
812 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm,
813 BITS depth_channel_select);
815 GLboolean callPreSub(r700_AssemblerBase* pAsm,
816 LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
817 /* struct prog_instruction ** pILInstParent, */
818 COMPILED_SUB * pCompiledSub,
820 GLshort uNumValidSrc);
821 GLboolean EG_add_ps_interp(r700_AssemblerBase* pAsm);
824 GLboolean AssembleInstr(GLuint uiFirstInst,
826 GLuint uiNumberInsts,
827 struct prog_instruction *pILInst,
828 r700_AssemblerBase *pR700AsmCode);
829 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
830 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
832 GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg);
833 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm);
835 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
836 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode);
838 #endif //_R700_ASSEMBLER_H_