2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
34 #include "main/imports.h"
35 #include "main/mtypes.h"
37 #include "tnl/t_context.h"
38 #include "program/program.h"
39 #include "program/prog_parameter.h"
40 #include "program/prog_statevars.h"
42 #include "radeon_debug.h"
43 #include "r600_context.h"
44 #include "r600_cmdbuf.h"
45 #include "r600_emit.h"
46 #include "program/programopt.h"
48 #include "evergreen_vertprog.h"
50 unsigned int evergreen_Map_Vertex_Output(r700_AssemblerBase *pAsm,
51 struct gl_vertex_program *mesa_vp,
56 unsigned int unTotal = unStart;
58 //!!!!!!! THE ORDER MATCH FS INPUT
60 unBit = 1 << VERT_RESULT_HPOS;
61 if(mesa_vp->Base.OutputsWritten & unBit)
63 pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
66 unBit = 1 << VERT_RESULT_COL0;
67 if(mesa_vp->Base.OutputsWritten & unBit)
69 pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
72 unBit = 1 << VERT_RESULT_COL1;
73 if(mesa_vp->Base.OutputsWritten & unBit)
75 pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
78 //TODO : dealing back face.
79 unBit = 1 << VERT_RESULT_BFC0;
80 if(mesa_vp->Base.OutputsWritten & unBit)
82 pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++;
85 unBit = 1 << VERT_RESULT_BFC1;
86 if(mesa_vp->Base.OutputsWritten & unBit)
88 pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++;
92 unBit = 1 << VERT_RESULT_FOGC;
93 if(mesa_vp->Base.OutputsWritten & unBit)
95 pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
98 //TODO : dealing point size.
99 unBit = 1 << VERT_RESULT_PSIZ;
100 if(mesa_vp->Base.OutputsWritten & unBit)
102 pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
107 unBit = 1 << (VERT_RESULT_TEX0 + i);
108 if(mesa_vp->Base.OutputsWritten & unBit)
110 pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
114 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
117 if(mesa_vp->Base.OutputsWritten & unBit)
119 pAsm->ucVP_OutputMap[i] = unTotal++;
123 return (unTotal - unStart);
126 unsigned int evergreen_Map_Vertex_Input(r700_AssemblerBase *pAsm,
127 struct gl_vertex_program *mesa_vp,
128 unsigned int unStart)
132 unsigned int unTotal = unStart;
133 for(i=0; i<VERT_ATTRIB_MAX; i++)
136 if(mesa_vp->Base.InputsRead & unBit)
138 pAsm->ucVP_AttributeMap[i] = unTotal++;
141 return (unTotal - unStart);
144 GLboolean evergreen_Process_Vertex_Program_Vfetch_Instructions(
145 struct evergreen_vertex_program *vp,
146 struct gl_vertex_program *mesa_vp)
150 VTX_FETCH_METHOD vtxFetchMethod;
151 vtxFetchMethod.bEnableMini = GL_FALSE;
152 vtxFetchMethod.mega_fetch_remainder = 0;
154 for(i=0; i<VERT_ATTRIB_MAX; i++)
157 if(mesa_vp->Base.InputsRead & unBit)
159 assemble_vfetch_instruction(&vp->r700AsmCode,
161 vp->r700AsmCode.ucVP_AttributeMap[i],
162 vp->aos_desc[i].size,
163 vp->aos_desc[i].type,
171 GLboolean evergreen_Process_Vertex_Program_Vfetch_Instructions2(
172 struct gl_context *ctx,
173 struct evergreen_vertex_program *vp,
174 struct gl_vertex_program *mesa_vp)
177 context_t *context = R700_CONTEXT(ctx);
179 VTX_FETCH_METHOD vtxFetchMethod;
180 vtxFetchMethod.bEnableMini = GL_FALSE;
181 vtxFetchMethod.mega_fetch_remainder = 0;
183 for(i=0; i<context->nNumActiveAos; i++)
185 EG_assemble_vfetch_instruction(&vp->r700AsmCode,
186 vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element],
187 context->stream_desc[i].type,
188 context->stream_desc[i].size,
189 context->stream_desc[i].element,
190 context->stream_desc[i]._signed,
191 context->stream_desc[i].normalize,
192 context->stream_desc[i].format,
199 void evergreen_Map_Vertex_Program(struct gl_context *ctx,
200 struct evergreen_vertex_program *vp,
201 struct gl_vertex_program *mesa_vp)
203 r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
204 unsigned int num_inputs;
206 // R0 will always be used for index into vertex buffer
207 pAsm->number_used_registers = 1;
208 pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
210 // Map Inputs: Add 1 to mapping since R0 is used for index
211 num_inputs = evergreen_Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
212 pAsm->number_used_registers += num_inputs;
214 // Create VFETCH instructions for inputs
215 if (GL_TRUE != evergreen_Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
217 radeon_error("Calling evergreen_Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
222 pAsm->number_of_exports = evergreen_Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
224 pAsm->starting_export_register_number = pAsm->number_used_registers;
226 pAsm->number_used_registers += pAsm->number_of_exports;
228 /* Map temporary registers (GPRs) */
229 pAsm->starting_temp_register_number = pAsm->number_used_registers;
231 if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
232 { /* arb uses NumNativeTemporaries */
233 pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
236 { /* fix func t_vp uses NumTemporaries */
237 pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
240 pAsm->flag_reg_index = pAsm->number_used_registers++;
242 pAsm->uFirstHelpReg = pAsm->number_used_registers;
245 GLboolean evergreen_Find_Instruction_Dependencies_vp(struct evergreen_vertex_program *vp,
246 struct gl_vertex_program *mesa_vp)
249 GLint * puiTEMPwrites;
250 struct prog_instruction *pILInst;
253 puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
254 for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
256 puiTEMPwrites[i] = -1;
259 pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
261 for(i=0; i<mesa_vp->Base.NumInstructions; i++)
263 pInstDeps[i].nDstDep = -1;
264 pILInst = &(mesa_vp->Base.Instructions[i]);
267 if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
269 //Set lastwrite for the temp
270 puiTEMPwrites[pILInst->DstReg.Index] = i;
276 if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
279 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
283 pInstDeps[i].nSrcDeps[j] = -1;
288 vp->r700AsmCode.pInstDeps = pInstDeps;
295 struct evergreen_vertex_program* evergreenTranslateVertexShader(struct gl_context *ctx,
296 struct gl_vertex_program *mesa_vp)
298 context_t *context = EVERGREEN_CONTEXT(ctx);
300 struct evergreen_vertex_program *vp;
303 vp = calloc(1, sizeof(*vp));
304 vp->mesa_program = _mesa_clone_vertex_program(ctx, mesa_vp);
308 if (mesa_vp->IsPositionInvariant)
310 _mesa_insert_mvp_code(ctx, vp->mesa_program);
313 for(i=0; i<context->nNumActiveAos; i++)
315 vp->aos_desc[i].size = context->stream_desc[i].size;
316 vp->aos_desc[i].stride = context->stream_desc[i].stride;
317 vp->aos_desc[i].type = context->stream_desc[i].type;
318 vp->aos_desc[i].format = context->stream_desc[i].format;
321 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
323 vp->r700AsmCode.bR6xx = 1;
327 Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
329 vp->r700AsmCode.bUseMemConstant = GL_TRUE;
330 vp->r700AsmCode.unAsic = 8;
332 evergreen_Map_Vertex_Program(ctx, vp, vp->mesa_program );
334 if(GL_FALSE == evergreen_Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
339 InitShaderProgram(&(vp->r700AsmCode));
341 for(i=0; i < MAX_SAMPLERS; i++)
343 vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i];
346 vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions;
348 if(GL_FALSE == AssembleInstr(0,
350 vp->mesa_program->Base.NumInstructions,
351 &(vp->mesa_program->Base.Instructions[0]),
352 &(vp->r700AsmCode)) )
357 if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
362 if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) )
367 vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
368 : (vp->r700AsmCode.number_used_registers - 1);
370 vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
372 vp->translated = GL_TRUE;
377 void evergreenSelectVertexShader(struct gl_context *ctx)
379 context_t *context = EVERGREEN_CONTEXT(ctx);
380 struct evergreen_vertex_program_cont *vpc;
381 struct evergreen_vertex_program *vp;
384 GLbitfield InputsRead;
386 vpc = (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current;
388 InputsRead = vpc->mesa_program.Base.InputsRead;
389 if (vpc->mesa_program.IsPositionInvariant)
391 InputsRead |= VERT_BIT_POS;
394 for (vp = vpc->progs; vp; vp = vp->next)
397 for(i=0; i<context->nNumActiveAos; i++)
399 if (vp->aos_desc[i].size != context->stream_desc[i].size ||
400 vp->aos_desc[i].format != context->stream_desc[i].format)
408 context->selected_vp = (struct r700_vertex_program *) vp;
413 vp = evergreenTranslateVertexShader(ctx, &(vpc->mesa_program));
416 radeon_error("Failed to translate vertex shader. \n");
419 vp->next = vpc->progs;
421 context->selected_vp = (struct r700_vertex_program *) vp;
425 int evergreen_getTypeSize(GLenum type)
430 return sizeof(GLdouble);
432 return sizeof(GLfloat);
434 return sizeof(GLint);
435 case GL_UNSIGNED_INT:
436 return sizeof(GLuint);
438 return sizeof(GLshort);
439 case GL_UNSIGNED_SHORT:
440 return sizeof(GLushort);
442 return sizeof(GLbyte);
443 case GL_UNSIGNED_BYTE:
444 return sizeof(GLubyte);
451 static void evergreenTranslateAttrib(struct gl_context *ctx, GLuint unLoc, int count, const struct gl_client_array *input)
453 context_t *context = EVERGREEN_CONTEXT(ctx);
455 StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]);
459 stride = (input->StrideB == 0) ? evergreen_getTypeSize(input->Type) * input->Size
462 if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT
464 || evergreen_getTypeSize(input->Type) != 4
468 pStreamDesc->type = GL_FLOAT;
470 if (input->StrideB == 0)
472 pStreamDesc->stride = 0;
476 pStreamDesc->stride = sizeof(GLfloat) * input->Size;
478 pStreamDesc->dwords = input->Size;
479 pStreamDesc->is_named_bo = GL_FALSE;
483 pStreamDesc->type = input->Type;
484 pStreamDesc->dwords = (evergreen_getTypeSize(input->Type) * input->Size + 3)/ 4;
485 if (!input->BufferObj->Name)
487 if (input->StrideB == 0)
489 pStreamDesc->stride = 0;
493 pStreamDesc->stride = (evergreen_getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3;
496 pStreamDesc->is_named_bo = GL_FALSE;
500 pStreamDesc->size = input->Size;
501 pStreamDesc->dst_loc = context->nNumActiveAos;
502 pStreamDesc->element = unLoc;
503 pStreamDesc->format = input->Format;
505 switch (pStreamDesc->type)
508 pStreamDesc->_signed = 0;
509 pStreamDesc->normalize = GL_FALSE;
512 pStreamDesc->_signed = 1;
513 pStreamDesc->normalize = input->Normalized;
516 pStreamDesc->_signed = 1;
517 pStreamDesc->normalize = input->Normalized;
519 case GL_UNSIGNED_SHORT:
520 pStreamDesc->_signed = 0;
521 pStreamDesc->normalize = input->Normalized;
523 case GL_UNSIGNED_BYTE:
524 pStreamDesc->_signed = 0;
525 pStreamDesc->normalize = input->Normalized;
529 case GL_UNSIGNED_INT:
534 context->nNumActiveAos++;
537 void evergreenSetVertexFormat(struct gl_context *ctx, const struct gl_client_array *arrays[], int count)
539 context_t *context = EVERGREEN_CONTEXT(ctx);
540 struct evergreen_vertex_program *vpc
541 = (struct evergreen_vertex_program *)ctx->VertexProgram._Current;
543 struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program);
544 unsigned int unLoc = 0;
545 unsigned int unBit = mesa_vp->Base.InputsRead;
546 context->nNumActiveAos = 0;
548 if (mesa_vp->IsPositionInvariant)
550 unBit |= VERT_BIT_POS;
557 evergreenTranslateAttrib(ctx, unLoc, count, arrays[unLoc]);
563 context->radeon.tcl.aos_count = context->nNumActiveAos;
566 void * evergreenGetActiveVpShaderBo(struct gl_context * ctx)
568 context_t *context = EVERGREEN_CONTEXT(ctx);
569 struct evergreen_vertex_program *vp = (struct evergreen_vertex_program *) context->selected_vp;
577 void * evergreenGetActiveVpShaderConstBo(struct gl_context * ctx)
579 context_t *context = EVERGREEN_CONTEXT(ctx);
580 struct evergreen_vertex_program *vp = (struct evergreen_vertex_program *) context->selected_vp;
588 GLboolean evergreenSetupVertexProgram(struct gl_context * ctx)
590 context_t *context = EVERGREEN_CONTEXT(ctx);
591 EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
592 struct evergreen_vertex_program *vp = (struct evergreen_vertex_program *) context->selected_vp;
594 if(GL_FALSE == vp->loaded)
596 if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
598 Assemble( &(vp->r700Shader) );
604 (GLvoid *)(vp->r700Shader.pProgram),
605 vp->r700Shader.uShaderBinaryDWORDSize,
608 vp->loaded = GL_TRUE;
611 EVERGREEN_STATECHANGE(context, vs);
613 /* TODO : enable this after MemUse fixed *=
614 (context->chipobj.MemUse)(context, vp->shadercode.buf->id);
617 evergreen->SQ_PGM_RESOURCES_VS.u32All = 0;
618 SETbit(evergreen->SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
620 evergreen->vs.SQ_ALU_CONST_CACHE_VS_0.u32All = 0; /* set from buffer object. */
622 evergreen->vs.SQ_PGM_START_VS.u32All = 0;
624 SETfield(evergreen->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
625 NUM_GPRS_shift, NUM_GPRS_mask);
627 if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
629 SETfield(evergreen->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
630 STACK_SIZE_shift, STACK_SIZE_mask);
633 EVERGREEN_STATECHANGE(context, spi);
635 SETfield(evergreen->SPI_VS_OUT_CONFIG.u32All,
636 vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0,
637 VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
638 SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
639 NUM_INTERP_shift, NUM_INTERP_mask);
642 SETbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
643 CLEARbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
649 GLboolean evergreenSetupVPconstants(struct gl_context * ctx)
651 context_t *context = EVERGREEN_CONTEXT(ctx);
652 EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
653 struct evergreen_vertex_program *vp = (struct evergreen_vertex_program *) context->selected_vp;
655 struct gl_program_parameter_list *paramList;
656 unsigned int unNumParamData;
660 /* sent out shader constants. */
661 paramList = vp->mesa_program->Base.Parameters;
663 if(NULL != paramList) {
664 /* vp->mesa_program was cloned, not updated by glsl shader api. */
665 /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */
666 /* so, use ctx->VertexProgem._Current */
667 struct gl_program_parameter_list *paramListOrginal =
668 ctx->VertexProgram._Current->Base.Parameters;
670 _mesa_load_state_parameters(ctx, paramList);
672 if (paramList->NumParameters > EVERGREEN_MAX_DX9_CONSTS)
675 EVERGREEN_STATECHANGE(context, vs);
677 evergreen->vs.num_consts = paramList->NumParameters;
679 unNumParamData = paramList->NumParameters;
681 /* alloc multiple of 16 constants */
682 alloc_size = ((unNumParamData * 4 * 4) + 255) & ~255;
684 for(ui=0; ui<unNumParamData; ui++) {
685 if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM)
687 evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f;
688 evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f;
689 evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f;
690 evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f;
694 evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
695 evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
696 evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
697 evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
701 radeonAllocDmaRegion(&context->radeon,
702 &context->vp_Constbo,
703 &context->vp_bo_offset,
706 r600EmitShaderConsts(ctx,
708 context->vp_bo_offset,
709 (GLvoid *)&(evergreen->vs.consts[0][0]),
710 unNumParamData * 4 * 4);
712 evergreen->vs.num_consts = 0;
714 COMPILED_SUB * pCompiledSub;
716 GLuint unConstOffset = evergreen->vs.num_consts;
717 for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++)
719 pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub;
721 evergreen->vs.num_consts += pCompiledSub->NumParameters;
723 for(uj=0; uj<pCompiledSub->NumParameters; uj++)
725 evergreen->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
726 evergreen->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
727 evergreen->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
728 evergreen->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
730 unConstOffset += pCompiledSub->NumParameters;