Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005  Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008  Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "program/program.h"
35 #include "program/programopt.h"
36 #include "program/prog_instruction.h"
37 #include "program/prog_parameter.h"
38 #include "program/prog_print.h"
39 #include "program/prog_statevars.h"
40 #include "tnl/tnl.h"
41
42 #include "compiler/radeon_compiler.h"
43 #include "radeon_mesa_to_rc.h"
44 #include "r300_context.h"
45 #include "r300_fragprog_common.h"
46 #include "r300_state.h"
47
48 /**
49  * Write parameter array for the given vertex program into dst.
50  * Return the total number of components written.
51  */
52 static int r300VertexProgUpdateParams(struct gl_context * ctx, struct r300_vertex_program *vp, float *dst)
53 {
54         int i;
55
56         if (vp->Base->IsNVProgram) {
57                 _mesa_load_tracked_matrices(ctx);
58         } else {
59                 if (vp->Base->Base.Parameters) {
60                         _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
61                 }
62         }
63
64         for(i = 0; i < vp->code.constants.Count; ++i) {
65                 const float * src = 0;
66                 const struct rc_constant * constant = &vp->code.constants.Constants[i];
67
68                 switch(constant->Type) {
69                 case RC_CONSTANT_EXTERNAL:
70                         if (vp->Base->IsNVProgram) {
71                                 src = ctx->VertexProgram.Parameters[constant->u.External];
72                         } else {
73                                 src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
74                         }
75                         break;
76
77                 case RC_CONSTANT_IMMEDIATE:
78                         src = constant->u.Immediate;
79                         break;
80                 }
81
82                 assert(src);
83                 dst[4*i] = src[0];
84                 dst[4*i + 1] = src[1];
85                 dst[4*i + 2] = src[2];
86                 dst[4*i + 3] = src[3];
87         }
88
89         return 4 * vp->code.constants.Count;
90 }
91
92 static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
93 {
94         GLbitfield outputs = 0;
95         int i;
96
97 #define ADD_OUTPUT(fp_attr, vp_result) \
98         do { \
99                 if (fpreads & (1 << (fp_attr))) \
100                         outputs |= (1 << (vp_result)); \
101         } while (0)
102
103         ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
104         ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
105
106         for (i = 0; i <= 7; ++i) {
107                 ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
108         }
109
110 #undef ADD_OUTPUT
111
112         if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
113             (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
114                 outputs |= 1 << VERT_RESULT_BFC0;
115         if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
116             (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
117                 outputs |= 1 << VERT_RESULT_BFC1;
118
119         outputs |= 1 << VERT_RESULT_HPOS;
120         if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
121                 outputs |= 1 << VERT_RESULT_PSIZ;
122
123         return outputs;
124 }
125
126
127 static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
128 {
129         int i;
130         int cur_reg;
131         GLuint OutputsWritten, InputsRead;
132
133         OutputsWritten = c->Base.Program.OutputsWritten;
134         InputsRead = c->Base.Program.InputsRead;
135
136         cur_reg = -1;
137         for (i = 0; i < VERT_ATTRIB_MAX; i++) {
138                 if (InputsRead & (1 << i))
139                         c->code->inputs[i] = ++cur_reg;
140                 else
141                         c->code->inputs[i] = -1;
142         }
143
144         cur_reg = 0;
145         for (i = 0; i < VERT_RESULT_MAX; i++)
146                 c->code->outputs[i] = -1;
147
148         assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
149
150         if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
151                 c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
152         }
153
154         if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
155                 c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
156         }
157
158         /* If we're writing back facing colors we need to send
159          * four colors to make front/back face colors selection work.
160          * If the vertex program doesn't write all 4 colors, lets
161          * pretend it does by skipping output index reg so the colors
162          * get written into appropriate output vectors.
163          */
164         if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
165                 c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
166         } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
167                 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
168                 cur_reg++;
169         }
170
171         if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
172                 c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
173         } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
174                 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
175                 cur_reg++;
176         }
177
178         if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
179                 c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
180         } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
181                 cur_reg++;
182         }
183
184         if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
185                 c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
186         } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
187                 cur_reg++;
188         }
189
190         for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
191                 if (OutputsWritten & (1 << i)) {
192                         c->code->outputs[i] = cur_reg++;
193                 }
194         }
195
196         if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
197                 c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
198         }
199 }
200
201 /**
202  * The NV_vertex_program spec mandates that all registers be
203  * initialized to zero. We do this here unconditionally.
204  *
205  * \note We rely on dead-code elimination in the compiler.
206  */
207 static void initialize_NV_registers(struct radeon_compiler * compiler)
208 {
209         unsigned int reg;
210         struct rc_instruction * inst;
211
212         for(reg = 0; reg < 12; ++reg) {
213                 inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
214                 inst->U.I.Opcode = RC_OPCODE_MOV;
215                 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
216                 inst->U.I.DstReg.Index = reg;
217                 inst->U.I.SrcReg[0].File = RC_FILE_NONE;
218                 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
219         }
220
221         inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
222         inst->U.I.Opcode = RC_OPCODE_ARL;
223         inst->U.I.DstReg.File = RC_FILE_ADDRESS;
224         inst->U.I.DstReg.Index = 0;
225         inst->U.I.DstReg.WriteMask = WRITEMASK_X;
226         inst->U.I.SrcReg[0].File = RC_FILE_NONE;
227         inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
228 }
229
230 static struct r300_vertex_program *build_program(struct gl_context *ctx,
231                                                  struct r300_vertex_program_key *wanted_key,
232                                                  const struct gl_vertex_program *mesa_vp)
233 {
234         struct r300_vertex_program *vp;
235         struct r300_vertex_program_compiler compiler;
236
237         vp = calloc(1, sizeof(*vp));
238         vp->Base = _mesa_clone_vertex_program(ctx, mesa_vp);
239         memcpy(&vp->key, wanted_key, sizeof(vp->key));
240
241         memset(&compiler, 0, sizeof(compiler));
242         rc_init(&compiler.Base);
243         compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;
244
245         compiler.code = &vp->code;
246         compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
247         compiler.SetHwInputOutput = &t_inputs_outputs;
248         compiler.Base.is_r500 = R300_CONTEXT(ctx)->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
249         compiler.Base.disable_optimizations = 0;
250         compiler.Base.has_half_swizzles = 0;
251         compiler.Base.max_temp_regs = 32;
252         compiler.Base.max_constants = 256;
253         compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 1024 : 256;
254
255         if (compiler.Base.Debug) {
256                 fprintf(stderr, "Initial vertex program:\n");
257                 _mesa_print_program(&vp->Base->Base);
258                 fflush(stderr);
259         }
260
261         if (mesa_vp->IsPositionInvariant) {
262                 _mesa_insert_mvp_code(ctx, vp->Base);
263         }
264
265         radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
266
267         if (mesa_vp->IsNVProgram)
268                 initialize_NV_registers(&compiler.Base);
269
270         rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
271
272         if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
273                 unsigned int vp_wpos_attr = vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
274
275                 /* Set empty writemask for instructions writing to vp_wpos_attr
276                  * before moving the wpos attr there.
277                  * Such instructions will be removed by DCE.
278                  */
279                 rc_move_output(&compiler.Base, vp_wpos_attr, vp->key.WPosAttr, 0);
280                 rc_copy_output(&compiler.Base, VERT_RESULT_HPOS, vp_wpos_attr);
281         }
282
283         if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
284                 unsigned int vp_fog_attr = vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
285
286                 /* Set empty writemask for instructions writing to vp_fog_attr
287                  * before moving the fog attr there.
288                  * Such instructions will be removed by DCE.
289                  */
290                 rc_move_output(&compiler.Base, vp_fog_attr, vp->key.FogAttr, 0);
291                 rc_move_output(&compiler.Base, VERT_RESULT_FOGC, vp_fog_attr, WRITEMASK_X);
292         }
293
294         r3xx_compile_vertex_program(&compiler);
295
296         if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) {
297                 rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n");
298         }
299
300         vp->error = compiler.Base.Error;
301
302         vp->Base->Base.InputsRead = vp->code.InputsRead;
303         vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
304
305         rc_destroy(&compiler.Base);
306
307         return vp;
308 }
309
310 struct r300_vertex_program * r300SelectAndTranslateVertexShader(struct gl_context *ctx)
311 {
312         r300ContextPtr r300 = R300_CONTEXT(ctx);
313         struct r300_vertex_program_key wanted_key = { 0 };
314         struct r300_vertex_program_cont *vpc;
315         struct r300_vertex_program *vp;
316
317         vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
318
319         if (!r300->selected_fp) {
320                 /* This can happen when GetProgramiv is called to check
321                  * whether the program runs natively.
322                  *
323                  * To be honest, this is not a very good solution,
324                  * but solving the problem of reporting good values
325                  * for those queries is tough anyway considering that
326                  * we recompile vertex programs based on the precise
327                  * fragment program that is in use.
328                  */
329                 r300SelectAndTranslateFragmentShader(ctx);
330         }
331
332         assert(r300->selected_fp);
333         wanted_key.FpReads = r300->selected_fp->InputsRead;
334         wanted_key.FogAttr = r300->selected_fp->fog_attr;
335         wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
336
337         for (vp = vpc->progs; vp; vp = vp->next) {
338                 if (memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) {
339                         return r300->selected_vp = vp;
340                 }
341         }
342
343         vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
344         vp->next = vpc->progs;
345         vpc->progs = vp;
346
347         return r300->selected_vp = vp;
348 }
349
350 #define bump_vpu_count(ptr, new_count)   do { \
351                 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
352                 int _nc=(new_count)/4; \
353                 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
354         } while(0)
355
356 static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code)
357 {
358         int i;
359
360         assert((code->length > 0) && (code->length % 4 == 0));
361
362         switch ((dest >> 8) & 0xf) {
363                 case 0:
364                         R300_STATECHANGE(r300, vpi);
365                         for (i = 0; i < code->length; i++)
366                                 r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
367                         bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
368                         break;
369                 case 2:
370                         R300_STATECHANGE(r300, vpp);
371                         for (i = 0; i < code->length; i++)
372                                 r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
373                         bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
374                         break;
375                 case 4:
376                         R300_STATECHANGE(r300, vps);
377                         for (i = 0; i < code->length; i++)
378                                 r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
379                         bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
380                         break;
381                 default:
382                         fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
383                         exit(-1);
384         }
385 }
386
387 void r300SetupVertexProgram(r300ContextPtr rmesa)
388 {
389         struct gl_context *ctx = rmesa->radeon.glCtx;
390         struct r300_vertex_program *prog = rmesa->selected_vp;
391         int inst_count = 0;
392         int param_count = 0;
393
394         /* Reset state, in case we don't use something */
395         ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
396         ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
397         ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
398
399         R300_STATECHANGE(rmesa, vap_cntl);
400         R300_STATECHANGE(rmesa, vpp);
401         param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
402         if (!rmesa->radeon.radeonScreen->kernel_mm && param_count > 255 * 4) {
403                 WARN_ONCE("Too many VP params, expect rendering errors\n");
404         }
405         /* Prevent the overflow (vpu.count is u8) */
406         bump_vpu_count(rmesa->hw.vpp.cmd, MIN2(255 * 4, param_count));
407         param_count /= 4;
408
409         r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
410         inst_count = (prog->code.length / 4) - 1;
411
412         r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead),
413                                  _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries);
414
415         R300_STATECHANGE(rmesa, pvs);
416         rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
417                                 (inst_count << R300_PVS_LAST_INST_SHIFT);
418
419         rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ((param_count - 1) << R300_PVS_MAX_CONST_ADDR_SHIFT);
420         rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
421 }