Merge commit 'origin/master' into i965g-restart
[profile/ivi/mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29  * \file
30  *
31  * \brief R300 Render (Vertex Buffer Implementation)
32  *
33  * The immediate implementation has been removed from CVS in favor of the vertex
34  * buffer implementation.
35  *
36  * The render functions are called by the pipeline manager to render a batch of
37  * primitives. They return TRUE to pass on to the next stage (i.e. software
38  * rasterization) or FALSE to indicate that the pipeline has finished after
39  * rendering something.
40  *
41  * When falling back to software TCL still attempt to use hardware
42  * rasterization.
43  *
44  * I am not sure that the cache related registers are setup correctly, but
45  * obviously this does work... Further investigation is needed.
46  *
47  * \author Nicolai Haehnle <prefect_@gmx.net>
48  *
49  * \todo Add immediate implementation back? Perhaps this is useful if there are
50  * no bugs...
51  */
52
53 #include "r300_render.h"
54
55 #include "main/glheader.h"
56 #include "main/state.h"
57 #include "main/imports.h"
58 #include "main/enums.h"
59 #include "main/macros.h"
60 #include "main/context.h"
61 #include "main/dd.h"
62 #include "main/simple_list.h"
63 #include "main/api_arrayelt.h"
64 #include "swrast/swrast.h"
65 #include "swrast_setup/swrast_setup.h"
66 #include "vbo/vbo.h"
67 #include "vbo/vbo_split.h"
68 #include "tnl/tnl.h"
69 #include "tnl/t_vp_build.h"
70 #include "r300_context.h"
71 #include "r300_ioctl.h"
72 #include "r300_state.h"
73 #include "r300_reg.h"
74 #include "r300_tex.h"
75 #include "r300_emit.h"
76 #include "r300_fragprog_common.h"
77 #include "r300_swtcl.h"
78
79 /**
80  * \brief Convert a OpenGL primitive type into a R300 primitive type.
81  */
82 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
83 {
84         switch (prim & PRIM_MODE_MASK) {
85         case GL_POINTS:
86                 return R300_VAP_VF_CNTL__PRIM_POINTS;
87                 break;
88         case GL_LINES:
89                 return R300_VAP_VF_CNTL__PRIM_LINES;
90                 break;
91         case GL_LINE_STRIP:
92                 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
93                 break;
94         case GL_LINE_LOOP:
95                 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
96                 break;
97         case GL_TRIANGLES:
98                 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
99                 break;
100         case GL_TRIANGLE_STRIP:
101                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
102                 break;
103         case GL_TRIANGLE_FAN:
104                 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
105                 break;
106         case GL_QUADS:
107                 return R300_VAP_VF_CNTL__PRIM_QUADS;
108                 break;
109         case GL_QUAD_STRIP:
110                 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
111                 break;
112         case GL_POLYGON:
113                 return R300_VAP_VF_CNTL__PRIM_POLYGON;
114                 break;
115         default:
116                 assert(0);
117                 return -1;
118                 break;
119         }
120 }
121
122 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
123 {
124         int verts_off = 0;
125
126         switch (prim & PRIM_MODE_MASK) {
127         case GL_POINTS:
128                 verts_off = 0;
129                 break;
130         case GL_LINES:
131                 verts_off = num_verts % 2;
132                 break;
133         case GL_LINE_STRIP:
134                 if (num_verts < 2)
135                         verts_off = num_verts;
136                 break;
137         case GL_LINE_LOOP:
138                 if (num_verts < 2)
139                         verts_off = num_verts;
140                 break;
141         case GL_TRIANGLES:
142                 verts_off = num_verts % 3;
143                 break;
144         case GL_TRIANGLE_STRIP:
145                 if (num_verts < 3)
146                         verts_off = num_verts;
147                 break;
148         case GL_TRIANGLE_FAN:
149                 if (num_verts < 3)
150                         verts_off = num_verts;
151                 break;
152         case GL_QUADS:
153                 verts_off = num_verts % 4;
154                 break;
155         case GL_QUAD_STRIP:
156                 if (num_verts < 4)
157                         verts_off = num_verts;
158                 else
159                         verts_off = num_verts % 2;
160                 break;
161         case GL_POLYGON:
162                 if (num_verts < 3)
163                         verts_off = num_verts;
164                 break;
165         default:
166                 assert(0);
167                 return -1;
168                 break;
169         }
170
171         return num_verts - verts_off;
172 }
173
174 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset)
175 {
176         BATCH_LOCALS(&rmesa->radeon);
177         int size;
178
179         /* offset is in indices */
180         BEGIN_BATCH(10);
181         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
182         if (rmesa->ind_buf.is_32bit) {
183                 /* convert to bytes */
184                 offset *= 4;
185                 size = vertex_count;
186                 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
187                   (vertex_count << 16) | type |
188                   R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
189         } else {
190                 /* convert to bytes */
191                 offset *= 2;
192                 size = (vertex_count + 1) >> 1;
193                 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
194                    (vertex_count << 16) | type);
195         }
196
197         if (!rmesa->radeon.radeonScreen->kernel_mm) {
198                 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
199                 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
200                                  (R300_VAP_PORT_IDX0 >> 2));
201                 OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
202                 OUT_BATCH(size);
203         } else {
204                 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
205                 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
206                                  (R300_VAP_PORT_IDX0 >> 2));
207                 OUT_BATCH(rmesa->ind_buf.bo_offset + offset);
208                 OUT_BATCH(size);
209                 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
210                                       rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0);
211         }
212         END_BATCH();
213 }
214
215 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
216 {
217         BATCH_LOCALS(&rmesa->radeon);
218         uint32_t voffset;
219         int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
220         int i;
221
222         if (RADEON_DEBUG & RADEON_VERTS)
223                 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
224                         offset);
225
226         if (!rmesa->radeon.radeonScreen->kernel_mm) {
227                 BEGIN_BATCH(sz+2+(nr * 2));
228                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
229                 OUT_BATCH(nr);
230
231                 for (i = 0; i + 1 < nr; i += 2) {
232                         OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
233                                   (rmesa->radeon.tcl.aos[i].stride << 8) |
234                                   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
235                                   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
236
237                         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
238                                 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
239                         OUT_BATCH_RELOC(voffset,
240                                         rmesa->radeon.tcl.aos[i].bo,
241                                         voffset,
242                                         RADEON_GEM_DOMAIN_GTT,
243                                         0, 0);
244                         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
245                           offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
246                         OUT_BATCH_RELOC(voffset,
247                                         rmesa->radeon.tcl.aos[i+1].bo,
248                                         voffset,
249                                         RADEON_GEM_DOMAIN_GTT,
250                                         0, 0);
251                 }
252
253                 if (nr & 1) {
254                         OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
255                                   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
256                         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
257                                 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
258                         OUT_BATCH_RELOC(voffset,
259                                         rmesa->radeon.tcl.aos[nr - 1].bo,
260                                         voffset,
261                                         RADEON_GEM_DOMAIN_GTT,
262                                         0, 0);
263                 }
264                 END_BATCH();
265         } else {
266
267                 BEGIN_BATCH(sz+2+(nr * 2));
268                 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
269                 OUT_BATCH(nr);
270
271                 for (i = 0; i + 1 < nr; i += 2) {
272                         OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
273                                   (rmesa->radeon.tcl.aos[i].stride << 8) |
274                                   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
275                                   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
276
277                         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
278                                 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
279                         OUT_BATCH(voffset);
280                         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
281                                 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
282                         OUT_BATCH(voffset);
283                 }
284
285                 if (nr & 1) {
286                         OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
287                           (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
288                         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
289                                 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
290                         OUT_BATCH(voffset);
291                 }
292                 for (i = 0; i + 1 < nr; i += 2) {
293                         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
294                                 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
295                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
296                                               rmesa->radeon.tcl.aos[i+0].bo,
297                                               RADEON_GEM_DOMAIN_GTT,
298                                               0, 0);
299                         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
300                                 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
301                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
302                                               rmesa->radeon.tcl.aos[i+1].bo,
303                                               RADEON_GEM_DOMAIN_GTT,
304                                               0, 0);
305                 }
306                 if (nr & 1) {
307                         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
308                                 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
309                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
310                                               rmesa->radeon.tcl.aos[nr-1].bo,
311                                               RADEON_GEM_DOMAIN_GTT,
312                                               0, 0);
313                 }
314                 END_BATCH();
315         }
316
317 }
318
319 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
320 {
321         BATCH_LOCALS(&rmesa->radeon);
322
323         r300_emit_scissor(rmesa->radeon.glCtx);
324         BEGIN_BATCH(3);
325         OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
326         OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
327         END_BATCH();
328 }
329
330 void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
331 {
332         r300ContextPtr rmesa = R300_CONTEXT(ctx);
333         BATCH_LOCALS(&rmesa->radeon);
334         int type, num_verts;
335
336         type = r300PrimitiveType(rmesa, prim);
337         num_verts = r300NumVerts(rmesa, end - start, prim);
338
339         if (type < 0 || num_verts <= 0)
340                 return;
341
342         if (rmesa->ind_buf.bo) {
343                 GLuint first, incr, offset = 0;
344
345                 if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
346                         num_verts > 65500) {
347                         WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
348                         return;
349                 }
350
351
352                 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0);
353                 if (rmesa->radeon.radeonScreen->kernel_mm) {
354                         BEGIN_BATCH_NO_AUTOSTATE(2);
355                         OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1);
356                         OUT_BATCH(rmesa->radeon.tcl.aos[0].count);
357                         END_BATCH();
358                 }
359
360                 r300_emit_scissor(rmesa->radeon.glCtx);
361                 while (num_verts > 0) {
362                         int nr;
363                         int align;
364
365                         nr = MIN2(num_verts, 65535);
366                         nr -= (nr - first) % incr;
367
368                         /* get alignment for IB correct */
369                         if (nr != num_verts) {
370                                 do {
371                                     align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2);
372                                     if (align % 4)
373                                         nr -= incr;
374                                 } while(align % 4);
375                                 if (nr <= 0) {
376                                         WARN_ONCE("did the impossible happen? we never aligned nr to dword\n");
377                                         return;
378                                 }
379                                         
380                         }
381                         r300FireEB(rmesa, nr, type, offset);
382
383                         num_verts -= nr;
384                         offset += nr;
385                 }
386
387         } else {
388                 GLuint first, incr, offset = 0;
389
390                 if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
391                         num_verts > 65535) {
392                         WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
393                         return;
394                 }
395                 r300_emit_scissor(rmesa->radeon.glCtx);
396                 while (num_verts > 0) {
397                         int nr;
398                         nr = MIN2(num_verts, 65535);
399                         nr -= (nr - first) % incr;
400                         r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start + offset);
401                         r300FireAOS(rmesa, nr, type);
402                         num_verts -= nr;
403                         offset += nr;
404                 }
405         }
406         COMMIT_BATCH();
407 }
408
409 static const char *getFallbackString(uint32_t bit)
410 {
411         switch (bit) {
412                 case R300_FALLBACK_VERTEX_PROGRAM :
413                         return "vertex program";
414                 case R300_FALLBACK_LINE_SMOOTH:
415                         return "smooth lines";
416                 case R300_FALLBACK_POINT_SMOOTH:
417                         return "smooth points";
418                 case R300_FALLBACK_POLYGON_SMOOTH:
419                         return "smooth polygons";
420                 case R300_FALLBACK_LINE_STIPPLE:
421                         return "line stipple";
422                 case R300_FALLBACK_POLYGON_STIPPLE:
423                         return "polygon stipple";
424                 case R300_FALLBACK_STENCIL_TWOSIDE:
425                         return "two-sided stencil";
426                 case R300_FALLBACK_RENDER_MODE:
427                         return "render mode != GL_RENDER";
428                 case R300_FALLBACK_FRAGMENT_PROGRAM:
429                         return "fragment program";
430                 case R300_FALLBACK_AOS_LIMIT:
431                         return "aos limit";
432                 case R300_FALLBACK_INVALID_BUFFERS:
433                         return "invalid buffers";
434                 default:
435                         return "unknown";
436         }
437 }
438
439 void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
440 {
441         TNLcontext *tnl = TNL_CONTEXT(ctx);
442         r300ContextPtr rmesa = R300_CONTEXT(ctx);
443         uint32_t old_fallback = rmesa->fallback;
444         static uint32_t fallback_warn = 0;
445
446         if (mode) {
447                 if ((fallback_warn & bit) == 0) {
448                         if (RADEON_DEBUG & RADEON_FALLBACKS)
449                                 _mesa_fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit));
450                         fallback_warn |= bit;
451                 }
452                 rmesa->fallback |= bit;
453
454                 /* update only if we change from no tcl fallbacks to some tcl fallbacks */
455                 if (rmesa->options.hw_tcl_enabled) {
456                         if (((old_fallback & R300_TCL_FALLBACK_MASK) == 0) &&
457                                 ((bit & R300_TCL_FALLBACK_MASK) > 0)) {
458                                 R300_STATECHANGE(rmesa, vap_cntl_status);
459                                 rmesa->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS;
460                         }
461                 }
462
463                 /* update only if we change from no raster fallbacks to some raster fallbacks */
464                 if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) &&
465                         ((bit & R300_RASTER_FALLBACK_MASK) > 0)) {
466
467                         radeon_firevertices(&rmesa->radeon);
468                         rmesa->radeon.swtcl.RenderIndex = ~0;
469                         _swsetup_Wakeup( ctx );
470                 }
471         } else {
472                 rmesa->fallback &= ~bit;
473
474                 /* update only if we have disabled all tcl fallbacks */
475                 if (rmesa->options.hw_tcl_enabled) {
476                         if ((old_fallback & R300_TCL_FALLBACK_MASK) == bit) {
477                                 R300_STATECHANGE(rmesa, vap_cntl_status);
478                                 rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS;
479                         }
480                 }
481
482                 /* update only if we have disabled all raster fallbacks */
483                 if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) {
484                         _swrast_flush( ctx );
485
486                         tnl->Driver.Render.Start = r300RenderStart;
487                         tnl->Driver.Render.Finish = r300RenderFinish;
488                         tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
489                         tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple;
490                         tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
491                         tnl->Driver.Render.CopyPV = _tnl_copy_pv;
492                         tnl->Driver.Render.Interp = _tnl_interp;
493
494                         _tnl_invalidate_vertex_state( ctx, ~0 );
495                         _tnl_invalidate_vertices( ctx, ~0 );
496                 }
497         }
498
499 }