Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / gallium / drivers / nvfx / nvfx_push.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_state.h"
3 #include "util/u_inlines.h"
4 #include "util/u_format.h"
5 #include "util/u_split_prim.h"
6 #include "translate/translate.h"
7
8 #include "nvfx_context.h"
9 #include "nvfx_resource.h"
10
11 struct push_context {
12         struct nouveau_channel* chan;
13         struct nouveau_grobj *eng3d;
14
15         void *idxbuf;
16         int32_t idxbias;
17
18         float edgeflag;
19         int edgeflag_attr;
20
21         unsigned vertex_length;
22         unsigned max_vertices_per_packet;
23
24         struct translate* translate;
25 };
26
27 static void
28 emit_edgeflag(void *priv, boolean enabled)
29 {
30         struct push_context* ctx = priv;
31         struct nouveau_grobj *eng3d = ctx->eng3d;
32         struct nouveau_channel *chan = ctx->chan;
33
34         BEGIN_RING(chan, eng3d, NV30_3D_EDGEFLAG, 1);
35         OUT_RING(chan, enabled ? 1 : 0);
36 }
37
38 static void
39 emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
40 {
41         struct push_context *ctx = priv;
42         struct nouveau_grobj *eng3d = ctx->eng3d;
43         uint8_t* elts = (uint8_t*)ctx->idxbuf + start;
44
45         while(count)
46         {
47                 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
48                 unsigned length = push * ctx->vertex_length;
49
50                 BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
51                 ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
52                 ctx->chan->cur += length;
53
54                 count -= push;
55                 elts += push;
56         }
57 }
58
59 static void
60 emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
61 {
62         struct push_context *ctx = priv;
63         struct nouveau_grobj *eng3d = ctx->eng3d;
64         uint16_t* elts = (uint16_t*)ctx->idxbuf + start;
65
66         while(count)
67         {
68                 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
69                 unsigned length = push * ctx->vertex_length;
70
71                 BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
72                 ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
73                 ctx->chan->cur += length;
74
75                 count -= push;
76                 elts += push;
77         }
78 }
79
80 static void
81 emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
82 {
83         struct push_context *ctx = priv;
84         struct nouveau_grobj *eng3d = ctx->eng3d;
85         uint32_t* elts = (uint32_t*)ctx->idxbuf + start;
86
87         while(count)
88         {
89                 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
90                 unsigned length = push * ctx->vertex_length;
91
92                 BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
93                 ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
94                 ctx->chan->cur += length;
95
96                 count -= push;
97                 elts += push;
98         }
99 }
100
101 static void
102 emit_vertices(void *priv, unsigned start, unsigned count)
103 {
104         struct push_context *ctx = priv;
105         struct nouveau_grobj *eng3d = ctx->eng3d;
106
107         while(count)
108         {
109                 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
110                 unsigned length = push * ctx->vertex_length;
111
112                 BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
113                 ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
114                 ctx->chan->cur += length;
115
116                 count -= push;
117                 start += push;
118         }
119 }
120
121 static void
122 emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
123 {
124         struct push_context* ctx = priv;
125         struct nouveau_grobj *eng3d = ctx->eng3d;
126         struct nouveau_channel *chan = ctx->chan;
127         unsigned nr = (vc & 0xff);
128         if (nr) {
129                 BEGIN_RING(chan, eng3d, reg, 1);
130                 OUT_RING  (chan, ((nr - 1) << 24) | start);
131                 start += nr;
132         }
133
134         nr = vc >> 8;
135         while (nr) {
136                 unsigned push = nr > 2047 ? 2047 : nr;
137
138                 nr -= push;
139
140                 BEGIN_RING_NI(chan, eng3d, reg, push);
141                 while (push--) {
142                         OUT_RING(chan, ((0x100 - 1) << 24) | start);
143                         start += 0x100;
144                 }
145         }
146 }
147
148 static void
149 emit_ib_ranges(void* priv, unsigned start, unsigned vc)
150 {
151         emit_ranges(priv, start, vc, NV30_3D_VB_INDEX_BATCH);
152 }
153
154 static void
155 emit_vb_ranges(void* priv, unsigned start, unsigned vc)
156 {
157         emit_ranges(priv, start, vc, NV30_3D_VB_VERTEX_BATCH);
158 }
159
160 static INLINE void
161 emit_elt8(void* priv, unsigned start, unsigned vc)
162 {
163         struct push_context* ctx = priv;
164         struct nouveau_grobj *eng3d = ctx->eng3d;
165         struct nouveau_channel *chan = ctx->chan;
166         uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
167         int idxbias = ctx->idxbias;
168
169         if (vc & 1) {
170                 BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
171                 OUT_RING  (chan, elts[0]);
172                 elts++; vc--;
173         }
174
175         while (vc) {
176                 unsigned i;
177                 unsigned push = MIN2(vc, 2047 * 2);
178
179                 BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
180                 for (i = 0; i < push; i+=2)
181                         OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
182
183                 vc -= push;
184                 elts += push;
185         }
186 }
187
188 static INLINE void
189 emit_elt16(void* priv, unsigned start, unsigned vc)
190 {
191         struct push_context* ctx = priv;
192         struct nouveau_grobj *eng3d = ctx->eng3d;
193         struct nouveau_channel *chan = ctx->chan;
194         uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
195         int idxbias = ctx->idxbias;
196
197         if (vc & 1) {
198                 BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
199                 OUT_RING  (chan, elts[0]);
200                 elts++; vc--;
201         }
202
203         while (vc) {
204                 unsigned i;
205                 unsigned push = MIN2(vc, 2047 * 2);
206
207                 BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
208                 for (i = 0; i < push; i+=2)
209                         OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
210
211                 vc -= push;
212                 elts += push;
213         }
214 }
215
216 static INLINE void
217 emit_elt32(void* priv, unsigned start, unsigned vc)
218 {
219         struct push_context* ctx = priv;
220         struct nouveau_grobj *eng3d = ctx->eng3d;
221         struct nouveau_channel *chan = ctx->chan;
222         uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
223         int idxbias = ctx->idxbias;
224
225         while (vc) {
226                 unsigned push = MIN2(vc, 2047);
227
228                 BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U32, push);
229                 if(idxbias)
230                 {
231                         for(unsigned i = 0; i < push; ++i)
232                                 OUT_RING(chan, elts[i] + idxbias);
233                 }
234                 else
235                         OUT_RINGp(chan, elts, push);
236
237                 vc -= push;
238                 elts += push;
239         }
240 }
241
242 void
243 nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
244 {
245         struct nvfx_context *nvfx = nvfx_context(pipe);
246         struct nouveau_channel *chan = nvfx->screen->base.channel;
247         struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
248         struct push_context ctx;
249         struct util_split_prim s;
250         unsigned instances_left = info->instance_count;
251         int vtx_value;
252         unsigned hw_mode = nvgl_primitive(info->mode);
253         int i;
254         struct
255         {
256                 uint8_t* map;
257                 unsigned step;
258         } per_instance[16];
259         unsigned p_overhead = 64 /* magic fix */
260                         + 4 /* begin/end */
261                         + 4; /* potential edgeflag enable/disable */
262
263         ctx.chan = nvfx->screen->base.channel;
264         ctx.eng3d = nvfx->screen->eng3d;
265         ctx.translate = nvfx->vtxelt->translate;
266         ctx.idxbuf = NULL;
267         ctx.vertex_length = nvfx->vtxelt->vertex_length;
268         ctx.max_vertices_per_packet = nvfx->vtxelt->max_vertices_per_packet;
269         ctx.edgeflag = 0.5f;
270         // TODO: figure out if we really want to handle this, and do so in that case
271         ctx.edgeflag_attr = 0xff; // nvfx->vertprog->cfg.edgeflag_in;
272
273         if(!nvfx->use_vertex_buffers)
274         {
275                 for(i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
276                 {
277                         struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
278                         struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
279                         uint8_t* data = nvfx_buffer(vb->buffer)->data + vb->buffer_offset;
280                         if(info->indexed)
281                                 data += info->index_bias * vb->stride;
282                         ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
283                 }
284
285                 if(ctx.edgeflag_attr < 16)
286                         vtx_value = -(ctx.vertex_length + 3);  /* vertex data and edgeflag header and value */
287                 else
288                 {
289                         p_overhead += 1; /* initial vertex_data header */
290                         vtx_value = -ctx.vertex_length;  /* vertex data and edgeflag header and value */
291                 }
292
293                 if (info->indexed) {
294                         // XXX: this case and is broken and probably need a new VTX_ATTR push path
295                         if (nvfx->idxbuf.index_size == 1)
296                                 s.emit = emit_vertices_lookup8;
297                         else if (nvfx->idxbuf.index_size == 2)
298                                 s.emit = emit_vertices_lookup16;
299                         else
300                                 s.emit = emit_vertices_lookup32;
301                 } else
302                         s.emit = emit_vertices;
303         }
304         else
305         {
306                 if(!info->indexed || nvfx->use_index_buffer)
307                 {
308                         s.emit = info->indexed ? emit_ib_ranges : emit_vb_ranges;
309                         p_overhead += 3;
310                         vtx_value = 0;
311                 }
312                 else if (nvfx->idxbuf.index_size == 4)
313                 {
314                         s.emit = emit_elt32;
315                         p_overhead += 1;
316                         vtx_value = 8;
317                 }
318                 else
319                 {
320                         s.emit = (nvfx->idxbuf.index_size == 2) ? emit_elt16 : emit_elt8;
321                         p_overhead += 3;
322                         vtx_value = 7;
323                 }
324         }
325
326         ctx.idxbias = info->index_bias;
327         if(nvfx->use_vertex_buffers)
328                 ctx.idxbias -= nvfx->base_vertex;
329
330         /* map index buffer, if present */
331         if (info->indexed && !nvfx->use_index_buffer)
332                 ctx.idxbuf = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset;
333
334         s.priv = &ctx;
335         s.edge = emit_edgeflag;
336
337         for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
338         {
339                 struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
340                 struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
341                 float v[4];
342                 per_instance[i].step = info->start_instance % ve->instance_divisor;
343                 per_instance[i].map = nvfx_buffer(vb->buffer)->data + vb->buffer_offset + ve->base.src_offset;
344
345                 nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
346
347                 nvfx_emit_vtx_attr(chan, eng3d,
348                                    nvfx->vtxelt->per_instance[i].base.idx, v,
349                                    nvfx->vtxelt->per_instance[i].base.ncomp);
350         }
351
352         /* per-instance loop */
353         while (instances_left--) {
354                 int max_verts;
355                 boolean done;
356
357                 util_split_prim_init(&s, info->mode, info->start, info->count);
358                 nvfx_state_emit(nvfx);
359                 for(;;) {
360                         max_verts  = AVAIL_RING(chan);
361                         max_verts -= p_overhead;
362
363                         /* if vtx_value < 0, each vertex is -vtx_value words long
364                          * otherwise, each vertex is 2^(vtx_value) / 255 words long (this is an approximation)
365                          */
366                         if(vtx_value < 0)
367                         {
368                                 max_verts /= -vtx_value;
369                                 max_verts -= (max_verts >> 10); /* vertex data headers */
370                         }
371                         else
372                         {
373                                 if(max_verts >= (1 << 23)) /* avoid overflow here */
374                                         max_verts = (1 << 23);
375                                 max_verts = (max_verts * 255) >> vtx_value;
376                         }
377
378                         //printf("avail %u max_verts %u\n", AVAIL_RING(chan), max_verts);
379
380                         if(max_verts >= 16)
381                         {
382                                 /* XXX: any command a lot of times seems to (mostly) fix corruption that would otherwise happen */
383                                 /* this seems to cause issues on nv3x, and also be unneeded there */
384                                 if(nvfx->is_nv4x)
385                                 {
386                                         int i;
387                                         for(i = 0; i < 32; ++i)
388                                         {
389                                                 BEGIN_RING(chan, eng3d,
390                                                            0x1dac, 1);
391                                                 OUT_RING(chan, 0);
392                                         }
393                                 }
394
395                                 BEGIN_RING(chan, eng3d,
396                                            NV30_3D_VERTEX_BEGIN_END, 1);
397                                 OUT_RING(chan, hw_mode);
398                                 done = util_split_prim_next(&s, max_verts);
399                                 BEGIN_RING(chan, eng3d,
400                                            NV30_3D_VERTEX_BEGIN_END, 1);
401                                 OUT_RING(chan, 0);
402
403                                 if(done)
404                                         break;
405                         }
406
407                         FIRE_RING(chan);
408                         nvfx_state_emit(nvfx);
409                 }
410
411                 /* set data for the next instance, if any changed */
412                 for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
413                 {
414                         struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
415                         struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
416
417                         if(++per_instance[i].step == ve->instance_divisor)
418                         {
419                                 float v[4];
420                                 per_instance[i].map += vb->stride;
421                                 per_instance[i].step = 0;
422
423                                 nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
424                                 nvfx_emit_vtx_attr(chan, eng3d,
425                                                    nvfx->vtxelt->per_instance[i].base.idx,
426                                                    v,
427                                                    nvfx->vtxelt->per_instance[i].base.ncomp);
428                         }
429                 }
430         }
431 }