89a84b7af8e1d5d86d9b0b46eced868a90368ae3
[platform/upstream/libdrm.git] / shared-core / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39
40 #define R300_SIMULTANEOUS_CLIPRECTS             4
41
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
43  */
44 static const int r300_cliprect_cntl[4] = {
45         0xAAAA,
46         0xEEEE,
47         0xFEFE,
48         0xFFFE
49 };
50
51 /**
52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53  * buffer, starting with index n.
54  */
55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
57 {
58         struct drm_clip_rect box;
59         int nr;
60         int i;
61         RING_LOCALS;
62
63         nr = cmdbuf->nbox - n;
64         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65                 nr = R300_SIMULTANEOUS_CLIPRECTS;
66
67         DRM_DEBUG("%i cliprects\n", nr);
68
69         if (nr) {
70                 BEGIN_RING(6 + nr * 2);
71                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
72
73                 for (i = 0; i < nr; ++i) {
74                         if (DRM_COPY_FROM_USER_UNCHECKED
75                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76                                 DRM_ERROR("copy cliprect faulted\n");
77                                 return -EFAULT;
78                         }
79
80                         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
81                                 box.x1 = (box.x1) &
82                                         R300_CLIPRECT_MASK;
83                                 box.y1 = (box.y1) &
84                                         R300_CLIPRECT_MASK;
85                                 box.x2 = (box.x2) &
86                                         R300_CLIPRECT_MASK;
87                                 box.y2 = (box.y2) &
88                                         R300_CLIPRECT_MASK;
89                         } else {
90                                 box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
91                                         R300_CLIPRECT_MASK;
92                                 box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
93                                         R300_CLIPRECT_MASK;
94                                 box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
95                                         R300_CLIPRECT_MASK;
96                                 box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
97                                         R300_CLIPRECT_MASK;
98
99                         }
100                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
101                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
102                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
103                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
104
105                 }
106
107                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
108
109                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
110                  * client might be able to trample over memory.
111                  * The impact should be very limited, but I'd rather be safe than
112                  * sorry.
113                  */
114                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
115                 OUT_RING(0);
116                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
117                 ADVANCE_RING();
118         } else {
119                 /* Why we allow zero cliprect rendering:
120                  * There are some commands in a command buffer that must be submitted
121                  * even when there are no cliprects, e.g. DMA buffer discard
122                  * or state setting (though state setting could be avoided by
123                  * simulating a loss of context).
124                  *
125                  * Now since the cmdbuf interface is so chaotic right now (and is
126                  * bound to remain that way for a bit until things settle down),
127                  * it is basically impossible to filter out the commands that are
128                  * necessary and those that aren't.
129                  *
130                  * So I choose the safe way and don't do any filtering at all;
131                  * instead, I simply set up the engine so that all rendering
132                  * can't produce any fragments.
133                  */
134                 BEGIN_RING(2);
135                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
136                 ADVANCE_RING();
137         }
138
139         return 0;
140 }
141
142 static u8 r300_reg_flags[0x10000 >> 2];
143
144 void r300_init_reg_flags(struct drm_device *dev)
145 {
146         int i;
147         drm_radeon_private_t *dev_priv = dev->dev_private;
148
149         memset(r300_reg_flags, 0, 0x10000 >> 2);
150 #define ADD_RANGE_MARK(reg, count,mark) \
151                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
152                         r300_reg_flags[i]|=(mark);
153
154 #define MARK_SAFE               1
155 #define MARK_CHECK_OFFSET       2
156
157 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
158
159         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
160         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
161         ADD_RANGE(R300_VAP_CNTL, 1);
162         ADD_RANGE(R300_SE_VTE_CNTL, 2);
163         ADD_RANGE(0x2134, 2);
164         ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
165         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
166         ADD_RANGE(0x21DC, 1);
167         ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
168         ADD_RANGE(R300_VAP_CLIP_X_0, 4);
169         ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1);
170         ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
171         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
172         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
173         ADD_RANGE(R300_GB_ENABLE, 1);
174         ADD_RANGE(R300_GB_MSPOS0, 5);
175         ADD_RANGE(R300_TX_CNTL, 1);
176         ADD_RANGE(R300_TX_ENABLE, 1);
177         ADD_RANGE(0x4200, 4);
178         ADD_RANGE(0x4214, 1);
179         ADD_RANGE(R300_RE_POINTSIZE, 1);
180         ADD_RANGE(0x4230, 3);
181         ADD_RANGE(R300_RE_LINE_CNT, 1);
182         ADD_RANGE(R300_RE_UNK4238, 1);
183         ADD_RANGE(0x4260, 3);
184         ADD_RANGE(R300_RE_SHADE, 4);
185         ADD_RANGE(R300_RE_POLYGON_MODE, 5);
186         ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
187         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
188         ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
189         ADD_RANGE(R300_RE_CULL_CNTL, 1);
190         ADD_RANGE(0x42C0, 2);
191         ADD_RANGE(R300_RS_CNTL_0, 2);
192
193         ADD_RANGE(0x43A4, 2);
194         ADD_RANGE(0x43E8, 1);
195         ADD_RANGE(R300_PFS_CNTL_0, 3);
196         ADD_RANGE(R300_PFS_NODE_0, 4);
197         ADD_RANGE(R300_PFS_TEXI_0, 64);
198         ADD_RANGE(0x46A4, 5);
199
200         ADD_RANGE(R300_RE_FOG_STATE, 1);
201         ADD_RANGE(R300_FOG_COLOR_R, 3);
202         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
203         ADD_RANGE(0x4BD8, 1);
204         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
205         ADD_RANGE(0x4E00, 1);
206         ADD_RANGE(R300_RB3D_CBLEND, 2);
207         ADD_RANGE(R300_RB3D_COLORMASK, 1);
208         ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
209         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
210         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
211         ADD_RANGE(0x4E50, 9);
212         ADD_RANGE(0x4E88, 1);
213         ADD_RANGE(0x4EA0, 2);
214         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
215         ADD_RANGE(R300_RB3D_ZSTENCIL_FORMAT, 4);
216         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);    /* check offset */
217         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
218         ADD_RANGE(0x4F28, 1);
219         ADD_RANGE(0x4F30, 2);
220         ADD_RANGE(0x4F44, 1);
221         ADD_RANGE(0x4F54, 1);
222
223         ADD_RANGE(R300_TX_FILTER_0, 16);
224         ADD_RANGE(R300_TX_FILTER1_0, 16);
225         ADD_RANGE(R300_TX_SIZE_0, 16);
226         ADD_RANGE(R300_TX_FORMAT_0, 16);
227         ADD_RANGE(R300_TX_PITCH_0, 16);
228         /* Texture offset is dangerous and needs more checking */
229         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
230         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
231         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
232
233         /* Sporadic registers used as primitives are emitted */
234         ADD_RANGE(R300_RB3D_ZCACHE_CTLSTAT, 1);
235         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
236         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
237         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
238
239         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
240                 ADD_RANGE(R500_RS_IP_0, 16);
241                 ADD_RANGE(R500_RS_INST_0, 16);
242         } else {
243                 ADD_RANGE(R300_PFS_INSTR0_0, 64);
244                 ADD_RANGE(R300_PFS_INSTR1_0, 64);
245                 ADD_RANGE(R300_PFS_INSTR2_0, 64);
246                 ADD_RANGE(R300_PFS_INSTR3_0, 64);
247                 ADD_RANGE(R300_RS_INTERP_0, 8);
248                 ADD_RANGE(R300_RS_ROUTE_0, 8);
249
250         }
251 }
252
253 static __inline__ int r300_check_range(unsigned reg, int count)
254 {
255         int i;
256         if (reg & ~0xffff)
257                 return -1;
258         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
259                 if (r300_reg_flags[i] != MARK_SAFE)
260                         return 1;
261         return 0;
262 }
263
264 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
265                                                           dev_priv,
266                                                           drm_radeon_kcmd_buffer_t
267                                                           * cmdbuf,
268                                                           drm_r300_cmd_header_t
269                                                           header)
270 {
271         int reg;
272         int sz;
273         int i;
274         int values[64];
275         RING_LOCALS;
276
277         sz = header.packet0.count;
278         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
279
280         if ((sz > 64) || (sz < 0)) {
281                 DRM_ERROR
282                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
283                      reg, sz);
284                 return -EINVAL;
285         }
286         for (i = 0; i < sz; i++) {
287                 values[i] = ((int *)cmdbuf->buf)[i];
288                 switch (r300_reg_flags[(reg >> 2) + i]) {
289                 case MARK_SAFE:
290                         break;
291                 case MARK_CHECK_OFFSET:
292                         if (!radeon_check_offset(dev_priv, (u32) values[i])) {
293                                 DRM_ERROR
294                                     ("Offset failed range check (reg=%04x sz=%d)\n",
295                                      reg, sz);
296                                 return -EINVAL;
297                         }
298                         break;
299                 default:
300                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
301                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
302                         return -EINVAL;
303                 }
304         }
305
306         BEGIN_RING(1 + sz);
307         OUT_RING(CP_PACKET0(reg, sz - 1));
308         OUT_RING_TABLE(values, sz);
309         ADVANCE_RING();
310
311         cmdbuf->buf += sz * 4;
312         cmdbuf->bufsz -= sz * 4;
313
314         return 0;
315 }
316
317 /**
318  * Emits a packet0 setting arbitrary registers.
319  * Called by r300_do_cp_cmdbuf.
320  *
321  * Note that checks are performed on contents and addresses of the registers
322  */
323 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
324                                         drm_radeon_kcmd_buffer_t *cmdbuf,
325                                         drm_r300_cmd_header_t header)
326 {
327         int reg;
328         int sz;
329         RING_LOCALS;
330
331         sz = header.packet0.count;
332         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
333
334         if (!sz)
335                 return 0;
336
337         if (sz * 4 > cmdbuf->bufsz)
338                 return -EINVAL;
339
340         if (reg + sz * 4 >= 0x10000) {
341                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
342                           sz);
343                 return -EINVAL;
344         }
345
346         if (r300_check_range(reg, sz)) {
347                 /* go and check everything */
348                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
349                                                            header);
350         }
351         /* the rest of the data is safe to emit, whatever the values the user passed */
352
353         BEGIN_RING(1 + sz);
354         OUT_RING(CP_PACKET0(reg, sz - 1));
355         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
356         ADVANCE_RING();
357
358         cmdbuf->buf += sz * 4;
359         cmdbuf->bufsz -= sz * 4;
360
361         return 0;
362 }
363
364 /**
365  * Uploads user-supplied vertex program instructions or parameters onto
366  * the graphics card.
367  * Called by r300_do_cp_cmdbuf.
368  */
369 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
370                                     drm_radeon_kcmd_buffer_t *cmdbuf,
371                                     drm_r300_cmd_header_t header)
372 {
373         int sz;
374         int addr;
375         RING_LOCALS;
376
377         sz = header.vpu.count;
378         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
379
380         if (!sz)
381                 return 0;
382         if (sz * 16 > cmdbuf->bufsz)
383                 return -EINVAL;
384
385         BEGIN_RING(5 + sz * 4);
386         /* Wait for VAP to come to senses.. */
387         /* there is no need to emit it multiple times, (only once before VAP is programmed,
388            but this optimization is for later */
389         OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
390         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
391         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
392         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
393
394         ADVANCE_RING();
395
396         cmdbuf->buf += sz * 16;
397         cmdbuf->bufsz -= sz * 16;
398
399         return 0;
400 }
401
402 /**
403  * Emit a clear packet from userspace.
404  * Called by r300_emit_packet3.
405  */
406 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
407                                       drm_radeon_kcmd_buffer_t *cmdbuf)
408 {
409         RING_LOCALS;
410
411         if (8 * 4 > cmdbuf->bufsz)
412                 return -EINVAL;
413
414         BEGIN_RING(10);
415         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
416         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
417                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
418         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
419         ADVANCE_RING();
420
421         cmdbuf->buf += 8 * 4;
422         cmdbuf->bufsz -= 8 * 4;
423
424         return 0;
425 }
426
427 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
428                                                drm_radeon_kcmd_buffer_t *cmdbuf,
429                                                u32 header)
430 {
431         int count, i, k;
432 #define MAX_ARRAY_PACKET  64
433         u32 payload[MAX_ARRAY_PACKET];
434         u32 narrays;
435         RING_LOCALS;
436
437         count = (header >> 16) & 0x3fff;
438
439         if ((count + 1) > MAX_ARRAY_PACKET) {
440                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
441                           count);
442                 return -EINVAL;
443         }
444         memset(payload, 0, MAX_ARRAY_PACKET * 4);
445         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
446
447         /* carefully check packet contents */
448
449         narrays = payload[0];
450         k = 0;
451         i = 1;
452         while ((k < narrays) && (i < (count + 1))) {
453                 i++;            /* skip attribute field */
454                 if (!radeon_check_offset(dev_priv, payload[i])) {
455                         DRM_ERROR
456                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
457                              k, i);
458                         return -EINVAL;
459                 }
460                 k++;
461                 i++;
462                 if (k == narrays)
463                         break;
464                 /* have one more to process, they come in pairs */
465                 if (!radeon_check_offset(dev_priv, payload[i])) {
466                         DRM_ERROR
467                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
468                              k, i);
469                         return -EINVAL;
470                 }
471                 k++;
472                 i++;
473         }
474         /* do the counts match what we expect ? */
475         if ((k != narrays) || (i != (count + 1))) {
476                 DRM_ERROR
477                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
478                      k, i, narrays, count + 1);
479                 return -EINVAL;
480         }
481
482         /* all clear, output packet */
483
484         BEGIN_RING(count + 2);
485         OUT_RING(header);
486         OUT_RING_TABLE(payload, count + 1);
487         ADVANCE_RING();
488
489         cmdbuf->buf += (count + 2) * 4;
490         cmdbuf->bufsz -= (count + 2) * 4;
491
492         return 0;
493 }
494
495 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
496                                              drm_radeon_kcmd_buffer_t *cmdbuf)
497 {
498         u32 *cmd = (u32 *) cmdbuf->buf;
499         int count, ret;
500         RING_LOCALS;
501
502         count=(cmd[0]>>16) & 0x3fff;
503
504         if (cmd[0] & 0x8000) {
505                 u32 offset;
506
507                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
508                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
509                         offset = cmd[2] << 10;
510                         ret = !radeon_check_offset(dev_priv, offset);
511                         if (ret) {
512                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
513                                 return -EINVAL;
514                         }
515                 }
516
517                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
518                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
519                         offset = cmd[3] << 10;
520                         ret = !radeon_check_offset(dev_priv, offset);
521                         if (ret) {
522                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
523                                 return -EINVAL;
524                         }
525
526                 }
527         }
528
529         BEGIN_RING(count+2);
530         OUT_RING(cmd[0]);
531         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
532         ADVANCE_RING();
533
534         cmdbuf->buf += (count+2)*4;
535         cmdbuf->bufsz -= (count+2)*4;
536
537         return 0;
538 }
539
540 static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
541                                              drm_radeon_kcmd_buffer_t *cmdbuf)
542 {
543         u32 *cmd = (u32 *) cmdbuf->buf;
544         int count, ret;
545         RING_LOCALS;
546
547         count=(cmd[0]>>16) & 0x3fff;
548
549         if ((cmd[1] & 0x8000ffff) != 0x80000810) {
550                 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
551                 return -EINVAL;
552         }
553         ret = !radeon_check_offset(dev_priv, cmd[2]);
554         if (ret) {
555                 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
556                 return -EINVAL;
557         }
558
559         BEGIN_RING(count+2);
560         OUT_RING(cmd[0]);
561         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
562         ADVANCE_RING();
563
564         cmdbuf->buf += (count+2)*4;
565         cmdbuf->bufsz -= (count+2)*4;
566
567         return 0;
568 }
569
570 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
571                                             drm_radeon_kcmd_buffer_t *cmdbuf)
572 {
573         u32 header;
574         int count;
575         RING_LOCALS;
576
577         if (4 > cmdbuf->bufsz)
578                 return -EINVAL;
579
580         /* Fixme !! This simply emits a packet without much checking.
581            We need to be smarter. */
582
583         /* obtain first word - actual packet3 header */
584         header = *(u32 *) cmdbuf->buf;
585
586         /* Is it packet 3 ? */
587         if ((header >> 30) != 0x3) {
588                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
589                 return -EINVAL;
590         }
591
592         count = (header >> 16) & 0x3fff;
593
594         /* Check again now that we know how much data to expect */
595         if ((count + 2) * 4 > cmdbuf->bufsz) {
596                 DRM_ERROR
597                     ("Expected packet3 of length %d but have only %d bytes left\n",
598                      (count + 2) * 4, cmdbuf->bufsz);
599                 return -EINVAL;
600         }
601
602         /* Is it a packet type we know about ? */
603         switch (header & 0xff00) {
604         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
605                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
606
607         case RADEON_CNTL_BITBLT_MULTI:
608                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
609
610         case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
611                 return r300_emit_indx_buffer(dev_priv, cmdbuf);
612         case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
613         case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
614         case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
615         case RADEON_WAIT_FOR_IDLE:
616         case RADEON_CP_NOP:
617                 /* these packets are safe */
618                 break;
619         default:
620                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
621                 return -EINVAL;
622         }
623
624         BEGIN_RING(count + 2);
625         OUT_RING(header);
626         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
627         ADVANCE_RING();
628
629         cmdbuf->buf += (count + 2) * 4;
630         cmdbuf->bufsz -= (count + 2) * 4;
631
632         return 0;
633 }
634
635 /**
636  * Emit a rendering packet3 from userspace.
637  * Called by r300_do_cp_cmdbuf.
638  */
639 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
640                                         drm_radeon_kcmd_buffer_t *cmdbuf,
641                                         drm_r300_cmd_header_t header)
642 {
643         int n;
644         int ret;
645         char *orig_buf = cmdbuf->buf;
646         int orig_bufsz = cmdbuf->bufsz;
647
648         /* This is a do-while-loop so that we run the interior at least once,
649          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
650          */
651         n = 0;
652         do {
653                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
654                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
655                         if (ret)
656                                 return ret;
657
658                         cmdbuf->buf = orig_buf;
659                         cmdbuf->bufsz = orig_bufsz;
660                 }
661
662                 switch (header.packet3.packet) {
663                 case R300_CMD_PACKET3_CLEAR:
664                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
665                         ret = r300_emit_clear(dev_priv, cmdbuf);
666                         if (ret) {
667                                 DRM_ERROR("r300_emit_clear failed\n");
668                                 return ret;
669                         }
670                         break;
671
672                 case R300_CMD_PACKET3_RAW:
673                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
674                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
675                         if (ret) {
676                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
677                                 return ret;
678                         }
679                         break;
680
681                 default:
682                         DRM_ERROR("bad packet3 type %i at %p\n",
683                                   header.packet3.packet,
684                                   cmdbuf->buf - sizeof(header));
685                         return -EINVAL;
686                 }
687
688                 n += R300_SIMULTANEOUS_CLIPRECTS;
689         } while (n < cmdbuf->nbox);
690
691         return 0;
692 }
693
694 /* Some of the R300 chips seem to be extremely touchy about the two registers
695  * that are configured in r300_pacify.
696  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
697  * sends a command buffer that contains only state setting commands and a
698  * vertex program/parameter upload sequence, this will eventually lead to a
699  * lockup, unless the sequence is bracketed by calls to r300_pacify.
700  * So we should take great care to *always* call r300_pacify before
701  * *anything* 3D related, and again afterwards. This is what the
702  * call bracket in r300_do_cp_cmdbuf is for.
703  */
704
705 /**
706  * Emit the sequence to pacify R300.
707  */
708 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
709 {
710         RING_LOCALS;
711
712         BEGIN_RING(6);
713         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
714         OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A);
715         OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
716         OUT_RING(R300_RB3D_ZCACHE_UNKNOWN_03);
717         OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
718         OUT_RING(0x0);
719         ADVANCE_RING();
720 }
721
722 /**
723  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
724  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
725  * be careful about how this function is called.
726  */
727 static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
728 {
729         drm_radeon_private_t *dev_priv = dev->dev_private;
730         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
731
732         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
733         buf->pending = 1;
734         buf->used = 0;
735 }
736
737 static int r300_scratch(drm_radeon_private_t *dev_priv,
738                         drm_radeon_kcmd_buffer_t *cmdbuf,
739                         drm_r300_cmd_header_t header)
740 {
741         u32 *ref_age_base;
742         u32 i, buf_idx, h_pending;
743         RING_LOCALS;
744
745         if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) {
746                 return -EINVAL;
747         }
748
749         if (header.scratch.reg >= 5) {
750                 return -EINVAL;
751         }
752
753         dev_priv->scratch_ages[header.scratch.reg] ++;
754
755         ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
756
757         cmdbuf->buf += sizeof(uint64_t);
758         cmdbuf->bufsz -= sizeof(uint64_t);
759
760         for (i=0; i < header.scratch.n_bufs; i++) {
761                 buf_idx = *(u32 *)cmdbuf->buf;
762                 buf_idx *= 2; /* 8 bytes per buf */
763
764                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
765                         return -EINVAL;
766                 }
767
768                 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
769                         return -EINVAL;
770                 }
771
772                 if (h_pending == 0) {
773                         return -EINVAL;
774                 }
775
776                 h_pending--;
777
778                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
779                         return -EINVAL;
780                 }
781
782                 cmdbuf->buf += sizeof(buf_idx);
783                 cmdbuf->bufsz -= sizeof(buf_idx);
784         }
785
786         BEGIN_RING(2);
787         OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
788         OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
789         ADVANCE_RING();
790
791         return 0;
792 }
793
794 /**
795  * Uploads user-supplied vertex program instructions or parameters onto
796  * the graphics card.
797  * Called by r300_do_cp_cmdbuf.
798  */
799 static __inline__ int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
800                                        drm_radeon_kcmd_buffer_t *cmdbuf,
801                                        drm_r300_cmd_header_t header)
802 {
803         int sz;
804         int addr;
805         RING_LOCALS;
806
807         sz = header.r500fp.count;
808         addr = (header.r500fp.adrhi << 8) | header.r500fp.adrlo;
809
810         if (!sz)
811                 return 0;
812         if (sz * 16 > cmdbuf->bufsz)
813                 return -EINVAL;
814
815         BEGIN_RING(4 + sz * 4);
816         /* Wait for VAP to come to senses.. */
817         /* there is no need to emit it multiple times, (only once before VAP is programmed,
818            but this optimization is for later */
819         OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
820         OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * 4 - 1));
821         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
822
823         ADVANCE_RING();
824
825         cmdbuf->buf += sz * 16;
826         cmdbuf->bufsz -= sz * 16;
827
828         return 0;
829 }
830
831
832 /**
833  * Parses and validates a user-supplied command buffer and emits appropriate
834  * commands on the DMA ring buffer.
835  * Called by the ioctl handler function radeon_cp_cmdbuf.
836  */
837 int r300_do_cp_cmdbuf(struct drm_device *dev,
838                       struct drm_file *file_priv,
839                       drm_radeon_kcmd_buffer_t *cmdbuf)
840 {
841         drm_radeon_private_t *dev_priv = dev->dev_private;
842         struct drm_device_dma *dma = dev->dma;
843         struct drm_buf *buf = NULL;
844         int emit_dispatch_age = 0;
845         int ret = 0;
846
847         DRM_DEBUG("\n");
848
849         /* See the comment above r300_emit_begin3d for why this call must be here,
850          * and what the cleanup gotos are for. */
851         r300_pacify(dev_priv);
852
853         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
854                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
855                 if (ret)
856                         goto cleanup;
857         }
858
859         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
860                 int idx;
861                 drm_r300_cmd_header_t header;
862
863                 header.u = *(unsigned int *)cmdbuf->buf;
864
865                 cmdbuf->buf += sizeof(header);
866                 cmdbuf->bufsz -= sizeof(header);
867
868                 switch (header.header.cmd_type) {
869                 case R300_CMD_PACKET0:
870                         DRM_DEBUG("R300_CMD_PACKET0\n");
871                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
872                         if (ret) {
873                                 DRM_ERROR("r300_emit_packet0 failed\n");
874                                 goto cleanup;
875                         }
876                         break;
877
878                 case R300_CMD_VPU:
879                         DRM_DEBUG("R300_CMD_VPU\n");
880                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
881                         if (ret) {
882                                 DRM_ERROR("r300_emit_vpu failed\n");
883                                 goto cleanup;
884                         }
885                         break;
886
887                 case R300_CMD_PACKET3:
888                         DRM_DEBUG("R300_CMD_PACKET3\n");
889                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
890                         if (ret) {
891                                 DRM_ERROR("r300_emit_packet3 failed\n");
892                                 goto cleanup;
893                         }
894                         break;
895
896                 case R300_CMD_END3D:
897                         DRM_DEBUG("R300_CMD_END3D\n");
898                         /* TODO:
899                            Ideally userspace driver should not need to issue this call,
900                            i.e. the drm driver should issue it automatically and prevent
901                            lockups.
902
903                            In practice, we do not understand why this call is needed and what
904                            it does (except for some vague guesses that it has to do with cache
905                            coherence) and so the user space driver does it.
906
907                            Once we are sure which uses prevent lockups the code could be moved
908                            into the kernel and the userspace driver will not
909                            need to use this command.
910
911                            Note that issuing this command does not hurt anything
912                            except, possibly, performance */
913                         r300_pacify(dev_priv);
914                         break;
915
916                 case R300_CMD_CP_DELAY:
917                         /* simple enough, we can do it here */
918                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
919                         {
920                                 int i;
921                                 RING_LOCALS;
922
923                                 BEGIN_RING(header.delay.count);
924                                 for (i = 0; i < header.delay.count; i++)
925                                         OUT_RING(RADEON_CP_PACKET2);
926                                 ADVANCE_RING();
927                         }
928                         break;
929
930                 case R300_CMD_DMA_DISCARD:
931                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
932                         idx = header.dma.buf_idx;
933                         if (idx < 0 || idx >= dma->buf_count) {
934                                 DRM_ERROR("buffer index %d (of %d max)\n",
935                                           idx, dma->buf_count - 1);
936                                 ret = -EINVAL;
937                                 goto cleanup;
938                         }
939
940                         buf = dma->buflist[idx];
941                         if (buf->file_priv != file_priv || buf->pending) {
942                                 DRM_ERROR("bad buffer %p %p %d\n",
943                                           buf->file_priv, file_priv,
944                                           buf->pending);
945                                 ret = -EINVAL;
946                                 goto cleanup;
947                         }
948
949                         emit_dispatch_age = 1;
950                         r300_discard_buffer(dev, buf);
951                         break;
952
953                 case R300_CMD_WAIT:
954                         /* simple enough, we can do it here */
955                         DRM_DEBUG("R300_CMD_WAIT\n");
956                         if (header.wait.flags == 0)
957                                 break;  /* nothing to do */
958
959                         {
960                                 RING_LOCALS;
961
962                                 BEGIN_RING(2);
963                                 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
964                                 OUT_RING((header.wait.flags & 0xf) << 14);
965                                 ADVANCE_RING();
966                         }
967                         break;
968
969                 case R300_CMD_SCRATCH:
970                         DRM_DEBUG("R300_CMD_SCRATCH\n");
971                         ret = r300_scratch(dev_priv, cmdbuf, header);
972                         if (ret) {
973                                 DRM_ERROR("r300_scratch failed\n");
974                                 goto cleanup;
975                         }
976                         break;
977
978                 case R300_CMD_R500FP:
979                         if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
980                                 DRM_ERROR("Calling r500 command on r300 card\n");
981                                 ret = -EINVAL;
982                                 goto cleanup;
983                         }
984                         DRM_DEBUG("R300_CMD_R500FP\n");
985                         ret = r300_emit_r500fp(dev_priv, cmdbuf, header);
986                         if (ret) {
987                                 DRM_ERROR("r300_emit_r500fp failed\n");
988                                 goto cleanup;
989                         }
990                         break;
991                 default:
992                         DRM_ERROR("bad cmd_type %i at %p\n",
993                                   header.header.cmd_type,
994                                   cmdbuf->buf - sizeof(header));
995                         ret = -EINVAL;
996                         goto cleanup;
997                 }
998         }
999
1000         DRM_DEBUG("END\n");
1001
1002       cleanup:
1003         r300_pacify(dev_priv);
1004
1005         /* We emit the vertex buffer age here, outside the pacifier "brackets"
1006          * for two reasons:
1007          *  (1) This may coalesce multiple age emissions into a single one and
1008          *  (2) more importantly, some chips lock up hard when scratch registers
1009          *      are written inside the pacifier bracket.
1010          */
1011         if (emit_dispatch_age) {
1012                 RING_LOCALS;
1013
1014                 /* Emit the vertex buffer age */
1015                 BEGIN_RING(2);
1016                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
1017                 ADVANCE_RING();
1018         }
1019
1020         COMMIT_RING();
1021
1022         return ret;
1023 }