7262b2aaeab2ba07caf96bb62fcc23eff6febc4e
[platform/upstream/libdrm.git] / shared-core / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     struct drm_file *file_priv,
43                                                     u32 * offset)
44 {
45         u64 off = *offset;
46         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
47         struct drm_radeon_driver_file_fields *radeon_priv;
48
49         /* Hrm ... the story of the offset ... So this function converts
50          * the various ideas of what userland clients might have for an
51          * offset in the card address space into an offset into the card
52          * address space :) So with a sane client, it should just keep
53          * the value intact and just do some boundary checking. However,
54          * not all clients are sane. Some older clients pass us 0 based
55          * offsets relative to the start of the framebuffer and some may
56          * assume the AGP aperture it appended to the framebuffer, so we
57          * try to detect those cases and fix them up.
58          *
59          * Note: It might be a good idea here to make sure the offset lands
60          * in some "allowed" area to protect things like the PCIE GART...
61          */
62
63         /* First, the best case, the offset already lands in either the
64          * framebuffer or the GART mapped space
65          */
66         if (radeon_check_offset(dev_priv, off))
67                 return 0;
68
69         /* Ok, that didn't happen... now check if we have a zero based
70          * offset that fits in the framebuffer + gart space, apply the
71          * magic offset we get from SETPARAM or calculated from fb_location
72          */
73         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
74                 radeon_priv = file_priv->driver_priv;
75                 off += radeon_priv->radeon_fb_delta;
76         }
77
78         /* Finally, assume we aimed at a GART offset if beyond the fb */
79         if (off > fb_end)
80                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
81
82         /* Now recheck and fail if out of bounds */
83         if (radeon_check_offset(dev_priv, off)) {
84                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
85                 *offset = off;
86                 return 0;
87         }
88         return -EINVAL;
89 }
90
91 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
92                                                      dev_priv,
93                                                      struct drm_file *file_priv,
94                                                      int id, u32 *data)
95 {
96         switch (id) {
97
98         case RADEON_EMIT_PP_MISC:
99                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
100                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
101                         DRM_ERROR("Invalid depth buffer offset\n");
102                         return -EINVAL;
103                 }
104                 break;
105
106         case RADEON_EMIT_PP_CNTL:
107                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
108                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
109                         DRM_ERROR("Invalid colour buffer offset\n");
110                         return -EINVAL;
111                 }
112                 break;
113
114         case R200_EMIT_PP_TXOFFSET_0:
115         case R200_EMIT_PP_TXOFFSET_1:
116         case R200_EMIT_PP_TXOFFSET_2:
117         case R200_EMIT_PP_TXOFFSET_3:
118         case R200_EMIT_PP_TXOFFSET_4:
119         case R200_EMIT_PP_TXOFFSET_5:
120                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
121                                                   &data[0])) {
122                         DRM_ERROR("Invalid R200 texture offset\n");
123                         return -EINVAL;
124                 }
125                 break;
126
127         case RADEON_EMIT_PP_TXFILTER_0:
128         case RADEON_EMIT_PP_TXFILTER_1:
129         case RADEON_EMIT_PP_TXFILTER_2:
130                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
131                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
132                         DRM_ERROR("Invalid R100 texture offset\n");
133                         return -EINVAL;
134                 }
135                 break;
136
137         case R200_EMIT_PP_CUBIC_OFFSETS_0:
138         case R200_EMIT_PP_CUBIC_OFFSETS_1:
139         case R200_EMIT_PP_CUBIC_OFFSETS_2:
140         case R200_EMIT_PP_CUBIC_OFFSETS_3:
141         case R200_EMIT_PP_CUBIC_OFFSETS_4:
142         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
143                         int i;
144                         for (i = 0; i < 5; i++) {
145                                 if (radeon_check_and_fixup_offset(dev_priv,
146                                                                   file_priv,
147                                                                   &data[i])) {
148                                         DRM_ERROR
149                                             ("Invalid R200 cubic texture offset\n");
150                                         return -EINVAL;
151                                 }
152                         }
153                         break;
154                 }
155
156         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
157         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
158         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
159                         int i;
160                         for (i = 0; i < 5; i++) {
161                                 if (radeon_check_and_fixup_offset(dev_priv,
162                                                                   file_priv,
163                                                                   &data[i])) {
164                                         DRM_ERROR
165                                             ("Invalid R100 cubic texture offset\n");
166                                         return -EINVAL;
167                                 }
168                         }
169                 }
170                 break;
171
172         case R200_EMIT_VAP_CTL: {
173                         RING_LOCALS;
174                         BEGIN_RING(2);
175                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
176                         ADVANCE_RING();
177                 }
178                 break;
179
180         case RADEON_EMIT_RB3D_COLORPITCH:
181         case RADEON_EMIT_RE_LINE_PATTERN:
182         case RADEON_EMIT_SE_LINE_WIDTH:
183         case RADEON_EMIT_PP_LUM_MATRIX:
184         case RADEON_EMIT_PP_ROT_MATRIX_0:
185         case RADEON_EMIT_RB3D_STENCILREFMASK:
186         case RADEON_EMIT_SE_VPORT_XSCALE:
187         case RADEON_EMIT_SE_CNTL:
188         case RADEON_EMIT_SE_CNTL_STATUS:
189         case RADEON_EMIT_RE_MISC:
190         case RADEON_EMIT_PP_BORDER_COLOR_0:
191         case RADEON_EMIT_PP_BORDER_COLOR_1:
192         case RADEON_EMIT_PP_BORDER_COLOR_2:
193         case RADEON_EMIT_SE_ZBIAS_FACTOR:
194         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
195         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
196         case R200_EMIT_PP_TXCBLEND_0:
197         case R200_EMIT_PP_TXCBLEND_1:
198         case R200_EMIT_PP_TXCBLEND_2:
199         case R200_EMIT_PP_TXCBLEND_3:
200         case R200_EMIT_PP_TXCBLEND_4:
201         case R200_EMIT_PP_TXCBLEND_5:
202         case R200_EMIT_PP_TXCBLEND_6:
203         case R200_EMIT_PP_TXCBLEND_7:
204         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
205         case R200_EMIT_TFACTOR_0:
206         case R200_EMIT_VTX_FMT_0:
207         case R200_EMIT_MATRIX_SELECT_0:
208         case R200_EMIT_TEX_PROC_CTL_2:
209         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
210         case R200_EMIT_PP_TXFILTER_0:
211         case R200_EMIT_PP_TXFILTER_1:
212         case R200_EMIT_PP_TXFILTER_2:
213         case R200_EMIT_PP_TXFILTER_3:
214         case R200_EMIT_PP_TXFILTER_4:
215         case R200_EMIT_PP_TXFILTER_5:
216         case R200_EMIT_VTE_CNTL:
217         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
218         case R200_EMIT_PP_TAM_DEBUG3:
219         case R200_EMIT_PP_CNTL_X:
220         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
221         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
222         case R200_EMIT_RE_SCISSOR_TL_0:
223         case R200_EMIT_RE_SCISSOR_TL_1:
224         case R200_EMIT_RE_SCISSOR_TL_2:
225         case R200_EMIT_SE_VAP_CNTL_STATUS:
226         case R200_EMIT_SE_VTX_STATE_CNTL:
227         case R200_EMIT_RE_POINTSIZE:
228         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
229         case R200_EMIT_PP_CUBIC_FACES_0:
230         case R200_EMIT_PP_CUBIC_FACES_1:
231         case R200_EMIT_PP_CUBIC_FACES_2:
232         case R200_EMIT_PP_CUBIC_FACES_3:
233         case R200_EMIT_PP_CUBIC_FACES_4:
234         case R200_EMIT_PP_CUBIC_FACES_5:
235         case RADEON_EMIT_PP_TEX_SIZE_0:
236         case RADEON_EMIT_PP_TEX_SIZE_1:
237         case RADEON_EMIT_PP_TEX_SIZE_2:
238         case R200_EMIT_RB3D_BLENDCOLOR:
239         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
240         case RADEON_EMIT_PP_CUBIC_FACES_0:
241         case RADEON_EMIT_PP_CUBIC_FACES_1:
242         case RADEON_EMIT_PP_CUBIC_FACES_2:
243         case R200_EMIT_PP_TRI_PERF_CNTL:
244         case R200_EMIT_PP_AFS_0:
245         case R200_EMIT_PP_AFS_1:
246         case R200_EMIT_ATF_TFACTOR:
247         case R200_EMIT_PP_TXCTLALL_0:
248         case R200_EMIT_PP_TXCTLALL_1:
249         case R200_EMIT_PP_TXCTLALL_2:
250         case R200_EMIT_PP_TXCTLALL_3:
251         case R200_EMIT_PP_TXCTLALL_4:
252         case R200_EMIT_PP_TXCTLALL_5:
253         case R200_EMIT_VAP_PVS_CNTL:
254                 /* These packets don't contain memory offsets */
255                 break;
256
257         default:
258                 DRM_ERROR("Unknown state packet ID %d\n", id);
259                 return -EINVAL;
260         }
261
262         return 0;
263 }
264
265 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
266                                                      dev_priv,
267                                                      struct drm_file *file_priv,
268                                                      drm_radeon_kcmd_buffer_t *
269                                                      cmdbuf,
270                                                      unsigned int *cmdsz)
271 {
272         u32 *cmd = (u32 *) cmdbuf->buf;
273         u32 offset, narrays;
274         int count, i, k;
275
276         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
277
278         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
279                 DRM_ERROR("Not a type 3 packet\n");
280                 return -EINVAL;
281         }
282
283         if (4 * *cmdsz > cmdbuf->bufsz) {
284                 DRM_ERROR("Packet size larger than size of data provided\n");
285                 return -EINVAL;
286         }
287
288         switch(cmd[0] & 0xff00) {
289         /* XXX Are there old drivers needing other packets? */
290
291         case RADEON_3D_DRAW_IMMD:
292         case RADEON_3D_DRAW_VBUF:
293         case RADEON_3D_DRAW_INDX:
294         case RADEON_WAIT_FOR_IDLE:
295         case RADEON_CP_NOP:
296         case RADEON_3D_CLEAR_ZMASK:
297 /*      case RADEON_CP_NEXT_CHAR:
298         case RADEON_CP_PLY_NEXTSCAN:
299         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
300                 /* these packets are safe */
301                 break;
302
303         case RADEON_CP_3D_DRAW_IMMD_2:
304         case RADEON_CP_3D_DRAW_VBUF_2:
305         case RADEON_CP_3D_DRAW_INDX_2:
306         case RADEON_3D_CLEAR_HIZ:
307                 /* safe but r200 only */
308                 if ((dev_priv->chip_family < CHIP_R200) ||
309                     (dev_priv->chip_family > CHIP_RV280)) {
310                         DRM_ERROR("Invalid 3d packet for non r200-class chip\n");
311                         return -EINVAL;
312                 }
313                 break;
314
315         case RADEON_3D_LOAD_VBPNTR:
316                 count = (cmd[0] >> 16) & 0x3fff;
317
318                 if (count > 18) { /* 12 arrays max */
319                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
320                                   count);
321                         return -EINVAL;
322                 }
323
324                 /* carefully check packet contents */
325                 narrays = cmd[1] & ~0xc000;
326                 k = 0;
327                 i = 2;
328                 while ((k < narrays) && (i < (count + 2))) {
329                         i++;            /* skip attribute field */
330                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
331                                                           &cmd[i])) {
332                                 DRM_ERROR
333                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
334                                      k, i);
335                                 return -EINVAL;
336                         }
337                         k++;
338                         i++;
339                         if (k == narrays)
340                                 break;
341                         /* have one more to process, they come in pairs */
342                         if (radeon_check_and_fixup_offset(dev_priv,
343                                                           file_priv, &cmd[i]))
344                         {
345                                 DRM_ERROR
346                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
347                                      k, i);
348                                 return -EINVAL;
349                         }
350                         k++;
351                         i++;
352                 }
353                 /* do the counts match what we expect ? */
354                 if ((k != narrays) || (i != (count + 2))) {
355                         DRM_ERROR
356                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
357                               k, i, narrays, count + 1);
358                         return -EINVAL;
359                 }
360                 break;
361
362         case RADEON_3D_RNDR_GEN_INDX_PRIM:
363                 if (dev_priv->chip_family > CHIP_RS200) {
364                         DRM_ERROR("Invalid 3d packet for non-r100-class chip\n");
365                         return -EINVAL;
366                 }
367                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
368                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
369                                 return -EINVAL;
370                 }
371                 break;
372
373         case RADEON_CP_INDX_BUFFER:
374                 /* safe but r200 only */
375                 if ((dev_priv->chip_family < CHIP_R200) ||
376                     (dev_priv->chip_family > CHIP_RV280)) {
377                         DRM_ERROR("Invalid 3d packet for non-r200-class chip\n");
378                         return -EINVAL;
379                 }
380                 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
381                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
382                         return -EINVAL;
383                 }
384                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
385                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
386                         return -EINVAL;
387                 }
388                 break;
389
390         case RADEON_CNTL_HOSTDATA_BLT:
391         case RADEON_CNTL_PAINT_MULTI:
392         case RADEON_CNTL_BITBLT_MULTI:
393                 /* MSB of opcode: next DWORD GUI_CNTL */
394                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
395                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
396                         offset = cmd[2] << 10;
397                         if (radeon_check_and_fixup_offset
398                             (dev_priv, file_priv, &offset)) {
399                                 DRM_ERROR("Invalid first packet offset\n");
400                                 return -EINVAL;
401                         }
402                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
403                 }
404
405                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
406                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
407                         offset = cmd[3] << 10;
408                         if (radeon_check_and_fixup_offset
409                             (dev_priv, file_priv, &offset)) {
410                                 DRM_ERROR("Invalid second packet offset\n");
411                                 return -EINVAL;
412                         }
413                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
414                 }
415                 break;
416
417         default:
418                 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
419                 return -EINVAL;
420         }
421
422         return 0;
423 }
424
425 /* ================================================================
426  * CP hardware state programming functions
427  */
428
429 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
430                                              struct drm_clip_rect * box)
431 {
432         RING_LOCALS;
433
434         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
435                   box->x1, box->y1, box->x2, box->y2);
436
437         BEGIN_RING(4);
438         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
439         OUT_RING((box->y1 << 16) | box->x1);
440         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
441         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
442         ADVANCE_RING();
443 }
444
445 /* Emit 1.1 state
446  */
447 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
448                              struct drm_file *file_priv,
449                              drm_radeon_context_regs_t * ctx,
450                              drm_radeon_texture_regs_t * tex,
451                              unsigned int dirty)
452 {
453         RING_LOCALS;
454         DRM_DEBUG("dirty=0x%08x\n", dirty);
455
456         if (dirty & RADEON_UPLOAD_CONTEXT) {
457                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
458                                                   &ctx->rb3d_depthoffset)) {
459                         DRM_ERROR("Invalid depth buffer offset\n");
460                         return -EINVAL;
461                 }
462
463                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
464                                                   &ctx->rb3d_coloroffset)) {
465                         DRM_ERROR("Invalid depth buffer offset\n");
466                         return -EINVAL;
467                 }
468
469                 BEGIN_RING(14);
470                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
471                 OUT_RING(ctx->pp_misc);
472                 OUT_RING(ctx->pp_fog_color);
473                 OUT_RING(ctx->re_solid_color);
474                 OUT_RING(ctx->rb3d_blendcntl);
475                 OUT_RING(ctx->rb3d_depthoffset);
476                 OUT_RING(ctx->rb3d_depthpitch);
477                 OUT_RING(ctx->rb3d_zstencilcntl);
478                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
479                 OUT_RING(ctx->pp_cntl);
480                 OUT_RING(ctx->rb3d_cntl);
481                 OUT_RING(ctx->rb3d_coloroffset);
482                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
483                 OUT_RING(ctx->rb3d_colorpitch);
484                 ADVANCE_RING();
485         }
486
487         if (dirty & RADEON_UPLOAD_VERTFMT) {
488                 BEGIN_RING(2);
489                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
490                 OUT_RING(ctx->se_coord_fmt);
491                 ADVANCE_RING();
492         }
493
494         if (dirty & RADEON_UPLOAD_LINE) {
495                 BEGIN_RING(5);
496                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
497                 OUT_RING(ctx->re_line_pattern);
498                 OUT_RING(ctx->re_line_state);
499                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
500                 OUT_RING(ctx->se_line_width);
501                 ADVANCE_RING();
502         }
503
504         if (dirty & RADEON_UPLOAD_BUMPMAP) {
505                 BEGIN_RING(5);
506                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
507                 OUT_RING(ctx->pp_lum_matrix);
508                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
509                 OUT_RING(ctx->pp_rot_matrix_0);
510                 OUT_RING(ctx->pp_rot_matrix_1);
511                 ADVANCE_RING();
512         }
513
514         if (dirty & RADEON_UPLOAD_MASKS) {
515                 BEGIN_RING(4);
516                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
517                 OUT_RING(ctx->rb3d_stencilrefmask);
518                 OUT_RING(ctx->rb3d_ropcntl);
519                 OUT_RING(ctx->rb3d_planemask);
520                 ADVANCE_RING();
521         }
522
523         if (dirty & RADEON_UPLOAD_VIEWPORT) {
524                 BEGIN_RING(7);
525                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
526                 OUT_RING(ctx->se_vport_xscale);
527                 OUT_RING(ctx->se_vport_xoffset);
528                 OUT_RING(ctx->se_vport_yscale);
529                 OUT_RING(ctx->se_vport_yoffset);
530                 OUT_RING(ctx->se_vport_zscale);
531                 OUT_RING(ctx->se_vport_zoffset);
532                 ADVANCE_RING();
533         }
534
535         if (dirty & RADEON_UPLOAD_SETUP) {
536                 BEGIN_RING(4);
537                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
538                 OUT_RING(ctx->se_cntl);
539                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
540                 OUT_RING(ctx->se_cntl_status);
541                 ADVANCE_RING();
542         }
543
544         if (dirty & RADEON_UPLOAD_MISC) {
545                 BEGIN_RING(2);
546                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
547                 OUT_RING(ctx->re_misc);
548                 ADVANCE_RING();
549         }
550
551         if (dirty & RADEON_UPLOAD_TEX0) {
552                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
553                                                   &tex[0].pp_txoffset)) {
554                         DRM_ERROR("Invalid texture offset for unit 0\n");
555                         return -EINVAL;
556                 }
557
558                 BEGIN_RING(9);
559                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
560                 OUT_RING(tex[0].pp_txfilter);
561                 OUT_RING(tex[0].pp_txformat);
562                 OUT_RING(tex[0].pp_txoffset);
563                 OUT_RING(tex[0].pp_txcblend);
564                 OUT_RING(tex[0].pp_txablend);
565                 OUT_RING(tex[0].pp_tfactor);
566                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
567                 OUT_RING(tex[0].pp_border_color);
568                 ADVANCE_RING();
569         }
570
571         if (dirty & RADEON_UPLOAD_TEX1) {
572                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
573                                                   &tex[1].pp_txoffset)) {
574                         DRM_ERROR("Invalid texture offset for unit 1\n");
575                         return -EINVAL;
576                 }
577
578                 BEGIN_RING(9);
579                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
580                 OUT_RING(tex[1].pp_txfilter);
581                 OUT_RING(tex[1].pp_txformat);
582                 OUT_RING(tex[1].pp_txoffset);
583                 OUT_RING(tex[1].pp_txcblend);
584                 OUT_RING(tex[1].pp_txablend);
585                 OUT_RING(tex[1].pp_tfactor);
586                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
587                 OUT_RING(tex[1].pp_border_color);
588                 ADVANCE_RING();
589         }
590
591         if (dirty & RADEON_UPLOAD_TEX2) {
592                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
593                                                   &tex[2].pp_txoffset)) {
594                         DRM_ERROR("Invalid texture offset for unit 2\n");
595                         return -EINVAL;
596                 }
597
598                 BEGIN_RING(9);
599                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
600                 OUT_RING(tex[2].pp_txfilter);
601                 OUT_RING(tex[2].pp_txformat);
602                 OUT_RING(tex[2].pp_txoffset);
603                 OUT_RING(tex[2].pp_txcblend);
604                 OUT_RING(tex[2].pp_txablend);
605                 OUT_RING(tex[2].pp_tfactor);
606                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
607                 OUT_RING(tex[2].pp_border_color);
608                 ADVANCE_RING();
609         }
610
611         return 0;
612 }
613
614 /* Emit 1.2 state
615  */
616 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
617                               struct drm_file *file_priv,
618                               drm_radeon_state_t * state)
619 {
620         RING_LOCALS;
621
622         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
623                 BEGIN_RING(3);
624                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
625                 OUT_RING(state->context2.se_zbias_factor);
626                 OUT_RING(state->context2.se_zbias_constant);
627                 ADVANCE_RING();
628         }
629
630         return radeon_emit_state(dev_priv, file_priv, &state->context,
631                                  state->tex, state->dirty);
632 }
633
634 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
635  * 1.3 cmdbuffers allow all previous state to be updated as well as
636  * the tcl scalar and vector areas.
637  */
638 static struct {
639         int start;
640         int len;
641         const char *name;
642 } packet[RADEON_MAX_STATE_PACKETS] = {
643         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
644         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
645         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
646         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
647         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
648         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
649         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
650         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
651         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
652         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
653         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
654         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
655         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
656         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
657         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
658         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
659         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
660         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
661         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
662         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
663         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
664                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
665         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
666         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
667         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
668         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
669         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
670         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
671         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
672         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
673         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
674         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
675         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
676         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
677         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
678         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
679         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
680         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
681         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
682         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
683         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
684         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
685         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
686         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
687         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
688         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
689         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
690         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
691         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
692         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
693         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
694          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
695         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
696         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
697         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
698         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
699         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
700         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
701         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
702         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
703         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
704         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
705         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
706                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
707         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
708         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
709         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
710         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
711         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
712         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
713         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
714         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
715         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
716         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
717         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
718         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
719         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
720         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
721         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
722         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
723         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
724         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
725         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
726         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
727         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
728         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
729         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
730         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
731         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
732         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
733         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
734         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
735         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
736         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
737         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
738         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
739         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
740         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
741 };
742
743 /* ================================================================
744  * Performance monitoring functions
745  */
746
747 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
748                              struct drm_radeon_master_private *master_priv,
749                              int x, int y, int w, int h, int r, int g, int b)
750 {
751         u32 color;
752         RING_LOCALS;
753
754         x += master_priv->sarea_priv->boxes[0].x1;
755         y += master_priv->sarea_priv->boxes[0].y1;
756
757         switch (dev_priv->color_fmt) {
758         case RADEON_COLOR_FORMAT_RGB565:
759                 color = (((r & 0xf8) << 8) |
760                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
761                 break;
762         case RADEON_COLOR_FORMAT_ARGB8888:
763         default:
764                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
765                 break;
766         }
767
768         BEGIN_RING(4);
769         RADEON_WAIT_UNTIL_3D_IDLE();
770         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
771         OUT_RING(0xffffffff);
772         ADVANCE_RING();
773
774         BEGIN_RING(6);
775
776         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
777         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
778                  RADEON_GMC_BRUSH_SOLID_COLOR |
779                  (dev_priv->color_fmt << 8) |
780                  RADEON_GMC_SRC_DATATYPE_COLOR |
781                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
782
783         if (master_priv->sarea_priv->pfCurrentPage == 1) {
784                 OUT_RING(dev_priv->front_pitch_offset);
785         } else {
786                 OUT_RING(dev_priv->back_pitch_offset);
787         }
788
789         OUT_RING(color);
790
791         OUT_RING((x << 16) | y);
792         OUT_RING((w << 16) | h);
793
794         ADVANCE_RING();
795 }
796
797 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv, struct drm_radeon_master_private *master_priv)
798 {
799         /* Collapse various things into a wait flag -- trying to
800          * guess if userspase slept -- better just to have them tell us.
801          */
802         if (dev_priv->stats.last_frame_reads > 1 ||
803             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
804                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
805         }
806
807         if (dev_priv->stats.freelist_loops) {
808                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
809         }
810
811         /* Purple box for page flipping
812          */
813         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
814                 radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
815
816         /* Red box if we have to wait for idle at any point
817          */
818         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
819                 radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
820
821         /* Blue box: lost context?
822          */
823
824         /* Yellow box for texture swaps
825          */
826         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
827                 radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
828
829         /* Green box if hardware never idles (as far as we can tell)
830          */
831         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
832                 radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
833
834         /* Draw bars indicating number of buffers allocated
835          * (not a great measure, easily confused)
836          */
837         if (dev_priv->stats.requested_bufs) {
838                 if (dev_priv->stats.requested_bufs > 100)
839                         dev_priv->stats.requested_bufs = 100;
840
841                 radeon_clear_box(dev_priv, master_priv, 4, 16,
842                                  dev_priv->stats.requested_bufs, 4,
843                                  196, 128, 128);
844         }
845
846         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
847
848 }
849
850 /* ================================================================
851  * CP command dispatch functions
852  */
853
854 static void radeon_cp_dispatch_clear(struct drm_device * dev,
855                                      struct drm_master *master,
856                                      drm_radeon_clear_t * clear,
857                                      drm_radeon_clear_rect_t * depth_boxes)
858 {
859         drm_radeon_private_t *dev_priv = dev->dev_private;
860         struct drm_radeon_master_private *master_priv = master->driver_priv;
861         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
862         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
863         int nbox = sarea_priv->nbox;
864         struct drm_clip_rect *pbox = sarea_priv->boxes;
865         unsigned int flags = clear->flags;
866         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
867         int i;
868         RING_LOCALS;
869         DRM_DEBUG("flags = 0x%x\n", flags);
870
871         dev_priv->stats.clears++;
872
873         if (sarea_priv->pfCurrentPage == 1) {
874                 unsigned int tmp = flags;
875
876                 flags &= ~(RADEON_FRONT | RADEON_BACK);
877                 if (tmp & RADEON_FRONT)
878                         flags |= RADEON_BACK;
879                 if (tmp & RADEON_BACK)
880                         flags |= RADEON_FRONT;
881         }
882
883         if (flags & (RADEON_FRONT | RADEON_BACK)) {
884
885                 BEGIN_RING(4);
886
887                 /* Ensure the 3D stream is idle before doing a
888                  * 2D fill to clear the front or back buffer.
889                  */
890                 RADEON_WAIT_UNTIL_3D_IDLE();
891
892                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
893                 OUT_RING(clear->color_mask);
894
895                 ADVANCE_RING();
896
897                 /* Make sure we restore the 3D state next time.
898                  */
899                 sarea_priv->ctx_owner = 0;
900
901                 for (i = 0; i < nbox; i++) {
902                         int x = pbox[i].x1;
903                         int y = pbox[i].y1;
904                         int w = pbox[i].x2 - x;
905                         int h = pbox[i].y2 - y;
906
907                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
908                                   x, y, w, h, flags);
909
910                         if (flags & RADEON_FRONT) {
911                                 BEGIN_RING(6);
912
913                                 OUT_RING(CP_PACKET3
914                                          (RADEON_CNTL_PAINT_MULTI, 4));
915                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
916                                          RADEON_GMC_BRUSH_SOLID_COLOR |
917                                          (dev_priv->
918                                           color_fmt << 8) |
919                                          RADEON_GMC_SRC_DATATYPE_COLOR |
920                                          RADEON_ROP3_P |
921                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
922
923                                 OUT_RING(dev_priv->front_pitch_offset);
924                                 OUT_RING(clear->clear_color);
925
926                                 OUT_RING((x << 16) | y);
927                                 OUT_RING((w << 16) | h);
928
929                                 ADVANCE_RING();
930                         }
931
932                         if (flags & RADEON_BACK) {
933                                 BEGIN_RING(6);
934
935                                 OUT_RING(CP_PACKET3
936                                          (RADEON_CNTL_PAINT_MULTI, 4));
937                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
938                                          RADEON_GMC_BRUSH_SOLID_COLOR |
939                                          (dev_priv->
940                                           color_fmt << 8) |
941                                          RADEON_GMC_SRC_DATATYPE_COLOR |
942                                          RADEON_ROP3_P |
943                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
944
945                                 OUT_RING(dev_priv->back_pitch_offset);
946                                 OUT_RING(clear->clear_color);
947
948                                 OUT_RING((x << 16) | y);
949                                 OUT_RING((w << 16) | h);
950
951                                 ADVANCE_RING();
952                         }
953                 }
954         }
955
956         /* hyper z clear */
957         /* no docs available, based on reverse engeneering by Stephane Marchesin */
958         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
959             && (flags & RADEON_CLEAR_FASTZ)) {
960
961                 int i;
962                 int depthpixperline =
963                     dev_priv->depth_fmt ==
964                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
965                                                        2) : (dev_priv->
966                                                              depth_pitch / 4);
967
968                 u32 clearmask;
969
970                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
971                     ((clear->depth_mask & 0xff) << 24);
972
973                 /* Make sure we restore the 3D state next time.
974                  * we haven't touched any "normal" state - still need this?
975                  */
976                 sarea_priv->ctx_owner = 0;
977
978                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
979                     && (flags & RADEON_USE_HIERZ)) {
980                         /* FIXME : reverse engineer that for Rx00 cards */
981                         /* FIXME : the mask supposedly contains low-res z values. So can't set
982                            just to the max (0xff? or actually 0x3fff?), need to take z clear
983                            value into account? */
984                         /* pattern seems to work for r100, though get slight
985                            rendering errors with glxgears. If hierz is not enabled for r100,
986                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
987                            other ones are ignored, and the same clear mask can be used. That's
988                            very different behaviour than R200 which needs different clear mask
989                            and different number of tiles to clear if hierz is enabled or not !?!
990                          */
991                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
992                 } else {
993                         /* clear mask : chooses the clearing pattern.
994                            rv250: could be used to clear only parts of macrotiles
995                            (but that would get really complicated...)?
996                            bit 0 and 1 (either or both of them ?!?!) are used to
997                            not clear tile (or maybe one of the bits indicates if the tile is
998                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
999                            Pattern is as follows:
1000                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
1001                            bits -------------------------------------------------
1002                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1003                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1004                            covers 256 pixels ?!?
1005                          */
1006                         clearmask = 0x0;
1007                 }
1008
1009                 BEGIN_RING(8);
1010                 RADEON_WAIT_UNTIL_2D_IDLE();
1011                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1012                              tempRB3D_DEPTHCLEARVALUE);
1013                 /* what offset is this exactly ? */
1014                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1015                 /* need ctlstat, otherwise get some strange black flickering */
1016                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1017                              RADEON_RB3D_ZC_FLUSH_ALL);
1018                 ADVANCE_RING();
1019
1020                 for (i = 0; i < nbox; i++) {
1021                         int tileoffset, nrtilesx, nrtilesy, j;
1022                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1023                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1024                             && (dev_priv->chip_family < CHIP_R200)) {
1025                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1026                                    maybe r200 actually doesn't need to put the low-res z value into
1027                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1028                                    Works for R100, both with hierz and without.
1029                                    R100 seems to operate on 2x1 8x8 tiles, but...
1030                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1031                                    problematic with resolutions which are not 64 pix aligned? */
1032                                 tileoffset =
1033                                     ((pbox[i].y1 >> 3) * depthpixperline +
1034                                      pbox[i].x1) >> 6;
1035                                 nrtilesx =
1036                                     ((pbox[i].x2 & ~63) -
1037                                      (pbox[i].x1 & ~63)) >> 4;
1038                                 nrtilesy =
1039                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1040                                 for (j = 0; j <= nrtilesy; j++) {
1041                                         BEGIN_RING(4);
1042                                         OUT_RING(CP_PACKET3
1043                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1044                                         /* first tile */
1045                                         OUT_RING(tileoffset * 8);
1046                                         /* the number of tiles to clear */
1047                                         OUT_RING(nrtilesx + 4);
1048                                         /* clear mask : chooses the clearing pattern. */
1049                                         OUT_RING(clearmask);
1050                                         ADVANCE_RING();
1051                                         tileoffset += depthpixperline >> 6;
1052                                 }
1053                         } else if ((dev_priv->chip_family >= CHIP_R200) &&
1054                                    (dev_priv->chip_family <= CHIP_RV280)) {
1055                                 /* works for rv250. */
1056                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1057                                 tileoffset =
1058                                     ((pbox[i].y1 >> 3) * depthpixperline +
1059                                      pbox[i].x1) >> 5;
1060                                 nrtilesx =
1061                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1062                                 nrtilesy =
1063                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1064                                 for (j = 0; j <= nrtilesy; j++) {
1065                                         BEGIN_RING(4);
1066                                         OUT_RING(CP_PACKET3
1067                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1068                                         /* first tile */
1069                                         /* judging by the first tile offset needed, could possibly
1070                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1071                                            macro tiles, though would still need clear mask for
1072                                            right/bottom if truely 4x4 granularity is desired ? */
1073                                         OUT_RING(tileoffset * 16);
1074                                         /* the number of tiles to clear */
1075                                         OUT_RING(nrtilesx + 1);
1076                                         /* clear mask : chooses the clearing pattern. */
1077                                         OUT_RING(clearmask);
1078                                         ADVANCE_RING();
1079                                         tileoffset += depthpixperline >> 5;
1080                                 }
1081                         } else {        /* rv 100 */
1082                                 /* rv100 might not need 64 pix alignment, who knows */
1083                                 /* offsets are, hmm, weird */
1084                                 tileoffset =
1085                                     ((pbox[i].y1 >> 4) * depthpixperline +
1086                                      pbox[i].x1) >> 6;
1087                                 nrtilesx =
1088                                     ((pbox[i].x2 & ~63) -
1089                                      (pbox[i].x1 & ~63)) >> 4;
1090                                 nrtilesy =
1091                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1092                                 for (j = 0; j <= nrtilesy; j++) {
1093                                         BEGIN_RING(4);
1094                                         OUT_RING(CP_PACKET3
1095                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1096                                         OUT_RING(tileoffset * 128);
1097                                         /* the number of tiles to clear */
1098                                         OUT_RING(nrtilesx + 4);
1099                                         /* clear mask : chooses the clearing pattern. */
1100                                         OUT_RING(clearmask);
1101                                         ADVANCE_RING();
1102                                         tileoffset += depthpixperline >> 6;
1103                                 }
1104                         }
1105                 }
1106
1107                 /* TODO don't always clear all hi-level z tiles */
1108                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1109                     && ((dev_priv->chip_family >= CHIP_R200) &&
1110                         (dev_priv->chip_family <= CHIP_RV280))
1111                     && (flags & RADEON_USE_HIERZ))
1112                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1113                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1114                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1115                            value into account? */
1116                 {
1117                         BEGIN_RING(4);
1118                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1119                         OUT_RING(0x0);  /* First tile */
1120                         OUT_RING(0x3cc0);
1121                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1122                         ADVANCE_RING();
1123                 }
1124         }
1125
1126         /* We have to clear the depth and/or stencil buffers by
1127          * rendering a quad into just those buffers.  Thus, we have to
1128          * make sure the 3D engine is configured correctly.
1129          */
1130         else if ((dev_priv->chip_family >= CHIP_R200) &&
1131                  (dev_priv->chip_family <= CHIP_RV280) &&
1132                  (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1133
1134                 int tempPP_CNTL;
1135                 int tempRE_CNTL;
1136                 int tempRB3D_CNTL;
1137                 int tempRB3D_ZSTENCILCNTL;
1138                 int tempRB3D_STENCILREFMASK;
1139                 int tempRB3D_PLANEMASK;
1140                 int tempSE_CNTL;
1141                 int tempSE_VTE_CNTL;
1142                 int tempSE_VTX_FMT_0;
1143                 int tempSE_VTX_FMT_1;
1144                 int tempSE_VAP_CNTL;
1145                 int tempRE_AUX_SCISSOR_CNTL;
1146
1147                 tempPP_CNTL = 0;
1148                 tempRE_CNTL = 0;
1149
1150                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1151
1152                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1153                 tempRB3D_STENCILREFMASK = 0x0;
1154
1155                 tempSE_CNTL = depth_clear->se_cntl;
1156
1157                 /* Disable TCL */
1158
1159                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1160                                           (0x9 <<
1161                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1162
1163                 tempRB3D_PLANEMASK = 0x0;
1164
1165                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1166
1167                 tempSE_VTE_CNTL =
1168                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1169
1170                 /* Vertex format (X, Y, Z, W) */
1171                 tempSE_VTX_FMT_0 =
1172                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1173                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1174                 tempSE_VTX_FMT_1 = 0x0;
1175
1176                 /*
1177                  * Depth buffer specific enables
1178                  */
1179                 if (flags & RADEON_DEPTH) {
1180                         /* Enable depth buffer */
1181                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1182                 } else {
1183                         /* Disable depth buffer */
1184                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1185                 }
1186
1187                 /*
1188                  * Stencil buffer specific enables
1189                  */
1190                 if (flags & RADEON_STENCIL) {
1191                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1192                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1193                 } else {
1194                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1195                         tempRB3D_STENCILREFMASK = 0x00000000;
1196                 }
1197
1198                 if (flags & RADEON_USE_COMP_ZBUF) {
1199                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1200                             RADEON_Z_DECOMPRESSION_ENABLE;
1201                 }
1202                 if (flags & RADEON_USE_HIERZ) {
1203                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1204                 }
1205
1206                 BEGIN_RING(26);
1207                 RADEON_WAIT_UNTIL_2D_IDLE();
1208
1209                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1210                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1211                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1212                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1213                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1214                              tempRB3D_STENCILREFMASK);
1215                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1216                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1217                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1218                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1219                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1220                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1221                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1222                 ADVANCE_RING();
1223
1224                 /* Make sure we restore the 3D state next time.
1225                  */
1226                 sarea_priv->ctx_owner = 0;
1227
1228                 for (i = 0; i < nbox; i++) {
1229
1230                         /* Funny that this should be required --
1231                          *  sets top-left?
1232                          */
1233                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1234
1235                         BEGIN_RING(14);
1236                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1237                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1238                                   RADEON_PRIM_WALK_RING |
1239                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1240                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1241                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1242                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1243                         OUT_RING(0x3f800000);
1244                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1245                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1246                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1247                         OUT_RING(0x3f800000);
1248                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1249                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1250                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1251                         OUT_RING(0x3f800000);
1252                         ADVANCE_RING();
1253                 }
1254         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1255
1256                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1257
1258                 rb3d_cntl = depth_clear->rb3d_cntl;
1259
1260                 if (flags & RADEON_DEPTH) {
1261                         rb3d_cntl |= RADEON_Z_ENABLE;
1262                 } else {
1263                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1264                 }
1265
1266                 if (flags & RADEON_STENCIL) {
1267                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1268                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1269                 } else {
1270                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1271                         rb3d_stencilrefmask = 0x00000000;
1272                 }
1273
1274                 if (flags & RADEON_USE_COMP_ZBUF) {
1275                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1276                             RADEON_Z_DECOMPRESSION_ENABLE;
1277                 }
1278                 if (flags & RADEON_USE_HIERZ) {
1279                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1280                 }
1281
1282                 BEGIN_RING(13);
1283                 RADEON_WAIT_UNTIL_2D_IDLE();
1284
1285                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1286                 OUT_RING(0x00000000);
1287                 OUT_RING(rb3d_cntl);
1288
1289                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1290                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1291                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1292                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1293                 ADVANCE_RING();
1294
1295                 /* Make sure we restore the 3D state next time.
1296                  */
1297                 sarea_priv->ctx_owner = 0;
1298
1299                 for (i = 0; i < nbox; i++) {
1300
1301                         /* Funny that this should be required --
1302                          *  sets top-left?
1303                          */
1304                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1305
1306                         BEGIN_RING(15);
1307
1308                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1309                         OUT_RING(RADEON_VTX_Z_PRESENT |
1310                                  RADEON_VTX_PKCOLOR_PRESENT);
1311                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1312                                   RADEON_PRIM_WALK_RING |
1313                                   RADEON_MAOS_ENABLE |
1314                                   RADEON_VTX_FMT_RADEON_MODE |
1315                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1316
1317                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1318                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1319                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1320                         OUT_RING(0x0);
1321
1322                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1323                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1324                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1325                         OUT_RING(0x0);
1326
1327                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1328                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1329                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1330                         OUT_RING(0x0);
1331
1332                         ADVANCE_RING();
1333                 }
1334         }
1335
1336         /* Increment the clear counter.  The client-side 3D driver must
1337          * wait on this value before performing the clear ioctl.  We
1338          * need this because the card's so damned fast...
1339          */
1340         sarea_priv->last_clear++;
1341
1342         BEGIN_RING(4);
1343
1344         RADEON_CLEAR_AGE(sarea_priv->last_clear);
1345         RADEON_WAIT_UNTIL_IDLE();
1346
1347         ADVANCE_RING();
1348 }
1349
1350 static void radeon_cp_dispatch_swap(struct drm_device * dev, struct drm_master *master)
1351 {
1352         drm_radeon_private_t *dev_priv = dev->dev_private;
1353         struct drm_radeon_master_private *master_priv = master->driver_priv;
1354         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1355         int nbox = sarea_priv->nbox;
1356         struct drm_clip_rect *pbox = sarea_priv->boxes;
1357         int i;
1358         RING_LOCALS;
1359         DRM_DEBUG("\n");
1360
1361         /* Do some trivial performance monitoring...
1362          */
1363         if (dev_priv->do_boxes)
1364                 radeon_cp_performance_boxes(dev_priv, master_priv);
1365
1366         /* Wait for the 3D stream to idle before dispatching the bitblt.
1367          * This will prevent data corruption between the two streams.
1368          */
1369         BEGIN_RING(2);
1370
1371         RADEON_WAIT_UNTIL_3D_IDLE();
1372
1373         ADVANCE_RING();
1374
1375         for (i = 0; i < nbox; i++) {
1376                 int x = pbox[i].x1;
1377                 int y = pbox[i].y1;
1378                 int w = pbox[i].x2 - x;
1379                 int h = pbox[i].y2 - y;
1380
1381                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1382
1383                 BEGIN_RING(9);
1384
1385                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1386                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1387                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1388                          RADEON_GMC_BRUSH_NONE |
1389                          (dev_priv->color_fmt << 8) |
1390                          RADEON_GMC_SRC_DATATYPE_COLOR |
1391                          RADEON_ROP3_S |
1392                          RADEON_DP_SRC_SOURCE_MEMORY |
1393                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1394
1395                 /* Make this work even if front & back are flipped:
1396                  */
1397                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1398                 if (sarea_priv->pfCurrentPage == 0) {
1399                         OUT_RING(dev_priv->back_pitch_offset);
1400                         OUT_RING(dev_priv->front_pitch_offset);
1401                 } else {
1402                         OUT_RING(dev_priv->front_pitch_offset);
1403                         OUT_RING(dev_priv->back_pitch_offset);
1404                 }
1405
1406                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1407                 OUT_RING((x << 16) | y);
1408                 OUT_RING((x << 16) | y);
1409                 OUT_RING((w << 16) | h);
1410
1411                 ADVANCE_RING();
1412         }
1413
1414         /* Increment the frame counter.  The client-side 3D driver must
1415          * throttle the framerate by waiting for this value before
1416          * performing the swapbuffer ioctl.
1417          */
1418         sarea_priv->last_frame++;
1419
1420         BEGIN_RING(4);
1421
1422         RADEON_FRAME_AGE(sarea_priv->last_frame);
1423         RADEON_WAIT_UNTIL_2D_IDLE();
1424
1425         ADVANCE_RING();
1426 }
1427
1428 void radeon_cp_dispatch_flip(struct drm_device * dev, struct drm_master *master)
1429 {
1430         drm_radeon_private_t *dev_priv = dev->dev_private;
1431         struct drm_radeon_master_private *master_priv = master->driver_priv;
1432         struct drm_sarea *sarea = (struct drm_sarea *) master_priv->sarea->handle;
1433         int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
1434             ? dev_priv->front_offset : dev_priv->back_offset;
1435         RING_LOCALS;
1436         DRM_DEBUG("pfCurrentPage=%d\n",
1437                   master_priv->sarea_priv->pfCurrentPage);
1438
1439         /* Do some trivial performance monitoring...
1440          */
1441         if (dev_priv->do_boxes) {
1442                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1443                 radeon_cp_performance_boxes(dev_priv, master_priv);
1444         }
1445
1446         /* Update the frame offsets for both CRTCs
1447          */
1448         BEGIN_RING(6);
1449
1450         RADEON_WAIT_UNTIL_3D_IDLE();
1451         OUT_RING_REG(RADEON_CRTC_OFFSET,
1452                      ((sarea->frame.y * dev_priv->front_pitch +
1453                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1454                      + offset);
1455         OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
1456                      + offset);
1457
1458         ADVANCE_RING();
1459
1460         /* Increment the frame counter.  The client-side 3D driver must
1461          * throttle the framerate by waiting for this value before
1462          * performing the swapbuffer ioctl.
1463          */
1464         master_priv->sarea_priv->last_frame++;
1465         master_priv->sarea_priv->pfCurrentPage =
1466                 1 - master_priv->sarea_priv->pfCurrentPage;
1467
1468         BEGIN_RING(2);
1469
1470         RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
1471
1472         ADVANCE_RING();
1473 }
1474
1475 static int bad_prim_vertex_nr(int primitive, int nr)
1476 {
1477         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1478         case RADEON_PRIM_TYPE_NONE:
1479         case RADEON_PRIM_TYPE_POINT:
1480                 return nr < 1;
1481         case RADEON_PRIM_TYPE_LINE:
1482                 return (nr & 1) || nr == 0;
1483         case RADEON_PRIM_TYPE_LINE_STRIP:
1484                 return nr < 2;
1485         case RADEON_PRIM_TYPE_TRI_LIST:
1486         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1487         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1488         case RADEON_PRIM_TYPE_RECT_LIST:
1489                 return nr % 3 || nr == 0;
1490         case RADEON_PRIM_TYPE_TRI_FAN:
1491         case RADEON_PRIM_TYPE_TRI_STRIP:
1492                 return nr < 3;
1493         default:
1494                 return 1;
1495         }
1496 }
1497
1498 typedef struct {
1499         unsigned int start;
1500         unsigned int finish;
1501         unsigned int prim;
1502         unsigned int numverts;
1503         unsigned int offset;
1504         unsigned int vc_format;
1505 } drm_radeon_tcl_prim_t;
1506
1507 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1508                                       struct drm_file *file_priv,
1509                                       struct drm_buf * buf,
1510                                       drm_radeon_tcl_prim_t * prim)
1511 {
1512         drm_radeon_private_t *dev_priv = dev->dev_private;
1513         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1514         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1515         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1516         int numverts = (int)prim->numverts;
1517         int nbox = sarea_priv->nbox;
1518         int i = 0;
1519         RING_LOCALS;
1520
1521         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1522                   prim->prim,
1523                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1524
1525         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1526                 DRM_ERROR("bad prim %x numverts %d\n",
1527                           prim->prim, prim->numverts);
1528                 return;
1529         }
1530
1531         do {
1532                 /* Emit the next cliprect */
1533                 if (i < nbox) {
1534                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1535                 }
1536
1537                 /* Emit the vertex buffer rendering commands */
1538                 BEGIN_RING(5);
1539
1540                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1541                 OUT_RING(offset);
1542                 OUT_RING(numverts);
1543                 OUT_RING(prim->vc_format);
1544                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1545                          RADEON_COLOR_ORDER_RGBA |
1546                          RADEON_VTX_FMT_RADEON_MODE |
1547                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1548
1549                 ADVANCE_RING();
1550
1551                 i++;
1552         } while (i < nbox);
1553 }
1554
1555 static void radeon_cp_discard_buffer(struct drm_device * dev, struct drm_master *master, struct drm_buf * buf)
1556 {
1557         drm_radeon_private_t *dev_priv = dev->dev_private;
1558         struct drm_radeon_master_private *master_priv = master->driver_priv;
1559         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1560         RING_LOCALS;
1561
1562         buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
1563
1564         /* Emit the vertex buffer age */
1565         BEGIN_RING(2);
1566         RADEON_DISPATCH_AGE(buf_priv->age);
1567         ADVANCE_RING();
1568
1569         buf->pending = 1;
1570         buf->used = 0;
1571 }
1572
1573 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1574                                         struct drm_buf * buf, int start, int end)
1575 {
1576         drm_radeon_private_t *dev_priv = dev->dev_private;
1577         RING_LOCALS;
1578         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1579
1580         if (start != end) {
1581                 int offset = (dev_priv->gart_buffers_offset
1582                               + buf->offset + start);
1583                 int dwords = (end - start + 3) / sizeof(u32);
1584
1585                 /* Indirect buffer data must be an even number of
1586                  * dwords, so if we've been given an odd number we must
1587                  * pad the data with a Type-2 CP packet.
1588                  */
1589                 if (dwords & 1) {
1590                         u32 *data = (u32 *)
1591                             ((char *)dev->agp_buffer_map->handle
1592                              + buf->offset + start);
1593                         data[dwords++] = RADEON_CP_PACKET2;
1594                 }
1595
1596                 /* Fire off the indirect buffer */
1597                 BEGIN_RING(3);
1598
1599                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1600                 OUT_RING(offset);
1601                 OUT_RING(dwords);
1602
1603                 ADVANCE_RING();
1604         }
1605 }
1606
1607 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1608                                        struct drm_master *master,
1609                                        struct drm_buf * elt_buf,
1610                                        drm_radeon_tcl_prim_t * prim)
1611 {
1612         drm_radeon_private_t *dev_priv = dev->dev_private;
1613         struct drm_radeon_master_private *master_priv = master->driver_priv;
1614         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1615         int offset = dev_priv->gart_buffers_offset + prim->offset;
1616         u32 *data;
1617         int dwords;
1618         int i = 0;
1619         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1620         int count = (prim->finish - start) / sizeof(u16);
1621         int nbox = sarea_priv->nbox;
1622
1623         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1624                   prim->prim,
1625                   prim->vc_format,
1626                   prim->start, prim->finish, prim->offset, prim->numverts);
1627
1628         if (bad_prim_vertex_nr(prim->prim, count)) {
1629                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1630                 return;
1631         }
1632
1633         if (start >= prim->finish || (prim->start & 0x7)) {
1634                 DRM_ERROR("buffer prim %d\n", prim->prim);
1635                 return;
1636         }
1637
1638         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1639
1640         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1641                         elt_buf->offset + prim->start);
1642
1643         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1644         data[1] = offset;
1645         data[2] = prim->numverts;
1646         data[3] = prim->vc_format;
1647         data[4] = (prim->prim |
1648                    RADEON_PRIM_WALK_IND |
1649                    RADEON_COLOR_ORDER_RGBA |
1650                    RADEON_VTX_FMT_RADEON_MODE |
1651                    (count << RADEON_NUM_VERTICES_SHIFT));
1652
1653         do {
1654                 if (i < nbox)
1655                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1656
1657                 radeon_cp_dispatch_indirect(dev, elt_buf,
1658                                             prim->start, prim->finish);
1659
1660                 i++;
1661         } while (i < nbox);
1662
1663 }
1664
1665 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1666
1667 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1668                                       struct drm_file *file_priv,
1669                                       drm_radeon_texture_t * tex,
1670                                       drm_radeon_tex_image_t * image)
1671 {
1672         drm_radeon_private_t *dev_priv = dev->dev_private;
1673         struct drm_buf *buf;
1674         u32 format;
1675         u32 *buffer;
1676         const u8 __user *data;
1677         int size, dwords, tex_width, blit_width, spitch;
1678         u32 height;
1679         int i;
1680         u32 texpitch, microtile;
1681         u32 offset, byte_offset;
1682         RING_LOCALS;
1683
1684         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1685                 DRM_ERROR("Invalid destination offset\n");
1686                 return -EINVAL;
1687         }
1688
1689         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1690
1691         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1692          * up with the texture data from the host data blit, otherwise
1693          * part of the texture image may be corrupted.
1694          */
1695         BEGIN_RING(4);
1696         RADEON_FLUSH_CACHE();
1697         RADEON_WAIT_UNTIL_IDLE();
1698         ADVANCE_RING();
1699
1700         /* The compiler won't optimize away a division by a variable,
1701          * even if the only legal values are powers of two.  Thus, we'll
1702          * use a shift instead.
1703          */
1704         switch (tex->format) {
1705         case RADEON_TXFORMAT_ARGB8888:
1706         case RADEON_TXFORMAT_RGBA8888:
1707                 format = RADEON_COLOR_FORMAT_ARGB8888;
1708                 tex_width = tex->width * 4;
1709                 blit_width = image->width * 4;
1710                 break;
1711         case RADEON_TXFORMAT_AI88:
1712         case RADEON_TXFORMAT_ARGB1555:
1713         case RADEON_TXFORMAT_RGB565:
1714         case RADEON_TXFORMAT_ARGB4444:
1715         case RADEON_TXFORMAT_VYUY422:
1716         case RADEON_TXFORMAT_YVYU422:
1717                 format = RADEON_COLOR_FORMAT_RGB565;
1718                 tex_width = tex->width * 2;
1719                 blit_width = image->width * 2;
1720                 break;
1721         case RADEON_TXFORMAT_I8:
1722         case RADEON_TXFORMAT_RGB332:
1723                 format = RADEON_COLOR_FORMAT_CI8;
1724                 tex_width = tex->width * 1;
1725                 blit_width = image->width * 1;
1726                 break;
1727         default:
1728                 DRM_ERROR("invalid texture format %d\n", tex->format);
1729                 return -EINVAL;
1730         }
1731         spitch = blit_width >> 6;
1732         if (spitch == 0 && image->height > 1)
1733                 return -EINVAL;
1734
1735         texpitch = tex->pitch;
1736         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1737                 microtile = 1;
1738                 if (tex_width < 64) {
1739                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1740                         /* we got tiled coordinates, untile them */
1741                         image->x *= 2;
1742                 }
1743         } else
1744                 microtile = 0;
1745
1746         /* this might fail for zero-sized uploads - are those illegal? */
1747         if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1748                                 blit_width - 1)) {
1749                 DRM_ERROR("Invalid final destination offset\n");
1750                 return -EINVAL;
1751         }
1752
1753         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1754
1755         do {
1756                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1757                           tex->offset >> 10, tex->pitch, tex->format,
1758                           image->x, image->y, image->width, image->height);
1759
1760                 /* Make a copy of some parameters in case we have to
1761                  * update them for a multi-pass texture blit.
1762                  */
1763                 height = image->height;
1764                 data = (const u8 __user *)image->data;
1765
1766                 size = height * blit_width;
1767
1768                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1769                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1770                         size = height * blit_width;
1771                 } else if (size < 4 && size > 0) {
1772                         size = 4;
1773                 } else if (size == 0) {
1774                         return 0;
1775                 }
1776
1777                 buf = radeon_freelist_get(dev);
1778                 if (0 && !buf) {
1779                         radeon_do_cp_idle(dev_priv);
1780                         buf = radeon_freelist_get(dev);
1781                 }
1782                 if (!buf) {
1783                         DRM_DEBUG("EAGAIN\n");
1784                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1785                                 return -EFAULT;
1786                         return -EAGAIN;
1787                 }
1788
1789                 /* Dispatch the indirect buffer.
1790                  */
1791                 buffer =
1792                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1793                 dwords = size / 4;
1794
1795 #define RADEON_COPY_MT(_buf, _data, _width) \
1796         do { \
1797                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1798                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1799                         return -EFAULT; \
1800                 } \
1801         } while(0)
1802
1803                 if (microtile) {
1804                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1805                            however, we cannot use blitter directly for texture width < 64 bytes,
1806                            since minimum tex pitch is 64 bytes and we need this to match
1807                            the texture width, otherwise the blitter will tile it wrong.
1808                            Thus, tiling manually in this case. Additionally, need to special
1809                            case tex height = 1, since our actual image will have height 2
1810                            and we need to ensure we don't read beyond the texture size
1811                            from user space. */
1812                         if (tex->height == 1) {
1813                                 if (tex_width >= 64 || tex_width <= 16) {
1814                                         RADEON_COPY_MT(buffer, data,
1815                                                 (int)(tex_width * sizeof(u32)));
1816                                 } else if (tex_width == 32) {
1817                                         RADEON_COPY_MT(buffer, data, 16);
1818                                         RADEON_COPY_MT(buffer + 8,
1819                                                        data + 16, 16);
1820                                 }
1821                         } else if (tex_width >= 64 || tex_width == 16) {
1822                                 RADEON_COPY_MT(buffer, data,
1823                                                (int)(dwords * sizeof(u32)));
1824                         } else if (tex_width < 16) {
1825                                 for (i = 0; i < tex->height; i++) {
1826                                         RADEON_COPY_MT(buffer, data, tex_width);
1827                                         buffer += 4;
1828                                         data += tex_width;
1829                                 }
1830                         } else if (tex_width == 32) {
1831                                 /* TODO: make sure this works when not fitting in one buffer
1832                                    (i.e. 32bytes x 2048...) */
1833                                 for (i = 0; i < tex->height; i += 2) {
1834                                         RADEON_COPY_MT(buffer, data, 16);
1835                                         data += 16;
1836                                         RADEON_COPY_MT(buffer + 8, data, 16);
1837                                         data += 16;
1838                                         RADEON_COPY_MT(buffer + 4, data, 16);
1839                                         data += 16;
1840                                         RADEON_COPY_MT(buffer + 12, data, 16);
1841                                         data += 16;
1842                                         buffer += 16;
1843                                 }
1844                         }
1845                 } else {
1846                         if (tex_width >= 32) {
1847                                 /* Texture image width is larger than the minimum, so we
1848                                  * can upload it directly.
1849                                  */
1850                                 RADEON_COPY_MT(buffer, data,
1851                                                (int)(dwords * sizeof(u32)));
1852                         } else {
1853                                 /* Texture image width is less than the minimum, so we
1854                                  * need to pad out each image scanline to the minimum
1855                                  * width.
1856                                  */
1857                                 for (i = 0; i < tex->height; i++) {
1858                                         RADEON_COPY_MT(buffer, data, tex_width);
1859                                         buffer += 8;
1860                                         data += tex_width;
1861                                 }
1862                         }
1863                 }
1864
1865 #undef RADEON_COPY_MT
1866                 byte_offset = (image->y & ~2047) * blit_width;
1867                 buf->file_priv = file_priv;
1868                 buf->used = size;
1869                 offset = dev_priv->gart_buffers_offset + buf->offset;
1870                 BEGIN_RING(9);
1871                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1872                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1873                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1874                          RADEON_GMC_BRUSH_NONE |
1875                          (format << 8) |
1876                          RADEON_GMC_SRC_DATATYPE_COLOR |
1877                          RADEON_ROP3_S |
1878                          RADEON_DP_SRC_SOURCE_MEMORY |
1879                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1880                 OUT_RING((spitch << 22) | (offset >> 10));
1881                 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1882                 OUT_RING(0);
1883                 OUT_RING((image->x << 16) | (image->y % 2048));
1884                 OUT_RING((image->width << 16) | height);
1885                 RADEON_WAIT_UNTIL_2D_IDLE();
1886                 ADVANCE_RING();
1887                 COMMIT_RING();
1888
1889                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
1890
1891                 /* Update the input parameters for next time */
1892                 image->y += height;
1893                 image->height -= height;
1894                 image->data = (const u8 __user *)image->data + size;
1895         } while (image->height > 0);
1896
1897         /* Flush the pixel cache after the blit completes.  This ensures
1898          * the texture data is written out to memory before rendering
1899          * continues.
1900          */
1901         BEGIN_RING(4);
1902         RADEON_FLUSH_CACHE();
1903         RADEON_WAIT_UNTIL_2D_IDLE();
1904         ADVANCE_RING();
1905         COMMIT_RING();
1906
1907         return 0;
1908 }
1909
1910 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1911 {
1912         drm_radeon_private_t *dev_priv = dev->dev_private;
1913         int i;
1914         RING_LOCALS;
1915         DRM_DEBUG("\n");
1916
1917         BEGIN_RING(35);
1918
1919         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1920         OUT_RING(0x00000000);
1921
1922         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1923         for (i = 0; i < 32; i++) {
1924                 OUT_RING(stipple[i]);
1925         }
1926
1927         ADVANCE_RING();
1928 }
1929
1930 static void radeon_apply_surface_regs(int surf_index,
1931                                       drm_radeon_private_t *dev_priv)
1932 {
1933         if (!dev_priv->mmio)
1934                 return;
1935
1936         radeon_do_cp_idle(dev_priv);
1937
1938         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1939                      dev_priv->surfaces[surf_index].flags);
1940         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1941                      dev_priv->surfaces[surf_index].lower);
1942         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1943                      dev_priv->surfaces[surf_index].upper);
1944 }
1945
1946 /* Allocates a virtual surface
1947  * doesn't always allocate a real surface, will stretch an existing
1948  * surface when possible.
1949  *
1950  * Note that refcount can be at most 2, since during a free refcount=3
1951  * might mean we have to allocate a new surface which might not always
1952  * be available.
1953  * For example : we allocate three contigous surfaces ABC. If B is
1954  * freed, we suddenly need two surfaces to store A and C, which might
1955  * not always be available.
1956  */
1957 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1958                          drm_radeon_private_t *dev_priv,
1959                          struct drm_file *file_priv)
1960 {
1961         struct radeon_virt_surface *s;
1962         int i;
1963         int virt_surface_index;
1964         uint32_t new_upper, new_lower;
1965
1966         new_lower = new->address;
1967         new_upper = new_lower + new->size - 1;
1968
1969         /* sanity check */
1970         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1971             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1972              RADEON_SURF_ADDRESS_FIXED_MASK)
1973             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1974                 return -1;
1975
1976         /* make sure there is no overlap with existing surfaces */
1977         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1978                 if ((dev_priv->surfaces[i].refcount != 0) &&
1979                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1980                       (new_lower < dev_priv->surfaces[i].upper)) ||
1981                      ((new_lower < dev_priv->surfaces[i].lower) &&
1982                       (new_upper > dev_priv->surfaces[i].lower)))) {
1983                         return -1;
1984                 }
1985         }
1986
1987         /* find a virtual surface */
1988         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1989                 if (dev_priv->virt_surfaces[i].file_priv == 0)
1990                         break;
1991         if (i == 2 * RADEON_MAX_SURFACES) {
1992                 return -1;
1993         }
1994         virt_surface_index = i;
1995
1996         /* try to reuse an existing surface */
1997         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1998                 /* extend before */
1999                 if ((dev_priv->surfaces[i].refcount == 1) &&
2000                     (new->flags == dev_priv->surfaces[i].flags) &&
2001                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
2002                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2003                         s->surface_index = i;
2004                         s->lower = new_lower;
2005                         s->upper = new_upper;
2006                         s->flags = new->flags;
2007                         s->file_priv = file_priv;
2008                         dev_priv->surfaces[i].refcount++;
2009                         dev_priv->surfaces[i].lower = s->lower;
2010                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2011                         return virt_surface_index;
2012                 }
2013
2014                 /* extend after */
2015                 if ((dev_priv->surfaces[i].refcount == 1) &&
2016                     (new->flags == dev_priv->surfaces[i].flags) &&
2017                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
2018                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2019                         s->surface_index = i;
2020                         s->lower = new_lower;
2021                         s->upper = new_upper;
2022                         s->flags = new->flags;
2023                         s->file_priv = file_priv;
2024                         dev_priv->surfaces[i].refcount++;
2025                         dev_priv->surfaces[i].upper = s->upper;
2026                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2027                         return virt_surface_index;
2028                 }
2029         }
2030
2031         /* okay, we need a new one */
2032         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2033                 if (dev_priv->surfaces[i].refcount == 0) {
2034                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2035                         s->surface_index = i;
2036                         s->lower = new_lower;
2037                         s->upper = new_upper;
2038                         s->flags = new->flags;
2039                         s->file_priv = file_priv;
2040                         dev_priv->surfaces[i].refcount = 1;
2041                         dev_priv->surfaces[i].lower = s->lower;
2042                         dev_priv->surfaces[i].upper = s->upper;
2043                         dev_priv->surfaces[i].flags = s->flags;
2044                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2045                         return virt_surface_index;
2046                 }
2047         }
2048
2049         /* we didn't find anything */
2050         return -1;
2051 }
2052
2053 static int free_surface(struct drm_file *file_priv,
2054                         drm_radeon_private_t * dev_priv,
2055                         int lower)
2056 {
2057         struct radeon_virt_surface *s;
2058         int i;
2059         /* find the virtual surface */
2060         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2061                 s = &(dev_priv->virt_surfaces[i]);
2062                 if (s->file_priv) {
2063                         if ((lower == s->lower) && (file_priv == s->file_priv))
2064                         {
2065                                 if (dev_priv->surfaces[s->surface_index].
2066                                     lower == s->lower)
2067                                         dev_priv->surfaces[s->surface_index].
2068                                             lower = s->upper;
2069
2070                                 if (dev_priv->surfaces[s->surface_index].
2071                                     upper == s->upper)
2072                                         dev_priv->surfaces[s->surface_index].
2073                                             upper = s->lower;
2074
2075                                 dev_priv->surfaces[s->surface_index].refcount--;
2076                                 if (dev_priv->surfaces[s->surface_index].
2077                                     refcount == 0)
2078                                         dev_priv->surfaces[s->surface_index].
2079                                             flags = 0;
2080                                 s->file_priv = NULL;
2081                                 radeon_apply_surface_regs(s->surface_index,
2082                                                           dev_priv);
2083                                 return 0;
2084                         }
2085                 }
2086         }
2087         return 1;
2088 }
2089
2090 static void radeon_surfaces_release(struct drm_file *file_priv,
2091                                     drm_radeon_private_t * dev_priv)
2092 {
2093         int i;
2094         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2095                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2096                         free_surface(file_priv, dev_priv,
2097                                      dev_priv->virt_surfaces[i].lower);
2098         }
2099 }
2100
2101 /* ================================================================
2102  * IOCTL functions
2103  */
2104 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2105 {
2106         drm_radeon_private_t *dev_priv = dev->dev_private;
2107         drm_radeon_surface_alloc_t *alloc = data;
2108
2109         if (!dev_priv) {
2110                 DRM_ERROR("called with no initialization\n");
2111                 return -EINVAL;
2112         }
2113
2114         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2115                 return -EINVAL;
2116         else
2117                 return 0;
2118 }
2119
2120 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2121 {
2122         drm_radeon_private_t *dev_priv = dev->dev_private;
2123         drm_radeon_surface_free_t *memfree = data;
2124
2125         if (!dev_priv) {
2126                 DRM_ERROR("called with no initialization\n");
2127                 return -EINVAL;
2128         }
2129
2130         if (free_surface(file_priv, dev_priv, memfree->address))
2131                 return -EINVAL;
2132         else
2133                 return 0;
2134 }
2135
2136 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2137 {
2138         drm_radeon_private_t *dev_priv = dev->dev_private;
2139         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2140         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2141         drm_radeon_clear_t *clear = data;
2142         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2143         DRM_DEBUG("\n");
2144
2145         LOCK_TEST_WITH_RETURN(dev, file_priv);
2146
2147         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2148
2149         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2150                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2151
2152         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2153                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2154                 return -EFAULT;
2155
2156         radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);
2157
2158         COMMIT_RING();
2159         return 0;
2160 }
2161
2162 /* Not sure why this isn't set all the time:
2163  */
2164 static int radeon_do_init_pageflip(struct drm_device * dev, struct drm_master *master)
2165 {
2166         drm_radeon_private_t *dev_priv = dev->dev_private;
2167         struct drm_radeon_master_private *master_priv = master->driver_priv;
2168         RING_LOCALS;
2169
2170         DRM_DEBUG("\n");
2171
2172         BEGIN_RING(6);
2173         RADEON_WAIT_UNTIL_3D_IDLE();
2174         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2175         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2176                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2177         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2178         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2179                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2180         ADVANCE_RING();
2181
2182         dev_priv->page_flipping = 1;
2183
2184         if (master_priv->sarea_priv->pfCurrentPage != 1)
2185                 master_priv->sarea_priv->pfCurrentPage = 0;
2186
2187         return 0;
2188 }
2189
2190 /* Swapping and flipping are different operations, need different ioctls.
2191  * They can & should be intermixed to support multiple 3d windows.
2192  */
2193 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2194 {
2195         drm_radeon_private_t *dev_priv = dev->dev_private;
2196         DRM_DEBUG("\n");
2197
2198         LOCK_TEST_WITH_RETURN(dev, file_priv);
2199
2200         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2201
2202         if (!dev_priv->page_flipping)
2203                 radeon_do_init_pageflip(dev, file_priv->master);
2204
2205         radeon_cp_dispatch_flip(dev, file_priv->master);
2206
2207         COMMIT_RING();
2208         return 0;
2209 }
2210
2211 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2212 {
2213         drm_radeon_private_t *dev_priv = dev->dev_private;
2214         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2215         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2216
2217         DRM_DEBUG("\n");
2218
2219         LOCK_TEST_WITH_RETURN(dev, file_priv);
2220
2221         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2222
2223         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2224                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2225
2226         if (dev_priv->mm.vram_offset)
2227                 radeon_gem_update_offsets(dev, file_priv->master);
2228
2229         radeon_cp_dispatch_swap(dev, file_priv->master);
2230         sarea_priv->ctx_owner = 0;
2231
2232         COMMIT_RING();
2233         return 0;
2234 }
2235
2236 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2237 {
2238         drm_radeon_private_t *dev_priv = dev->dev_private;
2239         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2240         drm_radeon_sarea_t *sarea_priv;
2241         struct drm_device_dma *dma = dev->dma;
2242         struct drm_buf *buf;
2243         drm_radeon_vertex_t *vertex = data;
2244         drm_radeon_tcl_prim_t prim;
2245
2246         LOCK_TEST_WITH_RETURN(dev, file_priv);
2247
2248         if (!dev_priv) {
2249                 DRM_ERROR("called with no initialization\n");
2250                 return -EINVAL;
2251         }
2252
2253         sarea_priv = master_priv->sarea_priv;
2254
2255         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2256                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2257
2258         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2259                 DRM_ERROR("buffer index %d (of %d max)\n",
2260                           vertex->idx, dma->buf_count - 1);
2261                 return -EINVAL;
2262         }
2263         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2264                 DRM_ERROR("buffer prim %d\n", vertex->prim);
2265                 return -EINVAL;
2266         }
2267
2268         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2269         VB_AGE_TEST_WITH_RETURN(dev_priv);
2270
2271         buf = dma->buflist[vertex->idx];
2272
2273         if (buf->file_priv != file_priv) {
2274                 DRM_ERROR("process %d using buffer owned by %p\n",
2275                           DRM_CURRENTPID, buf->file_priv);
2276                 return -EINVAL;
2277         }
2278         if (buf->pending) {
2279                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2280                 return -EINVAL;
2281         }
2282
2283         /* Build up a prim_t record:
2284          */
2285         if (vertex->count) {
2286                 buf->used = vertex->count;      /* not used? */
2287
2288                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2289                         if (radeon_emit_state(dev_priv, file_priv,
2290                                               &sarea_priv->context_state,
2291                                               sarea_priv->tex_state,
2292                                               sarea_priv->dirty)) {
2293                                 DRM_ERROR("radeon_emit_state failed\n");
2294                                 return -EINVAL;
2295                         }
2296
2297                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2298                                                RADEON_UPLOAD_TEX1IMAGES |
2299                                                RADEON_UPLOAD_TEX2IMAGES |
2300                                                RADEON_REQUIRE_QUIESCENCE);
2301                 }
2302
2303                 prim.start = 0;
2304                 prim.finish = vertex->count;    /* unused */
2305                 prim.prim = vertex->prim;
2306                 prim.numverts = vertex->count;
2307                 prim.vc_format = sarea_priv->vc_format;
2308
2309                 radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
2310         }
2311
2312         if (vertex->discard) {
2313                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2314         }
2315
2316         COMMIT_RING();
2317         return 0;
2318 }
2319
2320 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2321 {
2322         drm_radeon_private_t *dev_priv = dev->dev_private;
2323         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2324         drm_radeon_sarea_t *sarea_priv;
2325         struct drm_device_dma *dma = dev->dma;
2326         struct drm_buf *buf;
2327         drm_radeon_indices_t *elts = data;
2328         drm_radeon_tcl_prim_t prim;
2329         int count;
2330
2331         LOCK_TEST_WITH_RETURN(dev, file_priv);
2332
2333         if (!dev_priv) {
2334                 DRM_ERROR("called with no initialization\n");
2335                 return -EINVAL;
2336         }
2337         sarea_priv = master_priv->sarea_priv;
2338
2339         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2340                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2341                   elts->discard);
2342
2343         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2344                 DRM_ERROR("buffer index %d (of %d max)\n",
2345                           elts->idx, dma->buf_count - 1);
2346                 return -EINVAL;
2347         }
2348         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2349                 DRM_ERROR("buffer prim %d\n", elts->prim);
2350                 return -EINVAL;
2351         }
2352
2353         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2354         VB_AGE_TEST_WITH_RETURN(dev_priv);
2355
2356         buf = dma->buflist[elts->idx];
2357
2358         if (buf->file_priv != file_priv) {
2359                 DRM_ERROR("process %d using buffer owned by %p\n",
2360                           DRM_CURRENTPID, buf->file_priv);
2361                 return -EINVAL;
2362         }
2363         if (buf->pending) {
2364                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2365                 return -EINVAL;
2366         }
2367
2368         count = (elts->end - elts->start) / sizeof(u16);
2369         elts->start -= RADEON_INDEX_PRIM_OFFSET;
2370
2371         if (elts->start & 0x7) {
2372                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2373                 return -EINVAL;
2374         }
2375         if (elts->start < buf->used) {
2376                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2377                 return -EINVAL;
2378         }
2379
2380         buf->used = elts->end;
2381
2382         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2383                 if (radeon_emit_state(dev_priv, file_priv,
2384                                       &sarea_priv->context_state,
2385                                       sarea_priv->tex_state,
2386                                       sarea_priv->dirty)) {
2387                         DRM_ERROR("radeon_emit_state failed\n");
2388                         return -EINVAL;
2389                 }
2390
2391                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2392                                        RADEON_UPLOAD_TEX1IMAGES |
2393                                        RADEON_UPLOAD_TEX2IMAGES |
2394                                        RADEON_REQUIRE_QUIESCENCE);
2395         }
2396
2397         /* Build up a prim_t record:
2398          */
2399         prim.start = elts->start;
2400         prim.finish = elts->end;
2401         prim.prim = elts->prim;
2402         prim.offset = 0;        /* offset from start of dma buffers */
2403         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2404         prim.vc_format = sarea_priv->vc_format;
2405
2406         radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
2407         if (elts->discard) {
2408                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2409         }
2410
2411         COMMIT_RING();
2412         return 0;
2413 }
2414
2415 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2416 {
2417         drm_radeon_private_t *dev_priv = dev->dev_private;
2418         drm_radeon_texture_t *tex = data;
2419         drm_radeon_tex_image_t image;
2420         int ret;
2421
2422         LOCK_TEST_WITH_RETURN(dev, file_priv);
2423
2424         if (tex->image == NULL) {
2425                 DRM_ERROR("null texture image!\n");
2426                 return -EINVAL;
2427         }
2428
2429         if (DRM_COPY_FROM_USER(&image,
2430                                (drm_radeon_tex_image_t __user *) tex->image,
2431                                sizeof(image)))
2432                 return -EFAULT;
2433
2434         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2435         VB_AGE_TEST_WITH_RETURN(dev_priv);
2436
2437         ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2438
2439         return ret;
2440 }
2441
2442 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2443 {
2444         drm_radeon_private_t *dev_priv = dev->dev_private;
2445         drm_radeon_stipple_t *stipple = data;
2446         u32 mask[32];
2447
2448         LOCK_TEST_WITH_RETURN(dev, file_priv);
2449
2450         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2451                 return -EFAULT;
2452
2453         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2454
2455         radeon_cp_dispatch_stipple(dev, mask);
2456
2457         COMMIT_RING();
2458         return 0;
2459 }
2460
2461 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2462 {
2463         drm_radeon_private_t *dev_priv = dev->dev_private;
2464         struct drm_device_dma *dma = dev->dma;
2465         struct drm_buf *buf;
2466         drm_radeon_indirect_t *indirect = data;
2467         RING_LOCALS;
2468
2469         LOCK_TEST_WITH_RETURN(dev, file_priv);
2470
2471         if (!dev_priv) {
2472                 DRM_ERROR("called with no initialization\n");
2473                 return -EINVAL;
2474         }
2475
2476         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2477                   indirect->idx, indirect->start, indirect->end,
2478                   indirect->discard);
2479
2480         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2481                 DRM_ERROR("buffer index %d (of %d max)\n",
2482                           indirect->idx, dma->buf_count - 1);
2483                 return -EINVAL;
2484         }
2485
2486         buf = dma->buflist[indirect->idx];
2487
2488         if (buf->file_priv != file_priv) {
2489                 DRM_ERROR("process %d using buffer owned by %p\n",
2490                           DRM_CURRENTPID, buf->file_priv);
2491                 return -EINVAL;
2492         }
2493         if (buf->pending) {
2494                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2495                 return -EINVAL;
2496         }
2497
2498         if (indirect->start < buf->used) {
2499                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2500                           indirect->start, buf->used);
2501                 return -EINVAL;
2502         }
2503
2504         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2505         VB_AGE_TEST_WITH_RETURN(dev_priv);
2506
2507         buf->used = indirect->end;
2508
2509         /* Wait for the 3D stream to idle before the indirect buffer
2510          * containing 2D acceleration commands is processed.
2511          */
2512         BEGIN_RING(2);
2513
2514         RADEON_WAIT_UNTIL_3D_IDLE();
2515
2516         ADVANCE_RING();
2517
2518         /* Dispatch the indirect buffer full of commands from the
2519          * X server.  This is insecure and is thus only available to
2520          * privileged clients.
2521          */
2522         radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2523         if (indirect->discard) {
2524                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2525         }
2526
2527         COMMIT_RING();
2528         return 0;
2529 }
2530
2531 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2532 {
2533         drm_radeon_private_t *dev_priv = dev->dev_private;
2534         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2535         drm_radeon_sarea_t *sarea_priv;
2536         struct drm_device_dma *dma = dev->dma;
2537         struct drm_buf *buf;
2538         drm_radeon_vertex2_t *vertex = data;
2539         int i;
2540         unsigned char laststate;
2541
2542         LOCK_TEST_WITH_RETURN(dev, file_priv);
2543
2544         if (!dev_priv) {
2545                 DRM_ERROR("called with no initialization\n");
2546                 return -EINVAL;
2547         }
2548
2549         sarea_priv = master_priv->sarea_priv;
2550
2551         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2552                   DRM_CURRENTPID, vertex->idx, vertex->discard);
2553
2554         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2555                 DRM_ERROR("buffer index %d (of %d max)\n",
2556                           vertex->idx, dma->buf_count - 1);
2557                 return -EINVAL;
2558         }
2559
2560         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2561         VB_AGE_TEST_WITH_RETURN(dev_priv);
2562
2563         buf = dma->buflist[vertex->idx];
2564
2565         if (buf->file_priv != file_priv) {
2566                 DRM_ERROR("process %d using buffer owned by %p\n",
2567                           DRM_CURRENTPID, buf->file_priv);
2568                 return -EINVAL;
2569         }
2570
2571         if (buf->pending) {
2572                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2573                 return -EINVAL;
2574         }
2575
2576         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2577                 return -EINVAL;
2578
2579         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2580                 drm_radeon_prim_t prim;
2581                 drm_radeon_tcl_prim_t tclprim;
2582
2583                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2584                         return -EFAULT;
2585
2586                 if (prim.stateidx != laststate) {
2587                         drm_radeon_state_t state;
2588
2589                         if (DRM_COPY_FROM_USER(&state,
2590                                                &vertex->state[prim.stateidx],
2591                                                sizeof(state)))
2592                                 return -EFAULT;
2593
2594                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2595                                 DRM_ERROR("radeon_emit_state2 failed\n");
2596                                 return -EINVAL;
2597                         }
2598
2599                         laststate = prim.stateidx;
2600                 }
2601
2602                 tclprim.start = prim.start;
2603                 tclprim.finish = prim.finish;
2604                 tclprim.prim = prim.prim;
2605                 tclprim.vc_format = prim.vc_format;
2606
2607                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2608                         tclprim.offset = prim.numverts * 64;
2609                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2610
2611                         radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
2612                 } else {
2613                         tclprim.numverts = prim.numverts;
2614                         tclprim.offset = 0;     /* not used */
2615
2616                         radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
2617                 }
2618
2619                 if (sarea_priv->nbox == 1)
2620                         sarea_priv->nbox = 0;
2621         }
2622
2623         if (vertex->discard) {
2624                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2625         }
2626
2627         COMMIT_RING();
2628         return 0;
2629 }
2630
2631 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2632                                struct drm_file *file_priv,
2633                                drm_radeon_cmd_header_t header,
2634                                drm_radeon_kcmd_buffer_t *cmdbuf)
2635 {
2636         int id = (int)header.packet.packet_id;
2637         int sz, reg;
2638         int *data = (int *)cmdbuf->buf;
2639         RING_LOCALS;
2640
2641         if (id >= RADEON_MAX_STATE_PACKETS)
2642                 return -EINVAL;
2643
2644         sz = packet[id].len;
2645         reg = packet[id].start;
2646
2647         if (sz * sizeof(int) > cmdbuf->bufsz) {
2648                 DRM_ERROR("Packet size provided larger than data provided\n");
2649                 return -EINVAL;
2650         }
2651
2652         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2653                 DRM_ERROR("Packet verification failed\n");
2654                 return -EINVAL;
2655         }
2656
2657         BEGIN_RING(sz + 1);
2658         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2659         OUT_RING_TABLE(data, sz);
2660         ADVANCE_RING();
2661
2662         cmdbuf->buf += sz * sizeof(int);
2663         cmdbuf->bufsz -= sz * sizeof(int);
2664         return 0;
2665 }
2666
2667 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2668                                           drm_radeon_cmd_header_t header,
2669                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2670 {
2671         int sz = header.scalars.count;
2672         int start = header.scalars.offset;
2673         int stride = header.scalars.stride;
2674         RING_LOCALS;
2675
2676         BEGIN_RING(3 + sz);
2677         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2678         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2679         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2680         OUT_RING_TABLE(cmdbuf->buf, sz);
2681         ADVANCE_RING();
2682         cmdbuf->buf += sz * sizeof(int);
2683         cmdbuf->bufsz -= sz * sizeof(int);
2684         return 0;
2685 }
2686
2687 /* God this is ugly
2688  */
2689 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2690                                            drm_radeon_cmd_header_t header,
2691                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2692 {
2693         int sz = header.scalars.count;
2694         int start = ((unsigned int)header.scalars.offset) + 0x100;
2695         int stride = header.scalars.stride;
2696         RING_LOCALS;
2697
2698         BEGIN_RING(3 + sz);
2699         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2700         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2701         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2702         OUT_RING_TABLE(cmdbuf->buf, sz);
2703         ADVANCE_RING();
2704         cmdbuf->buf += sz * sizeof(int);
2705         cmdbuf->bufsz -= sz * sizeof(int);
2706         return 0;
2707 }
2708
2709 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2710                                           drm_radeon_cmd_header_t header,
2711                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2712 {
2713         int sz = header.vectors.count;
2714         int start = header.vectors.offset;
2715         int stride = header.vectors.stride;
2716         RING_LOCALS;
2717
2718         BEGIN_RING(5 + sz);
2719         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2720         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2721         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2722         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2723         OUT_RING_TABLE(cmdbuf->buf, sz);
2724         ADVANCE_RING();
2725
2726         cmdbuf->buf += sz * sizeof(int);
2727         cmdbuf->bufsz -= sz * sizeof(int);
2728         return 0;
2729 }
2730
2731 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2732                                           drm_radeon_cmd_header_t header,
2733                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2734 {
2735         int sz = header.veclinear.count * 4;
2736         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2737         RING_LOCALS;
2738
2739         if (!sz)
2740                 return 0;
2741         if (sz * 4 > cmdbuf->bufsz)
2742                 return -EINVAL;
2743
2744         BEGIN_RING(5 + sz);
2745         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2746         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2747         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2748         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2749         OUT_RING_TABLE(cmdbuf->buf, sz);
2750         ADVANCE_RING();
2751
2752         cmdbuf->buf += sz * sizeof(int);
2753         cmdbuf->bufsz -= sz * sizeof(int);
2754         return 0;
2755 }
2756
2757 static int radeon_emit_packet3(struct drm_device * dev,
2758                                struct drm_file *file_priv,
2759                                drm_radeon_kcmd_buffer_t *cmdbuf)
2760 {
2761         drm_radeon_private_t *dev_priv = dev->dev_private;
2762         unsigned int cmdsz;
2763         int ret;
2764         RING_LOCALS;
2765
2766         DRM_DEBUG("\n");
2767
2768         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2769                                                   cmdbuf, &cmdsz))) {
2770                 DRM_ERROR("Packet verification failed\n");
2771                 return ret;
2772         }
2773
2774         BEGIN_RING(cmdsz);
2775         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2776         ADVANCE_RING();
2777
2778         cmdbuf->buf += cmdsz * 4;
2779         cmdbuf->bufsz -= cmdsz * 4;
2780         return 0;
2781 }
2782
2783 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2784                                         struct drm_file *file_priv,
2785                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2786                                         int orig_nbox)
2787 {
2788         drm_radeon_private_t *dev_priv = dev->dev_private;
2789         struct drm_clip_rect box;
2790         unsigned int cmdsz;
2791         int ret;
2792         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2793         int i = 0;
2794         RING_LOCALS;
2795
2796         DRM_DEBUG("\n");
2797
2798         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2799                                                   cmdbuf, &cmdsz))) {
2800                 DRM_ERROR("Packet verification failed\n");
2801                 return ret;
2802         }
2803
2804         if (!orig_nbox)
2805                 goto out;
2806
2807         do {
2808                 if (i < cmdbuf->nbox) {
2809                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2810                                 return -EFAULT;
2811                         /* FIXME The second and subsequent times round
2812                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2813                          * calling emit_clip_rect(). This fixes a
2814                          * lockup on fast machines when sending
2815                          * several cliprects with a cmdbuf, as when
2816                          * waving a 2D window over a 3D
2817                          * window. Something in the commands from user
2818                          * space seems to hang the card when they're
2819                          * sent several times in a row. That would be
2820                          * the correct place to fix it but this works
2821                          * around it until I can figure that out - Tim
2822                          * Smith */
2823                         if (i) {
2824                                 BEGIN_RING(2);
2825                                 RADEON_WAIT_UNTIL_3D_IDLE();
2826                                 ADVANCE_RING();
2827                         }
2828                         radeon_emit_clip_rect(dev_priv, &box);
2829                 }
2830
2831                 BEGIN_RING(cmdsz);
2832                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2833                 ADVANCE_RING();
2834
2835         } while (++i < cmdbuf->nbox);
2836         if (cmdbuf->nbox == 1)
2837                 cmdbuf->nbox = 0;
2838
2839       out:
2840         cmdbuf->buf += cmdsz * 4;
2841         cmdbuf->bufsz -= cmdsz * 4;
2842         return 0;
2843 }
2844
2845 static int radeon_emit_wait(struct drm_device * dev, int flags)
2846 {
2847         drm_radeon_private_t *dev_priv = dev->dev_private;
2848         RING_LOCALS;
2849
2850         DRM_DEBUG("%x\n", flags);
2851         switch (flags) {
2852         case RADEON_WAIT_2D:
2853                 BEGIN_RING(2);
2854                 RADEON_WAIT_UNTIL_2D_IDLE();
2855                 ADVANCE_RING();
2856                 break;
2857         case RADEON_WAIT_3D:
2858                 BEGIN_RING(2);
2859                 RADEON_WAIT_UNTIL_3D_IDLE();
2860                 ADVANCE_RING();
2861                 break;
2862         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2863                 BEGIN_RING(2);
2864                 RADEON_WAIT_UNTIL_IDLE();
2865                 ADVANCE_RING();
2866                 break;
2867         default:
2868                 return -EINVAL;
2869         }
2870
2871         return 0;
2872 }
2873
2874 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2875 {
2876         drm_radeon_private_t *dev_priv = dev->dev_private;
2877         struct drm_device_dma *dma = dev->dma;
2878         struct drm_buf *buf = NULL;
2879         int idx;
2880         drm_radeon_kcmd_buffer_t *cmdbuf = data;
2881         drm_radeon_cmd_header_t header;
2882         int orig_nbox, orig_bufsz;
2883         char *kbuf = NULL;
2884
2885         LOCK_TEST_WITH_RETURN(dev, file_priv);
2886
2887         if (!dev_priv) {
2888                 DRM_ERROR("called with no initialization\n");
2889                 return -EINVAL;
2890         }
2891
2892         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2893         VB_AGE_TEST_WITH_RETURN(dev_priv);
2894
2895         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2896                 return -EINVAL;
2897         }
2898
2899         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2900          * races between checking values and using those values in other code,
2901          * and simply to avoid a lot of function calls to copy in data.
2902          */
2903         orig_bufsz = cmdbuf->bufsz;
2904         if (orig_bufsz != 0) {
2905                 kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
2906                 if (kbuf == NULL)
2907                         return -ENOMEM;
2908                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2909                                        cmdbuf->bufsz)) {
2910                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2911                         return -EFAULT;
2912                 }
2913                 cmdbuf->buf = kbuf;
2914         }
2915
2916         orig_nbox = cmdbuf->nbox;
2917
2918         if (dev_priv->chip_family >= CHIP_R300) {
2919                 int temp;
2920                 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2921
2922                 if (orig_bufsz != 0)
2923                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2924
2925                 return temp;
2926         }
2927
2928         /* microcode_version != r300 */
2929         while (cmdbuf->bufsz >= sizeof(header)) {
2930
2931                 header.i = *(int *)cmdbuf->buf;
2932                 cmdbuf->buf += sizeof(header);
2933                 cmdbuf->bufsz -= sizeof(header);
2934
2935                 switch (header.header.cmd_type) {
2936                 case RADEON_CMD_PACKET:
2937                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2938                         if (radeon_emit_packets
2939                             (dev_priv, file_priv, header, cmdbuf)) {
2940                                 DRM_ERROR("radeon_emit_packets failed\n");
2941                                 goto err;
2942                         }
2943                         break;
2944
2945                 case RADEON_CMD_SCALARS:
2946                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2947                         if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2948                                 DRM_ERROR("radeon_emit_scalars failed\n");
2949                                 goto err;
2950                         }
2951                         break;
2952
2953                 case RADEON_CMD_VECTORS:
2954                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2955                         if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2956                                 DRM_ERROR("radeon_emit_vectors failed\n");
2957                                 goto err;
2958                         }
2959                         break;
2960
2961                 case RADEON_CMD_DMA_DISCARD:
2962                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2963                         idx = header.dma.buf_idx;
2964                         if (idx < 0 || idx >= dma->buf_count) {
2965                                 DRM_ERROR("buffer index %d (of %d max)\n",
2966                                           idx, dma->buf_count - 1);
2967                                 goto err;
2968                         }
2969
2970                         buf = dma->buflist[idx];
2971                         if (buf->file_priv != file_priv || buf->pending) {
2972                                 DRM_ERROR("bad buffer %p %p %d\n",
2973                                           buf->file_priv, file_priv,
2974                                           buf->pending);
2975                                 goto err;
2976                         }
2977
2978                         radeon_cp_discard_buffer(dev, file_priv->master, buf);
2979                         break;
2980
2981                 case RADEON_CMD_PACKET3:
2982                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2983                         if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2984                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2985                                 goto err;
2986                         }
2987                         break;
2988
2989                 case RADEON_CMD_PACKET3_CLIP:
2990                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2991                         if (radeon_emit_packet3_cliprect
2992                             (dev, file_priv, cmdbuf, orig_nbox)) {
2993                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2994                                 goto err;
2995                         }
2996                         break;
2997
2998                 case RADEON_CMD_SCALARS2:
2999                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
3000                         if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
3001                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
3002                                 goto err;
3003                         }
3004                         break;
3005
3006                 case RADEON_CMD_WAIT:
3007                         DRM_DEBUG("RADEON_CMD_WAIT\n");
3008                         if (radeon_emit_wait(dev, header.wait.flags)) {
3009                                 DRM_ERROR("radeon_emit_wait failed\n");
3010                                 goto err;
3011                         }
3012                         break;
3013                 case RADEON_CMD_VECLINEAR:
3014                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
3015                         if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
3016                                 DRM_ERROR("radeon_emit_veclinear failed\n");
3017                                 goto err;
3018                         }
3019                         break;
3020
3021                 default:
3022                         DRM_ERROR("bad cmd_type %d at %p\n",
3023                                   header.header.cmd_type,
3024                                   cmdbuf->buf - sizeof(header));
3025                         goto err;
3026                 }
3027         }
3028
3029         if (orig_bufsz != 0)
3030                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3031
3032         DRM_DEBUG("DONE\n");
3033         COMMIT_RING();
3034         return 0;
3035
3036       err:
3037         if (orig_bufsz != 0)
3038                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3039         return -EINVAL;
3040 }
3041
3042 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3043 {
3044         drm_radeon_private_t *dev_priv = dev->dev_private;
3045         drm_radeon_getparam_t *param = data;
3046         int value;
3047
3048         if (!dev_priv) {
3049                 DRM_ERROR("called with no initialization\n");
3050                 return -EINVAL;
3051         }
3052
3053         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3054
3055         switch (param->param) {
3056         case RADEON_PARAM_GART_BUFFER_OFFSET:
3057                 value = dev_priv->gart_buffers_offset;
3058                 break;
3059         case RADEON_PARAM_LAST_FRAME:
3060                 dev_priv->stats.last_frame_reads++;
3061                 value = GET_SCRATCH(0);
3062                 break;
3063         case RADEON_PARAM_LAST_DISPATCH:
3064                 value = GET_SCRATCH(1);
3065                 break;
3066         case RADEON_PARAM_LAST_CLEAR:
3067                 dev_priv->stats.last_clear_reads++;
3068                 value = GET_SCRATCH(2);
3069                 break;
3070         case RADEON_PARAM_IRQ_NR:
3071                 value = dev->irq;
3072                 break;
3073         case RADEON_PARAM_GART_BASE:
3074                 value = dev_priv->gart_vm_start;
3075                 break;
3076         case RADEON_PARAM_REGISTER_HANDLE:
3077                 value = dev_priv->mmio->offset;
3078                 break;
3079         case RADEON_PARAM_STATUS_HANDLE:
3080                 value = dev_priv->ring_rptr_offset;
3081                 break;
3082 #ifndef __LP64__
3083                 /*
3084                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3085                  * pointer which can't fit into an int-sized variable.  According to
3086                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3087                  * not supporting it shouldn't be a problem.  If the same functionality
3088                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
3089                  * so backwards-compatibility for the embedded platforms can be
3090                  * maintained.  --davidm 4-Feb-2004.
3091                  */
3092         case RADEON_PARAM_SAREA_HANDLE:
3093                 /* The lock is the first dword in the sarea. */
3094                 value = (long)dev->primary->master->lock.hw_lock;
3095                 break;
3096 #endif
3097         case RADEON_PARAM_GART_TEX_HANDLE:
3098                 value = dev_priv->gart_textures_offset;
3099                 break;
3100         case RADEON_PARAM_SCRATCH_OFFSET:
3101                 if (!dev_priv->writeback_works)
3102                         return -EINVAL;
3103                 value = RADEON_SCRATCH_REG_OFFSET;
3104                 break;
3105
3106         case RADEON_PARAM_CARD_TYPE:
3107                 if (dev_priv->flags & RADEON_IS_PCIE)
3108                         value = RADEON_CARD_PCIE;
3109                 else if (dev_priv->flags & RADEON_IS_AGP)
3110                         value = RADEON_CARD_AGP;
3111                 else
3112                         value = RADEON_CARD_PCI;
3113                 break;
3114         case RADEON_PARAM_VBLANK_CRTC:
3115                 value = radeon_vblank_crtc_get(dev);
3116                 break;
3117         case RADEON_PARAM_FB_LOCATION:
3118                 value = radeon_read_fb_location(dev_priv);
3119                 break;
3120         case RADEON_PARAM_NUM_GB_PIPES:
3121                 value = dev_priv->num_gb_pipes;
3122                 break;
3123         case RADEON_PARAM_KERNEL_MM:
3124                 value = dev_priv->mm_enabled;
3125                 break;
3126         default:
3127                 DRM_DEBUG( "Invalid parameter %d\n", param->param );
3128                 return -EINVAL;
3129         }
3130
3131         if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3132                 DRM_ERROR("copy_to_user\n");
3133                 return -EFAULT;
3134         }
3135
3136         return 0;
3137 }
3138
3139 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3140 {
3141         drm_radeon_private_t *dev_priv = dev->dev_private;
3142         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
3143         drm_radeon_setparam_t *sp = data;
3144         struct drm_radeon_driver_file_fields *radeon_priv;
3145
3146         if (!dev_priv) {
3147                 DRM_ERROR("called with no initialization\n");
3148                 return -EINVAL;
3149         }
3150
3151         switch (sp->param) {
3152         case RADEON_SETPARAM_FB_LOCATION:
3153                 radeon_priv = file_priv->driver_priv;
3154                 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3155                     sp->value;
3156                 break;
3157         case RADEON_SETPARAM_SWITCH_TILING:
3158                 if (sp->value == 0) {
3159                         DRM_DEBUG("color tiling disabled\n");
3160                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3161                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3162                         if (master_priv->sarea_priv)
3163                                 master_priv->sarea_priv->tiling_enabled = 0;
3164                 } else if (sp->value == 1) {
3165                         DRM_DEBUG("color tiling enabled\n");
3166                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3167                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3168                         if (master_priv->sarea_priv)
3169                                 master_priv->sarea_priv->tiling_enabled = 1;
3170                 }
3171                 break;
3172         case RADEON_SETPARAM_PCIGART_LOCATION:
3173                 dev_priv->pcigart_offset = sp->value;
3174                 dev_priv->pcigart_offset_set = 1;
3175                 break;
3176         case RADEON_SETPARAM_NEW_MEMMAP:
3177                 dev_priv->new_memmap = sp->value;
3178                 break;
3179         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3180                 dev_priv->gart_info.table_size = sp->value;
3181                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3182                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3183                 break;
3184         case RADEON_SETPARAM_VBLANK_CRTC:
3185                 return radeon_vblank_crtc_set(dev, sp->value);
3186                 break;
3187         case RADEON_SETPARAM_MM_INIT:
3188                 dev_priv->user_mm_enable = true;
3189                 dev_priv->new_memmap = true;
3190                 return radeon_gem_mm_init(dev);
3191         default:
3192                 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3193                 return -EINVAL;
3194         }
3195
3196         return 0;
3197 }
3198
3199 /* When a client dies:
3200  *    - Check for and clean up flipped page state
3201  *    - Free any alloced GART memory.
3202  *    - Free any alloced radeon surfaces.
3203  *
3204  * DRM infrastructure takes care of reclaiming dma buffers.
3205  */
3206 void radeon_driver_preclose(struct drm_device *dev,
3207                             struct drm_file *file_priv)
3208 {
3209         if (dev->dev_private) {
3210                 drm_radeon_private_t *dev_priv = dev->dev_private;
3211                 dev_priv->page_flipping = 0;
3212                 radeon_mem_release(file_priv, dev_priv->gart_heap);
3213                 radeon_mem_release(file_priv, dev_priv->fb_heap);
3214                 radeon_surfaces_release(file_priv, dev_priv);
3215         }
3216 }
3217
3218 void radeon_driver_lastclose(struct drm_device *dev)
3219 {
3220         radeon_do_release(dev);
3221 }
3222
3223 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3224 {
3225         drm_radeon_private_t *dev_priv = dev->dev_private;
3226         struct drm_radeon_driver_file_fields *radeon_priv;
3227
3228         DRM_DEBUG("\n");
3229         radeon_priv =
3230             (struct drm_radeon_driver_file_fields *)
3231             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3232
3233         if (!radeon_priv)
3234                 return -ENOMEM;
3235
3236         file_priv->driver_priv = radeon_priv;
3237
3238         if (dev_priv)
3239                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3240         else
3241                 radeon_priv->radeon_fb_delta = 0;
3242         return 0;
3243 }
3244
3245 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3246 {
3247         struct drm_radeon_driver_file_fields *radeon_priv =
3248             file_priv->driver_priv;
3249
3250         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3251 }
3252
3253 struct drm_ioctl_desc radeon_ioctls[] = {
3254         DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3255         DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3256         DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3257         DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3258         DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3259         DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3260         DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3261         DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3262         DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3263         DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3264         DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3265         DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3266         DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3267         DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3268         DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3269         DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3270         DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3271         DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3272         DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3273         DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3274         DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3275         DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3276         DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3277         DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3278         DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3279         DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3280         DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3281
3282         DRM_IOCTL_DEF(DRM_RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH),
3283         DRM_IOCTL_DEF(DRM_RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH),
3284
3285         DRM_IOCTL_DEF(DRM_RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH),
3286         DRM_IOCTL_DEF(DRM_RADEON_GEM_PIN, radeon_gem_pin_ioctl, DRM_AUTH),
3287         DRM_IOCTL_DEF(DRM_RADEON_GEM_UNPIN, radeon_gem_unpin_ioctl, DRM_AUTH),
3288         DRM_IOCTL_DEF(DRM_RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH),
3289         DRM_IOCTL_DEF(DRM_RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH),
3290         DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH),
3291         DRM_IOCTL_DEF(DRM_RADEON_GEM_INDIRECT, radeon_gem_indirect_ioctl, DRM_AUTH),
3292         DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH),
3293 };
3294
3295 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);