888bd1ae2a913044caa21591b94b348ac97790d9
[dragonfly.git] / sys / dev / drm / radeon / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  *
29  * $FreeBSD: head/sys/dev/drm2/radeon/radeon_state.c 254885 2013-08-25 19:37:15Z dumbbell $
30  */
31
32 #include <drm/drmP.h>
33 #include <drm/drm_buffer.h>
34 #include <uapi_drm/radeon_drm.h>
35 #include "radeon_drv.h"
36
37 /* ================================================================
38  * Helper functions for client state checking and fixup
39  */
40
41 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
42                                                     dev_priv,
43                                                     struct drm_file * file_priv,
44                                                     u32 *offset)
45 {
46         u64 off = *offset;
47         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
48         struct drm_radeon_driver_file_fields *radeon_priv;
49
50         /* Hrm ... the story of the offset ... So this function converts
51          * the various ideas of what userland clients might have for an
52          * offset in the card address space into an offset into the card
53          * address space :) So with a sane client, it should just keep
54          * the value intact and just do some boundary checking. However,
55          * not all clients are sane. Some older clients pass us 0 based
56          * offsets relative to the start of the framebuffer and some may
57          * assume the AGP aperture it appended to the framebuffer, so we
58          * try to detect those cases and fix them up.
59          *
60          * Note: It might be a good idea here to make sure the offset lands
61          * in some "allowed" area to protect things like the PCIE GART...
62          */
63
64         /* First, the best case, the offset already lands in either the
65          * framebuffer or the GART mapped space
66          */
67         if (radeon_check_offset(dev_priv, off))
68                 return 0;
69
70         /* Ok, that didn't happen... now check if we have a zero based
71          * offset that fits in the framebuffer + gart space, apply the
72          * magic offset we get from SETPARAM or calculated from fb_location
73          */
74         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
75                 radeon_priv = file_priv->driver_priv;
76                 off += radeon_priv->radeon_fb_delta;
77         }
78
79         /* Finally, assume we aimed at a GART offset if beyond the fb */
80         if (off > fb_end)
81                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
82
83         /* Now recheck and fail if out of bounds */
84         if (radeon_check_offset(dev_priv, off)) {
85                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
86                 *offset = off;
87                 return 0;
88         }
89         return -EINVAL;
90 }
91
92 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
93                                                      dev_priv,
94                                                      struct drm_file *file_priv,
95                                                      int id, struct drm_buffer *buf)
96 {
97         u32 *data;
98         switch (id) {
99
100         case RADEON_EMIT_PP_MISC:
101                 data = drm_buffer_pointer_to_dword(buf,
102                         (RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4);
103
104                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
105                         DRM_ERROR("Invalid depth buffer offset\n");
106                         return -EINVAL;
107                 }
108                 dev_priv->have_z_offset = 1;
109                 break;
110
111         case RADEON_EMIT_PP_CNTL:
112                 data = drm_buffer_pointer_to_dword(buf,
113                         (RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4);
114
115                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
116                         DRM_ERROR("Invalid colour buffer offset\n");
117                         return -EINVAL;
118                 }
119                 break;
120
121         case R200_EMIT_PP_TXOFFSET_0:
122         case R200_EMIT_PP_TXOFFSET_1:
123         case R200_EMIT_PP_TXOFFSET_2:
124         case R200_EMIT_PP_TXOFFSET_3:
125         case R200_EMIT_PP_TXOFFSET_4:
126         case R200_EMIT_PP_TXOFFSET_5:
127                 data = drm_buffer_pointer_to_dword(buf, 0);
128                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
129                         DRM_ERROR("Invalid R200 texture offset\n");
130                         return -EINVAL;
131                 }
132                 break;
133
134         case RADEON_EMIT_PP_TXFILTER_0:
135         case RADEON_EMIT_PP_TXFILTER_1:
136         case RADEON_EMIT_PP_TXFILTER_2:
137                 data = drm_buffer_pointer_to_dword(buf,
138                         (RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4);
139                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
140                         DRM_ERROR("Invalid R100 texture offset\n");
141                         return -EINVAL;
142                 }
143                 break;
144
145         case R200_EMIT_PP_CUBIC_OFFSETS_0:
146         case R200_EMIT_PP_CUBIC_OFFSETS_1:
147         case R200_EMIT_PP_CUBIC_OFFSETS_2:
148         case R200_EMIT_PP_CUBIC_OFFSETS_3:
149         case R200_EMIT_PP_CUBIC_OFFSETS_4:
150         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
151                         int i;
152                         for (i = 0; i < 5; i++) {
153                                 data = drm_buffer_pointer_to_dword(buf, i);
154                                 if (radeon_check_and_fixup_offset(dev_priv,
155                                                                   file_priv,
156                                                                   data)) {
157                                         DRM_ERROR
158                                             ("Invalid R200 cubic texture offset\n");
159                                         return -EINVAL;
160                                 }
161                         }
162                         break;
163                 }
164
165         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
166         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
167         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
168                         int i;
169                         for (i = 0; i < 5; i++) {
170                                 data = drm_buffer_pointer_to_dword(buf, i);
171                                 if (radeon_check_and_fixup_offset(dev_priv,
172                                                                   file_priv,
173                                                                   data)) {
174                                         DRM_ERROR
175                                             ("Invalid R100 cubic texture offset\n");
176                                         return -EINVAL;
177                                 }
178                         }
179                 }
180                 break;
181
182         case R200_EMIT_VAP_CTL:{
183                         RING_LOCALS;
184                         BEGIN_RING(2);
185                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
186                         ADVANCE_RING();
187                 }
188                 break;
189
190         case RADEON_EMIT_RB3D_COLORPITCH:
191         case RADEON_EMIT_RE_LINE_PATTERN:
192         case RADEON_EMIT_SE_LINE_WIDTH:
193         case RADEON_EMIT_PP_LUM_MATRIX:
194         case RADEON_EMIT_PP_ROT_MATRIX_0:
195         case RADEON_EMIT_RB3D_STENCILREFMASK:
196         case RADEON_EMIT_SE_VPORT_XSCALE:
197         case RADEON_EMIT_SE_CNTL:
198         case RADEON_EMIT_SE_CNTL_STATUS:
199         case RADEON_EMIT_RE_MISC:
200         case RADEON_EMIT_PP_BORDER_COLOR_0:
201         case RADEON_EMIT_PP_BORDER_COLOR_1:
202         case RADEON_EMIT_PP_BORDER_COLOR_2:
203         case RADEON_EMIT_SE_ZBIAS_FACTOR:
204         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
205         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
206         case R200_EMIT_PP_TXCBLEND_0:
207         case R200_EMIT_PP_TXCBLEND_1:
208         case R200_EMIT_PP_TXCBLEND_2:
209         case R200_EMIT_PP_TXCBLEND_3:
210         case R200_EMIT_PP_TXCBLEND_4:
211         case R200_EMIT_PP_TXCBLEND_5:
212         case R200_EMIT_PP_TXCBLEND_6:
213         case R200_EMIT_PP_TXCBLEND_7:
214         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
215         case R200_EMIT_TFACTOR_0:
216         case R200_EMIT_VTX_FMT_0:
217         case R200_EMIT_MATRIX_SELECT_0:
218         case R200_EMIT_TEX_PROC_CTL_2:
219         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
220         case R200_EMIT_PP_TXFILTER_0:
221         case R200_EMIT_PP_TXFILTER_1:
222         case R200_EMIT_PP_TXFILTER_2:
223         case R200_EMIT_PP_TXFILTER_3:
224         case R200_EMIT_PP_TXFILTER_4:
225         case R200_EMIT_PP_TXFILTER_5:
226         case R200_EMIT_VTE_CNTL:
227         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
228         case R200_EMIT_PP_TAM_DEBUG3:
229         case R200_EMIT_PP_CNTL_X:
230         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
231         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
232         case R200_EMIT_RE_SCISSOR_TL_0:
233         case R200_EMIT_RE_SCISSOR_TL_1:
234         case R200_EMIT_RE_SCISSOR_TL_2:
235         case R200_EMIT_SE_VAP_CNTL_STATUS:
236         case R200_EMIT_SE_VTX_STATE_CNTL:
237         case R200_EMIT_RE_POINTSIZE:
238         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
239         case R200_EMIT_PP_CUBIC_FACES_0:
240         case R200_EMIT_PP_CUBIC_FACES_1:
241         case R200_EMIT_PP_CUBIC_FACES_2:
242         case R200_EMIT_PP_CUBIC_FACES_3:
243         case R200_EMIT_PP_CUBIC_FACES_4:
244         case R200_EMIT_PP_CUBIC_FACES_5:
245         case RADEON_EMIT_PP_TEX_SIZE_0:
246         case RADEON_EMIT_PP_TEX_SIZE_1:
247         case RADEON_EMIT_PP_TEX_SIZE_2:
248         case R200_EMIT_RB3D_BLENDCOLOR:
249         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
250         case RADEON_EMIT_PP_CUBIC_FACES_0:
251         case RADEON_EMIT_PP_CUBIC_FACES_1:
252         case RADEON_EMIT_PP_CUBIC_FACES_2:
253         case R200_EMIT_PP_TRI_PERF_CNTL:
254         case R200_EMIT_PP_AFS_0:
255         case R200_EMIT_PP_AFS_1:
256         case R200_EMIT_ATF_TFACTOR:
257         case R200_EMIT_PP_TXCTLALL_0:
258         case R200_EMIT_PP_TXCTLALL_1:
259         case R200_EMIT_PP_TXCTLALL_2:
260         case R200_EMIT_PP_TXCTLALL_3:
261         case R200_EMIT_PP_TXCTLALL_4:
262         case R200_EMIT_PP_TXCTLALL_5:
263         case R200_EMIT_VAP_PVS_CNTL:
264                 /* These packets don't contain memory offsets */
265                 break;
266
267         default:
268                 DRM_ERROR("Unknown state packet ID %d\n", id);
269                 return -EINVAL;
270         }
271
272         return 0;
273 }
274
275 static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
276                                           dev_priv,
277                                           struct drm_file *file_priv,
278                                           drm_radeon_kcmd_buffer_t *
279                                           cmdbuf,
280                                           unsigned int *cmdsz)
281 {
282         u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
283         u32 offset, narrays;
284         int count, i, k;
285
286         count = ((*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16);
287         *cmdsz = 2 + count;
288
289         if ((*cmd & 0xc0000000) != RADEON_CP_PACKET3) {
290                 DRM_ERROR("Not a type 3 packet\n");
291                 return -EINVAL;
292         }
293
294         if (4 * *cmdsz > drm_buffer_unprocessed(cmdbuf->buffer)) {
295                 DRM_ERROR("Packet size larger than size of data provided\n");
296                 return -EINVAL;
297         }
298
299         switch (*cmd & 0xff00) {
300         /* XXX Are there old drivers needing other packets? */
301
302         case RADEON_3D_DRAW_IMMD:
303         case RADEON_3D_DRAW_VBUF:
304         case RADEON_3D_DRAW_INDX:
305         case RADEON_WAIT_FOR_IDLE:
306         case RADEON_CP_NOP:
307         case RADEON_3D_CLEAR_ZMASK:
308 /*      case RADEON_CP_NEXT_CHAR:
309         case RADEON_CP_PLY_NEXTSCAN:
310         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
311                 /* these packets are safe */
312                 break;
313
314         case RADEON_CP_3D_DRAW_IMMD_2:
315         case RADEON_CP_3D_DRAW_VBUF_2:
316         case RADEON_CP_3D_DRAW_INDX_2:
317         case RADEON_3D_CLEAR_HIZ:
318                 /* safe but r200 only */
319                 if (dev_priv->microcode_version != UCODE_R200) {
320                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
321                         return -EINVAL;
322                 }
323                 break;
324
325         case RADEON_3D_LOAD_VBPNTR:
326
327                 if (count > 18) { /* 12 arrays max */
328                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
329                                   count);
330                         return -EINVAL;
331                 }
332
333                 /* carefully check packet contents */
334                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
335
336                 narrays = *cmd & ~0xc000;
337                 k = 0;
338                 i = 2;
339                 while ((k < narrays) && (i < (count + 2))) {
340                         i++;            /* skip attribute field */
341                         cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
342                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
343                                                           cmd)) {
344                                 DRM_ERROR
345                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
346                                      k, i);
347                                 return -EINVAL;
348                         }
349                         k++;
350                         i++;
351                         if (k == narrays)
352                                 break;
353                         /* have one more to process, they come in pairs */
354                         cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
355
356                         if (radeon_check_and_fixup_offset(dev_priv,
357                                                           file_priv, cmd))
358                         {
359                                 DRM_ERROR
360                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
361                                      k, i);
362                                 return -EINVAL;
363                         }
364                         k++;
365                         i++;
366                 }
367                 /* do the counts match what we expect ? */
368                 if ((k != narrays) || (i != (count + 2))) {
369                         DRM_ERROR
370                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
371                               k, i, narrays, count + 1);
372                         return -EINVAL;
373                 }
374                 break;
375
376         case RADEON_3D_RNDR_GEN_INDX_PRIM:
377                 if (dev_priv->microcode_version != UCODE_R100) {
378                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
379                         return -EINVAL;
380                 }
381
382                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
383                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
384                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
385                                 return -EINVAL;
386                 }
387                 break;
388
389         case RADEON_CP_INDX_BUFFER:
390                 if (dev_priv->microcode_version != UCODE_R200) {
391                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
392                         return -EINVAL;
393                 }
394
395                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
396                 if ((*cmd & 0x8000ffff) != 0x80000810) {
397                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", *cmd);
398                         return -EINVAL;
399                 }
400                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
401                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
402                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", *cmd);
403                         return -EINVAL;
404                 }
405                 break;
406
407         case RADEON_CNTL_HOSTDATA_BLT:
408         case RADEON_CNTL_PAINT_MULTI:
409         case RADEON_CNTL_BITBLT_MULTI:
410                 /* MSB of opcode: next DWORD GUI_CNTL */
411                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
412                 if (*cmd & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
413                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
414                         u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
415                         offset = *cmd2 << 10;
416                         if (radeon_check_and_fixup_offset
417                             (dev_priv, file_priv, &offset)) {
418                                 DRM_ERROR("Invalid first packet offset\n");
419                                 return -EINVAL;
420                         }
421                         *cmd2 = (*cmd2 & 0xffc00000) | offset >> 10;
422                 }
423
424                 if ((*cmd & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
425                     (*cmd & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
426                         u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
427                         offset = *cmd3 << 10;
428                         if (radeon_check_and_fixup_offset
429                             (dev_priv, file_priv, &offset)) {
430                                 DRM_ERROR("Invalid second packet offset\n");
431                                 return -EINVAL;
432                         }
433                         *cmd3 = (*cmd3 & 0xffc00000) | offset >> 10;
434                 }
435                 break;
436
437         default:
438                 DRM_ERROR("Invalid packet type %x\n", *cmd & 0xff00);
439                 return -EINVAL;
440         }
441
442         return 0;
443 }
444
445 /* ================================================================
446  * CP hardware state programming functions
447  */
448
449 static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
450                                   struct drm_clip_rect * box)
451 {
452         RING_LOCALS;
453
454         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
455                   box->x1, box->y1, box->x2, box->y2);
456
457         BEGIN_RING(4);
458         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
459         OUT_RING((box->y1 << 16) | box->x1);
460         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
461         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
462         ADVANCE_RING();
463 }
464
465 /* Emit 1.1 state
466  */
467 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
468                              struct drm_file *file_priv,
469                              drm_radeon_context_regs_t * ctx,
470                              drm_radeon_texture_regs_t * tex,
471                              unsigned int dirty)
472 {
473         RING_LOCALS;
474         DRM_DEBUG("dirty=0x%08x\n", dirty);
475
476         if (dirty & RADEON_UPLOAD_CONTEXT) {
477                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
478                                                   &ctx->rb3d_depthoffset)) {
479                         DRM_ERROR("Invalid depth buffer offset\n");
480                         return -EINVAL;
481                 }
482
483                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
484                                                   &ctx->rb3d_coloroffset)) {
485                         DRM_ERROR("Invalid depth buffer offset\n");
486                         return -EINVAL;
487                 }
488
489                 BEGIN_RING(14);
490                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
491                 OUT_RING(ctx->pp_misc);
492                 OUT_RING(ctx->pp_fog_color);
493                 OUT_RING(ctx->re_solid_color);
494                 OUT_RING(ctx->rb3d_blendcntl);
495                 OUT_RING(ctx->rb3d_depthoffset);
496                 OUT_RING(ctx->rb3d_depthpitch);
497                 OUT_RING(ctx->rb3d_zstencilcntl);
498                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
499                 OUT_RING(ctx->pp_cntl);
500                 OUT_RING(ctx->rb3d_cntl);
501                 OUT_RING(ctx->rb3d_coloroffset);
502                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
503                 OUT_RING(ctx->rb3d_colorpitch);
504                 ADVANCE_RING();
505         }
506
507         if (dirty & RADEON_UPLOAD_VERTFMT) {
508                 BEGIN_RING(2);
509                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
510                 OUT_RING(ctx->se_coord_fmt);
511                 ADVANCE_RING();
512         }
513
514         if (dirty & RADEON_UPLOAD_LINE) {
515                 BEGIN_RING(5);
516                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
517                 OUT_RING(ctx->re_line_pattern);
518                 OUT_RING(ctx->re_line_state);
519                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
520                 OUT_RING(ctx->se_line_width);
521                 ADVANCE_RING();
522         }
523
524         if (dirty & RADEON_UPLOAD_BUMPMAP) {
525                 BEGIN_RING(5);
526                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
527                 OUT_RING(ctx->pp_lum_matrix);
528                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
529                 OUT_RING(ctx->pp_rot_matrix_0);
530                 OUT_RING(ctx->pp_rot_matrix_1);
531                 ADVANCE_RING();
532         }
533
534         if (dirty & RADEON_UPLOAD_MASKS) {
535                 BEGIN_RING(4);
536                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
537                 OUT_RING(ctx->rb3d_stencilrefmask);
538                 OUT_RING(ctx->rb3d_ropcntl);
539                 OUT_RING(ctx->rb3d_planemask);
540                 ADVANCE_RING();
541         }
542
543         if (dirty & RADEON_UPLOAD_VIEWPORT) {
544                 BEGIN_RING(7);
545                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
546                 OUT_RING(ctx->se_vport_xscale);
547                 OUT_RING(ctx->se_vport_xoffset);
548                 OUT_RING(ctx->se_vport_yscale);
549                 OUT_RING(ctx->se_vport_yoffset);
550                 OUT_RING(ctx->se_vport_zscale);
551                 OUT_RING(ctx->se_vport_zoffset);
552                 ADVANCE_RING();
553         }
554
555         if (dirty & RADEON_UPLOAD_SETUP) {
556                 BEGIN_RING(4);
557                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
558                 OUT_RING(ctx->se_cntl);
559                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
560                 OUT_RING(ctx->se_cntl_status);
561                 ADVANCE_RING();
562         }
563
564         if (dirty & RADEON_UPLOAD_MISC) {
565                 BEGIN_RING(2);
566                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
567                 OUT_RING(ctx->re_misc);
568                 ADVANCE_RING();
569         }
570
571         if (dirty & RADEON_UPLOAD_TEX0) {
572                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
573                                                   &tex[0].pp_txoffset)) {
574                         DRM_ERROR("Invalid texture offset for unit 0\n");
575                         return -EINVAL;
576                 }
577
578                 BEGIN_RING(9);
579                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
580                 OUT_RING(tex[0].pp_txfilter);
581                 OUT_RING(tex[0].pp_txformat);
582                 OUT_RING(tex[0].pp_txoffset);
583                 OUT_RING(tex[0].pp_txcblend);
584                 OUT_RING(tex[0].pp_txablend);
585                 OUT_RING(tex[0].pp_tfactor);
586                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
587                 OUT_RING(tex[0].pp_border_color);
588                 ADVANCE_RING();
589         }
590
591         if (dirty & RADEON_UPLOAD_TEX1) {
592                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
593                                                   &tex[1].pp_txoffset)) {
594                         DRM_ERROR("Invalid texture offset for unit 1\n");
595                         return -EINVAL;
596                 }
597
598                 BEGIN_RING(9);
599                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
600                 OUT_RING(tex[1].pp_txfilter);
601                 OUT_RING(tex[1].pp_txformat);
602                 OUT_RING(tex[1].pp_txoffset);
603                 OUT_RING(tex[1].pp_txcblend);
604                 OUT_RING(tex[1].pp_txablend);
605                 OUT_RING(tex[1].pp_tfactor);
606                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
607                 OUT_RING(tex[1].pp_border_color);
608                 ADVANCE_RING();
609         }
610
611         if (dirty & RADEON_UPLOAD_TEX2) {
612                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
613                                                   &tex[2].pp_txoffset)) {
614                         DRM_ERROR("Invalid texture offset for unit 2\n");
615                         return -EINVAL;
616                 }
617
618                 BEGIN_RING(9);
619                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
620                 OUT_RING(tex[2].pp_txfilter);
621                 OUT_RING(tex[2].pp_txformat);
622                 OUT_RING(tex[2].pp_txoffset);
623                 OUT_RING(tex[2].pp_txcblend);
624                 OUT_RING(tex[2].pp_txablend);
625                 OUT_RING(tex[2].pp_tfactor);
626                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
627                 OUT_RING(tex[2].pp_border_color);
628                 ADVANCE_RING();
629         }
630
631         return 0;
632 }
633
634 /* Emit 1.2 state
635  */
636 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
637                               struct drm_file *file_priv,
638                               drm_radeon_state_t * state)
639 {
640         RING_LOCALS;
641
642         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
643                 BEGIN_RING(3);
644                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
645                 OUT_RING(state->context2.se_zbias_factor);
646                 OUT_RING(state->context2.se_zbias_constant);
647                 ADVANCE_RING();
648         }
649
650         return radeon_emit_state(dev_priv, file_priv, &state->context,
651                                  state->tex, state->dirty);
652 }
653
654 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
655  * 1.3 cmdbuffers allow all previous state to be updated as well as
656  * the tcl scalar and vector areas.
657  */
658 static struct {
659         int start;
660         int len;
661         const char *name;
662 } packet[RADEON_MAX_STATE_PACKETS] = {
663         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
664         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
665         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
666         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
667         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
668         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
669         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
670         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
671         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
672         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
673         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
674         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
675         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
676         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
677         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
678         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
679         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
680         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
681         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
682         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
683         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
684                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
685         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
686         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
687         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
688         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
689         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
690         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
691         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
692         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
693         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
694         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
695         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
696         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
697         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
698         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
699         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
700         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
701         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
702         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
703         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
704         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
705         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
706         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
707         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
708         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
709         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
710         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
711         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
712         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
713         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
714          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
715         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
716         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
717         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
718         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
719         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
720         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
721         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
722         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
723         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
724         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
725         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
726                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
727         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
728         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
729         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
730         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
731         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
732         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
733         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
734         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
735         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
736         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
737         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
738         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
739         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
740         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
741         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
742         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
743         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
744         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
745         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
746         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
747         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
748         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
749         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
750         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
751         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
752         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
753         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
754         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
755         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
756         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
757         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
758         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
759         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
760         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
761 };
762
763 /* ================================================================
764  * Performance monitoring functions
765  */
766
767 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
768                              struct drm_radeon_master_private *master_priv,
769                              int x, int y, int w, int h, int r, int g, int b)
770 {
771         u32 color;
772         RING_LOCALS;
773
774         x += master_priv->sarea_priv->boxes[0].x1;
775         y += master_priv->sarea_priv->boxes[0].y1;
776
777         switch (dev_priv->color_fmt) {
778         case RADEON_COLOR_FORMAT_RGB565:
779                 color = (((r & 0xf8) << 8) |
780                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
781                 break;
782         case RADEON_COLOR_FORMAT_ARGB8888:
783         default:
784                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
785                 break;
786         }
787
788         BEGIN_RING(4);
789         RADEON_WAIT_UNTIL_3D_IDLE();
790         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
791         OUT_RING(0xffffffff);
792         ADVANCE_RING();
793
794         BEGIN_RING(6);
795
796         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
797         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
798                  RADEON_GMC_BRUSH_SOLID_COLOR |
799                  (dev_priv->color_fmt << 8) |
800                  RADEON_GMC_SRC_DATATYPE_COLOR |
801                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
802
803         if (master_priv->sarea_priv->pfCurrentPage == 1) {
804                 OUT_RING(dev_priv->front_pitch_offset);
805         } else {
806                 OUT_RING(dev_priv->back_pitch_offset);
807         }
808
809         OUT_RING(color);
810
811         OUT_RING((x << 16) | y);
812         OUT_RING((w << 16) | h);
813
814         ADVANCE_RING();
815 }
816
817 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
818 {
819         /* Collapse various things into a wait flag -- trying to
820          * guess if userspase slept -- better just to have them tell us.
821          */
822         if (dev_priv->stats.last_frame_reads > 1 ||
823             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
824                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
825         }
826
827         if (dev_priv->stats.freelist_loops) {
828                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
829         }
830
831         /* Purple box for page flipping
832          */
833         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
834                 radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
835
836         /* Red box if we have to wait for idle at any point
837          */
838         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
839                 radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
840
841         /* Blue box: lost context?
842          */
843
844         /* Yellow box for texture swaps
845          */
846         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
847                 radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
848
849         /* Green box if hardware never idles (as far as we can tell)
850          */
851         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
852                 radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
853
854         /* Draw bars indicating number of buffers allocated
855          * (not a great measure, easily confused)
856          */
857         if (dev_priv->stats.requested_bufs) {
858                 if (dev_priv->stats.requested_bufs > 100)
859                         dev_priv->stats.requested_bufs = 100;
860
861                 radeon_clear_box(dev_priv, master_priv, 4, 16,
862                                  dev_priv->stats.requested_bufs, 4,
863                                  196, 128, 128);
864         }
865
866         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
867
868 }
869
870 /* ================================================================
871  * CP command dispatch functions
872  */
873
874 static void radeon_cp_dispatch_clear(struct drm_device * dev,
875                                      struct drm_master *master,
876                                      drm_radeon_clear_t * clear,
877                                      drm_radeon_clear_rect_t * depth_boxes)
878 {
879         drm_radeon_private_t *dev_priv = dev->dev_private;
880         struct drm_radeon_master_private *master_priv = master->driver_priv;
881         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
882         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
883         int nbox = sarea_priv->nbox;
884         struct drm_clip_rect *pbox = sarea_priv->boxes;
885         unsigned int flags = clear->flags;
886         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
887         int i;
888         RING_LOCALS;
889         DRM_DEBUG("flags = 0x%x\n", flags);
890
891         dev_priv->stats.clears++;
892
893         if (sarea_priv->pfCurrentPage == 1) {
894                 unsigned int tmp = flags;
895
896                 flags &= ~(RADEON_FRONT | RADEON_BACK);
897                 if (tmp & RADEON_FRONT)
898                         flags |= RADEON_BACK;
899                 if (tmp & RADEON_BACK)
900                         flags |= RADEON_FRONT;
901         }
902         if (flags & (RADEON_DEPTH|RADEON_STENCIL)) {
903                 if (!dev_priv->have_z_offset) {
904                         DRM_ERROR("radeon: illegal depth clear request. Buggy mesa detected - please update.\n");
905                         flags &= ~(RADEON_DEPTH | RADEON_STENCIL);
906                 }
907         }
908
909         if (flags & (RADEON_FRONT | RADEON_BACK)) {
910
911                 BEGIN_RING(4);
912
913                 /* Ensure the 3D stream is idle before doing a
914                  * 2D fill to clear the front or back buffer.
915                  */
916                 RADEON_WAIT_UNTIL_3D_IDLE();
917
918                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
919                 OUT_RING(clear->color_mask);
920
921                 ADVANCE_RING();
922
923                 /* Make sure we restore the 3D state next time.
924                  */
925                 sarea_priv->ctx_owner = 0;
926
927                 for (i = 0; i < nbox; i++) {
928                         int x = pbox[i].x1;
929                         int y = pbox[i].y1;
930                         int w = pbox[i].x2 - x;
931                         int h = pbox[i].y2 - y;
932
933                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
934                                   x, y, w, h, flags);
935
936                         if (flags & RADEON_FRONT) {
937                                 BEGIN_RING(6);
938
939                                 OUT_RING(CP_PACKET3
940                                          (RADEON_CNTL_PAINT_MULTI, 4));
941                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
942                                          RADEON_GMC_BRUSH_SOLID_COLOR |
943                                          (dev_priv->
944                                           color_fmt << 8) |
945                                          RADEON_GMC_SRC_DATATYPE_COLOR |
946                                          RADEON_ROP3_P |
947                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
948
949                                 OUT_RING(dev_priv->front_pitch_offset);
950                                 OUT_RING(clear->clear_color);
951
952                                 OUT_RING((x << 16) | y);
953                                 OUT_RING((w << 16) | h);
954
955                                 ADVANCE_RING();
956                         }
957
958                         if (flags & RADEON_BACK) {
959                                 BEGIN_RING(6);
960
961                                 OUT_RING(CP_PACKET3
962                                          (RADEON_CNTL_PAINT_MULTI, 4));
963                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
964                                          RADEON_GMC_BRUSH_SOLID_COLOR |
965                                          (dev_priv->
966                                           color_fmt << 8) |
967                                          RADEON_GMC_SRC_DATATYPE_COLOR |
968                                          RADEON_ROP3_P |
969                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
970
971                                 OUT_RING(dev_priv->back_pitch_offset);
972                                 OUT_RING(clear->clear_color);
973
974                                 OUT_RING((x << 16) | y);
975                                 OUT_RING((w << 16) | h);
976
977                                 ADVANCE_RING();
978                         }
979                 }
980         }
981
982         /* hyper z clear */
983         /* no docs available, based on reverse engineering by Stephane Marchesin */
984         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
985             && (flags & RADEON_CLEAR_FASTZ)) {
986
987                 int i;
988                 int depthpixperline =
989                     dev_priv->depth_fmt ==
990                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
991                                                        2) : (dev_priv->
992                                                              depth_pitch / 4);
993
994                 u32 clearmask;
995
996                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
997                     ((clear->depth_mask & 0xff) << 24);
998
999                 /* Make sure we restore the 3D state next time.
1000                  * we haven't touched any "normal" state - still need this?
1001                  */
1002                 sarea_priv->ctx_owner = 0;
1003
1004                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1005                     && (flags & RADEON_USE_HIERZ)) {
1006                         /* FIXME : reverse engineer that for Rx00 cards */
1007                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1008                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1009                            value into account? */
1010                         /* pattern seems to work for r100, though get slight
1011                            rendering errors with glxgears. If hierz is not enabled for r100,
1012                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
1013                            other ones are ignored, and the same clear mask can be used. That's
1014                            very different behaviour than R200 which needs different clear mask
1015                            and different number of tiles to clear if hierz is enabled or not !?!
1016                          */
1017                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
1018                 } else {
1019                         /* clear mask : chooses the clearing pattern.
1020                            rv250: could be used to clear only parts of macrotiles
1021                            (but that would get really complicated...)?
1022                            bit 0 and 1 (either or both of them ?!?!) are used to
1023                            not clear tile (or maybe one of the bits indicates if the tile is
1024                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
1025                            Pattern is as follows:
1026                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
1027                            bits -------------------------------------------------
1028                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1029                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1030                            covers 256 pixels ?!?
1031                          */
1032                         clearmask = 0x0;
1033                 }
1034
1035                 BEGIN_RING(8);
1036                 RADEON_WAIT_UNTIL_2D_IDLE();
1037                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1038                              tempRB3D_DEPTHCLEARVALUE);
1039                 /* what offset is this exactly ? */
1040                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1041                 /* need ctlstat, otherwise get some strange black flickering */
1042                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1043                              RADEON_RB3D_ZC_FLUSH_ALL);
1044                 ADVANCE_RING();
1045
1046                 for (i = 0; i < nbox; i++) {
1047                         int tileoffset, nrtilesx, nrtilesy, j;
1048                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1049                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1050                             && !(dev_priv->microcode_version == UCODE_R200)) {
1051                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1052                                    maybe r200 actually doesn't need to put the low-res z value into
1053                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1054                                    Works for R100, both with hierz and without.
1055                                    R100 seems to operate on 2x1 8x8 tiles, but...
1056                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1057                                    problematic with resolutions which are not 64 pix aligned? */
1058                                 tileoffset =
1059                                     ((pbox[i].y1 >> 3) * depthpixperline +
1060                                      pbox[i].x1) >> 6;
1061                                 nrtilesx =
1062                                     ((pbox[i].x2 & ~63) -
1063                                      (pbox[i].x1 & ~63)) >> 4;
1064                                 nrtilesy =
1065                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1066                                 for (j = 0; j <= nrtilesy; j++) {
1067                                         BEGIN_RING(4);
1068                                         OUT_RING(CP_PACKET3
1069                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1070                                         /* first tile */
1071                                         OUT_RING(tileoffset * 8);
1072                                         /* the number of tiles to clear */
1073                                         OUT_RING(nrtilesx + 4);
1074                                         /* clear mask : chooses the clearing pattern. */
1075                                         OUT_RING(clearmask);
1076                                         ADVANCE_RING();
1077                                         tileoffset += depthpixperline >> 6;
1078                                 }
1079                         } else if (dev_priv->microcode_version == UCODE_R200) {
1080                                 /* works for rv250. */
1081                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1082                                 tileoffset =
1083                                     ((pbox[i].y1 >> 3) * depthpixperline +
1084                                      pbox[i].x1) >> 5;
1085                                 nrtilesx =
1086                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1087                                 nrtilesy =
1088                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1089                                 for (j = 0; j <= nrtilesy; j++) {
1090                                         BEGIN_RING(4);
1091                                         OUT_RING(CP_PACKET3
1092                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1093                                         /* first tile */
1094                                         /* judging by the first tile offset needed, could possibly
1095                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1096                                            macro tiles, though would still need clear mask for
1097                                            right/bottom if truly 4x4 granularity is desired ? */
1098                                         OUT_RING(tileoffset * 16);
1099                                         /* the number of tiles to clear */
1100                                         OUT_RING(nrtilesx + 1);
1101                                         /* clear mask : chooses the clearing pattern. */
1102                                         OUT_RING(clearmask);
1103                                         ADVANCE_RING();
1104                                         tileoffset += depthpixperline >> 5;
1105                                 }
1106                         } else {        /* rv 100 */
1107                                 /* rv100 might not need 64 pix alignment, who knows */
1108                                 /* offsets are, hmm, weird */
1109                                 tileoffset =
1110                                     ((pbox[i].y1 >> 4) * depthpixperline +
1111                                      pbox[i].x1) >> 6;
1112                                 nrtilesx =
1113                                     ((pbox[i].x2 & ~63) -
1114                                      (pbox[i].x1 & ~63)) >> 4;
1115                                 nrtilesy =
1116                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1117                                 for (j = 0; j <= nrtilesy; j++) {
1118                                         BEGIN_RING(4);
1119                                         OUT_RING(CP_PACKET3
1120                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1121                                         OUT_RING(tileoffset * 128);
1122                                         /* the number of tiles to clear */
1123                                         OUT_RING(nrtilesx + 4);
1124                                         /* clear mask : chooses the clearing pattern. */
1125                                         OUT_RING(clearmask);
1126                                         ADVANCE_RING();
1127                                         tileoffset += depthpixperline >> 6;
1128                                 }
1129                         }
1130                 }
1131
1132                 /* TODO don't always clear all hi-level z tiles */
1133                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1134                     && (dev_priv->microcode_version == UCODE_R200)
1135                     && (flags & RADEON_USE_HIERZ))
1136                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1137                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1138                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1139                            value into account? */
1140                 {
1141                         BEGIN_RING(4);
1142                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1143                         OUT_RING(0x0);  /* First tile */
1144                         OUT_RING(0x3cc0);
1145                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1146                         ADVANCE_RING();
1147                 }
1148         }
1149
1150         /* We have to clear the depth and/or stencil buffers by
1151          * rendering a quad into just those buffers.  Thus, we have to
1152          * make sure the 3D engine is configured correctly.
1153          */
1154         else if ((dev_priv->microcode_version == UCODE_R200) &&
1155                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1156
1157                 int tempPP_CNTL;
1158                 int tempRE_CNTL;
1159                 int tempRB3D_CNTL;
1160                 int tempRB3D_ZSTENCILCNTL;
1161                 int tempRB3D_STENCILREFMASK;
1162                 int tempRB3D_PLANEMASK;
1163                 int tempSE_CNTL;
1164                 int tempSE_VTE_CNTL;
1165                 int tempSE_VTX_FMT_0;
1166                 int tempSE_VTX_FMT_1;
1167                 int tempSE_VAP_CNTL;
1168                 int tempRE_AUX_SCISSOR_CNTL;
1169
1170                 tempPP_CNTL = 0;
1171                 tempRE_CNTL = 0;
1172
1173                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1174
1175                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1176                 tempRB3D_STENCILREFMASK = 0x0;
1177
1178                 tempSE_CNTL = depth_clear->se_cntl;
1179
1180                 /* Disable TCL */
1181
1182                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1183                                           (0x9 <<
1184                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1185
1186                 tempRB3D_PLANEMASK = 0x0;
1187
1188                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1189
1190                 tempSE_VTE_CNTL =
1191                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1192
1193                 /* Vertex format (X, Y, Z, W) */
1194                 tempSE_VTX_FMT_0 =
1195                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1196                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1197                 tempSE_VTX_FMT_1 = 0x0;
1198
1199                 /*
1200                  * Depth buffer specific enables
1201                  */
1202                 if (flags & RADEON_DEPTH) {
1203                         /* Enable depth buffer */
1204                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1205                 } else {
1206                         /* Disable depth buffer */
1207                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1208                 }
1209
1210                 /*
1211                  * Stencil buffer specific enables
1212                  */
1213                 if (flags & RADEON_STENCIL) {
1214                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1215                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1216                 } else {
1217                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1218                         tempRB3D_STENCILREFMASK = 0x00000000;
1219                 }
1220
1221                 if (flags & RADEON_USE_COMP_ZBUF) {
1222                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1223                             RADEON_Z_DECOMPRESSION_ENABLE;
1224                 }
1225                 if (flags & RADEON_USE_HIERZ) {
1226                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1227                 }
1228
1229                 BEGIN_RING(26);
1230                 RADEON_WAIT_UNTIL_2D_IDLE();
1231
1232                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1233                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1234                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1235                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1236                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1237                              tempRB3D_STENCILREFMASK);
1238                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1239                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1240                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1241                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1242                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1243                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1244                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1245                 ADVANCE_RING();
1246
1247                 /* Make sure we restore the 3D state next time.
1248                  */
1249                 sarea_priv->ctx_owner = 0;
1250
1251                 for (i = 0; i < nbox; i++) {
1252
1253                         /* Funny that this should be required --
1254                          *  sets top-left?
1255                          */
1256                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1257
1258                         BEGIN_RING(14);
1259                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1260                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1261                                   RADEON_PRIM_WALK_RING |
1262                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1263                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1264                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1265                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1266                         OUT_RING(0x3f800000);
1267                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1268                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1269                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1270                         OUT_RING(0x3f800000);
1271                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1272                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1273                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1274                         OUT_RING(0x3f800000);
1275                         ADVANCE_RING();
1276                 }
1277         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1278
1279                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1280
1281                 rb3d_cntl = depth_clear->rb3d_cntl;
1282
1283                 if (flags & RADEON_DEPTH) {
1284                         rb3d_cntl |= RADEON_Z_ENABLE;
1285                 } else {
1286                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1287                 }
1288
1289                 if (flags & RADEON_STENCIL) {
1290                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1291                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1292                 } else {
1293                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1294                         rb3d_stencilrefmask = 0x00000000;
1295                 }
1296
1297                 if (flags & RADEON_USE_COMP_ZBUF) {
1298                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1299                             RADEON_Z_DECOMPRESSION_ENABLE;
1300                 }
1301                 if (flags & RADEON_USE_HIERZ) {
1302                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1303                 }
1304
1305                 BEGIN_RING(13);
1306                 RADEON_WAIT_UNTIL_2D_IDLE();
1307
1308                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1309                 OUT_RING(0x00000000);
1310                 OUT_RING(rb3d_cntl);
1311
1312                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1313                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1314                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1315                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1316                 ADVANCE_RING();
1317
1318                 /* Make sure we restore the 3D state next time.
1319                  */
1320                 sarea_priv->ctx_owner = 0;
1321
1322                 for (i = 0; i < nbox; i++) {
1323
1324                         /* Funny that this should be required --
1325                          *  sets top-left?
1326                          */
1327                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1328
1329                         BEGIN_RING(15);
1330
1331                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1332                         OUT_RING(RADEON_VTX_Z_PRESENT |
1333                                  RADEON_VTX_PKCOLOR_PRESENT);
1334                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1335                                   RADEON_PRIM_WALK_RING |
1336                                   RADEON_MAOS_ENABLE |
1337                                   RADEON_VTX_FMT_RADEON_MODE |
1338                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1339
1340                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1341                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1342                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1343                         OUT_RING(0x0);
1344
1345                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1346                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1347                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1348                         OUT_RING(0x0);
1349
1350                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1351                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1352                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1353                         OUT_RING(0x0);
1354
1355                         ADVANCE_RING();
1356                 }
1357         }
1358
1359         /* Increment the clear counter.  The client-side 3D driver must
1360          * wait on this value before performing the clear ioctl.  We
1361          * need this because the card's so damned fast...
1362          */
1363         sarea_priv->last_clear++;
1364
1365         BEGIN_RING(4);
1366
1367         RADEON_CLEAR_AGE(sarea_priv->last_clear);
1368         RADEON_WAIT_UNTIL_IDLE();
1369
1370         ADVANCE_RING();
1371 }
1372
1373 static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
1374 {
1375         drm_radeon_private_t *dev_priv = dev->dev_private;
1376         struct drm_radeon_master_private *master_priv = master->driver_priv;
1377         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1378         int nbox = sarea_priv->nbox;
1379         struct drm_clip_rect *pbox = sarea_priv->boxes;
1380         int i;
1381         RING_LOCALS;
1382         DRM_DEBUG("\n");
1383
1384         /* Do some trivial performance monitoring...
1385          */
1386         if (dev_priv->do_boxes)
1387                 radeon_cp_performance_boxes(dev_priv, master_priv);
1388
1389         /* Wait for the 3D stream to idle before dispatching the bitblt.
1390          * This will prevent data corruption between the two streams.
1391          */
1392         BEGIN_RING(2);
1393
1394         RADEON_WAIT_UNTIL_3D_IDLE();
1395
1396         ADVANCE_RING();
1397
1398         for (i = 0; i < nbox; i++) {
1399                 int x = pbox[i].x1;
1400                 int y = pbox[i].y1;
1401                 int w = pbox[i].x2 - x;
1402                 int h = pbox[i].y2 - y;
1403
1404                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1405
1406                 BEGIN_RING(9);
1407
1408                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1409                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1410                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1411                          RADEON_GMC_BRUSH_NONE |
1412                          (dev_priv->color_fmt << 8) |
1413                          RADEON_GMC_SRC_DATATYPE_COLOR |
1414                          RADEON_ROP3_S |
1415                          RADEON_DP_SRC_SOURCE_MEMORY |
1416                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1417
1418                 /* Make this work even if front & back are flipped:
1419                  */
1420                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1421                 if (sarea_priv->pfCurrentPage == 0) {
1422                         OUT_RING(dev_priv->back_pitch_offset);
1423                         OUT_RING(dev_priv->front_pitch_offset);
1424                 } else {
1425                         OUT_RING(dev_priv->front_pitch_offset);
1426                         OUT_RING(dev_priv->back_pitch_offset);
1427                 }
1428
1429                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1430                 OUT_RING((x << 16) | y);
1431                 OUT_RING((x << 16) | y);
1432                 OUT_RING((w << 16) | h);
1433
1434                 ADVANCE_RING();
1435         }
1436
1437         /* Increment the frame counter.  The client-side 3D driver must
1438          * throttle the framerate by waiting for this value before
1439          * performing the swapbuffer ioctl.
1440          */
1441         sarea_priv->last_frame++;
1442
1443         BEGIN_RING(4);
1444
1445         RADEON_FRAME_AGE(sarea_priv->last_frame);
1446         RADEON_WAIT_UNTIL_2D_IDLE();
1447
1448         ADVANCE_RING();
1449 }
1450
1451 void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
1452 {
1453         drm_radeon_private_t *dev_priv = dev->dev_private;
1454         struct drm_radeon_master_private *master_priv = master->driver_priv;
1455         struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
1456         int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
1457             ? dev_priv->front_offset : dev_priv->back_offset;
1458         RING_LOCALS;
1459         DRM_DEBUG("pfCurrentPage=%d\n",
1460                   master_priv->sarea_priv->pfCurrentPage);
1461
1462         /* Do some trivial performance monitoring...
1463          */
1464         if (dev_priv->do_boxes) {
1465                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1466                 radeon_cp_performance_boxes(dev_priv, master_priv);
1467         }
1468
1469         /* Update the frame offsets for both CRTCs
1470          */
1471         BEGIN_RING(6);
1472
1473         RADEON_WAIT_UNTIL_3D_IDLE();
1474         OUT_RING_REG(RADEON_CRTC_OFFSET,
1475                      ((sarea->frame.y * dev_priv->front_pitch +
1476                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1477                      + offset);
1478         OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
1479                      + offset);
1480
1481         ADVANCE_RING();
1482
1483         /* Increment the frame counter.  The client-side 3D driver must
1484          * throttle the framerate by waiting for this value before
1485          * performing the swapbuffer ioctl.
1486          */
1487         master_priv->sarea_priv->last_frame++;
1488         master_priv->sarea_priv->pfCurrentPage =
1489                 1 - master_priv->sarea_priv->pfCurrentPage;
1490
1491         BEGIN_RING(2);
1492
1493         RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
1494
1495         ADVANCE_RING();
1496 }
1497
1498 static int bad_prim_vertex_nr(int primitive, int nr)
1499 {
1500         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1501         case RADEON_PRIM_TYPE_NONE:
1502         case RADEON_PRIM_TYPE_POINT:
1503                 return nr < 1;
1504         case RADEON_PRIM_TYPE_LINE:
1505                 return (nr & 1) || nr == 0;
1506         case RADEON_PRIM_TYPE_LINE_STRIP:
1507                 return nr < 2;
1508         case RADEON_PRIM_TYPE_TRI_LIST:
1509         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1510         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1511         case RADEON_PRIM_TYPE_RECT_LIST:
1512                 return nr % 3 || nr == 0;
1513         case RADEON_PRIM_TYPE_TRI_FAN:
1514         case RADEON_PRIM_TYPE_TRI_STRIP:
1515                 return nr < 3;
1516         default:
1517                 return 1;
1518         }
1519 }
1520
1521 typedef struct {
1522         unsigned int start;
1523         unsigned int finish;
1524         unsigned int prim;
1525         unsigned int numverts;
1526         unsigned int offset;
1527         unsigned int vc_format;
1528 } drm_radeon_tcl_prim_t;
1529
1530 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1531                                       struct drm_file *file_priv,
1532                                       struct drm_buf * buf,
1533                                       drm_radeon_tcl_prim_t * prim)
1534 {
1535         drm_radeon_private_t *dev_priv = dev->dev_private;
1536         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
1537         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1538         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1539         int numverts = (int)prim->numverts;
1540         int nbox = sarea_priv->nbox;
1541         int i = 0;
1542         RING_LOCALS;
1543
1544         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1545                   prim->prim,
1546                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1547
1548         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1549                 DRM_ERROR("bad prim %x numverts %d\n",
1550                           prim->prim, prim->numverts);
1551                 return;
1552         }
1553
1554         do {
1555                 /* Emit the next cliprect */
1556                 if (i < nbox) {
1557                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1558                 }
1559
1560                 /* Emit the vertex buffer rendering commands */
1561                 BEGIN_RING(5);
1562
1563                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1564                 OUT_RING(offset);
1565                 OUT_RING(numverts);
1566                 OUT_RING(prim->vc_format);
1567                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1568                          RADEON_COLOR_ORDER_RGBA |
1569                          RADEON_VTX_FMT_RADEON_MODE |
1570                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1571
1572                 ADVANCE_RING();
1573
1574                 i++;
1575         } while (i < nbox);
1576 }
1577
1578 void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
1579 {
1580         drm_radeon_private_t *dev_priv = dev->dev_private;
1581         struct drm_radeon_master_private *master_priv = master->driver_priv;
1582         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1583         RING_LOCALS;
1584
1585         buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
1586
1587         /* Emit the vertex buffer age */
1588         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1589                 BEGIN_RING(3);
1590                 R600_DISPATCH_AGE(buf_priv->age);
1591                 ADVANCE_RING();
1592         } else {
1593                 BEGIN_RING(2);
1594                 RADEON_DISPATCH_AGE(buf_priv->age);
1595                 ADVANCE_RING();
1596         }
1597
1598         buf->pending = 1;
1599         buf->used = 0;
1600 }
1601
1602 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1603                                         struct drm_buf * buf, int start, int end)
1604 {
1605         drm_radeon_private_t *dev_priv = dev->dev_private;
1606         RING_LOCALS;
1607         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1608
1609         if (start != end) {
1610                 int offset = (dev_priv->gart_buffers_offset
1611                               + buf->offset + start);
1612                 int dwords = (end - start + 3) / sizeof(u32);
1613
1614                 /* Indirect buffer data must be an even number of
1615                  * dwords, so if we've been given an odd number we must
1616                  * pad the data with a Type-2 CP packet.
1617                  */
1618                 if (dwords & 1) {
1619                         u32 *data = (u32 *)
1620                             ((char *)dev->agp_buffer_map->handle
1621                              + buf->offset + start);
1622                         data[dwords++] = RADEON_CP_PACKET2;
1623                 }
1624
1625                 /* Fire off the indirect buffer */
1626                 BEGIN_RING(3);
1627
1628                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1629                 OUT_RING(offset);
1630                 OUT_RING(dwords);
1631
1632                 ADVANCE_RING();
1633         }
1634 }
1635
1636 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1637                                        struct drm_master *master,
1638                                        struct drm_buf * elt_buf,
1639                                        drm_radeon_tcl_prim_t * prim)
1640 {
1641         drm_radeon_private_t *dev_priv = dev->dev_private;
1642         struct drm_radeon_master_private *master_priv = master->driver_priv;
1643         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1644         int offset = dev_priv->gart_buffers_offset + prim->offset;
1645         u32 *data;
1646         int dwords;
1647         int i = 0;
1648         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1649         int count = (prim->finish - start) / sizeof(u16);
1650         int nbox = sarea_priv->nbox;
1651
1652         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1653                   prim->prim,
1654                   prim->vc_format,
1655                   prim->start, prim->finish, prim->offset, prim->numverts);
1656
1657         if (bad_prim_vertex_nr(prim->prim, count)) {
1658                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1659                 return;
1660         }
1661
1662         if (start >= prim->finish || (prim->start & 0x7)) {
1663                 DRM_ERROR("buffer prim %d\n", prim->prim);
1664                 return;
1665         }
1666
1667         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1668
1669         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1670                         elt_buf->offset + prim->start);
1671
1672         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1673         data[1] = offset;
1674         data[2] = prim->numverts;
1675         data[3] = prim->vc_format;
1676         data[4] = (prim->prim |
1677                    RADEON_PRIM_WALK_IND |
1678                    RADEON_COLOR_ORDER_RGBA |
1679                    RADEON_VTX_FMT_RADEON_MODE |
1680                    (count << RADEON_NUM_VERTICES_SHIFT));
1681
1682         do {
1683                 if (i < nbox)
1684                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1685
1686                 radeon_cp_dispatch_indirect(dev, elt_buf,
1687                                             prim->start, prim->finish);
1688
1689                 i++;
1690         } while (i < nbox);
1691
1692 }
1693
1694 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1695
1696 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1697                                       struct drm_file *file_priv,
1698                                       drm_radeon_texture_t * tex,
1699                                       drm_radeon_tex_image_t * image)
1700 {
1701         drm_radeon_private_t *dev_priv = dev->dev_private;
1702         struct drm_buf *buf;
1703         u32 format;
1704         u32 *buffer;
1705         const u8 __user *data;
1706         int size, dwords, tex_width, blit_width, spitch;
1707         u32 height;
1708         int i;
1709         u32 texpitch, microtile;
1710         u32 offset, byte_offset;
1711         RING_LOCALS;
1712
1713         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1714                 DRM_ERROR("Invalid destination offset\n");
1715                 return -EINVAL;
1716         }
1717
1718         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1719
1720         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1721          * up with the texture data from the host data blit, otherwise
1722          * part of the texture image may be corrupted.
1723          */
1724         BEGIN_RING(4);
1725         RADEON_FLUSH_CACHE();
1726         RADEON_WAIT_UNTIL_IDLE();
1727         ADVANCE_RING();
1728
1729         /* The compiler won't optimize away a division by a variable,
1730          * even if the only legal values are powers of two.  Thus, we'll
1731          * use a shift instead.
1732          */
1733         switch (tex->format) {
1734         case RADEON_TXFORMAT_ARGB8888:
1735         case RADEON_TXFORMAT_RGBA8888:
1736                 format = RADEON_COLOR_FORMAT_ARGB8888;
1737                 tex_width = tex->width * 4;
1738                 blit_width = image->width * 4;
1739                 break;
1740         case RADEON_TXFORMAT_AI88:
1741         case RADEON_TXFORMAT_ARGB1555:
1742         case RADEON_TXFORMAT_RGB565:
1743         case RADEON_TXFORMAT_ARGB4444:
1744         case RADEON_TXFORMAT_VYUY422:
1745         case RADEON_TXFORMAT_YVYU422:
1746                 format = RADEON_COLOR_FORMAT_RGB565;
1747                 tex_width = tex->width * 2;
1748                 blit_width = image->width * 2;
1749                 break;
1750         case RADEON_TXFORMAT_I8:
1751         case RADEON_TXFORMAT_RGB332:
1752                 format = RADEON_COLOR_FORMAT_CI8;
1753                 tex_width = tex->width * 1;
1754                 blit_width = image->width * 1;
1755                 break;
1756         default:
1757                 DRM_ERROR("invalid texture format %d\n", tex->format);
1758                 return -EINVAL;
1759         }
1760         spitch = blit_width >> 6;
1761         if (spitch == 0 && image->height > 1)
1762                 return -EINVAL;
1763
1764         texpitch = tex->pitch;
1765         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1766                 microtile = 1;
1767                 if (tex_width < 64) {
1768                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1769                         /* we got tiled coordinates, untile them */
1770                         image->x *= 2;
1771                 }
1772         } else
1773                 microtile = 0;
1774
1775         /* this might fail for zero-sized uploads - are those illegal? */
1776         if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1777                                 blit_width - 1)) {
1778                 DRM_ERROR("Invalid final destination offset\n");
1779                 return -EINVAL;
1780         }
1781
1782         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1783
1784         do {
1785                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%u y=%u w=%u h=%u\n",
1786                           tex->offset >> 10, tex->pitch, tex->format,
1787                           image->x, image->y, image->width, image->height);
1788
1789                 /* Make a copy of some parameters in case we have to
1790                  * update them for a multi-pass texture blit.
1791                  */
1792                 height = image->height;
1793                 data = (const u8 __user *)image->data;
1794
1795                 size = height * blit_width;
1796
1797                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1798                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1799                         size = height * blit_width;
1800                 } else if (size < 4 && size > 0) {
1801                         size = 4;
1802                 } else if (size == 0) {
1803                         return 0;
1804                 }
1805
1806                 buf = radeon_freelist_get(dev);
1807                 if (0 && !buf) {
1808                         radeon_do_cp_idle(dev_priv);
1809                         buf = radeon_freelist_get(dev);
1810                 }
1811                 if (!buf) {
1812                         DRM_DEBUG("EAGAIN\n");
1813                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1814                                 return -EFAULT;
1815                         return -EAGAIN;
1816                 }
1817
1818                 /* Dispatch the indirect buffer.
1819                  */
1820                 buffer =
1821                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1822                 dwords = size / 4;
1823
1824 #define RADEON_COPY_MT(_buf, _data, _width) \
1825         do { \
1826                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1827                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1828                         return -EFAULT; \
1829                 } \
1830         } while(0)
1831
1832                 if (microtile) {
1833                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1834                            however, we cannot use blitter directly for texture width < 64 bytes,
1835                            since minimum tex pitch is 64 bytes and we need this to match
1836                            the texture width, otherwise the blitter will tile it wrong.
1837                            Thus, tiling manually in this case. Additionally, need to special
1838                            case tex height = 1, since our actual image will have height 2
1839                            and we need to ensure we don't read beyond the texture size
1840                            from user space. */
1841                         if (tex->height == 1) {
1842                                 if (tex_width >= 64 || tex_width <= 16) {
1843                                         RADEON_COPY_MT(buffer, data,
1844                                                 (int)(tex_width * sizeof(u32)));
1845                                 } else if (tex_width == 32) {
1846                                         RADEON_COPY_MT(buffer, data, 16);
1847                                         RADEON_COPY_MT(buffer + 8,
1848                                                        data + 16, 16);
1849                                 }
1850                         } else if (tex_width >= 64 || tex_width == 16) {
1851                                 RADEON_COPY_MT(buffer, data,
1852                                                (int)(dwords * sizeof(u32)));
1853                         } else if (tex_width < 16) {
1854                                 for (i = 0; i < tex->height; i++) {
1855                                         RADEON_COPY_MT(buffer, data, tex_width);
1856                                         buffer += 4;
1857                                         data += tex_width;
1858                                 }
1859                         } else if (tex_width == 32) {
1860                                 /* TODO: make sure this works when not fitting in one buffer
1861                                    (i.e. 32bytes x 2048...) */
1862                                 for (i = 0; i < tex->height; i += 2) {
1863                                         RADEON_COPY_MT(buffer, data, 16);
1864                                         data += 16;
1865                                         RADEON_COPY_MT(buffer + 8, data, 16);
1866                                         data += 16;
1867                                         RADEON_COPY_MT(buffer + 4, data, 16);
1868                                         data += 16;
1869                                         RADEON_COPY_MT(buffer + 12, data, 16);
1870                                         data += 16;
1871                                         buffer += 16;
1872                                 }
1873                         }
1874                 } else {
1875                         if (tex_width >= 32) {
1876                                 /* Texture image width is larger than the minimum, so we
1877                                  * can upload it directly.
1878                                  */
1879                                 RADEON_COPY_MT(buffer, data,
1880                                                (int)(dwords * sizeof(u32)));
1881                         } else {
1882                                 /* Texture image width is less than the minimum, so we
1883                                  * need to pad out each image scanline to the minimum
1884                                  * width.
1885                                  */
1886                                 for (i = 0; i < tex->height; i++) {
1887                                         RADEON_COPY_MT(buffer, data, tex_width);
1888                                         buffer += 8;
1889                                         data += tex_width;
1890                                 }
1891                         }
1892                 }
1893
1894 #undef RADEON_COPY_MT
1895                 byte_offset = (image->y & ~2047) * blit_width;
1896                 buf->file_priv = file_priv;
1897                 buf->used = size;
1898                 offset = dev_priv->gart_buffers_offset + buf->offset;
1899                 BEGIN_RING(9);
1900                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1901                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1902                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1903                          RADEON_GMC_BRUSH_NONE |
1904                          (format << 8) |
1905                          RADEON_GMC_SRC_DATATYPE_COLOR |
1906                          RADEON_ROP3_S |
1907                          RADEON_DP_SRC_SOURCE_MEMORY |
1908                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1909                 OUT_RING((spitch << 22) | (offset >> 10));
1910                 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1911                 OUT_RING(0);
1912                 OUT_RING((image->x << 16) | (image->y % 2048));
1913                 OUT_RING((image->width << 16) | height);
1914                 RADEON_WAIT_UNTIL_2D_IDLE();
1915                 ADVANCE_RING();
1916                 COMMIT_RING();
1917
1918                 radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
1919
1920                 /* Update the input parameters for next time */
1921                 image->y += height;
1922                 image->height -= height;
1923                 image->data = (const u8 __user *)image->data + size;
1924         } while (image->height > 0);
1925
1926         /* Flush the pixel cache after the blit completes.  This ensures
1927          * the texture data is written out to memory before rendering
1928          * continues.
1929          */
1930         BEGIN_RING(4);
1931         RADEON_FLUSH_CACHE();
1932         RADEON_WAIT_UNTIL_2D_IDLE();
1933         ADVANCE_RING();
1934         COMMIT_RING();
1935
1936         return 0;
1937 }
1938
1939 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1940 {
1941         drm_radeon_private_t *dev_priv = dev->dev_private;
1942         int i;
1943         RING_LOCALS;
1944         DRM_DEBUG("\n");
1945
1946         BEGIN_RING(35);
1947
1948         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1949         OUT_RING(0x00000000);
1950
1951         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1952         for (i = 0; i < 32; i++) {
1953                 OUT_RING(stipple[i]);
1954         }
1955
1956         ADVANCE_RING();
1957 }
1958
1959 static void radeon_apply_surface_regs(int surf_index,
1960                                       drm_radeon_private_t *dev_priv)
1961 {
1962         if (!dev_priv->mmio)
1963                 return;
1964
1965         radeon_do_cp_idle(dev_priv);
1966
1967         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1968                      dev_priv->surfaces[surf_index].flags);
1969         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1970                      dev_priv->surfaces[surf_index].lower);
1971         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1972                      dev_priv->surfaces[surf_index].upper);
1973 }
1974
1975 /* Allocates a virtual surface
1976  * doesn't always allocate a real surface, will stretch an existing
1977  * surface when possible.
1978  *
1979  * Note that refcount can be at most 2, since during a free refcount=3
1980  * might mean we have to allocate a new surface which might not always
1981  * be available.
1982  * For example : we allocate three contiguous surfaces ABC. If B is
1983  * freed, we suddenly need two surfaces to store A and C, which might
1984  * not always be available.
1985  */
1986 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1987                          drm_radeon_private_t *dev_priv,
1988                          struct drm_file *file_priv)
1989 {
1990         struct radeon_virt_surface *s;
1991         int i;
1992         int virt_surface_index;
1993         uint32_t new_upper, new_lower;
1994
1995         new_lower = new->address;
1996         new_upper = new_lower + new->size - 1;
1997
1998         /* sanity check */
1999         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
2000             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
2001              RADEON_SURF_ADDRESS_FIXED_MASK)
2002             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
2003                 return -1;
2004
2005         /* make sure there is no overlap with existing surfaces */
2006         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2007                 if ((dev_priv->surfaces[i].refcount != 0) &&
2008                     (((new_lower >= dev_priv->surfaces[i].lower) &&
2009                       (new_lower < dev_priv->surfaces[i].upper)) ||
2010                      ((new_lower < dev_priv->surfaces[i].lower) &&
2011                       (new_upper > dev_priv->surfaces[i].lower)))) {
2012                         return -1;
2013                 }
2014         }
2015
2016         /* find a virtual surface */
2017         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
2018                 if (dev_priv->virt_surfaces[i].file_priv == NULL)
2019                         break;
2020         if (i == 2 * RADEON_MAX_SURFACES) {
2021                 return -1;
2022         }
2023         virt_surface_index = i;
2024
2025         /* try to reuse an existing surface */
2026         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2027                 /* extend before */
2028                 if ((dev_priv->surfaces[i].refcount == 1) &&
2029                     (new->flags == dev_priv->surfaces[i].flags) &&
2030                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
2031                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2032                         s->surface_index = i;
2033                         s->lower = new_lower;
2034                         s->upper = new_upper;
2035                         s->flags = new->flags;
2036                         s->file_priv = file_priv;
2037                         dev_priv->surfaces[i].refcount++;
2038                         dev_priv->surfaces[i].lower = s->lower;
2039                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2040                         return virt_surface_index;
2041                 }
2042
2043                 /* extend after */
2044                 if ((dev_priv->surfaces[i].refcount == 1) &&
2045                     (new->flags == dev_priv->surfaces[i].flags) &&
2046                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
2047                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2048                         s->surface_index = i;
2049                         s->lower = new_lower;
2050                         s->upper = new_upper;
2051                         s->flags = new->flags;
2052                         s->file_priv = file_priv;
2053                         dev_priv->surfaces[i].refcount++;
2054                         dev_priv->surfaces[i].upper = s->upper;
2055                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2056                         return virt_surface_index;
2057                 }
2058         }
2059
2060         /* okay, we need a new one */
2061         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2062                 if (dev_priv->surfaces[i].refcount == 0) {
2063                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2064                         s->surface_index = i;
2065                         s->lower = new_lower;
2066                         s->upper = new_upper;
2067                         s->flags = new->flags;
2068                         s->file_priv = file_priv;
2069                         dev_priv->surfaces[i].refcount = 1;
2070                         dev_priv->surfaces[i].lower = s->lower;
2071                         dev_priv->surfaces[i].upper = s->upper;
2072                         dev_priv->surfaces[i].flags = s->flags;
2073                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2074                         return virt_surface_index;
2075                 }
2076         }
2077
2078         /* we didn't find anything */
2079         return -1;
2080 }
2081
2082 static int free_surface(struct drm_file *file_priv,
2083                         drm_radeon_private_t * dev_priv,
2084                         int lower)
2085 {
2086         struct radeon_virt_surface *s;
2087         int i;
2088         /* find the virtual surface */
2089         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2090                 s = &(dev_priv->virt_surfaces[i]);
2091                 if (s->file_priv) {
2092                         if ((lower == s->lower) && (file_priv == s->file_priv))
2093                         {
2094                                 if (dev_priv->surfaces[s->surface_index].
2095                                     lower == s->lower)
2096                                         dev_priv->surfaces[s->surface_index].
2097                                             lower = s->upper;
2098
2099                                 if (dev_priv->surfaces[s->surface_index].
2100                                     upper == s->upper)
2101                                         dev_priv->surfaces[s->surface_index].
2102                                             upper = s->lower;
2103
2104                                 dev_priv->surfaces[s->surface_index].refcount--;
2105                                 if (dev_priv->surfaces[s->surface_index].
2106                                     refcount == 0)
2107                                         dev_priv->surfaces[s->surface_index].
2108                                             flags = 0;
2109                                 s->file_priv = NULL;
2110                                 radeon_apply_surface_regs(s->surface_index,
2111                                                           dev_priv);
2112                                 return 0;
2113                         }
2114                 }
2115         }
2116         return 1;
2117 }
2118
2119 static void radeon_surfaces_release(struct drm_file *file_priv,
2120                                     drm_radeon_private_t * dev_priv)
2121 {
2122         int i;
2123         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2124                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2125                         free_surface(file_priv, dev_priv,
2126                                      dev_priv->virt_surfaces[i].lower);
2127         }
2128 }
2129
2130 /* ================================================================
2131  * IOCTL functions
2132  */
2133 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2134 {
2135         drm_radeon_private_t *dev_priv = dev->dev_private;
2136         drm_radeon_surface_alloc_t *alloc = data;
2137
2138         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2139                 return -EINVAL;
2140         else
2141                 return 0;
2142 }
2143
2144 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2145 {
2146         drm_radeon_private_t *dev_priv = dev->dev_private;
2147         drm_radeon_surface_free_t *memfree = data;
2148
2149         if (free_surface(file_priv, dev_priv, memfree->address))
2150                 return -EINVAL;
2151         else
2152                 return 0;
2153 }
2154
2155 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2156 {
2157         drm_radeon_private_t *dev_priv = dev->dev_private;
2158         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
2159         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2160         drm_radeon_clear_t *clear = data;
2161         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2162         DRM_DEBUG("\n");
2163
2164         LOCK_TEST_WITH_RETURN(dev, file_priv);
2165
2166         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2167
2168         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2169                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2170
2171         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2172                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2173                 return -EFAULT;
2174
2175         radeon_cp_dispatch_clear(dev, file_priv->masterp, clear, depth_boxes);
2176
2177         COMMIT_RING();
2178         return 0;
2179 }
2180
2181 /* Not sure why this isn't set all the time:
2182  */
2183 static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
2184 {
2185         drm_radeon_private_t *dev_priv = dev->dev_private;
2186         struct drm_radeon_master_private *master_priv = master->driver_priv;
2187         RING_LOCALS;
2188
2189         DRM_DEBUG("\n");
2190
2191         BEGIN_RING(6);
2192         RADEON_WAIT_UNTIL_3D_IDLE();
2193         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2194         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2195                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2196         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2197         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2198                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2199         ADVANCE_RING();
2200
2201         dev_priv->page_flipping = 1;
2202
2203         if (master_priv->sarea_priv->pfCurrentPage != 1)
2204                 master_priv->sarea_priv->pfCurrentPage = 0;
2205
2206         return 0;
2207 }
2208
2209 /* Swapping and flipping are different operations, need different ioctls.
2210  * They can & should be intermixed to support multiple 3d windows.
2211  */
2212 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2213 {
2214         drm_radeon_private_t *dev_priv = dev->dev_private;
2215         DRM_DEBUG("\n");
2216
2217         LOCK_TEST_WITH_RETURN(dev, file_priv);
2218
2219         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2220
2221         if (!dev_priv->page_flipping)
2222                 radeon_do_init_pageflip(dev, file_priv->masterp);
2223
2224         radeon_cp_dispatch_flip(dev, file_priv->masterp);
2225
2226         COMMIT_RING();
2227         return 0;
2228 }
2229
2230 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2231 {
2232         drm_radeon_private_t *dev_priv = dev->dev_private;
2233         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
2234         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2235
2236         DRM_DEBUG("\n");
2237
2238         LOCK_TEST_WITH_RETURN(dev, file_priv);
2239
2240         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2241
2242         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2243                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2244
2245         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2246                 r600_cp_dispatch_swap(dev, file_priv);
2247         else
2248                 radeon_cp_dispatch_swap(dev, file_priv->masterp);
2249         sarea_priv->ctx_owner = 0;
2250
2251         COMMIT_RING();
2252         return 0;
2253 }
2254
2255 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2256 {
2257         drm_radeon_private_t *dev_priv = dev->dev_private;
2258         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
2259         drm_radeon_sarea_t *sarea_priv;
2260         struct drm_device_dma *dma = dev->dma;
2261         struct drm_buf *buf;
2262         drm_radeon_vertex_t *vertex = data;
2263         drm_radeon_tcl_prim_t prim;
2264
2265         LOCK_TEST_WITH_RETURN(dev, file_priv);
2266
2267         sarea_priv = master_priv->sarea_priv;
2268
2269         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2270                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2271
2272         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2273                 DRM_ERROR("buffer index %d (of %d max)\n",
2274                           vertex->idx, dma->buf_count - 1);
2275                 return -EINVAL;
2276         }
2277         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2278                 DRM_ERROR("buffer prim %d\n", vertex->prim);
2279                 return -EINVAL;
2280         }
2281
2282         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2283         VB_AGE_TEST_WITH_RETURN(dev_priv);
2284
2285         buf = dma->buflist[vertex->idx];
2286
2287         if (buf->file_priv != file_priv) {
2288                 DRM_ERROR("process %d using buffer owned by %p\n",
2289                           DRM_CURRENTPID, buf->file_priv);
2290                 return -EINVAL;
2291         }
2292         if (buf->pending) {
2293                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2294                 return -EINVAL;
2295         }
2296
2297         /* Build up a prim_t record:
2298          */
2299         if (vertex->count) {
2300                 buf->used = vertex->count;      /* not used? */
2301
2302                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2303                         if (radeon_emit_state(dev_priv, file_priv,
2304                                               &sarea_priv->context_state,
2305                                               sarea_priv->tex_state,
2306                                               sarea_priv->dirty)) {
2307                                 DRM_ERROR("radeon_emit_state failed\n");
2308                                 return -EINVAL;
2309                         }
2310
2311                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2312                                                RADEON_UPLOAD_TEX1IMAGES |
2313                                                RADEON_UPLOAD_TEX2IMAGES |
2314                                                RADEON_REQUIRE_QUIESCENCE);
2315                 }
2316
2317                 prim.start = 0;
2318                 prim.finish = vertex->count;    /* unused */
2319                 prim.prim = vertex->prim;
2320                 prim.numverts = vertex->count;
2321                 prim.vc_format = sarea_priv->vc_format;
2322
2323                 radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
2324         }
2325
2326         if (vertex->discard) {
2327                 radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
2328         }
2329
2330         COMMIT_RING();
2331         return 0;
2332 }
2333
2334 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2335 {
2336         drm_radeon_private_t *dev_priv = dev->dev_private;
2337         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
2338         drm_radeon_sarea_t *sarea_priv;
2339         struct drm_device_dma *dma = dev->dma;
2340         struct drm_buf *buf;
2341         drm_radeon_indices_t *elts = data;
2342         drm_radeon_tcl_prim_t prim;
2343         int count;
2344
2345         LOCK_TEST_WITH_RETURN(dev, file_priv);
2346
2347         sarea_priv = master_priv->sarea_priv;
2348
2349         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2350                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2351                   elts->discard);
2352
2353         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2354                 DRM_ERROR("buffer index %d (of %d max)\n",
2355                           elts->idx, dma->buf_count - 1);
2356                 return -EINVAL;
2357         }
2358         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2359                 DRM_ERROR("buffer prim %d\n", elts->prim);
2360                 return -EINVAL;
2361         }
2362
2363         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2364         VB_AGE_TEST_WITH_RETURN(dev_priv);
2365
2366         buf = dma->buflist[elts->idx];
2367
2368         if (buf->file_priv != file_priv) {
2369                 DRM_ERROR("process %d using buffer owned by %p\n",
2370                           DRM_CURRENTPID, buf->file_priv);
2371                 return -EINVAL;
2372         }
2373         if (buf->pending) {
2374                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2375                 return -EINVAL;
2376         }
2377
2378         count = (elts->end - elts->start) / sizeof(u16);
2379         elts->start -= RADEON_INDEX_PRIM_OFFSET;
2380
2381         if (elts->start & 0x7) {
2382                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2383                 return -EINVAL;
2384         }
2385         if (elts->start < buf->used) {
2386                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2387                 return -EINVAL;
2388         }
2389
2390         buf->used = elts->end;
2391
2392         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2393                 if (radeon_emit_state(dev_priv, file_priv,
2394                                       &sarea_priv->context_state,
2395                                       sarea_priv->tex_state,
2396                                       sarea_priv->dirty)) {
2397                         DRM_ERROR("radeon_emit_state failed\n");
2398                         return -EINVAL;
2399                 }
2400
2401                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2402                                        RADEON_UPLOAD_TEX1IMAGES |
2403                                        RADEON_UPLOAD_TEX2IMAGES |
2404                                        RADEON_REQUIRE_QUIESCENCE);
2405         }
2406
2407         /* Build up a prim_t record:
2408          */
2409         prim.start = elts->start;
2410         prim.finish = elts->end;
2411         prim.prim = elts->prim;
2412         prim.offset = 0;        /* offset from start of dma buffers */
2413         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2414         prim.vc_format = sarea_priv->vc_format;
2415
2416         radeon_cp_dispatch_indices(dev, file_priv->masterp, buf, &prim);
2417         if (elts->discard) {
2418                 radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
2419         }
2420
2421         COMMIT_RING();
2422         return 0;
2423 }
2424
2425 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2426 {
2427         drm_radeon_private_t *dev_priv = dev->dev_private;
2428         drm_radeon_texture_t *tex = data;
2429         drm_radeon_tex_image_t image;
2430         int ret;
2431
2432         LOCK_TEST_WITH_RETURN(dev, file_priv);
2433
2434         if (tex->image == NULL) {
2435                 DRM_ERROR("null texture image!\n");
2436                 return -EINVAL;
2437         }
2438
2439         if (DRM_COPY_FROM_USER(&image,
2440                                (drm_radeon_tex_image_t __user *) tex->image,
2441                                sizeof(image)))
2442                 return -EFAULT;
2443
2444         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2445         VB_AGE_TEST_WITH_RETURN(dev_priv);
2446
2447         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2448                 ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2449         else
2450                 ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2451
2452         return ret;
2453 }
2454
2455 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2456 {
2457         drm_radeon_private_t *dev_priv = dev->dev_private;
2458         drm_radeon_stipple_t *stipple = data;
2459         u32 mask[32];
2460
2461         LOCK_TEST_WITH_RETURN(dev, file_priv);
2462
2463         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2464                 return -EFAULT;
2465
2466         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2467
2468         radeon_cp_dispatch_stipple(dev, mask);
2469
2470         COMMIT_RING();
2471         return 0;
2472 }
2473
2474 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2475 {
2476         drm_radeon_private_t *dev_priv = dev->dev_private;
2477         struct drm_device_dma *dma = dev->dma;
2478         struct drm_buf *buf;
2479         drm_radeon_indirect_t *indirect = data;
2480         RING_LOCALS;
2481
2482         LOCK_TEST_WITH_RETURN(dev, file_priv);
2483
2484         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2485                   indirect->idx, indirect->start, indirect->end,
2486                   indirect->discard);
2487
2488         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2489                 DRM_ERROR("buffer index %d (of %d max)\n",
2490                           indirect->idx, dma->buf_count - 1);
2491                 return -EINVAL;
2492         }
2493
2494         buf = dma->buflist[indirect->idx];
2495
2496         if (buf->file_priv != file_priv) {
2497                 DRM_ERROR("process %d using buffer owned by %p\n",
2498                           DRM_CURRENTPID, buf->file_priv);
2499                 return -EINVAL;
2500         }
2501         if (buf->pending) {
2502                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2503                 return -EINVAL;
2504         }
2505
2506         if (indirect->start < buf->used) {
2507                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2508                           indirect->start, buf->used);
2509                 return -EINVAL;
2510         }
2511
2512         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2513         VB_AGE_TEST_WITH_RETURN(dev_priv);
2514
2515         buf->used = indirect->end;
2516
2517         /* Dispatch the indirect buffer full of commands from the
2518          * X server.  This is insecure and is thus only available to
2519          * privileged clients.
2520          */
2521         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2522                 r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2523         else {
2524                 /* Wait for the 3D stream to idle before the indirect buffer
2525                  * containing 2D acceleration commands is processed.
2526                  */
2527                 BEGIN_RING(2);
2528                 RADEON_WAIT_UNTIL_3D_IDLE();
2529                 ADVANCE_RING();
2530                 radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2531         }
2532
2533         if (indirect->discard) {
2534                 radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
2535         }
2536
2537         COMMIT_RING();
2538         return 0;
2539 }
2540
2541 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2542 {
2543         drm_radeon_private_t *dev_priv = dev->dev_private;
2544         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
2545         drm_radeon_sarea_t *sarea_priv;
2546         struct drm_device_dma *dma = dev->dma;
2547         struct drm_buf *buf;
2548         drm_radeon_vertex2_t *vertex = data;
2549         int i;
2550         unsigned char laststate;
2551
2552         LOCK_TEST_WITH_RETURN(dev, file_priv);
2553
2554         sarea_priv = master_priv->sarea_priv;
2555
2556         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2557                   DRM_CURRENTPID, vertex->idx, vertex->discard);
2558
2559         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2560                 DRM_ERROR("buffer index %d (of %d max)\n",
2561                           vertex->idx, dma->buf_count - 1);
2562                 return -EINVAL;
2563         }
2564
2565         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2566         VB_AGE_TEST_WITH_RETURN(dev_priv);
2567
2568         buf = dma->buflist[vertex->idx];
2569
2570         if (buf->file_priv != file_priv) {
2571                 DRM_ERROR("process %d using buffer owned by %p\n",
2572                           DRM_CURRENTPID, buf->file_priv);
2573                 return -EINVAL;
2574         }
2575
2576         if (buf->pending) {
2577                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2578                 return -EINVAL;
2579         }
2580
2581         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2582                 return -EINVAL;
2583
2584         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2585                 drm_radeon_prim_t prim;
2586                 drm_radeon_tcl_prim_t tclprim;
2587
2588                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2589                         return -EFAULT;
2590
2591                 if (prim.stateidx != laststate) {
2592                         drm_radeon_state_t state;
2593
2594                         if (DRM_COPY_FROM_USER(&state,
2595                                                &vertex->state[prim.stateidx],
2596                                                sizeof(state)))
2597                                 return -EFAULT;
2598
2599                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2600                                 DRM_ERROR("radeon_emit_state2 failed\n");
2601                                 return -EINVAL;
2602                         }
2603
2604                         laststate = prim.stateidx;
2605                 }
2606
2607                 tclprim.start = prim.start;
2608                 tclprim.finish = prim.finish;
2609                 tclprim.prim = prim.prim;
2610                 tclprim.vc_format = prim.vc_format;
2611
2612                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2613                         tclprim.offset = prim.numverts * 64;
2614                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2615
2616                         radeon_cp_dispatch_indices(dev, file_priv->masterp, buf, &tclprim);
2617                 } else {
2618                         tclprim.numverts = prim.numverts;
2619                         tclprim.offset = 0;     /* not used */
2620
2621                         radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
2622                 }
2623
2624                 if (sarea_priv->nbox == 1)
2625                         sarea_priv->nbox = 0;
2626         }
2627
2628         if (vertex->discard) {
2629                 radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
2630         }
2631
2632         COMMIT_RING();
2633         return 0;
2634 }
2635
2636 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2637                                struct drm_file *file_priv,
2638                                drm_radeon_cmd_header_t header,
2639                                drm_radeon_kcmd_buffer_t *cmdbuf)
2640 {
2641         int id = (int)header.packet.packet_id;
2642         int sz, reg;
2643         RING_LOCALS;
2644
2645         if (id >= RADEON_MAX_STATE_PACKETS)
2646                 return -EINVAL;
2647
2648         sz = packet[id].len;
2649         reg = packet[id].start;
2650
2651         if (sz * sizeof(u32) > drm_buffer_unprocessed(cmdbuf->buffer)) {
2652                 DRM_ERROR("Packet size provided larger than data provided\n");
2653                 return -EINVAL;
2654         }
2655
2656         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id,
2657                                 cmdbuf->buffer)) {
2658                 DRM_ERROR("Packet verification failed\n");
2659                 return -EINVAL;
2660         }
2661
2662         BEGIN_RING(sz + 1);
2663         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2664         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2665         ADVANCE_RING();
2666
2667         return 0;
2668 }
2669
2670 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2671                                           drm_radeon_cmd_header_t header,
2672                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2673 {
2674         int sz = header.scalars.count;
2675         int start = header.scalars.offset;
2676         int stride = header.scalars.stride;
2677         RING_LOCALS;
2678
2679         BEGIN_RING(3 + sz);
2680         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2681         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2682         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2683         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2684         ADVANCE_RING();
2685         return 0;
2686 }
2687
2688 /* God this is ugly
2689  */
2690 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2691                                            drm_radeon_cmd_header_t header,
2692                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2693 {
2694         int sz = header.scalars.count;
2695         int start = ((unsigned int)header.scalars.offset) + 0x100;
2696         int stride = header.scalars.stride;
2697         RING_LOCALS;
2698
2699         BEGIN_RING(3 + sz);
2700         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2701         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2702         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2703         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2704         ADVANCE_RING();
2705         return 0;
2706 }
2707
2708 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2709                                           drm_radeon_cmd_header_t header,
2710                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2711 {
2712         int sz = header.vectors.count;
2713         int start = header.vectors.offset;
2714         int stride = header.vectors.stride;
2715         RING_LOCALS;
2716
2717         BEGIN_RING(5 + sz);
2718         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2719         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2720         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2721         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2722         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2723         ADVANCE_RING();
2724
2725         return 0;
2726 }
2727
2728 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2729                                           drm_radeon_cmd_header_t header,
2730                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2731 {
2732         int sz = header.veclinear.count * 4;
2733         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2734         RING_LOCALS;
2735
2736         if (!sz)
2737                 return 0;
2738         if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
2739                 return -EINVAL;
2740
2741         BEGIN_RING(5 + sz);
2742         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2743         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2744         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2745         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2746         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2747         ADVANCE_RING();
2748
2749         return 0;
2750 }
2751
2752 static int radeon_emit_packet3(struct drm_device * dev,
2753                                struct drm_file *file_priv,
2754                                drm_radeon_kcmd_buffer_t *cmdbuf)
2755 {
2756         drm_radeon_private_t *dev_priv = dev->dev_private;
2757         unsigned int cmdsz;
2758         int ret;
2759         RING_LOCALS;
2760
2761         DRM_DEBUG("\n");
2762
2763         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2764                                                   cmdbuf, &cmdsz))) {
2765                 DRM_ERROR("Packet verification failed\n");
2766                 return ret;
2767         }
2768
2769         BEGIN_RING(cmdsz);
2770         OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2771         ADVANCE_RING();
2772
2773         return 0;
2774 }
2775
2776 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2777                                         struct drm_file *file_priv,
2778                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2779                                         int orig_nbox)
2780 {
2781         drm_radeon_private_t *dev_priv = dev->dev_private;
2782         struct drm_clip_rect box;
2783         unsigned int cmdsz;
2784         int ret;
2785         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2786         int i = 0;
2787         RING_LOCALS;
2788
2789         DRM_DEBUG("\n");
2790
2791         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2792                                                   cmdbuf, &cmdsz))) {
2793                 DRM_ERROR("Packet verification failed\n");
2794                 return ret;
2795         }
2796
2797         if (!orig_nbox)
2798                 goto out;
2799
2800         do {
2801                 if (i < cmdbuf->nbox) {
2802                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2803                                 return -EFAULT;
2804                         /* FIXME The second and subsequent times round
2805                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2806                          * calling emit_clip_rect(). This fixes a
2807                          * lockup on fast machines when sending
2808                          * several cliprects with a cmdbuf, as when
2809                          * waving a 2D window over a 3D
2810                          * window. Something in the commands from user
2811                          * space seems to hang the card when they're
2812                          * sent several times in a row. That would be
2813                          * the correct place to fix it but this works
2814                          * around it until I can figure that out - Tim
2815                          * Smith */
2816                         if (i) {
2817                                 BEGIN_RING(2);
2818                                 RADEON_WAIT_UNTIL_3D_IDLE();
2819                                 ADVANCE_RING();
2820                         }
2821                         radeon_emit_clip_rect(dev_priv, &box);
2822                 }
2823
2824                 BEGIN_RING(cmdsz);
2825                 OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2826                 ADVANCE_RING();
2827
2828         } while (++i < cmdbuf->nbox);
2829         if (cmdbuf->nbox == 1)
2830                 cmdbuf->nbox = 0;
2831
2832         return 0;
2833       out:
2834         drm_buffer_advance(cmdbuf->buffer, cmdsz * 4);
2835         return 0;
2836 }
2837
2838 static int radeon_emit_wait(struct drm_device * dev, int flags)
2839 {
2840         drm_radeon_private_t *dev_priv = dev->dev_private;
2841         RING_LOCALS;
2842
2843         DRM_DEBUG("%x\n", flags);
2844         switch (flags) {
2845         case RADEON_WAIT_2D:
2846                 BEGIN_RING(2);
2847                 RADEON_WAIT_UNTIL_2D_IDLE();
2848                 ADVANCE_RING();
2849                 break;
2850         case RADEON_WAIT_3D:
2851                 BEGIN_RING(2);
2852                 RADEON_WAIT_UNTIL_3D_IDLE();
2853                 ADVANCE_RING();
2854                 break;
2855         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2856                 BEGIN_RING(2);
2857                 RADEON_WAIT_UNTIL_IDLE();
2858                 ADVANCE_RING();
2859                 break;
2860         default:
2861                 return -EINVAL;
2862         }
2863
2864         return 0;
2865 }
2866
2867 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data,
2868                 struct drm_file *file_priv)
2869 {
2870         drm_radeon_private_t *dev_priv = dev->dev_private;
2871         struct drm_device_dma *dma = dev->dma;
2872         struct drm_buf *buf = NULL;
2873         drm_radeon_cmd_header_t stack_header;
2874         int idx;
2875         drm_radeon_kcmd_buffer_t *cmdbuf = data;
2876         int orig_nbox;
2877
2878         LOCK_TEST_WITH_RETURN(dev, file_priv);
2879
2880         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2881         VB_AGE_TEST_WITH_RETURN(dev_priv);
2882
2883         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2884                 return -EINVAL;
2885         }
2886
2887         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2888          * races between checking values and using those values in other code,
2889          * and simply to avoid a lot of function calls to copy in data.
2890          */
2891         if (cmdbuf->bufsz != 0) {
2892                 int rv;
2893                 void __user *buffer = cmdbuf->buffer;
2894                 rv = drm_buffer_alloc(&cmdbuf->buffer, cmdbuf->bufsz);
2895                 if (rv)
2896                         return rv;
2897                 rv = drm_buffer_copy_from_user(cmdbuf->buffer, buffer,
2898                                                 cmdbuf->bufsz);
2899                 if (rv) {
2900                         drm_buffer_free(cmdbuf->buffer);
2901                         return rv;
2902                 }
2903         } else
2904                 goto done;
2905
2906         orig_nbox = cmdbuf->nbox;
2907
2908         if (dev_priv->microcode_version == UCODE_R300) {
2909                 int temp;
2910                 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2911
2912                 drm_buffer_free(cmdbuf->buffer);
2913
2914                 return temp;
2915         }
2916
2917         /* microcode_version != r300 */
2918         while (drm_buffer_unprocessed(cmdbuf->buffer) >= sizeof(stack_header)) {
2919
2920                 drm_radeon_cmd_header_t *header;
2921                 header = drm_buffer_read_object(cmdbuf->buffer,
2922                                 sizeof(stack_header), &stack_header);
2923
2924                 switch (header->header.cmd_type) {
2925                 case RADEON_CMD_PACKET:
2926                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2927                         if (radeon_emit_packets
2928                             (dev_priv, file_priv, *header, cmdbuf)) {
2929                                 DRM_ERROR("radeon_emit_packets failed\n");
2930                                 goto err;
2931                         }
2932                         break;
2933
2934                 case RADEON_CMD_SCALARS:
2935                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2936                         if (radeon_emit_scalars(dev_priv, *header, cmdbuf)) {
2937                                 DRM_ERROR("radeon_emit_scalars failed\n");
2938                                 goto err;
2939                         }
2940                         break;
2941
2942                 case RADEON_CMD_VECTORS:
2943                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2944                         if (radeon_emit_vectors(dev_priv, *header, cmdbuf)) {
2945                                 DRM_ERROR("radeon_emit_vectors failed\n");
2946                                 goto err;
2947                         }
2948                         break;
2949
2950                 case RADEON_CMD_DMA_DISCARD:
2951                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2952                         idx = header->dma.buf_idx;
2953                         if (idx < 0 || idx >= dma->buf_count) {
2954                                 DRM_ERROR("buffer index %d (of %d max)\n",
2955                                           idx, dma->buf_count - 1);
2956                                 goto err;
2957                         }
2958
2959                         buf = dma->buflist[idx];
2960                         if (buf->file_priv != file_priv || buf->pending) {
2961                                 DRM_ERROR("bad buffer %p %p %d\n",
2962                                           buf->file_priv, file_priv,
2963                                           buf->pending);
2964                                 goto err;
2965                         }
2966
2967                         radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
2968                         break;
2969
2970                 case RADEON_CMD_PACKET3:
2971                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2972                         if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2973                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2974                                 goto err;
2975                         }
2976                         break;
2977
2978                 case RADEON_CMD_PACKET3_CLIP:
2979                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2980                         if (radeon_emit_packet3_cliprect
2981                             (dev, file_priv, cmdbuf, orig_nbox)) {
2982                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2983                                 goto err;
2984                         }
2985                         break;
2986
2987                 case RADEON_CMD_SCALARS2:
2988                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2989                         if (radeon_emit_scalars2(dev_priv, *header, cmdbuf)) {
2990                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2991                                 goto err;
2992                         }
2993                         break;
2994
2995                 case RADEON_CMD_WAIT:
2996                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2997                         if (radeon_emit_wait(dev, header->wait.flags)) {
2998                                 DRM_ERROR("radeon_emit_wait failed\n");
2999                                 goto err;
3000                         }
3001                         break;
3002                 case RADEON_CMD_VECLINEAR:
3003                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
3004                         if (radeon_emit_veclinear(dev_priv, *header, cmdbuf)) {
3005                                 DRM_ERROR("radeon_emit_veclinear failed\n");
3006                                 goto err;
3007                         }
3008                         break;
3009
3010                 default:
3011                         DRM_ERROR("bad cmd_type %d at byte %d\n",
3012                                   header->header.cmd_type,
3013                                   cmdbuf->buffer->iterator);
3014                         goto err;
3015                 }
3016         }
3017
3018         drm_buffer_free(cmdbuf->buffer);
3019
3020       done:
3021         DRM_DEBUG("DONE\n");
3022         COMMIT_RING();
3023         return 0;
3024
3025       err:
3026         drm_buffer_free(cmdbuf->buffer);
3027         return -EINVAL;
3028 }
3029
3030 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3031 {
3032         drm_radeon_private_t *dev_priv = dev->dev_private;
3033         drm_radeon_getparam_t *param = data;
3034         int value;
3035
3036         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3037
3038         switch (param->param) {
3039         case RADEON_PARAM_GART_BUFFER_OFFSET:
3040                 value = dev_priv->gart_buffers_offset;
3041                 break;
3042         case RADEON_PARAM_LAST_FRAME:
3043                 dev_priv->stats.last_frame_reads++;
3044                 value = GET_SCRATCH(dev_priv, 0);
3045                 break;
3046         case RADEON_PARAM_LAST_DISPATCH:
3047                 value = GET_SCRATCH(dev_priv, 1);
3048                 break;
3049         case RADEON_PARAM_LAST_CLEAR:
3050                 dev_priv->stats.last_clear_reads++;
3051                 value = GET_SCRATCH(dev_priv, 2);
3052                 break;
3053         case RADEON_PARAM_IRQ_NR:
3054                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3055                         value = 0;
3056                 else
3057                         value = dev->irq;
3058                 break;
3059         case RADEON_PARAM_GART_BASE:
3060                 value = dev_priv->gart_vm_start;
3061                 break;
3062         case RADEON_PARAM_REGISTER_HANDLE:
3063                 value = dev_priv->mmio->offset;
3064                 break;
3065         case RADEON_PARAM_STATUS_HANDLE:
3066                 value = dev_priv->ring_rptr_offset;
3067                 break;
3068 #ifndef __LP64__
3069                 /*
3070                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3071                  * pointer which can't fit into an int-sized variable.  According to
3072                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3073                  * not supporting it shouldn't be a problem.  If the same functionality
3074                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
3075                  * so backwards-compatibility for the embedded platforms can be
3076                  * maintained.  --davidm 4-Feb-2004.
3077                  */
3078         case RADEON_PARAM_SAREA_HANDLE:
3079                 /* The lock is the first dword in the sarea. */
3080                 /* no users of this parameter */
3081                 break;
3082 #endif
3083         case RADEON_PARAM_GART_TEX_HANDLE:
3084                 value = dev_priv->gart_textures_offset;
3085                 break;
3086         case RADEON_PARAM_SCRATCH_OFFSET:
3087                 if (!dev_priv->writeback_works)
3088                         return -EINVAL;
3089                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3090                         value = R600_SCRATCH_REG_OFFSET;
3091                 else
3092                         value = RADEON_SCRATCH_REG_OFFSET;
3093                 break;
3094         case RADEON_PARAM_CARD_TYPE:
3095                 if (dev_priv->flags & RADEON_IS_PCIE)
3096                         value = RADEON_CARD_PCIE;
3097                 else if (dev_priv->flags & RADEON_IS_AGP)
3098                         value = RADEON_CARD_AGP;
3099                 else
3100                         value = RADEON_CARD_PCI;
3101                 break;
3102         case RADEON_PARAM_VBLANK_CRTC:
3103                 value = radeon_vblank_crtc_get(dev);
3104                 break;
3105         case RADEON_PARAM_FB_LOCATION:
3106                 value = radeon_read_fb_location(dev_priv);
3107                 break;
3108         case RADEON_PARAM_NUM_GB_PIPES:
3109                 value = dev_priv->num_gb_pipes;
3110                 break;
3111         case RADEON_PARAM_NUM_Z_PIPES:
3112                 value = dev_priv->num_z_pipes;
3113                 break;
3114         default:
3115                 DRM_DEBUG("Invalid parameter %d\n", param->param);
3116                 return -EINVAL;
3117         }
3118
3119         if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3120                 DRM_ERROR("copy_to_user\n");
3121                 return -EFAULT;
3122         }
3123
3124         return 0;
3125 }
3126
3127 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3128 {
3129         drm_radeon_private_t *dev_priv = dev->dev_private;
3130         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
3131         drm_radeon_setparam_t *sp = data;
3132         struct drm_radeon_driver_file_fields *radeon_priv;
3133
3134         switch (sp->param) {
3135         case RADEON_SETPARAM_FB_LOCATION:
3136                 radeon_priv = file_priv->driver_priv;
3137                 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3138                     sp->value;
3139                 break;
3140         case RADEON_SETPARAM_SWITCH_TILING:
3141                 if (sp->value == 0) {
3142                         DRM_DEBUG("color tiling disabled\n");
3143                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3144                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3145                         if (master_priv->sarea_priv)
3146                                 master_priv->sarea_priv->tiling_enabled = 0;
3147                 } else if (sp->value == 1) {
3148                         DRM_DEBUG("color tiling enabled\n");
3149                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3150                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3151                         if (master_priv->sarea_priv)
3152                                 master_priv->sarea_priv->tiling_enabled = 1;
3153                 }
3154                 break;
3155         case RADEON_SETPARAM_PCIGART_LOCATION:
3156                 dev_priv->pcigart_offset = sp->value;
3157                 dev_priv->pcigart_offset_set = 1;
3158                 break;
3159         case RADEON_SETPARAM_NEW_MEMMAP:
3160                 dev_priv->new_memmap = sp->value;
3161                 break;
3162         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3163                 dev_priv->gart_info.table_size = sp->value;
3164                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3165                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3166                 break;
3167         case RADEON_SETPARAM_VBLANK_CRTC:
3168                 return radeon_vblank_crtc_set(dev, sp->value);
3169                 break;
3170         default:
3171                 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3172                 return -EINVAL;
3173         }
3174
3175         return 0;
3176 }
3177
3178 /* When a client dies:
3179  *    - Check for and clean up flipped page state
3180  *    - Free any alloced GART memory.
3181  *    - Free any alloced radeon surfaces.
3182  *
3183  * DRM infrastructure takes care of reclaiming dma buffers.
3184  */
3185 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3186 {
3187         if (dev->dev_private) {
3188                 drm_radeon_private_t *dev_priv = dev->dev_private;
3189                 dev_priv->page_flipping = 0;
3190                 radeon_mem_release(file_priv, dev_priv->gart_heap);
3191                 radeon_mem_release(file_priv, dev_priv->fb_heap);
3192                 radeon_surfaces_release(file_priv, dev_priv);
3193         }
3194 }
3195
3196 void radeon_driver_lastclose(struct drm_device *dev)
3197 {
3198         radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3199         radeon_do_release(dev);
3200 }
3201
3202 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3203 {
3204         drm_radeon_private_t *dev_priv = dev->dev_private;
3205         struct drm_radeon_driver_file_fields *radeon_priv;
3206
3207         DRM_DEBUG("\n");
3208         radeon_priv = kmalloc(sizeof(*radeon_priv), M_DRM, M_WAITOK);
3209
3210         if (!radeon_priv)
3211                 return -ENOMEM;
3212
3213         file_priv->driver_priv = radeon_priv;
3214
3215         if (dev_priv)
3216                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3217         else
3218                 radeon_priv->radeon_fb_delta = 0;
3219         return 0;
3220 }
3221
3222 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3223 {
3224         struct drm_radeon_driver_file_fields *radeon_priv =
3225             file_priv->driver_priv;
3226
3227         drm_free(radeon_priv, M_DRM);
3228 }
3229
3230 struct drm_ioctl_desc radeon_ioctls[] = {
3231         DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3232         DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3233         DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3234         DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3235         DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3236         DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3237         DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3238         DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3239         DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3240         DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3241         DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3242         DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3243         DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3244         DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3245         DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3246         DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3247         DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3248         DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3249         DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3250         DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3251         DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free, DRM_AUTH),
3252         DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3253         DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3254         DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3255         DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3256         DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3257         DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3258         DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
3259 };
3260
3261 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);