Merge branch 'vendor/LESS'
[dragonfly.git] / sys / dev / drm / radeon_state.c
1 /*-
2  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Gareth Hughes <gareth@valinux.com>
26  *    Kevin E. Martin <martin@valinux.com>
27  */
28
29 #include "dev/drm/drmP.h"
30 #include "dev/drm/drm.h"
31 #include "dev/drm/drm_sarea.h"
32 #include "dev/drm/radeon_drm.h"
33 #include "dev/drm/radeon_drv.h"
34
35 /* ================================================================
36  * Helper functions for client state checking and fixup
37  */
38
39 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
40                                                     dev_priv,
41                                                     struct drm_file *file_priv,
42                                                     u32 *offset)
43 {
44         u64 off = *offset;
45         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
46         struct drm_radeon_driver_file_fields *radeon_priv;
47
48         /* Hrm ... the story of the offset ... So this function converts
49          * the various ideas of what userland clients might have for an
50          * offset in the card address space into an offset into the card
51          * address space :) So with a sane client, it should just keep
52          * the value intact and just do some boundary checking. However,
53          * not all clients are sane. Some older clients pass us 0 based
54          * offsets relative to the start of the framebuffer and some may
55          * assume the AGP aperture it appended to the framebuffer, so we
56          * try to detect those cases and fix them up.
57          *
58          * Note: It might be a good idea here to make sure the offset lands
59          * in some "allowed" area to protect things like the PCIE GART...
60          */
61
62         /* First, the best case, the offset already lands in either the
63          * framebuffer or the GART mapped space
64          */
65         if (radeon_check_offset(dev_priv, off))
66                 return 0;
67
68         /* Ok, that didn't happen... now check if we have a zero based
69          * offset that fits in the framebuffer + gart space, apply the
70          * magic offset we get from SETPARAM or calculated from fb_location
71          */
72         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
73                 radeon_priv = file_priv->driver_priv;
74                 off += radeon_priv->radeon_fb_delta;
75         }
76
77         /* Finally, assume we aimed at a GART offset if beyond the fb */
78         if (off > fb_end)
79                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
80
81         /* Now recheck and fail if out of bounds */
82         if (radeon_check_offset(dev_priv, off)) {
83                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
84                 *offset = off;
85                 return 0;
86         }
87         return -EINVAL;
88 }
89
90 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
91                                                      dev_priv,
92                                                      struct drm_file *file_priv,
93                                                      int id, u32 *data)
94 {
95         switch (id) {
96
97         case RADEON_EMIT_PP_MISC:
98                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
99                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
100                         DRM_ERROR("Invalid depth buffer offset\n");
101                         return -EINVAL;
102                 }
103                 break;
104
105         case RADEON_EMIT_PP_CNTL:
106                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
107                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
108                         DRM_ERROR("Invalid colour buffer offset\n");
109                         return -EINVAL;
110                 }
111                 break;
112
113         case R200_EMIT_PP_TXOFFSET_0:
114         case R200_EMIT_PP_TXOFFSET_1:
115         case R200_EMIT_PP_TXOFFSET_2:
116         case R200_EMIT_PP_TXOFFSET_3:
117         case R200_EMIT_PP_TXOFFSET_4:
118         case R200_EMIT_PP_TXOFFSET_5:
119                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
120                                                   &data[0])) {
121                         DRM_ERROR("Invalid R200 texture offset\n");
122                         return -EINVAL;
123                 }
124                 break;
125
126         case RADEON_EMIT_PP_TXFILTER_0:
127         case RADEON_EMIT_PP_TXFILTER_1:
128         case RADEON_EMIT_PP_TXFILTER_2:
129                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
130                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
131                         DRM_ERROR("Invalid R100 texture offset\n");
132                         return -EINVAL;
133                 }
134                 break;
135
136         case R200_EMIT_PP_CUBIC_OFFSETS_0:
137         case R200_EMIT_PP_CUBIC_OFFSETS_1:
138         case R200_EMIT_PP_CUBIC_OFFSETS_2:
139         case R200_EMIT_PP_CUBIC_OFFSETS_3:
140         case R200_EMIT_PP_CUBIC_OFFSETS_4:
141         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
142                         int i;
143                         for (i = 0; i < 5; i++) {
144                                 if (radeon_check_and_fixup_offset(dev_priv,
145                                                                   file_priv,
146                                                                   &data[i])) {
147                                         DRM_ERROR
148                                             ("Invalid R200 cubic texture offset\n");
149                                         return -EINVAL;
150                                 }
151                         }
152                         break;
153                 }
154
155         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
156         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
157         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
158                         int i;
159                         for (i = 0; i < 5; i++) {
160                                 if (radeon_check_and_fixup_offset(dev_priv,
161                                                                   file_priv,
162                                                                   &data[i])) {
163                                         DRM_ERROR
164                                             ("Invalid R100 cubic texture offset\n");
165                                         return -EINVAL;
166                                 }
167                         }
168                 }
169                 break;
170
171         case R200_EMIT_VAP_CTL: {
172                         RING_LOCALS;
173                         BEGIN_RING(2);
174                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
175                         ADVANCE_RING();
176                 }
177                 break;
178
179         case RADEON_EMIT_RB3D_COLORPITCH:
180         case RADEON_EMIT_RE_LINE_PATTERN:
181         case RADEON_EMIT_SE_LINE_WIDTH:
182         case RADEON_EMIT_PP_LUM_MATRIX:
183         case RADEON_EMIT_PP_ROT_MATRIX_0:
184         case RADEON_EMIT_RB3D_STENCILREFMASK:
185         case RADEON_EMIT_SE_VPORT_XSCALE:
186         case RADEON_EMIT_SE_CNTL:
187         case RADEON_EMIT_SE_CNTL_STATUS:
188         case RADEON_EMIT_RE_MISC:
189         case RADEON_EMIT_PP_BORDER_COLOR_0:
190         case RADEON_EMIT_PP_BORDER_COLOR_1:
191         case RADEON_EMIT_PP_BORDER_COLOR_2:
192         case RADEON_EMIT_SE_ZBIAS_FACTOR:
193         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
194         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
195         case R200_EMIT_PP_TXCBLEND_0:
196         case R200_EMIT_PP_TXCBLEND_1:
197         case R200_EMIT_PP_TXCBLEND_2:
198         case R200_EMIT_PP_TXCBLEND_3:
199         case R200_EMIT_PP_TXCBLEND_4:
200         case R200_EMIT_PP_TXCBLEND_5:
201         case R200_EMIT_PP_TXCBLEND_6:
202         case R200_EMIT_PP_TXCBLEND_7:
203         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
204         case R200_EMIT_TFACTOR_0:
205         case R200_EMIT_VTX_FMT_0:
206         case R200_EMIT_MATRIX_SELECT_0:
207         case R200_EMIT_TEX_PROC_CTL_2:
208         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
209         case R200_EMIT_PP_TXFILTER_0:
210         case R200_EMIT_PP_TXFILTER_1:
211         case R200_EMIT_PP_TXFILTER_2:
212         case R200_EMIT_PP_TXFILTER_3:
213         case R200_EMIT_PP_TXFILTER_4:
214         case R200_EMIT_PP_TXFILTER_5:
215         case R200_EMIT_VTE_CNTL:
216         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
217         case R200_EMIT_PP_TAM_DEBUG3:
218         case R200_EMIT_PP_CNTL_X:
219         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
220         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
221         case R200_EMIT_RE_SCISSOR_TL_0:
222         case R200_EMIT_RE_SCISSOR_TL_1:
223         case R200_EMIT_RE_SCISSOR_TL_2:
224         case R200_EMIT_SE_VAP_CNTL_STATUS:
225         case R200_EMIT_SE_VTX_STATE_CNTL:
226         case R200_EMIT_RE_POINTSIZE:
227         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
228         case R200_EMIT_PP_CUBIC_FACES_0:
229         case R200_EMIT_PP_CUBIC_FACES_1:
230         case R200_EMIT_PP_CUBIC_FACES_2:
231         case R200_EMIT_PP_CUBIC_FACES_3:
232         case R200_EMIT_PP_CUBIC_FACES_4:
233         case R200_EMIT_PP_CUBIC_FACES_5:
234         case RADEON_EMIT_PP_TEX_SIZE_0:
235         case RADEON_EMIT_PP_TEX_SIZE_1:
236         case RADEON_EMIT_PP_TEX_SIZE_2:
237         case R200_EMIT_RB3D_BLENDCOLOR:
238         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
239         case RADEON_EMIT_PP_CUBIC_FACES_0:
240         case RADEON_EMIT_PP_CUBIC_FACES_1:
241         case RADEON_EMIT_PP_CUBIC_FACES_2:
242         case R200_EMIT_PP_TRI_PERF_CNTL:
243         case R200_EMIT_PP_AFS_0:
244         case R200_EMIT_PP_AFS_1:
245         case R200_EMIT_ATF_TFACTOR:
246         case R200_EMIT_PP_TXCTLALL_0:
247         case R200_EMIT_PP_TXCTLALL_1:
248         case R200_EMIT_PP_TXCTLALL_2:
249         case R200_EMIT_PP_TXCTLALL_3:
250         case R200_EMIT_PP_TXCTLALL_4:
251         case R200_EMIT_PP_TXCTLALL_5:
252         case R200_EMIT_VAP_PVS_CNTL:
253                 /* These packets don't contain memory offsets */
254                 break;
255
256         default:
257                 DRM_ERROR("Unknown state packet ID %d\n", id);
258                 return -EINVAL;
259         }
260
261         return 0;
262 }
263
264 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
265                                                      dev_priv,
266                                                      struct drm_file *file_priv,
267                                                      drm_radeon_kcmd_buffer_t *
268                                                      cmdbuf,
269                                                      unsigned int *cmdsz)
270 {
271         u32 *cmd = (u32 *) cmdbuf->buf;
272         u32 offset, narrays;
273         int count, i, k;
274
275         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
276
277         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
278                 DRM_ERROR("Not a type 3 packet\n");
279                 return -EINVAL;
280         }
281
282         if (4 * *cmdsz > cmdbuf->bufsz) {
283                 DRM_ERROR("Packet size larger than size of data provided\n");
284                 return -EINVAL;
285         }
286
287         switch(cmd[0] & 0xff00) {
288         /* XXX Are there old drivers needing other packets? */
289
290         case RADEON_3D_DRAW_IMMD:
291         case RADEON_3D_DRAW_VBUF:
292         case RADEON_3D_DRAW_INDX:
293         case RADEON_WAIT_FOR_IDLE:
294         case RADEON_CP_NOP:
295         case RADEON_3D_CLEAR_ZMASK:
296 /*      case RADEON_CP_NEXT_CHAR:
297         case RADEON_CP_PLY_NEXTSCAN:
298         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
299                 /* these packets are safe */
300                 break;
301
302         case RADEON_CP_3D_DRAW_IMMD_2:
303         case RADEON_CP_3D_DRAW_VBUF_2:
304         case RADEON_CP_3D_DRAW_INDX_2:
305         case RADEON_3D_CLEAR_HIZ:
306                 /* safe but r200 only */
307                 if (dev_priv->microcode_version != UCODE_R200) {
308                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
309                         return -EINVAL;
310                 }
311                 break;
312
313         case RADEON_3D_LOAD_VBPNTR:
314                 count = (cmd[0] >> 16) & 0x3fff;
315
316                 if (count > 18) { /* 12 arrays max */
317                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
318                                   count);
319                         return -EINVAL;
320                 }
321
322                 /* carefully check packet contents */
323                 narrays = cmd[1] & ~0xc000;
324                 k = 0;
325                 i = 2;
326                 while ((k < narrays) && (i < (count + 2))) {
327                         i++;            /* skip attribute field */
328                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
329                                                           &cmd[i])) {
330                                 DRM_ERROR
331                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
332                                      k, i);
333                                 return -EINVAL;
334                         }
335                         k++;
336                         i++;
337                         if (k == narrays)
338                                 break;
339                         /* have one more to process, they come in pairs */
340                         if (radeon_check_and_fixup_offset(dev_priv,
341                                                           file_priv, &cmd[i]))
342                         {
343                                 DRM_ERROR
344                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
345                                      k, i);
346                                 return -EINVAL;
347                         }
348                         k++;
349                         i++;
350                 }
351                 /* do the counts match what we expect ? */
352                 if ((k != narrays) || (i != (count + 2))) {
353                         DRM_ERROR
354                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
355                               k, i, narrays, count + 1);
356                         return -EINVAL;
357                 }
358                 break;
359
360         case RADEON_3D_RNDR_GEN_INDX_PRIM:
361                 if (dev_priv->microcode_version != UCODE_R100) {
362                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
363                         return -EINVAL;
364                 }
365                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
366                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
367                                 return -EINVAL;
368                 }
369                 break;
370
371         case RADEON_CP_INDX_BUFFER:
372                 if (dev_priv->microcode_version != UCODE_R200) {
373                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
374                         return -EINVAL;
375                 }
376                 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
377                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
378                         return -EINVAL;
379                 }
380                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
381                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
382                         return -EINVAL;
383                 }
384                 break;
385
386         case RADEON_CNTL_HOSTDATA_BLT:
387         case RADEON_CNTL_PAINT_MULTI:
388         case RADEON_CNTL_BITBLT_MULTI:
389                 /* MSB of opcode: next DWORD GUI_CNTL */
390                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
391                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
392                         offset = cmd[2] << 10;
393                         if (radeon_check_and_fixup_offset
394                             (dev_priv, file_priv, &offset)) {
395                                 DRM_ERROR("Invalid first packet offset\n");
396                                 return -EINVAL;
397                         }
398                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
399                 }
400
401                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
402                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
403                         offset = cmd[3] << 10;
404                         if (radeon_check_and_fixup_offset
405                             (dev_priv, file_priv, &offset)) {
406                                 DRM_ERROR("Invalid second packet offset\n");
407                                 return -EINVAL;
408                         }
409                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
410                 }
411                 break;
412
413         default:
414                 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
415                 return -EINVAL;
416         }
417
418         return 0;
419 }
420
421 /* ================================================================
422  * CP hardware state programming functions
423  */
424
425 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
426                                              struct drm_clip_rect * box)
427 {
428         RING_LOCALS;
429
430         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
431                   box->x1, box->y1, box->x2, box->y2);
432
433         BEGIN_RING(4);
434         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
435         OUT_RING((box->y1 << 16) | box->x1);
436         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
437         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
438         ADVANCE_RING();
439 }
440
441 /* Emit 1.1 state
442  */
443 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
444                              struct drm_file *file_priv,
445                              drm_radeon_context_regs_t * ctx,
446                              drm_radeon_texture_regs_t * tex,
447                              unsigned int dirty)
448 {
449         RING_LOCALS;
450         DRM_DEBUG("dirty=0x%08x\n", dirty);
451
452         if (dirty & RADEON_UPLOAD_CONTEXT) {
453                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
454                                                   &ctx->rb3d_depthoffset)) {
455                         DRM_ERROR("Invalid depth buffer offset\n");
456                         return -EINVAL;
457                 }
458
459                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
460                                                   &ctx->rb3d_coloroffset)) {
461                         DRM_ERROR("Invalid depth buffer offset\n");
462                         return -EINVAL;
463                 }
464
465                 BEGIN_RING(14);
466                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
467                 OUT_RING(ctx->pp_misc);
468                 OUT_RING(ctx->pp_fog_color);
469                 OUT_RING(ctx->re_solid_color);
470                 OUT_RING(ctx->rb3d_blendcntl);
471                 OUT_RING(ctx->rb3d_depthoffset);
472                 OUT_RING(ctx->rb3d_depthpitch);
473                 OUT_RING(ctx->rb3d_zstencilcntl);
474                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
475                 OUT_RING(ctx->pp_cntl);
476                 OUT_RING(ctx->rb3d_cntl);
477                 OUT_RING(ctx->rb3d_coloroffset);
478                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
479                 OUT_RING(ctx->rb3d_colorpitch);
480                 ADVANCE_RING();
481         }
482
483         if (dirty & RADEON_UPLOAD_VERTFMT) {
484                 BEGIN_RING(2);
485                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
486                 OUT_RING(ctx->se_coord_fmt);
487                 ADVANCE_RING();
488         }
489
490         if (dirty & RADEON_UPLOAD_LINE) {
491                 BEGIN_RING(5);
492                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
493                 OUT_RING(ctx->re_line_pattern);
494                 OUT_RING(ctx->re_line_state);
495                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
496                 OUT_RING(ctx->se_line_width);
497                 ADVANCE_RING();
498         }
499
500         if (dirty & RADEON_UPLOAD_BUMPMAP) {
501                 BEGIN_RING(5);
502                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
503                 OUT_RING(ctx->pp_lum_matrix);
504                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
505                 OUT_RING(ctx->pp_rot_matrix_0);
506                 OUT_RING(ctx->pp_rot_matrix_1);
507                 ADVANCE_RING();
508         }
509
510         if (dirty & RADEON_UPLOAD_MASKS) {
511                 BEGIN_RING(4);
512                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
513                 OUT_RING(ctx->rb3d_stencilrefmask);
514                 OUT_RING(ctx->rb3d_ropcntl);
515                 OUT_RING(ctx->rb3d_planemask);
516                 ADVANCE_RING();
517         }
518
519         if (dirty & RADEON_UPLOAD_VIEWPORT) {
520                 BEGIN_RING(7);
521                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
522                 OUT_RING(ctx->se_vport_xscale);
523                 OUT_RING(ctx->se_vport_xoffset);
524                 OUT_RING(ctx->se_vport_yscale);
525                 OUT_RING(ctx->se_vport_yoffset);
526                 OUT_RING(ctx->se_vport_zscale);
527                 OUT_RING(ctx->se_vport_zoffset);
528                 ADVANCE_RING();
529         }
530
531         if (dirty & RADEON_UPLOAD_SETUP) {
532                 BEGIN_RING(4);
533                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
534                 OUT_RING(ctx->se_cntl);
535                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
536                 OUT_RING(ctx->se_cntl_status);
537                 ADVANCE_RING();
538         }
539
540         if (dirty & RADEON_UPLOAD_MISC) {
541                 BEGIN_RING(2);
542                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
543                 OUT_RING(ctx->re_misc);
544                 ADVANCE_RING();
545         }
546
547         if (dirty & RADEON_UPLOAD_TEX0) {
548                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
549                                                   &tex[0].pp_txoffset)) {
550                         DRM_ERROR("Invalid texture offset for unit 0\n");
551                         return -EINVAL;
552                 }
553
554                 BEGIN_RING(9);
555                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
556                 OUT_RING(tex[0].pp_txfilter);
557                 OUT_RING(tex[0].pp_txformat);
558                 OUT_RING(tex[0].pp_txoffset);
559                 OUT_RING(tex[0].pp_txcblend);
560                 OUT_RING(tex[0].pp_txablend);
561                 OUT_RING(tex[0].pp_tfactor);
562                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
563                 OUT_RING(tex[0].pp_border_color);
564                 ADVANCE_RING();
565         }
566
567         if (dirty & RADEON_UPLOAD_TEX1) {
568                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
569                                                   &tex[1].pp_txoffset)) {
570                         DRM_ERROR("Invalid texture offset for unit 1\n");
571                         return -EINVAL;
572                 }
573
574                 BEGIN_RING(9);
575                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
576                 OUT_RING(tex[1].pp_txfilter);
577                 OUT_RING(tex[1].pp_txformat);
578                 OUT_RING(tex[1].pp_txoffset);
579                 OUT_RING(tex[1].pp_txcblend);
580                 OUT_RING(tex[1].pp_txablend);
581                 OUT_RING(tex[1].pp_tfactor);
582                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
583                 OUT_RING(tex[1].pp_border_color);
584                 ADVANCE_RING();
585         }
586
587         if (dirty & RADEON_UPLOAD_TEX2) {
588                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
589                                                   &tex[2].pp_txoffset)) {
590                         DRM_ERROR("Invalid texture offset for unit 2\n");
591                         return -EINVAL;
592                 }
593
594                 BEGIN_RING(9);
595                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
596                 OUT_RING(tex[2].pp_txfilter);
597                 OUT_RING(tex[2].pp_txformat);
598                 OUT_RING(tex[2].pp_txoffset);
599                 OUT_RING(tex[2].pp_txcblend);
600                 OUT_RING(tex[2].pp_txablend);
601                 OUT_RING(tex[2].pp_tfactor);
602                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
603                 OUT_RING(tex[2].pp_border_color);
604                 ADVANCE_RING();
605         }
606
607         return 0;
608 }
609
610 /* Emit 1.2 state
611  */
612 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
613                               struct drm_file *file_priv,
614                               drm_radeon_state_t * state)
615 {
616         RING_LOCALS;
617
618         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
619                 BEGIN_RING(3);
620                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
621                 OUT_RING(state->context2.se_zbias_factor);
622                 OUT_RING(state->context2.se_zbias_constant);
623                 ADVANCE_RING();
624         }
625
626         return radeon_emit_state(dev_priv, file_priv, &state->context,
627                                  state->tex, state->dirty);
628 }
629
630 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
631  * 1.3 cmdbuffers allow all previous state to be updated as well as
632  * the tcl scalar and vector areas.
633  */
634 static struct {
635         int start;
636         int len;
637         const char *name;
638 } packet[RADEON_MAX_STATE_PACKETS] = {
639         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
640         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
641         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
642         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
643         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
644         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
645         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
646         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
647         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
648         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
649         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
650         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
651         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
652         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
653         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
654         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
655         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
656         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
657         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
658         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
659         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
660                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
661         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
662         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
663         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
664         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
665         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
666         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
667         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
668         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
669         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
670         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
671         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
672         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
673         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
674         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
675         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
676         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
677         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
678         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
679         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
680         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
681         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
682         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
683         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
684         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
685         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
686         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
687         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
688         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
689         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
690          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
691         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
692         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
693         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
694         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
695         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
696         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
697         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
698         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
699         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
700         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
701         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
702                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
703         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
704         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
705         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
706         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
707         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
708         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
709         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
710         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
711         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
712         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
713         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
714         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
715         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
716         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
717         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
718         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
719         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
720         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
721         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
722         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
723         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
724         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
725         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
726         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
727         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
728         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
729         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
730         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
731         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
732         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
733         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
734         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
735         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
736         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
737 };
738
739 /* ================================================================
740  * Performance monitoring functions
741  */
742
743 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
744                              int x, int y, int w, int h, int r, int g, int b)
745 {
746         u32 color;
747         RING_LOCALS;
748
749         x += dev_priv->sarea_priv->boxes[0].x1;
750         y += dev_priv->sarea_priv->boxes[0].y1;
751
752         switch (dev_priv->color_fmt) {
753         case RADEON_COLOR_FORMAT_RGB565:
754                 color = (((r & 0xf8) << 8) |
755                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
756                 break;
757         case RADEON_COLOR_FORMAT_ARGB8888:
758         default:
759                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
760                 break;
761         }
762
763         BEGIN_RING(4);
764         RADEON_WAIT_UNTIL_3D_IDLE();
765         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
766         OUT_RING(0xffffffff);
767         ADVANCE_RING();
768
769         BEGIN_RING(6);
770
771         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
772         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
773                  RADEON_GMC_BRUSH_SOLID_COLOR |
774                  (dev_priv->color_fmt << 8) |
775                  RADEON_GMC_SRC_DATATYPE_COLOR |
776                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
777
778         if (dev_priv->sarea_priv->pfCurrentPage == 1) {
779                 OUT_RING(dev_priv->front_pitch_offset);
780         } else {
781                 OUT_RING(dev_priv->back_pitch_offset);
782         }
783
784         OUT_RING(color);
785
786         OUT_RING((x << 16) | y);
787         OUT_RING((w << 16) | h);
788
789         ADVANCE_RING();
790 }
791
792 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv)
793 {
794         /* Collapse various things into a wait flag -- trying to
795          * guess if userspase slept -- better just to have them tell us.
796          */
797         if (dev_priv->stats.last_frame_reads > 1 ||
798             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
799                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
800         }
801
802         if (dev_priv->stats.freelist_loops) {
803                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
804         }
805
806         /* Purple box for page flipping
807          */
808         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
809                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
810
811         /* Red box if we have to wait for idle at any point
812          */
813         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
814                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
815
816         /* Blue box: lost context?
817          */
818
819         /* Yellow box for texture swaps
820          */
821         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
822                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
823
824         /* Green box if hardware never idles (as far as we can tell)
825          */
826         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
827                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
828
829         /* Draw bars indicating number of buffers allocated
830          * (not a great measure, easily confused)
831          */
832         if (dev_priv->stats.requested_bufs) {
833                 if (dev_priv->stats.requested_bufs > 100)
834                         dev_priv->stats.requested_bufs = 100;
835
836                 radeon_clear_box(dev_priv, 4, 16,
837                                  dev_priv->stats.requested_bufs, 4,
838                                  196, 128, 128);
839         }
840
841         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
842
843 }
844
845 /* ================================================================
846  * CP command dispatch functions
847  */
848
849 static void radeon_cp_dispatch_clear(struct drm_device * dev,
850                                      drm_radeon_clear_t * clear,
851                                      drm_radeon_clear_rect_t * depth_boxes)
852 {
853         drm_radeon_private_t *dev_priv = dev->dev_private;
854         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
855         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
856         int nbox = sarea_priv->nbox;
857         struct drm_clip_rect *pbox = sarea_priv->boxes;
858         unsigned int flags = clear->flags;
859         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
860         int i;
861         RING_LOCALS;
862         DRM_DEBUG("flags = 0x%x\n", flags);
863
864         dev_priv->stats.clears++;
865
866         if (sarea_priv->pfCurrentPage == 1) {
867                 unsigned int tmp = flags;
868
869                 flags &= ~(RADEON_FRONT | RADEON_BACK);
870                 if (tmp & RADEON_FRONT)
871                         flags |= RADEON_BACK;
872                 if (tmp & RADEON_BACK)
873                         flags |= RADEON_FRONT;
874         }
875
876         if (flags & (RADEON_FRONT | RADEON_BACK)) {
877
878                 BEGIN_RING(4);
879
880                 /* Ensure the 3D stream is idle before doing a
881                  * 2D fill to clear the front or back buffer.
882                  */
883                 RADEON_WAIT_UNTIL_3D_IDLE();
884
885                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
886                 OUT_RING(clear->color_mask);
887
888                 ADVANCE_RING();
889
890                 /* Make sure we restore the 3D state next time.
891                  */
892                 sarea_priv->ctx_owner = 0;
893
894                 for (i = 0; i < nbox; i++) {
895                         int x = pbox[i].x1;
896                         int y = pbox[i].y1;
897                         int w = pbox[i].x2 - x;
898                         int h = pbox[i].y2 - y;
899
900                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
901                                   x, y, w, h, flags);
902
903                         if (flags & RADEON_FRONT) {
904                                 BEGIN_RING(6);
905
906                                 OUT_RING(CP_PACKET3
907                                          (RADEON_CNTL_PAINT_MULTI, 4));
908                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
909                                          RADEON_GMC_BRUSH_SOLID_COLOR |
910                                          (dev_priv->
911                                           color_fmt << 8) |
912                                          RADEON_GMC_SRC_DATATYPE_COLOR |
913                                          RADEON_ROP3_P |
914                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
915
916                                 OUT_RING(dev_priv->front_pitch_offset);
917                                 OUT_RING(clear->clear_color);
918
919                                 OUT_RING((x << 16) | y);
920                                 OUT_RING((w << 16) | h);
921
922                                 ADVANCE_RING();
923                         }
924
925                         if (flags & RADEON_BACK) {
926                                 BEGIN_RING(6);
927
928                                 OUT_RING(CP_PACKET3
929                                          (RADEON_CNTL_PAINT_MULTI, 4));
930                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
931                                          RADEON_GMC_BRUSH_SOLID_COLOR |
932                                          (dev_priv->
933                                           color_fmt << 8) |
934                                          RADEON_GMC_SRC_DATATYPE_COLOR |
935                                          RADEON_ROP3_P |
936                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
937
938                                 OUT_RING(dev_priv->back_pitch_offset);
939                                 OUT_RING(clear->clear_color);
940
941                                 OUT_RING((x << 16) | y);
942                                 OUT_RING((w << 16) | h);
943
944                                 ADVANCE_RING();
945                         }
946                 }
947         }
948
949         /* hyper z clear */
950         /* no docs available, based on reverse engeneering by Stephane Marchesin */
951         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
952             && (flags & RADEON_CLEAR_FASTZ)) {
953
954                 int i;
955                 int depthpixperline =
956                     dev_priv->depth_fmt ==
957                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
958                                                        2) : (dev_priv->
959                                                              depth_pitch / 4);
960
961                 u32 clearmask;
962
963                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
964                     ((clear->depth_mask & 0xff) << 24);
965
966                 /* Make sure we restore the 3D state next time.
967                  * we haven't touched any "normal" state - still need this?
968                  */
969                 sarea_priv->ctx_owner = 0;
970
971                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
972                     && (flags & RADEON_USE_HIERZ)) {
973                         /* FIXME : reverse engineer that for Rx00 cards */
974                         /* FIXME : the mask supposedly contains low-res z values. So can't set
975                            just to the max (0xff? or actually 0x3fff?), need to take z clear
976                            value into account? */
977                         /* pattern seems to work for r100, though get slight
978                            rendering errors with glxgears. If hierz is not enabled for r100,
979                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
980                            other ones are ignored, and the same clear mask can be used. That's
981                            very different behaviour than R200 which needs different clear mask
982                            and different number of tiles to clear if hierz is enabled or not !?!
983                          */
984                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
985                 } else {
986                         /* clear mask : chooses the clearing pattern.
987                            rv250: could be used to clear only parts of macrotiles
988                            (but that would get really complicated...)?
989                            bit 0 and 1 (either or both of them ?!?!) are used to
990                            not clear tile (or maybe one of the bits indicates if the tile is
991                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
992                            Pattern is as follows:
993                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
994                            bits -------------------------------------------------
995                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
996                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
997                            covers 256 pixels ?!?
998                          */
999                         clearmask = 0x0;
1000                 }
1001
1002                 BEGIN_RING(8);
1003                 RADEON_WAIT_UNTIL_2D_IDLE();
1004                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1005                              tempRB3D_DEPTHCLEARVALUE);
1006                 /* what offset is this exactly ? */
1007                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1008                 /* need ctlstat, otherwise get some strange black flickering */
1009                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1010                              RADEON_RB3D_ZC_FLUSH_ALL);
1011                 ADVANCE_RING();
1012
1013                 for (i = 0; i < nbox; i++) {
1014                         int tileoffset, nrtilesx, nrtilesy, j;
1015                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1016                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1017                             && !(dev_priv->microcode_version == UCODE_R200)) {
1018                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1019                                    maybe r200 actually doesn't need to put the low-res z value into
1020                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1021                                    Works for R100, both with hierz and without.
1022                                    R100 seems to operate on 2x1 8x8 tiles, but...
1023                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1024                                    problematic with resolutions which are not 64 pix aligned? */
1025                                 tileoffset =
1026                                     ((pbox[i].y1 >> 3) * depthpixperline +
1027                                      pbox[i].x1) >> 6;
1028                                 nrtilesx =
1029                                     ((pbox[i].x2 & ~63) -
1030                                      (pbox[i].x1 & ~63)) >> 4;
1031                                 nrtilesy =
1032                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1033                                 for (j = 0; j <= nrtilesy; j++) {
1034                                         BEGIN_RING(4);
1035                                         OUT_RING(CP_PACKET3
1036                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1037                                         /* first tile */
1038                                         OUT_RING(tileoffset * 8);
1039                                         /* the number of tiles to clear */
1040                                         OUT_RING(nrtilesx + 4);
1041                                         /* clear mask : chooses the clearing pattern. */
1042                                         OUT_RING(clearmask);
1043                                         ADVANCE_RING();
1044                                         tileoffset += depthpixperline >> 6;
1045                                 }
1046                         } else if (dev_priv->microcode_version == UCODE_R200) {
1047                                 /* works for rv250. */
1048                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1049                                 tileoffset =
1050                                     ((pbox[i].y1 >> 3) * depthpixperline +
1051                                      pbox[i].x1) >> 5;
1052                                 nrtilesx =
1053                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1054                                 nrtilesy =
1055                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1056                                 for (j = 0; j <= nrtilesy; j++) {
1057                                         BEGIN_RING(4);
1058                                         OUT_RING(CP_PACKET3
1059                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1060                                         /* first tile */
1061                                         /* judging by the first tile offset needed, could possibly
1062                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1063                                            macro tiles, though would still need clear mask for
1064                                            right/bottom if truely 4x4 granularity is desired ? */
1065                                         OUT_RING(tileoffset * 16);
1066                                         /* the number of tiles to clear */
1067                                         OUT_RING(nrtilesx + 1);
1068                                         /* clear mask : chooses the clearing pattern. */
1069                                         OUT_RING(clearmask);
1070                                         ADVANCE_RING();
1071                                         tileoffset += depthpixperline >> 5;
1072                                 }
1073                         } else {        /* rv 100 */
1074                                 /* rv100 might not need 64 pix alignment, who knows */
1075                                 /* offsets are, hmm, weird */
1076                                 tileoffset =
1077                                     ((pbox[i].y1 >> 4) * depthpixperline +
1078                                      pbox[i].x1) >> 6;
1079                                 nrtilesx =
1080                                     ((pbox[i].x2 & ~63) -
1081                                      (pbox[i].x1 & ~63)) >> 4;
1082                                 nrtilesy =
1083                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1084                                 for (j = 0; j <= nrtilesy; j++) {
1085                                         BEGIN_RING(4);
1086                                         OUT_RING(CP_PACKET3
1087                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1088                                         OUT_RING(tileoffset * 128);
1089                                         /* the number of tiles to clear */
1090                                         OUT_RING(nrtilesx + 4);
1091                                         /* clear mask : chooses the clearing pattern. */
1092                                         OUT_RING(clearmask);
1093                                         ADVANCE_RING();
1094                                         tileoffset += depthpixperline >> 6;
1095                                 }
1096                         }
1097                 }
1098
1099                 /* TODO don't always clear all hi-level z tiles */
1100                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1101                     && (dev_priv->microcode_version == UCODE_R200)
1102                     && (flags & RADEON_USE_HIERZ))
1103                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1104                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1105                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1106                            value into account? */
1107                 {
1108                         BEGIN_RING(4);
1109                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1110                         OUT_RING(0x0);  /* First tile */
1111                         OUT_RING(0x3cc0);
1112                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1113                         ADVANCE_RING();
1114                 }
1115         }
1116
1117         /* We have to clear the depth and/or stencil buffers by
1118          * rendering a quad into just those buffers.  Thus, we have to
1119          * make sure the 3D engine is configured correctly.
1120          */
1121         else if ((dev_priv->microcode_version == UCODE_R200) &&
1122                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1123
1124                 int tempPP_CNTL;
1125                 int tempRE_CNTL;
1126                 int tempRB3D_CNTL;
1127                 int tempRB3D_ZSTENCILCNTL;
1128                 int tempRB3D_STENCILREFMASK;
1129                 int tempRB3D_PLANEMASK;
1130                 int tempSE_CNTL;
1131                 int tempSE_VTE_CNTL;
1132                 int tempSE_VTX_FMT_0;
1133                 int tempSE_VTX_FMT_1;
1134                 int tempSE_VAP_CNTL;
1135                 int tempRE_AUX_SCISSOR_CNTL;
1136
1137                 tempPP_CNTL = 0;
1138                 tempRE_CNTL = 0;
1139
1140                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1141
1142                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1143                 tempRB3D_STENCILREFMASK = 0x0;
1144
1145                 tempSE_CNTL = depth_clear->se_cntl;
1146
1147                 /* Disable TCL */
1148
1149                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1150                                           (0x9 <<
1151                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1152
1153                 tempRB3D_PLANEMASK = 0x0;
1154
1155                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1156
1157                 tempSE_VTE_CNTL =
1158                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1159
1160                 /* Vertex format (X, Y, Z, W) */
1161                 tempSE_VTX_FMT_0 =
1162                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1163                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1164                 tempSE_VTX_FMT_1 = 0x0;
1165
1166                 /*
1167                  * Depth buffer specific enables
1168                  */
1169                 if (flags & RADEON_DEPTH) {
1170                         /* Enable depth buffer */
1171                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1172                 } else {
1173                         /* Disable depth buffer */
1174                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1175                 }
1176
1177                 /*
1178                  * Stencil buffer specific enables
1179                  */
1180                 if (flags & RADEON_STENCIL) {
1181                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1182                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1183                 } else {
1184                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1185                         tempRB3D_STENCILREFMASK = 0x00000000;
1186                 }
1187
1188                 if (flags & RADEON_USE_COMP_ZBUF) {
1189                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1190                             RADEON_Z_DECOMPRESSION_ENABLE;
1191                 }
1192                 if (flags & RADEON_USE_HIERZ) {
1193                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1194                 }
1195
1196                 BEGIN_RING(26);
1197                 RADEON_WAIT_UNTIL_2D_IDLE();
1198
1199                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1200                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1201                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1202                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1203                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1204                              tempRB3D_STENCILREFMASK);
1205                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1206                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1207                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1208                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1209                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1210                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1211                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1212                 ADVANCE_RING();
1213
1214                 /* Make sure we restore the 3D state next time.
1215                  */
1216                 sarea_priv->ctx_owner = 0;
1217
1218                 for (i = 0; i < nbox; i++) {
1219
1220                         /* Funny that this should be required --
1221                          *  sets top-left?
1222                          */
1223                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1224
1225                         BEGIN_RING(14);
1226                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1227                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1228                                   RADEON_PRIM_WALK_RING |
1229                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1230                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1231                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1232                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1233                         OUT_RING(0x3f800000);
1234                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1235                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1236                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1237                         OUT_RING(0x3f800000);
1238                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1239                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1240                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1241                         OUT_RING(0x3f800000);
1242                         ADVANCE_RING();
1243                 }
1244         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1245
1246                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1247
1248                 rb3d_cntl = depth_clear->rb3d_cntl;
1249
1250                 if (flags & RADEON_DEPTH) {
1251                         rb3d_cntl |= RADEON_Z_ENABLE;
1252                 } else {
1253                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1254                 }
1255
1256                 if (flags & RADEON_STENCIL) {
1257                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1258                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1259                 } else {
1260                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1261                         rb3d_stencilrefmask = 0x00000000;
1262                 }
1263
1264                 if (flags & RADEON_USE_COMP_ZBUF) {
1265                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1266                             RADEON_Z_DECOMPRESSION_ENABLE;
1267                 }
1268                 if (flags & RADEON_USE_HIERZ) {
1269                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1270                 }
1271
1272                 BEGIN_RING(13);
1273                 RADEON_WAIT_UNTIL_2D_IDLE();
1274
1275                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1276                 OUT_RING(0x00000000);
1277                 OUT_RING(rb3d_cntl);
1278
1279                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1280                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1281                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1282                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1283                 ADVANCE_RING();
1284
1285                 /* Make sure we restore the 3D state next time.
1286                  */
1287                 sarea_priv->ctx_owner = 0;
1288
1289                 for (i = 0; i < nbox; i++) {
1290
1291                         /* Funny that this should be required --
1292                          *  sets top-left?
1293                          */
1294                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1295
1296                         BEGIN_RING(15);
1297
1298                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1299                         OUT_RING(RADEON_VTX_Z_PRESENT |
1300                                  RADEON_VTX_PKCOLOR_PRESENT);
1301                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1302                                   RADEON_PRIM_WALK_RING |
1303                                   RADEON_MAOS_ENABLE |
1304                                   RADEON_VTX_FMT_RADEON_MODE |
1305                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1306
1307                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1308                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1309                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1310                         OUT_RING(0x0);
1311
1312                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1313                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1314                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1315                         OUT_RING(0x0);
1316
1317                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1318                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1319                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1320                         OUT_RING(0x0);
1321
1322                         ADVANCE_RING();
1323                 }
1324         }
1325
1326         /* Increment the clear counter.  The client-side 3D driver must
1327          * wait on this value before performing the clear ioctl.  We
1328          * need this because the card's so damned fast...
1329          */
1330         sarea_priv->last_clear++;
1331
1332         BEGIN_RING(4);
1333
1334         RADEON_CLEAR_AGE(sarea_priv->last_clear);
1335         RADEON_WAIT_UNTIL_IDLE();
1336
1337         ADVANCE_RING();
1338 }
1339
1340 static void radeon_cp_dispatch_swap(struct drm_device *dev)
1341 {
1342         drm_radeon_private_t *dev_priv = dev->dev_private;
1343         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1344         int nbox = sarea_priv->nbox;
1345         struct drm_clip_rect *pbox = sarea_priv->boxes;
1346         int i;
1347         RING_LOCALS;
1348         DRM_DEBUG("\n");
1349
1350         /* Do some trivial performance monitoring...
1351          */
1352         if (dev_priv->do_boxes)
1353                 radeon_cp_performance_boxes(dev_priv);
1354
1355         /* Wait for the 3D stream to idle before dispatching the bitblt.
1356          * This will prevent data corruption between the two streams.
1357          */
1358         BEGIN_RING(2);
1359
1360         RADEON_WAIT_UNTIL_3D_IDLE();
1361
1362         ADVANCE_RING();
1363
1364         for (i = 0; i < nbox; i++) {
1365                 int x = pbox[i].x1;
1366                 int y = pbox[i].y1;
1367                 int w = pbox[i].x2 - x;
1368                 int h = pbox[i].y2 - y;
1369
1370                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1371
1372                 BEGIN_RING(9);
1373
1374                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1375                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1376                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1377                          RADEON_GMC_BRUSH_NONE |
1378                          (dev_priv->color_fmt << 8) |
1379                          RADEON_GMC_SRC_DATATYPE_COLOR |
1380                          RADEON_ROP3_S |
1381                          RADEON_DP_SRC_SOURCE_MEMORY |
1382                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1383
1384                 /* Make this work even if front & back are flipped:
1385                  */
1386                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1387                 if (sarea_priv->pfCurrentPage == 0) {
1388                         OUT_RING(dev_priv->back_pitch_offset);
1389                         OUT_RING(dev_priv->front_pitch_offset);
1390                 } else {
1391                         OUT_RING(dev_priv->front_pitch_offset);
1392                         OUT_RING(dev_priv->back_pitch_offset);
1393                 }
1394
1395                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1396                 OUT_RING((x << 16) | y);
1397                 OUT_RING((x << 16) | y);
1398                 OUT_RING((w << 16) | h);
1399
1400                 ADVANCE_RING();
1401         }
1402
1403         /* Increment the frame counter.  The client-side 3D driver must
1404          * throttle the framerate by waiting for this value before
1405          * performing the swapbuffer ioctl.
1406          */
1407         sarea_priv->last_frame++;
1408
1409         BEGIN_RING(4);
1410
1411         RADEON_FRAME_AGE(sarea_priv->last_frame);
1412         RADEON_WAIT_UNTIL_2D_IDLE();
1413
1414         ADVANCE_RING();
1415 }
1416
1417 static void radeon_cp_dispatch_flip(struct drm_device *dev)
1418 {
1419         drm_radeon_private_t *dev_priv = dev->dev_private;
1420         struct drm_sarea *sarea = (struct drm_sarea *)dev_priv->sarea->handle;
1421         int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
1422             ? dev_priv->front_offset : dev_priv->back_offset;
1423         RING_LOCALS;
1424         DRM_DEBUG("pfCurrentPage=%d\n",
1425                   dev_priv->sarea_priv->pfCurrentPage);
1426
1427         /* Do some trivial performance monitoring...
1428          */
1429         if (dev_priv->do_boxes) {
1430                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1431                 radeon_cp_performance_boxes(dev_priv);
1432         }
1433
1434         /* Update the frame offsets for both CRTCs
1435          */
1436         BEGIN_RING(6);
1437
1438         RADEON_WAIT_UNTIL_3D_IDLE();
1439         OUT_RING_REG(RADEON_CRTC_OFFSET,
1440                      ((sarea->frame.y * dev_priv->front_pitch +
1441                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1442                      + offset);
1443         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1444                      + offset);
1445
1446         ADVANCE_RING();
1447
1448         /* Increment the frame counter.  The client-side 3D driver must
1449          * throttle the framerate by waiting for this value before
1450          * performing the swapbuffer ioctl.
1451          */
1452         dev_priv->sarea_priv->last_frame++;
1453         dev_priv->sarea_priv->pfCurrentPage =
1454                 1 - dev_priv->sarea_priv->pfCurrentPage;
1455
1456         BEGIN_RING(2);
1457
1458         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1459
1460         ADVANCE_RING();
1461 }
1462
1463 static int bad_prim_vertex_nr(int primitive, int nr)
1464 {
1465         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1466         case RADEON_PRIM_TYPE_NONE:
1467         case RADEON_PRIM_TYPE_POINT:
1468                 return nr < 1;
1469         case RADEON_PRIM_TYPE_LINE:
1470                 return (nr & 1) || nr == 0;
1471         case RADEON_PRIM_TYPE_LINE_STRIP:
1472                 return nr < 2;
1473         case RADEON_PRIM_TYPE_TRI_LIST:
1474         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1475         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1476         case RADEON_PRIM_TYPE_RECT_LIST:
1477                 return nr % 3 || nr == 0;
1478         case RADEON_PRIM_TYPE_TRI_FAN:
1479         case RADEON_PRIM_TYPE_TRI_STRIP:
1480                 return nr < 3;
1481         default:
1482                 return 1;
1483         }
1484 }
1485
1486 typedef struct {
1487         unsigned int start;
1488         unsigned int finish;
1489         unsigned int prim;
1490         unsigned int numverts;
1491         unsigned int offset;
1492         unsigned int vc_format;
1493 } drm_radeon_tcl_prim_t;
1494
1495 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1496                                       struct drm_buf * buf,
1497                                       drm_radeon_tcl_prim_t * prim)
1498 {
1499         drm_radeon_private_t *dev_priv = dev->dev_private;
1500         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1501         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1502         int numverts = (int)prim->numverts;
1503         int nbox = sarea_priv->nbox;
1504         int i = 0;
1505         RING_LOCALS;
1506
1507         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1508                   prim->prim,
1509                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1510
1511         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1512                 DRM_ERROR("bad prim %x numverts %d\n",
1513                           prim->prim, prim->numverts);
1514                 return;
1515         }
1516
1517         do {
1518                 /* Emit the next cliprect */
1519                 if (i < nbox) {
1520                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1521                 }
1522
1523                 /* Emit the vertex buffer rendering commands */
1524                 BEGIN_RING(5);
1525
1526                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1527                 OUT_RING(offset);
1528                 OUT_RING(numverts);
1529                 OUT_RING(prim->vc_format);
1530                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1531                          RADEON_COLOR_ORDER_RGBA |
1532                          RADEON_VTX_FMT_RADEON_MODE |
1533                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1534
1535                 ADVANCE_RING();
1536
1537                 i++;
1538         } while (i < nbox);
1539 }
1540
1541 static void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_buf *buf)
1542 {
1543         drm_radeon_private_t *dev_priv = dev->dev_private;
1544         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1545         RING_LOCALS;
1546
1547         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1548
1549         /* Emit the vertex buffer age */
1550         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1551                 BEGIN_RING(3);
1552                 R600_DISPATCH_AGE(buf_priv->age);
1553                 ADVANCE_RING();
1554         } else {
1555                 BEGIN_RING(2);
1556                 RADEON_DISPATCH_AGE(buf_priv->age);
1557                 ADVANCE_RING();
1558         }
1559
1560         buf->pending = 1;
1561         buf->used = 0;
1562 }
1563
1564 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1565                                         struct drm_buf * buf, int start, int end)
1566 {
1567         drm_radeon_private_t *dev_priv = dev->dev_private;
1568         RING_LOCALS;
1569         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1570
1571         if (start != end) {
1572                 int offset = (dev_priv->gart_buffers_offset
1573                               + buf->offset + start);
1574                 int dwords = (end - start + 3) / sizeof(u32);
1575
1576                 /* Indirect buffer data must be an even number of
1577                  * dwords, so if we've been given an odd number we must
1578                  * pad the data with a Type-2 CP packet.
1579                  */
1580                 if (dwords & 1) {
1581                         u32 *data = (u32 *)
1582                             ((char *)dev->agp_buffer_map->handle
1583                              + buf->offset + start);
1584                         data[dwords++] = RADEON_CP_PACKET2;
1585                 }
1586
1587                 /* Fire off the indirect buffer */
1588                 BEGIN_RING(3);
1589
1590                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1591                 OUT_RING(offset);
1592                 OUT_RING(dwords);
1593
1594                 ADVANCE_RING();
1595         }
1596 }
1597
1598 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1599                                        struct drm_buf * elt_buf,
1600                                        drm_radeon_tcl_prim_t * prim)
1601 {
1602         drm_radeon_private_t *dev_priv = dev->dev_private;
1603         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1604         int offset = dev_priv->gart_buffers_offset + prim->offset;
1605         u32 *data;
1606         int dwords;
1607         int i = 0;
1608         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1609         int count = (prim->finish - start) / sizeof(u16);
1610         int nbox = sarea_priv->nbox;
1611
1612         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1613                   prim->prim,
1614                   prim->vc_format,
1615                   prim->start, prim->finish, prim->offset, prim->numverts);
1616
1617         if (bad_prim_vertex_nr(prim->prim, count)) {
1618                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1619                 return;
1620         }
1621
1622         if (start >= prim->finish || (prim->start & 0x7)) {
1623                 DRM_ERROR("buffer prim %d\n", prim->prim);
1624                 return;
1625         }
1626
1627         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1628
1629         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1630                         elt_buf->offset + prim->start);
1631
1632         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1633         data[1] = offset;
1634         data[2] = prim->numverts;
1635         data[3] = prim->vc_format;
1636         data[4] = (prim->prim |
1637                    RADEON_PRIM_WALK_IND |
1638                    RADEON_COLOR_ORDER_RGBA |
1639                    RADEON_VTX_FMT_RADEON_MODE |
1640                    (count << RADEON_NUM_VERTICES_SHIFT));
1641
1642         do {
1643                 if (i < nbox)
1644                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1645
1646                 radeon_cp_dispatch_indirect(dev, elt_buf,
1647                                             prim->start, prim->finish);
1648
1649                 i++;
1650         } while (i < nbox);
1651
1652 }
1653
1654 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1655
1656 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1657                                       struct drm_file *file_priv,
1658                                       drm_radeon_texture_t * tex,
1659                                       drm_radeon_tex_image_t * image)
1660 {
1661         drm_radeon_private_t *dev_priv = dev->dev_private;
1662         struct drm_buf *buf;
1663         u32 format;
1664         u32 *buffer;
1665         const u8 __user *data;
1666         int size, dwords, tex_width, blit_width, spitch;
1667         u32 height;
1668         int i;
1669         u32 texpitch, microtile;
1670         u32 offset, byte_offset;
1671         RING_LOCALS;
1672
1673         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1674                 DRM_ERROR("Invalid destination offset\n");
1675                 return -EINVAL;
1676         }
1677
1678         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1679
1680         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1681          * up with the texture data from the host data blit, otherwise
1682          * part of the texture image may be corrupted.
1683          */
1684         BEGIN_RING(4);
1685         RADEON_FLUSH_CACHE();
1686         RADEON_WAIT_UNTIL_IDLE();
1687         ADVANCE_RING();
1688
1689         /* The compiler won't optimize away a division by a variable,
1690          * even if the only legal values are powers of two.  Thus, we'll
1691          * use a shift instead.
1692          */
1693         switch (tex->format) {
1694         case RADEON_TXFORMAT_ARGB8888:
1695         case RADEON_TXFORMAT_RGBA8888:
1696                 format = RADEON_COLOR_FORMAT_ARGB8888;
1697                 tex_width = tex->width * 4;
1698                 blit_width = image->width * 4;
1699                 break;
1700         case RADEON_TXFORMAT_AI88:
1701         case RADEON_TXFORMAT_ARGB1555:
1702         case RADEON_TXFORMAT_RGB565:
1703         case RADEON_TXFORMAT_ARGB4444:
1704         case RADEON_TXFORMAT_VYUY422:
1705         case RADEON_TXFORMAT_YVYU422:
1706                 format = RADEON_COLOR_FORMAT_RGB565;
1707                 tex_width = tex->width * 2;
1708                 blit_width = image->width * 2;
1709                 break;
1710         case RADEON_TXFORMAT_I8:
1711         case RADEON_TXFORMAT_RGB332:
1712                 format = RADEON_COLOR_FORMAT_CI8;
1713                 tex_width = tex->width * 1;
1714                 blit_width = image->width * 1;
1715                 break;
1716         default:
1717                 DRM_ERROR("invalid texture format %d\n", tex->format);
1718                 return -EINVAL;
1719         }
1720         spitch = blit_width >> 6;
1721         if (spitch == 0 && image->height > 1)
1722                 return -EINVAL;
1723
1724         texpitch = tex->pitch;
1725         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1726                 microtile = 1;
1727                 if (tex_width < 64) {
1728                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1729                         /* we got tiled coordinates, untile them */
1730                         image->x *= 2;
1731                 }
1732         } else
1733                 microtile = 0;
1734
1735         /* this might fail for zero-sized uploads - are those illegal? */
1736         if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1737                                 blit_width - 1)) {
1738                 DRM_ERROR("Invalid final destination offset\n");
1739                 return -EINVAL;
1740         }
1741
1742         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1743
1744         do {
1745                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1746                           tex->offset >> 10, tex->pitch, tex->format,
1747                           image->x, image->y, image->width, image->height);
1748
1749                 /* Make a copy of some parameters in case we have to
1750                  * update them for a multi-pass texture blit.
1751                  */
1752                 height = image->height;
1753                 data = (const u8 __user *)image->data;
1754
1755                 size = height * blit_width;
1756
1757                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1758                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1759                         size = height * blit_width;
1760                 } else if (size < 4 && size > 0) {
1761                         size = 4;
1762                 } else if (size == 0) {
1763                         return 0;
1764                 }
1765
1766                 buf = radeon_freelist_get(dev);
1767                 if (0 && !buf) {
1768                         radeon_do_cp_idle(dev_priv);
1769                         buf = radeon_freelist_get(dev);
1770                 }
1771                 if (!buf) {
1772                         DRM_DEBUG("EAGAIN\n");
1773                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1774                                 return -EFAULT;
1775                         return -EAGAIN;
1776                 }
1777
1778                 /* Dispatch the indirect buffer.
1779                  */
1780                 buffer =
1781                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1782                 dwords = size / 4;
1783
1784 #define RADEON_COPY_MT(_buf, _data, _width) \
1785         do { \
1786                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1787                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1788                         return -EFAULT; \
1789                 } \
1790         } while(0)
1791
1792                 if (microtile) {
1793                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1794                            however, we cannot use blitter directly for texture width < 64 bytes,
1795                            since minimum tex pitch is 64 bytes and we need this to match
1796                            the texture width, otherwise the blitter will tile it wrong.
1797                            Thus, tiling manually in this case. Additionally, need to special
1798                            case tex height = 1, since our actual image will have height 2
1799                            and we need to ensure we don't read beyond the texture size
1800                            from user space. */
1801                         if (tex->height == 1) {
1802                                 if (tex_width >= 64 || tex_width <= 16) {
1803                                         RADEON_COPY_MT(buffer, data,
1804                                                 (int)(tex_width * sizeof(u32)));
1805                                 } else if (tex_width == 32) {
1806                                         RADEON_COPY_MT(buffer, data, 16);
1807                                         RADEON_COPY_MT(buffer + 8,
1808                                                        data + 16, 16);
1809                                 }
1810                         } else if (tex_width >= 64 || tex_width == 16) {
1811                                 RADEON_COPY_MT(buffer, data,
1812                                                (int)(dwords * sizeof(u32)));
1813                         } else if (tex_width < 16) {
1814                                 for (i = 0; i < tex->height; i++) {
1815                                         RADEON_COPY_MT(buffer, data, tex_width);
1816                                         buffer += 4;
1817                                         data += tex_width;
1818                                 }
1819                         } else if (tex_width == 32) {
1820                                 /* TODO: make sure this works when not fitting in one buffer
1821                                    (i.e. 32bytes x 2048...) */
1822                                 for (i = 0; i < tex->height; i += 2) {
1823                                         RADEON_COPY_MT(buffer, data, 16);
1824                                         data += 16;
1825                                         RADEON_COPY_MT(buffer + 8, data, 16);
1826                                         data += 16;
1827                                         RADEON_COPY_MT(buffer + 4, data, 16);
1828                                         data += 16;
1829                                         RADEON_COPY_MT(buffer + 12, data, 16);
1830                                         data += 16;
1831                                         buffer += 16;
1832                                 }
1833                         }
1834                 } else {
1835                         if (tex_width >= 32) {
1836                                 /* Texture image width is larger than the minimum, so we
1837                                  * can upload it directly.
1838                                  */
1839                                 RADEON_COPY_MT(buffer, data,
1840                                                (int)(dwords * sizeof(u32)));
1841                         } else {
1842                                 /* Texture image width is less than the minimum, so we
1843                                  * need to pad out each image scanline to the minimum
1844                                  * width.
1845                                  */
1846                                 for (i = 0; i < tex->height; i++) {
1847                                         RADEON_COPY_MT(buffer, data, tex_width);
1848                                         buffer += 8;
1849                                         data += tex_width;
1850                                 }
1851                         }
1852                 }
1853
1854 #undef RADEON_COPY_MT
1855                 byte_offset = (image->y & ~2047) * blit_width;
1856                 buf->file_priv = file_priv;
1857                 buf->used = size;
1858                 offset = dev_priv->gart_buffers_offset + buf->offset;
1859                 BEGIN_RING(9);
1860                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1861                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1862                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1863                          RADEON_GMC_BRUSH_NONE |
1864                          (format << 8) |
1865                          RADEON_GMC_SRC_DATATYPE_COLOR |
1866                          RADEON_ROP3_S |
1867                          RADEON_DP_SRC_SOURCE_MEMORY |
1868                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1869                 OUT_RING((spitch << 22) | (offset >> 10));
1870                 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1871                 OUT_RING(0);
1872                 OUT_RING((image->x << 16) | (image->y % 2048));
1873                 OUT_RING((image->width << 16) | height);
1874                 RADEON_WAIT_UNTIL_2D_IDLE();
1875                 ADVANCE_RING();
1876                 COMMIT_RING();
1877
1878                 radeon_cp_discard_buffer(dev, buf);
1879
1880                 /* Update the input parameters for next time */
1881                 image->y += height;
1882                 image->height -= height;
1883                 image->data = (const u8 __user *)image->data + size;
1884         } while (image->height > 0);
1885
1886         /* Flush the pixel cache after the blit completes.  This ensures
1887          * the texture data is written out to memory before rendering
1888          * continues.
1889          */
1890         BEGIN_RING(4);
1891         RADEON_FLUSH_CACHE();
1892         RADEON_WAIT_UNTIL_2D_IDLE();
1893         ADVANCE_RING();
1894         COMMIT_RING();
1895
1896         return 0;
1897 }
1898
1899 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1900 {
1901         drm_radeon_private_t *dev_priv = dev->dev_private;
1902         int i;
1903         RING_LOCALS;
1904         DRM_DEBUG("\n");
1905
1906         BEGIN_RING(35);
1907
1908         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1909         OUT_RING(0x00000000);
1910
1911         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1912         for (i = 0; i < 32; i++) {
1913                 OUT_RING(stipple[i]);
1914         }
1915
1916         ADVANCE_RING();
1917 }
1918
1919 static void radeon_apply_surface_regs(int surf_index,
1920                                       drm_radeon_private_t *dev_priv)
1921 {
1922         if (!dev_priv->mmio)
1923                 return;
1924
1925         radeon_do_cp_idle(dev_priv);
1926
1927         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1928                      dev_priv->surfaces[surf_index].flags);
1929         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1930                      dev_priv->surfaces[surf_index].lower);
1931         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1932                      dev_priv->surfaces[surf_index].upper);
1933 }
1934
1935 /* Allocates a virtual surface
1936  * doesn't always allocate a real surface, will stretch an existing
1937  * surface when possible.
1938  *
1939  * Note that refcount can be at most 2, since during a free refcount=3
1940  * might mean we have to allocate a new surface which might not always
1941  * be available.
1942  * For example : we allocate three contigous surfaces ABC. If B is
1943  * freed, we suddenly need two surfaces to store A and C, which might
1944  * not always be available.
1945  */
1946 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1947                          drm_radeon_private_t *dev_priv,
1948                          struct drm_file *file_priv)
1949 {
1950         struct radeon_virt_surface *s;
1951         int i;
1952         int virt_surface_index;
1953         uint32_t new_upper, new_lower;
1954
1955         new_lower = new->address;
1956         new_upper = new_lower + new->size - 1;
1957
1958         /* sanity check */
1959         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1960             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1961              RADEON_SURF_ADDRESS_FIXED_MASK)
1962             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1963                 return -1;
1964
1965         /* make sure there is no overlap with existing surfaces */
1966         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1967                 if ((dev_priv->surfaces[i].refcount != 0) &&
1968                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1969                       (new_lower < dev_priv->surfaces[i].upper)) ||
1970                      ((new_lower < dev_priv->surfaces[i].lower) &&
1971                       (new_upper > dev_priv->surfaces[i].lower)))) {
1972                         return -1;
1973                 }
1974         }
1975
1976         /* find a virtual surface */
1977         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1978                 if (dev_priv->virt_surfaces[i].file_priv == 0)
1979                         break;
1980         if (i == 2 * RADEON_MAX_SURFACES) {
1981                 return -1;
1982         }
1983         virt_surface_index = i;
1984
1985         /* try to reuse an existing surface */
1986         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1987                 /* extend before */
1988                 if ((dev_priv->surfaces[i].refcount == 1) &&
1989                     (new->flags == dev_priv->surfaces[i].flags) &&
1990                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1991                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1992                         s->surface_index = i;
1993                         s->lower = new_lower;
1994                         s->upper = new_upper;
1995                         s->flags = new->flags;
1996                         s->file_priv = file_priv;
1997                         dev_priv->surfaces[i].refcount++;
1998                         dev_priv->surfaces[i].lower = s->lower;
1999                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2000                         return virt_surface_index;
2001                 }
2002
2003                 /* extend after */
2004                 if ((dev_priv->surfaces[i].refcount == 1) &&
2005                     (new->flags == dev_priv->surfaces[i].flags) &&
2006                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
2007                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2008                         s->surface_index = i;
2009                         s->lower = new_lower;
2010                         s->upper = new_upper;
2011                         s->flags = new->flags;
2012                         s->file_priv = file_priv;
2013                         dev_priv->surfaces[i].refcount++;
2014                         dev_priv->surfaces[i].upper = s->upper;
2015                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2016                         return virt_surface_index;
2017                 }
2018         }
2019
2020         /* okay, we need a new one */
2021         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2022                 if (dev_priv->surfaces[i].refcount == 0) {
2023                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2024                         s->surface_index = i;
2025                         s->lower = new_lower;
2026                         s->upper = new_upper;
2027                         s->flags = new->flags;
2028                         s->file_priv = file_priv;
2029                         dev_priv->surfaces[i].refcount = 1;
2030                         dev_priv->surfaces[i].lower = s->lower;
2031                         dev_priv->surfaces[i].upper = s->upper;
2032                         dev_priv->surfaces[i].flags = s->flags;
2033                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2034                         return virt_surface_index;
2035                 }
2036         }
2037
2038         /* we didn't find anything */
2039         return -1;
2040 }
2041
2042 static int free_surface(struct drm_file *file_priv,
2043                         drm_radeon_private_t * dev_priv,
2044                         int lower)
2045 {
2046         struct radeon_virt_surface *s;
2047         int i;
2048         /* find the virtual surface */
2049         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2050                 s = &(dev_priv->virt_surfaces[i]);
2051                 if (s->file_priv) {
2052                         if ((lower == s->lower) && (file_priv == s->file_priv))
2053                         {
2054                                 if (dev_priv->surfaces[s->surface_index].
2055                                     lower == s->lower)
2056                                         dev_priv->surfaces[s->surface_index].
2057                                             lower = s->upper;
2058
2059                                 if (dev_priv->surfaces[s->surface_index].
2060                                     upper == s->upper)
2061                                         dev_priv->surfaces[s->surface_index].
2062                                             upper = s->lower;
2063
2064                                 dev_priv->surfaces[s->surface_index].refcount--;
2065                                 if (dev_priv->surfaces[s->surface_index].
2066                                     refcount == 0)
2067                                         dev_priv->surfaces[s->surface_index].
2068                                             flags = 0;
2069                                 s->file_priv = NULL;
2070                                 radeon_apply_surface_regs(s->surface_index,
2071                                                           dev_priv);
2072                                 return 0;
2073                         }
2074                 }
2075         }
2076         return 1;
2077 }
2078
2079 static void radeon_surfaces_release(struct drm_file *file_priv,
2080                                     drm_radeon_private_t * dev_priv)
2081 {
2082         int i;
2083         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2084                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2085                         free_surface(file_priv, dev_priv,
2086                                      dev_priv->virt_surfaces[i].lower);
2087         }
2088 }
2089
2090 /* ================================================================
2091  * IOCTL functions
2092  */
2093 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2094 {
2095         drm_radeon_private_t *dev_priv = dev->dev_private;
2096         drm_radeon_surface_alloc_t *alloc = data;
2097
2098         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2099                 return -EINVAL;
2100         else
2101                 return 0;
2102 }
2103
2104 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2105 {
2106         drm_radeon_private_t *dev_priv = dev->dev_private;
2107         drm_radeon_surface_free_t *memfree = data;
2108
2109         if (free_surface(file_priv, dev_priv, memfree->address))
2110                 return -EINVAL;
2111         else
2112                 return 0;
2113 }
2114
2115 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2116 {
2117         drm_radeon_private_t *dev_priv = dev->dev_private;
2118         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2119         drm_radeon_clear_t *clear = data;
2120         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2121         DRM_DEBUG("\n");
2122
2123         LOCK_TEST_WITH_RETURN(dev, file_priv);
2124
2125         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2126
2127         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2128                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2129
2130         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2131                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2132                 return -EFAULT;
2133
2134         radeon_cp_dispatch_clear(dev, clear, depth_boxes);
2135
2136         COMMIT_RING();
2137         return 0;
2138 }
2139
2140 /* Not sure why this isn't set all the time:
2141  */
2142 static int radeon_do_init_pageflip(struct drm_device *dev)
2143 {
2144         drm_radeon_private_t *dev_priv = dev->dev_private;
2145         RING_LOCALS;
2146
2147         DRM_DEBUG("\n");
2148
2149         BEGIN_RING(6);
2150         RADEON_WAIT_UNTIL_3D_IDLE();
2151         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2152         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2153                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2154         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2155         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2156                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2157         ADVANCE_RING();
2158
2159         dev_priv->page_flipping = 1;
2160
2161         if (dev_priv->sarea_priv->pfCurrentPage != 1)
2162                 dev_priv->sarea_priv->pfCurrentPage = 0;
2163
2164         return 0;
2165 }
2166
2167 /* Swapping and flipping are different operations, need different ioctls.
2168  * They can & should be intermixed to support multiple 3d windows.
2169  */
2170 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2171 {
2172         drm_radeon_private_t *dev_priv = dev->dev_private;
2173         DRM_DEBUG("\n");
2174
2175         LOCK_TEST_WITH_RETURN(dev, file_priv);
2176
2177         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2178
2179         if (!dev_priv->page_flipping)
2180                 radeon_do_init_pageflip(dev);
2181
2182         radeon_cp_dispatch_flip(dev);
2183
2184         COMMIT_RING();
2185         return 0;
2186 }
2187
2188 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2189 {
2190         drm_radeon_private_t *dev_priv = dev->dev_private;
2191         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2192
2193         DRM_DEBUG("\n");
2194
2195         LOCK_TEST_WITH_RETURN(dev, file_priv);
2196
2197         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2198
2199         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2200                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2201
2202         radeon_cp_dispatch_swap(dev);
2203         sarea_priv->ctx_owner = 0;
2204
2205         COMMIT_RING();
2206         return 0;
2207 }
2208
2209 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2210 {
2211         drm_radeon_private_t *dev_priv = dev->dev_private;
2212         drm_radeon_sarea_t *sarea_priv;
2213         struct drm_device_dma *dma = dev->dma;
2214         struct drm_buf *buf;
2215         drm_radeon_vertex_t *vertex = data;
2216         drm_radeon_tcl_prim_t prim;
2217
2218         LOCK_TEST_WITH_RETURN(dev, file_priv);
2219
2220         sarea_priv = dev_priv->sarea_priv;
2221
2222         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2223                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2224
2225         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2226                 DRM_ERROR("buffer index %d (of %d max)\n",
2227                           vertex->idx, dma->buf_count - 1);
2228                 return -EINVAL;
2229         }
2230         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2231                 DRM_ERROR("buffer prim %d\n", vertex->prim);
2232                 return -EINVAL;
2233         }
2234
2235         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2236         VB_AGE_TEST_WITH_RETURN(dev_priv);
2237
2238         buf = dma->buflist[vertex->idx];
2239
2240         if (buf->file_priv != file_priv) {
2241                 DRM_ERROR("process %d using buffer owned by %p\n",
2242                           DRM_CURRENTPID, buf->file_priv);
2243                 return -EINVAL;
2244         }
2245         if (buf->pending) {
2246                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2247                 return -EINVAL;
2248         }
2249
2250         /* Build up a prim_t record:
2251          */
2252         if (vertex->count) {
2253                 buf->used = vertex->count;      /* not used? */
2254
2255                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2256                         if (radeon_emit_state(dev_priv, file_priv,
2257                                               &sarea_priv->context_state,
2258                                               sarea_priv->tex_state,
2259                                               sarea_priv->dirty)) {
2260                                 DRM_ERROR("radeon_emit_state failed\n");
2261                                 return -EINVAL;
2262                         }
2263
2264                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2265                                                RADEON_UPLOAD_TEX1IMAGES |
2266                                                RADEON_UPLOAD_TEX2IMAGES |
2267                                                RADEON_REQUIRE_QUIESCENCE);
2268                 }
2269
2270                 prim.start = 0;
2271                 prim.finish = vertex->count;    /* unused */
2272                 prim.prim = vertex->prim;
2273                 prim.numverts = vertex->count;
2274                 prim.vc_format = sarea_priv->vc_format;
2275
2276                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2277         }
2278
2279         if (vertex->discard) {
2280                 radeon_cp_discard_buffer(dev, buf);
2281         }
2282
2283         COMMIT_RING();
2284         return 0;
2285 }
2286
2287 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2288 {
2289         drm_radeon_private_t *dev_priv = dev->dev_private;
2290         drm_radeon_sarea_t *sarea_priv;
2291         struct drm_device_dma *dma = dev->dma;
2292         struct drm_buf *buf;
2293         drm_radeon_indices_t *elts = data;
2294         drm_radeon_tcl_prim_t prim;
2295         int count;
2296
2297         LOCK_TEST_WITH_RETURN(dev, file_priv);
2298
2299         sarea_priv = dev_priv->sarea_priv;
2300
2301         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2302                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2303                   elts->discard);
2304
2305         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2306                 DRM_ERROR("buffer index %d (of %d max)\n",
2307                           elts->idx, dma->buf_count - 1);
2308                 return -EINVAL;
2309         }
2310         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2311                 DRM_ERROR("buffer prim %d\n", elts->prim);
2312                 return -EINVAL;
2313         }
2314
2315         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2316         VB_AGE_TEST_WITH_RETURN(dev_priv);
2317
2318         buf = dma->buflist[elts->idx];
2319
2320         if (buf->file_priv != file_priv) {
2321                 DRM_ERROR("process %d using buffer owned by %p\n",
2322                           DRM_CURRENTPID, buf->file_priv);
2323                 return -EINVAL;
2324         }
2325         if (buf->pending) {
2326                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2327                 return -EINVAL;
2328         }
2329
2330         count = (elts->end - elts->start) / sizeof(u16);
2331         elts->start -= RADEON_INDEX_PRIM_OFFSET;
2332
2333         if (elts->start & 0x7) {
2334                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2335                 return -EINVAL;
2336         }
2337         if (elts->start < buf->used) {
2338                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2339                 return -EINVAL;
2340         }
2341
2342         buf->used = elts->end;
2343
2344         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2345                 if (radeon_emit_state(dev_priv, file_priv,
2346                                       &sarea_priv->context_state,
2347                                       sarea_priv->tex_state,
2348                                       sarea_priv->dirty)) {
2349                         DRM_ERROR("radeon_emit_state failed\n");
2350                         return -EINVAL;
2351                 }
2352
2353                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2354                                        RADEON_UPLOAD_TEX1IMAGES |
2355                                        RADEON_UPLOAD_TEX2IMAGES |
2356                                        RADEON_REQUIRE_QUIESCENCE);
2357         }
2358
2359         /* Build up a prim_t record:
2360          */
2361         prim.start = elts->start;
2362         prim.finish = elts->end;
2363         prim.prim = elts->prim;
2364         prim.offset = 0;        /* offset from start of dma buffers */
2365         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2366         prim.vc_format = sarea_priv->vc_format;
2367
2368         radeon_cp_dispatch_indices(dev, buf, &prim);
2369         if (elts->discard) {
2370                 radeon_cp_discard_buffer(dev, buf);
2371         }
2372
2373         COMMIT_RING();
2374         return 0;
2375 }
2376
2377 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2378 {
2379         drm_radeon_private_t *dev_priv = dev->dev_private;
2380         drm_radeon_texture_t *tex = data;
2381         drm_radeon_tex_image_t image;
2382         int ret;
2383
2384         LOCK_TEST_WITH_RETURN(dev, file_priv);
2385
2386         if (tex->image == NULL) {
2387                 DRM_ERROR("null texture image!\n");
2388                 return -EINVAL;
2389         }
2390
2391         if (DRM_COPY_FROM_USER(&image,
2392                                (drm_radeon_tex_image_t __user *) tex->image,
2393                                sizeof(image)))
2394                 return -EFAULT;
2395
2396         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2397         VB_AGE_TEST_WITH_RETURN(dev_priv);
2398
2399         ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2400
2401         return ret;
2402 }
2403
2404 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2405 {
2406         drm_radeon_private_t *dev_priv = dev->dev_private;
2407         drm_radeon_stipple_t *stipple = data;
2408         u32 mask[32];
2409
2410         LOCK_TEST_WITH_RETURN(dev, file_priv);
2411
2412         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2413                 return -EFAULT;
2414
2415         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2416
2417         radeon_cp_dispatch_stipple(dev, mask);
2418
2419         COMMIT_RING();
2420         return 0;
2421 }
2422
2423 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2424 {
2425         drm_radeon_private_t *dev_priv = dev->dev_private;
2426         struct drm_device_dma *dma = dev->dma;
2427         struct drm_buf *buf;
2428         drm_radeon_indirect_t *indirect = data;
2429         RING_LOCALS;
2430
2431         LOCK_TEST_WITH_RETURN(dev, file_priv);
2432
2433         if (!dev_priv) {
2434                 DRM_ERROR("called with no initialization\n");
2435                 return -EINVAL;
2436         }
2437
2438         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2439                   indirect->idx, indirect->start, indirect->end,
2440                   indirect->discard);
2441
2442         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2443                 DRM_ERROR("buffer index %d (of %d max)\n",
2444                           indirect->idx, dma->buf_count - 1);
2445                 return -EINVAL;
2446         }
2447
2448         buf = dma->buflist[indirect->idx];
2449
2450         if (buf->file_priv != file_priv) {
2451                 DRM_ERROR("process %d using buffer owned by %p\n",
2452                           DRM_CURRENTPID, buf->file_priv);
2453                 return -EINVAL;
2454         }
2455         if (buf->pending) {
2456                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2457                 return -EINVAL;
2458         }
2459
2460         if (indirect->start < buf->used) {
2461                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2462                           indirect->start, buf->used);
2463                 return -EINVAL;
2464         }
2465
2466         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2467         VB_AGE_TEST_WITH_RETURN(dev_priv);
2468
2469         buf->used = indirect->end;
2470
2471         /* Dispatch the indirect buffer full of commands from the
2472          * X server.  This is insecure and is thus only available to
2473          * privileged clients.
2474          */
2475         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2476                 r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2477         else {
2478                 /* Wait for the 3D stream to idle before the indirect buffer
2479                  * containing 2D acceleration commands is processed.
2480                  */
2481                 BEGIN_RING(2);
2482                 RADEON_WAIT_UNTIL_3D_IDLE();
2483                 ADVANCE_RING();
2484                 radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2485         }
2486
2487         if (indirect->discard)
2488                 radeon_cp_discard_buffer(dev, buf);
2489
2490         COMMIT_RING();
2491         return 0;
2492 }
2493
2494 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2495 {
2496         drm_radeon_private_t *dev_priv = dev->dev_private;
2497         drm_radeon_sarea_t *sarea_priv;
2498         struct drm_device_dma *dma = dev->dma;
2499         struct drm_buf *buf;
2500         drm_radeon_vertex2_t *vertex = data;
2501         int i;
2502         unsigned char laststate;
2503
2504         LOCK_TEST_WITH_RETURN(dev, file_priv);
2505
2506         sarea_priv = dev_priv->sarea_priv;
2507
2508         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2509                   DRM_CURRENTPID, vertex->idx, vertex->discard);
2510
2511         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2512                 DRM_ERROR("buffer index %d (of %d max)\n",
2513                           vertex->idx, dma->buf_count - 1);
2514                 return -EINVAL;
2515         }
2516
2517         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2518         VB_AGE_TEST_WITH_RETURN(dev_priv);
2519
2520         buf = dma->buflist[vertex->idx];
2521
2522         if (buf->file_priv != file_priv) {
2523                 DRM_ERROR("process %d using buffer owned by %p\n",
2524                           DRM_CURRENTPID, buf->file_priv);
2525                 return -EINVAL;
2526         }
2527
2528         if (buf->pending) {
2529                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2530                 return -EINVAL;
2531         }
2532
2533         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2534                 return -EINVAL;
2535
2536         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2537                 drm_radeon_prim_t prim;
2538                 drm_radeon_tcl_prim_t tclprim;
2539
2540                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2541                         return -EFAULT;
2542
2543                 if (prim.stateidx != laststate) {
2544                         drm_radeon_state_t state;
2545
2546                         if (DRM_COPY_FROM_USER(&state,
2547                                                &vertex->state[prim.stateidx],
2548                                                sizeof(state)))
2549                                 return -EFAULT;
2550
2551                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2552                                 DRM_ERROR("radeon_emit_state2 failed\n");
2553                                 return -EINVAL;
2554                         }
2555
2556                         laststate = prim.stateidx;
2557                 }
2558
2559                 tclprim.start = prim.start;
2560                 tclprim.finish = prim.finish;
2561                 tclprim.prim = prim.prim;
2562                 tclprim.vc_format = prim.vc_format;
2563
2564                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2565                         tclprim.offset = prim.numverts * 64;
2566                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2567
2568                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2569                 } else {
2570                         tclprim.numverts = prim.numverts;
2571                         tclprim.offset = 0;     /* not used */
2572
2573                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2574                 }
2575
2576                 if (sarea_priv->nbox == 1)
2577                         sarea_priv->nbox = 0;
2578         }
2579
2580         if (vertex->discard) {
2581                 radeon_cp_discard_buffer(dev, buf);
2582         }
2583
2584         COMMIT_RING();
2585         return 0;
2586 }
2587
2588 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2589                                struct drm_file *file_priv,
2590                                drm_radeon_cmd_header_t header,
2591                                drm_radeon_kcmd_buffer_t *cmdbuf)
2592 {
2593         int id = (int)header.packet.packet_id;
2594         int sz, reg;
2595         int *data = (int *)cmdbuf->buf;
2596         RING_LOCALS;
2597
2598         if (id >= RADEON_MAX_STATE_PACKETS)
2599                 return -EINVAL;
2600
2601         sz = packet[id].len;
2602         reg = packet[id].start;
2603
2604         if (sz * sizeof(int) > cmdbuf->bufsz) {
2605                 DRM_ERROR("Packet size provided larger than data provided\n");
2606                 return -EINVAL;
2607         }
2608
2609         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2610                 DRM_ERROR("Packet verification failed\n");
2611                 return -EINVAL;
2612         }
2613
2614         BEGIN_RING(sz + 1);
2615         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2616         OUT_RING_TABLE(data, sz);
2617         ADVANCE_RING();
2618
2619         cmdbuf->buf += sz * sizeof(int);
2620         cmdbuf->bufsz -= sz * sizeof(int);
2621         return 0;
2622 }
2623
2624 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2625                                           drm_radeon_cmd_header_t header,
2626                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2627 {
2628         int sz = header.scalars.count;
2629         int start = header.scalars.offset;
2630         int stride = header.scalars.stride;
2631         RING_LOCALS;
2632
2633         BEGIN_RING(3 + sz);
2634         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2635         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2636         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2637         OUT_RING_TABLE(cmdbuf->buf, sz);
2638         ADVANCE_RING();
2639         cmdbuf->buf += sz * sizeof(int);
2640         cmdbuf->bufsz -= sz * sizeof(int);
2641         return 0;
2642 }
2643
2644 /* God this is ugly
2645  */
2646 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2647                                            drm_radeon_cmd_header_t header,
2648                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2649 {
2650         int sz = header.scalars.count;
2651         int start = ((unsigned int)header.scalars.offset) + 0x100;
2652         int stride = header.scalars.stride;
2653         RING_LOCALS;
2654
2655         BEGIN_RING(3 + sz);
2656         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2657         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2658         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2659         OUT_RING_TABLE(cmdbuf->buf, sz);
2660         ADVANCE_RING();
2661         cmdbuf->buf += sz * sizeof(int);
2662         cmdbuf->bufsz -= sz * sizeof(int);
2663         return 0;
2664 }
2665
2666 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2667                                           drm_radeon_cmd_header_t header,
2668                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2669 {
2670         int sz = header.vectors.count;
2671         int start = header.vectors.offset;
2672         int stride = header.vectors.stride;
2673         RING_LOCALS;
2674
2675         BEGIN_RING(5 + sz);
2676         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2677         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2678         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2679         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2680         OUT_RING_TABLE(cmdbuf->buf, sz);
2681         ADVANCE_RING();
2682
2683         cmdbuf->buf += sz * sizeof(int);
2684         cmdbuf->bufsz -= sz * sizeof(int);
2685         return 0;
2686 }
2687
2688 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2689                                           drm_radeon_cmd_header_t header,
2690                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2691 {
2692         int sz = header.veclinear.count * 4;
2693         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2694         RING_LOCALS;
2695
2696         if (!sz)
2697                 return 0;
2698         if (sz * 4 > cmdbuf->bufsz)
2699                 return -EINVAL;
2700
2701         BEGIN_RING(5 + sz);
2702         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2703         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2704         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2705         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2706         OUT_RING_TABLE(cmdbuf->buf, sz);
2707         ADVANCE_RING();
2708
2709         cmdbuf->buf += sz * sizeof(int);
2710         cmdbuf->bufsz -= sz * sizeof(int);
2711         return 0;
2712 }
2713
2714 static int radeon_emit_packet3(struct drm_device * dev,
2715                                struct drm_file *file_priv,
2716                                drm_radeon_kcmd_buffer_t *cmdbuf)
2717 {
2718         drm_radeon_private_t *dev_priv = dev->dev_private;
2719         unsigned int cmdsz;
2720         int ret;
2721         RING_LOCALS;
2722
2723         DRM_DEBUG("\n");
2724
2725         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2726                                                   cmdbuf, &cmdsz))) {
2727                 DRM_ERROR("Packet verification failed\n");
2728                 return ret;
2729         }
2730
2731         BEGIN_RING(cmdsz);
2732         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2733         ADVANCE_RING();
2734
2735         cmdbuf->buf += cmdsz * 4;
2736         cmdbuf->bufsz -= cmdsz * 4;
2737         return 0;
2738 }
2739
2740 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2741                                         struct drm_file *file_priv,
2742                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2743                                         int orig_nbox)
2744 {
2745         drm_radeon_private_t *dev_priv = dev->dev_private;
2746         struct drm_clip_rect box;
2747         unsigned int cmdsz;
2748         int ret;
2749         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2750         int i = 0;
2751         RING_LOCALS;
2752
2753         DRM_DEBUG("\n");
2754
2755         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2756                                                   cmdbuf, &cmdsz))) {
2757                 DRM_ERROR("Packet verification failed\n");
2758                 return ret;
2759         }
2760
2761         if (!orig_nbox)
2762                 goto out;
2763
2764         do {
2765                 if (i < cmdbuf->nbox) {
2766                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2767                                 return -EFAULT;
2768                         /* FIXME The second and subsequent times round
2769                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2770                          * calling emit_clip_rect(). This fixes a
2771                          * lockup on fast machines when sending
2772                          * several cliprects with a cmdbuf, as when
2773                          * waving a 2D window over a 3D
2774                          * window. Something in the commands from user
2775                          * space seems to hang the card when they're
2776                          * sent several times in a row. That would be
2777                          * the correct place to fix it but this works
2778                          * around it until I can figure that out - Tim
2779                          * Smith */
2780                         if (i) {
2781                                 BEGIN_RING(2);
2782                                 RADEON_WAIT_UNTIL_3D_IDLE();
2783                                 ADVANCE_RING();
2784                         }
2785                         radeon_emit_clip_rect(dev_priv, &box);
2786                 }
2787
2788                 BEGIN_RING(cmdsz);
2789                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2790                 ADVANCE_RING();
2791
2792         } while (++i < cmdbuf->nbox);
2793         if (cmdbuf->nbox == 1)
2794                 cmdbuf->nbox = 0;
2795
2796       out:
2797         cmdbuf->buf += cmdsz * 4;
2798         cmdbuf->bufsz -= cmdsz * 4;
2799         return 0;
2800 }
2801
2802 static int radeon_emit_wait(struct drm_device * dev, int flags)
2803 {
2804         drm_radeon_private_t *dev_priv = dev->dev_private;
2805         RING_LOCALS;
2806
2807         DRM_DEBUG("%x\n", flags);
2808         switch (flags) {
2809         case RADEON_WAIT_2D:
2810                 BEGIN_RING(2);
2811                 RADEON_WAIT_UNTIL_2D_IDLE();
2812                 ADVANCE_RING();
2813                 break;
2814         case RADEON_WAIT_3D:
2815                 BEGIN_RING(2);
2816                 RADEON_WAIT_UNTIL_3D_IDLE();
2817                 ADVANCE_RING();
2818                 break;
2819         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2820                 BEGIN_RING(2);
2821                 RADEON_WAIT_UNTIL_IDLE();
2822                 ADVANCE_RING();
2823                 break;
2824         default:
2825                 return -EINVAL;
2826         }
2827
2828         return 0;
2829 }
2830
2831 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2832 {
2833         drm_radeon_private_t *dev_priv = dev->dev_private;
2834         struct drm_device_dma *dma = dev->dma;
2835         struct drm_buf *buf = NULL;
2836         int idx;
2837         drm_radeon_kcmd_buffer_t *cmdbuf = data;
2838         drm_radeon_cmd_header_t header;
2839         int orig_nbox, orig_bufsz;
2840         char *kbuf = NULL;
2841
2842         LOCK_TEST_WITH_RETURN(dev, file_priv);
2843
2844         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2845         VB_AGE_TEST_WITH_RETURN(dev_priv);
2846
2847         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2848                 return -EINVAL;
2849         }
2850
2851         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2852          * races between checking values and using those values in other code,
2853          * and simply to avoid a lot of function calls to copy in data.
2854          */
2855         orig_bufsz = cmdbuf->bufsz;
2856         if (orig_bufsz != 0) {
2857                 kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
2858                 if (kbuf == NULL)
2859                         return -ENOMEM;
2860                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2861                                        cmdbuf->bufsz)) {
2862                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2863                         return -EFAULT;
2864                 }
2865                 cmdbuf->buf = kbuf;
2866         }
2867
2868         orig_nbox = cmdbuf->nbox;
2869
2870         if (dev_priv->microcode_version == UCODE_R300) {
2871                 int temp;
2872                 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2873
2874                 if (orig_bufsz != 0)
2875                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2876
2877                 return temp;
2878         }
2879
2880         /* microcode_version != r300 */
2881         while (cmdbuf->bufsz >= sizeof(header)) {
2882
2883                 header.i = *(int *)cmdbuf->buf;
2884                 cmdbuf->buf += sizeof(header);
2885                 cmdbuf->bufsz -= sizeof(header);
2886
2887                 switch (header.header.cmd_type) {
2888                 case RADEON_CMD_PACKET:
2889                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2890                         if (radeon_emit_packets
2891                             (dev_priv, file_priv, header, cmdbuf)) {
2892                                 DRM_ERROR("radeon_emit_packets failed\n");
2893                                 goto err;
2894                         }
2895                         break;
2896
2897                 case RADEON_CMD_SCALARS:
2898                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2899                         if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2900                                 DRM_ERROR("radeon_emit_scalars failed\n");
2901                                 goto err;
2902                         }
2903                         break;
2904
2905                 case RADEON_CMD_VECTORS:
2906                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2907                         if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2908                                 DRM_ERROR("radeon_emit_vectors failed\n");
2909                                 goto err;
2910                         }
2911                         break;
2912
2913                 case RADEON_CMD_DMA_DISCARD:
2914                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2915                         idx = header.dma.buf_idx;
2916                         if (idx < 0 || idx >= dma->buf_count) {
2917                                 DRM_ERROR("buffer index %d (of %d max)\n",
2918                                           idx, dma->buf_count - 1);
2919                                 goto err;
2920                         }
2921
2922                         buf = dma->buflist[idx];
2923                         if (buf->file_priv != file_priv || buf->pending) {
2924                                 DRM_ERROR("bad buffer %p %p %d\n",
2925                                           buf->file_priv, file_priv,
2926                                           buf->pending);
2927                                 goto err;
2928                         }
2929
2930                         radeon_cp_discard_buffer(dev, buf);
2931                         break;
2932
2933                 case RADEON_CMD_PACKET3:
2934                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2935                         if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2936                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2937                                 goto err;
2938                         }
2939                         break;
2940
2941                 case RADEON_CMD_PACKET3_CLIP:
2942                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2943                         if (radeon_emit_packet3_cliprect
2944                             (dev, file_priv, cmdbuf, orig_nbox)) {
2945                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2946                                 goto err;
2947                         }
2948                         break;
2949
2950                 case RADEON_CMD_SCALARS2:
2951                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2952                         if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2953                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2954                                 goto err;
2955                         }
2956                         break;
2957
2958                 case RADEON_CMD_WAIT:
2959                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2960                         if (radeon_emit_wait(dev, header.wait.flags)) {
2961                                 DRM_ERROR("radeon_emit_wait failed\n");
2962                                 goto err;
2963                         }
2964                         break;
2965                 case RADEON_CMD_VECLINEAR:
2966                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2967                         if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
2968                                 DRM_ERROR("radeon_emit_veclinear failed\n");
2969                                 goto err;
2970                         }
2971                         break;
2972
2973                 default:
2974                         DRM_ERROR("bad cmd_type %d at %p\n",
2975                                   header.header.cmd_type,
2976                                   cmdbuf->buf - sizeof(header));
2977                         goto err;
2978                 }
2979         }
2980
2981         if (orig_bufsz != 0)
2982                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2983
2984         DRM_DEBUG("DONE\n");
2985         COMMIT_RING();
2986         return 0;
2987
2988       err:
2989         if (orig_bufsz != 0)
2990                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2991         return -EINVAL;
2992 }
2993
2994 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
2995 {
2996         drm_radeon_private_t *dev_priv = dev->dev_private;
2997         drm_radeon_getparam_t *param = data;
2998         int value;
2999
3000         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3001
3002         switch (param->param) {
3003         case RADEON_PARAM_GART_BUFFER_OFFSET:
3004                 value = dev_priv->gart_buffers_offset;
3005                 break;
3006         case RADEON_PARAM_LAST_FRAME:
3007                 dev_priv->stats.last_frame_reads++;
3008                 value = GET_SCRATCH(dev_priv, 0);
3009                 break;
3010         case RADEON_PARAM_LAST_DISPATCH:
3011                 value = GET_SCRATCH(dev_priv, 1);
3012                 break;
3013         case RADEON_PARAM_LAST_CLEAR:
3014                 dev_priv->stats.last_clear_reads++;
3015                 value = GET_SCRATCH(dev_priv, 2);
3016                 break;
3017         case RADEON_PARAM_IRQ_NR:
3018                 value = dev->irq;
3019                 break;
3020         case RADEON_PARAM_GART_BASE:
3021                 value = dev_priv->gart_vm_start;
3022                 break;
3023         case RADEON_PARAM_REGISTER_HANDLE:
3024                 value = dev_priv->mmio->offset;
3025                 break;
3026         case RADEON_PARAM_STATUS_HANDLE:
3027                 value = dev_priv->ring_rptr_offset;
3028                 break;
3029 #ifndef __LP64__
3030                 /*
3031                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3032                  * pointer which can't fit into an int-sized variable.  According to
3033                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3034                  * not supporting it shouldn't be a problem.  If the same functionality
3035                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
3036                  * so backwards-compatibility for the embedded platforms can be
3037                  * maintained.  --davidm 4-Feb-2004.
3038                  */
3039         case RADEON_PARAM_SAREA_HANDLE:
3040                 /* The lock is the first dword in the sarea. */
3041                 value = (long)dev->lock.hw_lock;
3042                 break;
3043 #endif
3044         case RADEON_PARAM_GART_TEX_HANDLE:
3045                 value = dev_priv->gart_textures_offset;
3046                 break;
3047         case RADEON_PARAM_SCRATCH_OFFSET:
3048                 if (!dev_priv->writeback_works)
3049                         return -EINVAL;
3050                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3051                         value = R600_SCRATCH_REG_OFFSET;
3052                 else
3053                         value = RADEON_SCRATCH_REG_OFFSET;
3054                 break;
3055         case RADEON_PARAM_CARD_TYPE:
3056                 if (dev_priv->flags & RADEON_IS_PCIE)
3057                         value = RADEON_CARD_PCIE;
3058                 else if (dev_priv->flags & RADEON_IS_AGP)
3059                         value = RADEON_CARD_AGP;
3060                 else
3061                         value = RADEON_CARD_PCI;
3062                 break;
3063         case RADEON_PARAM_VBLANK_CRTC:
3064                 value = radeon_vblank_crtc_get(dev);
3065                 break;
3066         case RADEON_PARAM_FB_LOCATION:
3067                 value = radeon_read_fb_location(dev_priv);
3068                 break;
3069         case RADEON_PARAM_NUM_GB_PIPES:
3070                 value = dev_priv->num_gb_pipes;
3071                 break;
3072         default:
3073                 DRM_DEBUG("Invalid parameter %d\n", param->param);
3074                 return -EINVAL;
3075         }
3076
3077         if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3078                 DRM_ERROR("copy_to_user\n");
3079                 return -EFAULT;
3080         }
3081
3082         return 0;
3083 }
3084
3085 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3086 {
3087         drm_radeon_private_t *dev_priv = dev->dev_private;
3088         drm_radeon_setparam_t *sp = data;
3089         struct drm_radeon_driver_file_fields *radeon_priv;
3090
3091         switch (sp->param) {
3092         case RADEON_SETPARAM_FB_LOCATION:
3093                 radeon_priv = file_priv->driver_priv;
3094                 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3095                     sp->value;
3096                 break;
3097         case RADEON_SETPARAM_SWITCH_TILING:
3098                 if (sp->value == 0) {
3099                         DRM_DEBUG("color tiling disabled\n");
3100                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3101                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3102                         if (dev_priv->sarea_priv)
3103                                 dev_priv->sarea_priv->tiling_enabled = 0;
3104                 } else if (sp->value == 1) {
3105                         DRM_DEBUG("color tiling enabled\n");
3106                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3107                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3108                         if (dev_priv->sarea_priv)
3109                                 dev_priv->sarea_priv->tiling_enabled = 1;
3110                 }
3111                 break;
3112         case RADEON_SETPARAM_PCIGART_LOCATION:
3113                 dev_priv->pcigart_offset = sp->value;
3114                 dev_priv->pcigart_offset_set = 1;
3115                 break;
3116         case RADEON_SETPARAM_NEW_MEMMAP:
3117                 dev_priv->new_memmap = sp->value;
3118                 break;
3119         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3120                 dev_priv->gart_info.table_size = sp->value;
3121                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3122                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3123                 break;
3124         case RADEON_SETPARAM_VBLANK_CRTC:
3125                 return radeon_vblank_crtc_set(dev, sp->value);
3126                 break;
3127         default:
3128                 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3129                 return -EINVAL;
3130         }
3131
3132         return 0;
3133 }
3134
3135 /* When a client dies:
3136  *    - Check for and clean up flipped page state
3137  *    - Free any alloced GART memory.
3138  *    - Free any alloced radeon surfaces.
3139  *
3140  * DRM infrastructure takes care of reclaiming dma buffers.
3141  */
3142 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3143 {
3144         if (dev->dev_private) {
3145                 drm_radeon_private_t *dev_priv = dev->dev_private;
3146                 dev_priv->page_flipping = 0;
3147                 radeon_mem_release(file_priv, dev_priv->gart_heap);
3148                 radeon_mem_release(file_priv, dev_priv->fb_heap);
3149                 radeon_surfaces_release(file_priv, dev_priv);
3150         }
3151 }
3152
3153 void radeon_driver_lastclose(struct drm_device *dev)
3154 {
3155         radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3156         radeon_do_release(dev);
3157 }
3158
3159 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3160 {
3161         drm_radeon_private_t *dev_priv = dev->dev_private;
3162         struct drm_radeon_driver_file_fields *radeon_priv;
3163
3164         DRM_DEBUG("\n");
3165         radeon_priv =
3166             (struct drm_radeon_driver_file_fields *)
3167             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3168
3169         if (!radeon_priv)
3170                 return -ENOMEM;
3171
3172         file_priv->driver_priv = radeon_priv;
3173
3174         if (dev_priv)
3175                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3176         else
3177                 radeon_priv->radeon_fb_delta = 0;
3178         return 0;
3179 }
3180
3181 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3182 {
3183         struct drm_radeon_driver_file_fields *radeon_priv =
3184             file_priv->driver_priv;
3185
3186         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3187 }
3188
3189 struct drm_ioctl_desc radeon_ioctls[] = {
3190         DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3191         DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3192         DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3193         DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3194         DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3195         DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3196         DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3197         DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3198         DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3199         DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3200         DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3201         DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3202         DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3203         DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3204         DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_ROOT_ONLY),
3205         DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3206         DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3207         DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3208         DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3209         DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3210         DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3211         DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3212         DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3213         DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3214         DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3215         DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3216         DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH)
3217 };
3218
3219 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);