e15a015ced2ee1c71ae7dc8e9fd5f6faa5fb6277
[dragonfly.git] / sys / dev / drm / radeon / radeon_state.c
1 /*-
2  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Gareth Hughes <gareth@valinux.com>
26  *    Kevin E. Martin <martin@valinux.com>
27  * __FBSDID("$FreeBSD: src/sys/dev/drm/radeon_state.c,v 1.27 2009/09/28 22:37:07 rnoland Exp $");
28  */
29
30 #include "dev/drm/drmP.h"
31 #include "dev/drm/drm.h"
32 #include "dev/drm/drm_sarea.h"
33 #include "dev/drm/radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     struct drm_file *file_priv,
43                                                     u32 *offset)
44 {
45         u64 off = *offset;
46         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
47         struct drm_radeon_driver_file_fields *radeon_priv;
48
49         /* Hrm ... the story of the offset ... So this function converts
50          * the various ideas of what userland clients might have for an
51          * offset in the card address space into an offset into the card
52          * address space :) So with a sane client, it should just keep
53          * the value intact and just do some boundary checking. However,
54          * not all clients are sane. Some older clients pass us 0 based
55          * offsets relative to the start of the framebuffer and some may
56          * assume the AGP aperture it appended to the framebuffer, so we
57          * try to detect those cases and fix them up.
58          *
59          * Note: It might be a good idea here to make sure the offset lands
60          * in some "allowed" area to protect things like the PCIE GART...
61          */
62
63         /* First, the best case, the offset already lands in either the
64          * framebuffer or the GART mapped space
65          */
66         if (radeon_check_offset(dev_priv, off))
67                 return 0;
68
69         /* Ok, that didn't happen... now check if we have a zero based
70          * offset that fits in the framebuffer + gart space, apply the
71          * magic offset we get from SETPARAM or calculated from fb_location
72          */
73         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
74                 radeon_priv = file_priv->driver_priv;
75                 off += radeon_priv->radeon_fb_delta;
76         }
77
78         /* Finally, assume we aimed at a GART offset if beyond the fb */
79         if (off > fb_end)
80                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
81
82         /* Now recheck and fail if out of bounds */
83         if (radeon_check_offset(dev_priv, off)) {
84                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
85                 *offset = off;
86                 return 0;
87         }
88         return -EINVAL;
89 }
90
91 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
92                                                      dev_priv,
93                                                      struct drm_file *file_priv,
94                                                      int id, u32 *data)
95 {
96         switch (id) {
97
98         case RADEON_EMIT_PP_MISC:
99                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
100                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
101                         DRM_ERROR("Invalid depth buffer offset\n");
102                         return -EINVAL;
103                 }
104                 break;
105
106         case RADEON_EMIT_PP_CNTL:
107                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
108                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
109                         DRM_ERROR("Invalid colour buffer offset\n");
110                         return -EINVAL;
111                 }
112                 break;
113
114         case R200_EMIT_PP_TXOFFSET_0:
115         case R200_EMIT_PP_TXOFFSET_1:
116         case R200_EMIT_PP_TXOFFSET_2:
117         case R200_EMIT_PP_TXOFFSET_3:
118         case R200_EMIT_PP_TXOFFSET_4:
119         case R200_EMIT_PP_TXOFFSET_5:
120                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
121                                                   &data[0])) {
122                         DRM_ERROR("Invalid R200 texture offset\n");
123                         return -EINVAL;
124                 }
125                 break;
126
127         case RADEON_EMIT_PP_TXFILTER_0:
128         case RADEON_EMIT_PP_TXFILTER_1:
129         case RADEON_EMIT_PP_TXFILTER_2:
130                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
131                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
132                         DRM_ERROR("Invalid R100 texture offset\n");
133                         return -EINVAL;
134                 }
135                 break;
136
137         case R200_EMIT_PP_CUBIC_OFFSETS_0:
138         case R200_EMIT_PP_CUBIC_OFFSETS_1:
139         case R200_EMIT_PP_CUBIC_OFFSETS_2:
140         case R200_EMIT_PP_CUBIC_OFFSETS_3:
141         case R200_EMIT_PP_CUBIC_OFFSETS_4:
142         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
143                         int i;
144                         for (i = 0; i < 5; i++) {
145                                 if (radeon_check_and_fixup_offset(dev_priv,
146                                                                   file_priv,
147                                                                   &data[i])) {
148                                         DRM_ERROR
149                                             ("Invalid R200 cubic texture offset\n");
150                                         return -EINVAL;
151                                 }
152                         }
153                         break;
154                 }
155
156         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
157         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
158         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
159                         int i;
160                         for (i = 0; i < 5; i++) {
161                                 if (radeon_check_and_fixup_offset(dev_priv,
162                                                                   file_priv,
163                                                                   &data[i])) {
164                                         DRM_ERROR
165                                             ("Invalid R100 cubic texture offset\n");
166                                         return -EINVAL;
167                                 }
168                         }
169                 }
170                 break;
171
172         case R200_EMIT_VAP_CTL: {
173                         RING_LOCALS;
174                         BEGIN_RING(2);
175                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
176                         ADVANCE_RING();
177                 }
178                 break;
179
180         case RADEON_EMIT_RB3D_COLORPITCH:
181         case RADEON_EMIT_RE_LINE_PATTERN:
182         case RADEON_EMIT_SE_LINE_WIDTH:
183         case RADEON_EMIT_PP_LUM_MATRIX:
184         case RADEON_EMIT_PP_ROT_MATRIX_0:
185         case RADEON_EMIT_RB3D_STENCILREFMASK:
186         case RADEON_EMIT_SE_VPORT_XSCALE:
187         case RADEON_EMIT_SE_CNTL:
188         case RADEON_EMIT_SE_CNTL_STATUS:
189         case RADEON_EMIT_RE_MISC:
190         case RADEON_EMIT_PP_BORDER_COLOR_0:
191         case RADEON_EMIT_PP_BORDER_COLOR_1:
192         case RADEON_EMIT_PP_BORDER_COLOR_2:
193         case RADEON_EMIT_SE_ZBIAS_FACTOR:
194         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
195         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
196         case R200_EMIT_PP_TXCBLEND_0:
197         case R200_EMIT_PP_TXCBLEND_1:
198         case R200_EMIT_PP_TXCBLEND_2:
199         case R200_EMIT_PP_TXCBLEND_3:
200         case R200_EMIT_PP_TXCBLEND_4:
201         case R200_EMIT_PP_TXCBLEND_5:
202         case R200_EMIT_PP_TXCBLEND_6:
203         case R200_EMIT_PP_TXCBLEND_7:
204         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
205         case R200_EMIT_TFACTOR_0:
206         case R200_EMIT_VTX_FMT_0:
207         case R200_EMIT_MATRIX_SELECT_0:
208         case R200_EMIT_TEX_PROC_CTL_2:
209         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
210         case R200_EMIT_PP_TXFILTER_0:
211         case R200_EMIT_PP_TXFILTER_1:
212         case R200_EMIT_PP_TXFILTER_2:
213         case R200_EMIT_PP_TXFILTER_3:
214         case R200_EMIT_PP_TXFILTER_4:
215         case R200_EMIT_PP_TXFILTER_5:
216         case R200_EMIT_VTE_CNTL:
217         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
218         case R200_EMIT_PP_TAM_DEBUG3:
219         case R200_EMIT_PP_CNTL_X:
220         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
221         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
222         case R200_EMIT_RE_SCISSOR_TL_0:
223         case R200_EMIT_RE_SCISSOR_TL_1:
224         case R200_EMIT_RE_SCISSOR_TL_2:
225         case R200_EMIT_SE_VAP_CNTL_STATUS:
226         case R200_EMIT_SE_VTX_STATE_CNTL:
227         case R200_EMIT_RE_POINTSIZE:
228         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
229         case R200_EMIT_PP_CUBIC_FACES_0:
230         case R200_EMIT_PP_CUBIC_FACES_1:
231         case R200_EMIT_PP_CUBIC_FACES_2:
232         case R200_EMIT_PP_CUBIC_FACES_3:
233         case R200_EMIT_PP_CUBIC_FACES_4:
234         case R200_EMIT_PP_CUBIC_FACES_5:
235         case RADEON_EMIT_PP_TEX_SIZE_0:
236         case RADEON_EMIT_PP_TEX_SIZE_1:
237         case RADEON_EMIT_PP_TEX_SIZE_2:
238         case R200_EMIT_RB3D_BLENDCOLOR:
239         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
240         case RADEON_EMIT_PP_CUBIC_FACES_0:
241         case RADEON_EMIT_PP_CUBIC_FACES_1:
242         case RADEON_EMIT_PP_CUBIC_FACES_2:
243         case R200_EMIT_PP_TRI_PERF_CNTL:
244         case R200_EMIT_PP_AFS_0:
245         case R200_EMIT_PP_AFS_1:
246         case R200_EMIT_ATF_TFACTOR:
247         case R200_EMIT_PP_TXCTLALL_0:
248         case R200_EMIT_PP_TXCTLALL_1:
249         case R200_EMIT_PP_TXCTLALL_2:
250         case R200_EMIT_PP_TXCTLALL_3:
251         case R200_EMIT_PP_TXCTLALL_4:
252         case R200_EMIT_PP_TXCTLALL_5:
253         case R200_EMIT_VAP_PVS_CNTL:
254                 /* These packets don't contain memory offsets */
255                 break;
256
257         default:
258                 DRM_ERROR("Unknown state packet ID %d\n", id);
259                 return -EINVAL;
260         }
261
262         return 0;
263 }
264
265 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
266                                                      dev_priv,
267                                                      struct drm_file *file_priv,
268                                                      drm_radeon_kcmd_buffer_t *
269                                                      cmdbuf,
270                                                      unsigned int *cmdsz)
271 {
272         u32 *cmd = (u32 *) cmdbuf->buf;
273         u32 offset, narrays;
274         int count, i, k;
275
276         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
277
278         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
279                 DRM_ERROR("Not a type 3 packet\n");
280                 return -EINVAL;
281         }
282
283         if (4 * *cmdsz > cmdbuf->bufsz) {
284                 DRM_ERROR("Packet size larger than size of data provided\n");
285                 return -EINVAL;
286         }
287
288         switch(cmd[0] & 0xff00) {
289         /* XXX Are there old drivers needing other packets? */
290
291         case RADEON_3D_DRAW_IMMD:
292         case RADEON_3D_DRAW_VBUF:
293         case RADEON_3D_DRAW_INDX:
294         case RADEON_WAIT_FOR_IDLE:
295         case RADEON_CP_NOP:
296         case RADEON_3D_CLEAR_ZMASK:
297 /*      case RADEON_CP_NEXT_CHAR:
298         case RADEON_CP_PLY_NEXTSCAN:
299         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
300                 /* these packets are safe */
301                 break;
302
303         case RADEON_CP_3D_DRAW_IMMD_2:
304         case RADEON_CP_3D_DRAW_VBUF_2:
305         case RADEON_CP_3D_DRAW_INDX_2:
306         case RADEON_3D_CLEAR_HIZ:
307                 /* safe but r200 only */
308                 if (dev_priv->microcode_version != UCODE_R200) {
309                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
310                         return -EINVAL;
311                 }
312                 break;
313
314         case RADEON_3D_LOAD_VBPNTR:
315                 count = (cmd[0] >> 16) & 0x3fff;
316
317                 if (count > 18) { /* 12 arrays max */
318                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
319                                   count);
320                         return -EINVAL;
321                 }
322
323                 /* carefully check packet contents */
324                 narrays = cmd[1] & ~0xc000;
325                 k = 0;
326                 i = 2;
327                 while ((k < narrays) && (i < (count + 2))) {
328                         i++;            /* skip attribute field */
329                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
330                                                           &cmd[i])) {
331                                 DRM_ERROR
332                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
333                                      k, i);
334                                 return -EINVAL;
335                         }
336                         k++;
337                         i++;
338                         if (k == narrays)
339                                 break;
340                         /* have one more to process, they come in pairs */
341                         if (radeon_check_and_fixup_offset(dev_priv,
342                                                           file_priv, &cmd[i]))
343                         {
344                                 DRM_ERROR
345                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
346                                      k, i);
347                                 return -EINVAL;
348                         }
349                         k++;
350                         i++;
351                 }
352                 /* do the counts match what we expect ? */
353                 if ((k != narrays) || (i != (count + 2))) {
354                         DRM_ERROR
355                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
356                               k, i, narrays, count + 1);
357                         return -EINVAL;
358                 }
359                 break;
360
361         case RADEON_3D_RNDR_GEN_INDX_PRIM:
362                 if (dev_priv->microcode_version != UCODE_R100) {
363                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
364                         return -EINVAL;
365                 }
366                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
367                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
368                                 return -EINVAL;
369                 }
370                 break;
371
372         case RADEON_CP_INDX_BUFFER:
373                 if (dev_priv->microcode_version != UCODE_R200) {
374                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
375                         return -EINVAL;
376                 }
377                 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
378                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
379                         return -EINVAL;
380                 }
381                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
382                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
383                         return -EINVAL;
384                 }
385                 break;
386
387         case RADEON_CNTL_HOSTDATA_BLT:
388         case RADEON_CNTL_PAINT_MULTI:
389         case RADEON_CNTL_BITBLT_MULTI:
390                 /* MSB of opcode: next DWORD GUI_CNTL */
391                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
392                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
393                         offset = cmd[2] << 10;
394                         if (radeon_check_and_fixup_offset
395                             (dev_priv, file_priv, &offset)) {
396                                 DRM_ERROR("Invalid first packet offset\n");
397                                 return -EINVAL;
398                         }
399                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
400                 }
401
402                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
403                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
404                         offset = cmd[3] << 10;
405                         if (radeon_check_and_fixup_offset
406                             (dev_priv, file_priv, &offset)) {
407                                 DRM_ERROR("Invalid second packet offset\n");
408                                 return -EINVAL;
409                         }
410                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
411                 }
412                 break;
413
414         default:
415                 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
416                 return -EINVAL;
417         }
418
419         return 0;
420 }
421
422 /* ================================================================
423  * CP hardware state programming functions
424  */
425
426 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
427                                              struct drm_clip_rect * box)
428 {
429         RING_LOCALS;
430
431         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
432                   box->x1, box->y1, box->x2, box->y2);
433
434         BEGIN_RING(4);
435         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
436         OUT_RING((box->y1 << 16) | box->x1);
437         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
438         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
439         ADVANCE_RING();
440 }
441
442 /* Emit 1.1 state
443  */
444 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
445                              struct drm_file *file_priv,
446                              drm_radeon_context_regs_t * ctx,
447                              drm_radeon_texture_regs_t * tex,
448                              unsigned int dirty)
449 {
450         RING_LOCALS;
451         DRM_DEBUG("dirty=0x%08x\n", dirty);
452
453         if (dirty & RADEON_UPLOAD_CONTEXT) {
454                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
455                                                   &ctx->rb3d_depthoffset)) {
456                         DRM_ERROR("Invalid depth buffer offset\n");
457                         return -EINVAL;
458                 }
459
460                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
461                                                   &ctx->rb3d_coloroffset)) {
462                         DRM_ERROR("Invalid depth buffer offset\n");
463                         return -EINVAL;
464                 }
465
466                 BEGIN_RING(14);
467                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
468                 OUT_RING(ctx->pp_misc);
469                 OUT_RING(ctx->pp_fog_color);
470                 OUT_RING(ctx->re_solid_color);
471                 OUT_RING(ctx->rb3d_blendcntl);
472                 OUT_RING(ctx->rb3d_depthoffset);
473                 OUT_RING(ctx->rb3d_depthpitch);
474                 OUT_RING(ctx->rb3d_zstencilcntl);
475                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
476                 OUT_RING(ctx->pp_cntl);
477                 OUT_RING(ctx->rb3d_cntl);
478                 OUT_RING(ctx->rb3d_coloroffset);
479                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
480                 OUT_RING(ctx->rb3d_colorpitch);
481                 ADVANCE_RING();
482         }
483
484         if (dirty & RADEON_UPLOAD_VERTFMT) {
485                 BEGIN_RING(2);
486                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
487                 OUT_RING(ctx->se_coord_fmt);
488                 ADVANCE_RING();
489         }
490
491         if (dirty & RADEON_UPLOAD_LINE) {
492                 BEGIN_RING(5);
493                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
494                 OUT_RING(ctx->re_line_pattern);
495                 OUT_RING(ctx->re_line_state);
496                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
497                 OUT_RING(ctx->se_line_width);
498                 ADVANCE_RING();
499         }
500
501         if (dirty & RADEON_UPLOAD_BUMPMAP) {
502                 BEGIN_RING(5);
503                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
504                 OUT_RING(ctx->pp_lum_matrix);
505                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
506                 OUT_RING(ctx->pp_rot_matrix_0);
507                 OUT_RING(ctx->pp_rot_matrix_1);
508                 ADVANCE_RING();
509         }
510
511         if (dirty & RADEON_UPLOAD_MASKS) {
512                 BEGIN_RING(4);
513                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
514                 OUT_RING(ctx->rb3d_stencilrefmask);
515                 OUT_RING(ctx->rb3d_ropcntl);
516                 OUT_RING(ctx->rb3d_planemask);
517                 ADVANCE_RING();
518         }
519
520         if (dirty & RADEON_UPLOAD_VIEWPORT) {
521                 BEGIN_RING(7);
522                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
523                 OUT_RING(ctx->se_vport_xscale);
524                 OUT_RING(ctx->se_vport_xoffset);
525                 OUT_RING(ctx->se_vport_yscale);
526                 OUT_RING(ctx->se_vport_yoffset);
527                 OUT_RING(ctx->se_vport_zscale);
528                 OUT_RING(ctx->se_vport_zoffset);
529                 ADVANCE_RING();
530         }
531
532         if (dirty & RADEON_UPLOAD_SETUP) {
533                 BEGIN_RING(4);
534                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
535                 OUT_RING(ctx->se_cntl);
536                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
537                 OUT_RING(ctx->se_cntl_status);
538                 ADVANCE_RING();
539         }
540
541         if (dirty & RADEON_UPLOAD_MISC) {
542                 BEGIN_RING(2);
543                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
544                 OUT_RING(ctx->re_misc);
545                 ADVANCE_RING();
546         }
547
548         if (dirty & RADEON_UPLOAD_TEX0) {
549                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
550                                                   &tex[0].pp_txoffset)) {
551                         DRM_ERROR("Invalid texture offset for unit 0\n");
552                         return -EINVAL;
553                 }
554
555                 BEGIN_RING(9);
556                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
557                 OUT_RING(tex[0].pp_txfilter);
558                 OUT_RING(tex[0].pp_txformat);
559                 OUT_RING(tex[0].pp_txoffset);
560                 OUT_RING(tex[0].pp_txcblend);
561                 OUT_RING(tex[0].pp_txablend);
562                 OUT_RING(tex[0].pp_tfactor);
563                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
564                 OUT_RING(tex[0].pp_border_color);
565                 ADVANCE_RING();
566         }
567
568         if (dirty & RADEON_UPLOAD_TEX1) {
569                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
570                                                   &tex[1].pp_txoffset)) {
571                         DRM_ERROR("Invalid texture offset for unit 1\n");
572                         return -EINVAL;
573                 }
574
575                 BEGIN_RING(9);
576                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
577                 OUT_RING(tex[1].pp_txfilter);
578                 OUT_RING(tex[1].pp_txformat);
579                 OUT_RING(tex[1].pp_txoffset);
580                 OUT_RING(tex[1].pp_txcblend);
581                 OUT_RING(tex[1].pp_txablend);
582                 OUT_RING(tex[1].pp_tfactor);
583                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
584                 OUT_RING(tex[1].pp_border_color);
585                 ADVANCE_RING();
586         }
587
588         if (dirty & RADEON_UPLOAD_TEX2) {
589                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
590                                                   &tex[2].pp_txoffset)) {
591                         DRM_ERROR("Invalid texture offset for unit 2\n");
592                         return -EINVAL;
593                 }
594
595                 BEGIN_RING(9);
596                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
597                 OUT_RING(tex[2].pp_txfilter);
598                 OUT_RING(tex[2].pp_txformat);
599                 OUT_RING(tex[2].pp_txoffset);
600                 OUT_RING(tex[2].pp_txcblend);
601                 OUT_RING(tex[2].pp_txablend);
602                 OUT_RING(tex[2].pp_tfactor);
603                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
604                 OUT_RING(tex[2].pp_border_color);
605                 ADVANCE_RING();
606         }
607
608         return 0;
609 }
610
611 /* Emit 1.2 state
612  */
613 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
614                               struct drm_file *file_priv,
615                               drm_radeon_state_t * state)
616 {
617         RING_LOCALS;
618
619         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
620                 BEGIN_RING(3);
621                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
622                 OUT_RING(state->context2.se_zbias_factor);
623                 OUT_RING(state->context2.se_zbias_constant);
624                 ADVANCE_RING();
625         }
626
627         return radeon_emit_state(dev_priv, file_priv, &state->context,
628                                  state->tex, state->dirty);
629 }
630
631 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
632  * 1.3 cmdbuffers allow all previous state to be updated as well as
633  * the tcl scalar and vector areas.
634  */
635 static struct {
636         int start;
637         int len;
638         const char *name;
639 } packet[RADEON_MAX_STATE_PACKETS] = {
640         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
641         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
642         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
643         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
644         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
645         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
646         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
647         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
648         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
649         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
650         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
651         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
652         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
653         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
654         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
655         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
656         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
657         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
658         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
659         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
660         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
661                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
662         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
663         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
664         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
665         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
666         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
667         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
668         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
669         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
670         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
671         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
672         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
673         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
674         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
675         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
676         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
677         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
678         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
679         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
680         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
681         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
682         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
683         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
684         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
685         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
686         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
687         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
688         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
689         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
690         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
691          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
692         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
693         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
694         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
695         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
696         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
697         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
698         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
699         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
700         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
701         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
702         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
703                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
704         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
705         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
706         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
707         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
708         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
709         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
710         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
711         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
712         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
713         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
714         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
715         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
716         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
717         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
718         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
719         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
720         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
721         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
722         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
723         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
724         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
725         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
726         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
727         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
728         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
729         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
730         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
731         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
732         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
733         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
734         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
735         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
736         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
737         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
738 };
739
740 /* ================================================================
741  * Performance monitoring functions
742  */
743
744 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
745                              int x, int y, int w, int h, int r, int g, int b)
746 {
747         u32 color;
748         RING_LOCALS;
749
750         x += dev_priv->sarea_priv->boxes[0].x1;
751         y += dev_priv->sarea_priv->boxes[0].y1;
752
753         switch (dev_priv->color_fmt) {
754         case RADEON_COLOR_FORMAT_RGB565:
755                 color = (((r & 0xf8) << 8) |
756                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
757                 break;
758         case RADEON_COLOR_FORMAT_ARGB8888:
759         default:
760                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
761                 break;
762         }
763
764         BEGIN_RING(4);
765         RADEON_WAIT_UNTIL_3D_IDLE();
766         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
767         OUT_RING(0xffffffff);
768         ADVANCE_RING();
769
770         BEGIN_RING(6);
771
772         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
773         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
774                  RADEON_GMC_BRUSH_SOLID_COLOR |
775                  (dev_priv->color_fmt << 8) |
776                  RADEON_GMC_SRC_DATATYPE_COLOR |
777                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
778
779         if (dev_priv->sarea_priv->pfCurrentPage == 1) {
780                 OUT_RING(dev_priv->front_pitch_offset);
781         } else {
782                 OUT_RING(dev_priv->back_pitch_offset);
783         }
784
785         OUT_RING(color);
786
787         OUT_RING((x << 16) | y);
788         OUT_RING((w << 16) | h);
789
790         ADVANCE_RING();
791 }
792
793 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv)
794 {
795         /* Collapse various things into a wait flag -- trying to
796          * guess if userspase slept -- better just to have them tell us.
797          */
798         if (dev_priv->stats.last_frame_reads > 1 ||
799             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
800                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
801         }
802
803         if (dev_priv->stats.freelist_loops) {
804                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
805         }
806
807         /* Purple box for page flipping
808          */
809         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
810                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
811
812         /* Red box if we have to wait for idle at any point
813          */
814         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
815                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
816
817         /* Blue box: lost context?
818          */
819
820         /* Yellow box for texture swaps
821          */
822         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
823                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
824
825         /* Green box if hardware never idles (as far as we can tell)
826          */
827         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
828                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
829
830         /* Draw bars indicating number of buffers allocated
831          * (not a great measure, easily confused)
832          */
833         if (dev_priv->stats.requested_bufs) {
834                 if (dev_priv->stats.requested_bufs > 100)
835                         dev_priv->stats.requested_bufs = 100;
836
837                 radeon_clear_box(dev_priv, 4, 16,
838                                  dev_priv->stats.requested_bufs, 4,
839                                  196, 128, 128);
840         }
841
842         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
843
844 }
845
846 /* ================================================================
847  * CP command dispatch functions
848  */
849
850 static void radeon_cp_dispatch_clear(struct drm_device * dev,
851                                      drm_radeon_clear_t * clear,
852                                      drm_radeon_clear_rect_t * depth_boxes)
853 {
854         drm_radeon_private_t *dev_priv = dev->dev_private;
855         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
856         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
857         int nbox = sarea_priv->nbox;
858         struct drm_clip_rect *pbox = sarea_priv->boxes;
859         unsigned int flags = clear->flags;
860         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
861         int i;
862         RING_LOCALS;
863         DRM_DEBUG("flags = 0x%x\n", flags);
864
865         dev_priv->stats.clears++;
866
867         if (sarea_priv->pfCurrentPage == 1) {
868                 unsigned int tmp = flags;
869
870                 flags &= ~(RADEON_FRONT | RADEON_BACK);
871                 if (tmp & RADEON_FRONT)
872                         flags |= RADEON_BACK;
873                 if (tmp & RADEON_BACK)
874                         flags |= RADEON_FRONT;
875         }
876
877         if (flags & (RADEON_FRONT | RADEON_BACK)) {
878
879                 BEGIN_RING(4);
880
881                 /* Ensure the 3D stream is idle before doing a
882                  * 2D fill to clear the front or back buffer.
883                  */
884                 RADEON_WAIT_UNTIL_3D_IDLE();
885
886                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
887                 OUT_RING(clear->color_mask);
888
889                 ADVANCE_RING();
890
891                 /* Make sure we restore the 3D state next time.
892                  */
893                 sarea_priv->ctx_owner = 0;
894
895                 for (i = 0; i < nbox; i++) {
896                         int x = pbox[i].x1;
897                         int y = pbox[i].y1;
898                         int w = pbox[i].x2 - x;
899                         int h = pbox[i].y2 - y;
900
901                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
902                                   x, y, w, h, flags);
903
904                         if (flags & RADEON_FRONT) {
905                                 BEGIN_RING(6);
906
907                                 OUT_RING(CP_PACKET3
908                                          (RADEON_CNTL_PAINT_MULTI, 4));
909                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
910                                          RADEON_GMC_BRUSH_SOLID_COLOR |
911                                          (dev_priv->
912                                           color_fmt << 8) |
913                                          RADEON_GMC_SRC_DATATYPE_COLOR |
914                                          RADEON_ROP3_P |
915                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
916
917                                 OUT_RING(dev_priv->front_pitch_offset);
918                                 OUT_RING(clear->clear_color);
919
920                                 OUT_RING((x << 16) | y);
921                                 OUT_RING((w << 16) | h);
922
923                                 ADVANCE_RING();
924                         }
925
926                         if (flags & RADEON_BACK) {
927                                 BEGIN_RING(6);
928
929                                 OUT_RING(CP_PACKET3
930                                          (RADEON_CNTL_PAINT_MULTI, 4));
931                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
932                                          RADEON_GMC_BRUSH_SOLID_COLOR |
933                                          (dev_priv->
934                                           color_fmt << 8) |
935                                          RADEON_GMC_SRC_DATATYPE_COLOR |
936                                          RADEON_ROP3_P |
937                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
938
939                                 OUT_RING(dev_priv->back_pitch_offset);
940                                 OUT_RING(clear->clear_color);
941
942                                 OUT_RING((x << 16) | y);
943                                 OUT_RING((w << 16) | h);
944
945                                 ADVANCE_RING();
946                         }
947                 }
948         }
949
950         /* hyper z clear */
951         /* no docs available, based on reverse engeneering by Stephane Marchesin */
952         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
953             && (flags & RADEON_CLEAR_FASTZ)) {
954
955                 int i;
956                 int depthpixperline =
957                     dev_priv->depth_fmt ==
958                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
959                                                        2) : (dev_priv->
960                                                              depth_pitch / 4);
961
962                 u32 clearmask;
963
964                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
965                     ((clear->depth_mask & 0xff) << 24);
966
967                 /* Make sure we restore the 3D state next time.
968                  * we haven't touched any "normal" state - still need this?
969                  */
970                 sarea_priv->ctx_owner = 0;
971
972                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
973                     && (flags & RADEON_USE_HIERZ)) {
974                         /* FIXME : reverse engineer that for Rx00 cards */
975                         /* FIXME : the mask supposedly contains low-res z values. So can't set
976                            just to the max (0xff? or actually 0x3fff?), need to take z clear
977                            value into account? */
978                         /* pattern seems to work for r100, though get slight
979                            rendering errors with glxgears. If hierz is not enabled for r100,
980                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
981                            other ones are ignored, and the same clear mask can be used. That's
982                            very different behaviour than R200 which needs different clear mask
983                            and different number of tiles to clear if hierz is enabled or not !?!
984                          */
985                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
986                 } else {
987                         /* clear mask : chooses the clearing pattern.
988                            rv250: could be used to clear only parts of macrotiles
989                            (but that would get really complicated...)?
990                            bit 0 and 1 (either or both of them ?!?!) are used to
991                            not clear tile (or maybe one of the bits indicates if the tile is
992                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
993                            Pattern is as follows:
994                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
995                            bits -------------------------------------------------
996                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
997                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
998                            covers 256 pixels ?!?
999                          */
1000                         clearmask = 0x0;
1001                 }
1002
1003                 BEGIN_RING(8);
1004                 RADEON_WAIT_UNTIL_2D_IDLE();
1005                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1006                              tempRB3D_DEPTHCLEARVALUE);
1007                 /* what offset is this exactly ? */
1008                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1009                 /* need ctlstat, otherwise get some strange black flickering */
1010                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1011                              RADEON_RB3D_ZC_FLUSH_ALL);
1012                 ADVANCE_RING();
1013
1014                 for (i = 0; i < nbox; i++) {
1015                         int tileoffset, nrtilesx, nrtilesy, j;
1016                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1017                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1018                             && !(dev_priv->microcode_version == UCODE_R200)) {
1019                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1020                                    maybe r200 actually doesn't need to put the low-res z value into
1021                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1022                                    Works for R100, both with hierz and without.
1023                                    R100 seems to operate on 2x1 8x8 tiles, but...
1024                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1025                                    problematic with resolutions which are not 64 pix aligned? */
1026                                 tileoffset =
1027                                     ((pbox[i].y1 >> 3) * depthpixperline +
1028                                      pbox[i].x1) >> 6;
1029                                 nrtilesx =
1030                                     ((pbox[i].x2 & ~63) -
1031                                      (pbox[i].x1 & ~63)) >> 4;
1032                                 nrtilesy =
1033                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1034                                 for (j = 0; j <= nrtilesy; j++) {
1035                                         BEGIN_RING(4);
1036                                         OUT_RING(CP_PACKET3
1037                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1038                                         /* first tile */
1039                                         OUT_RING(tileoffset * 8);
1040                                         /* the number of tiles to clear */
1041                                         OUT_RING(nrtilesx + 4);
1042                                         /* clear mask : chooses the clearing pattern. */
1043                                         OUT_RING(clearmask);
1044                                         ADVANCE_RING();
1045                                         tileoffset += depthpixperline >> 6;
1046                                 }
1047                         } else if (dev_priv->microcode_version == UCODE_R200) {
1048                                 /* works for rv250. */
1049                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1050                                 tileoffset =
1051                                     ((pbox[i].y1 >> 3) * depthpixperline +
1052                                      pbox[i].x1) >> 5;
1053                                 nrtilesx =
1054                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1055                                 nrtilesy =
1056                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1057                                 for (j = 0; j <= nrtilesy; j++) {
1058                                         BEGIN_RING(4);
1059                                         OUT_RING(CP_PACKET3
1060                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1061                                         /* first tile */
1062                                         /* judging by the first tile offset needed, could possibly
1063                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1064                                            macro tiles, though would still need clear mask for
1065                                            right/bottom if truely 4x4 granularity is desired ? */
1066                                         OUT_RING(tileoffset * 16);
1067                                         /* the number of tiles to clear */
1068                                         OUT_RING(nrtilesx + 1);
1069                                         /* clear mask : chooses the clearing pattern. */
1070                                         OUT_RING(clearmask);
1071                                         ADVANCE_RING();
1072                                         tileoffset += depthpixperline >> 5;
1073                                 }
1074                         } else {        /* rv 100 */
1075                                 /* rv100 might not need 64 pix alignment, who knows */
1076                                 /* offsets are, hmm, weird */
1077                                 tileoffset =
1078                                     ((pbox[i].y1 >> 4) * depthpixperline +
1079                                      pbox[i].x1) >> 6;
1080                                 nrtilesx =
1081                                     ((pbox[i].x2 & ~63) -
1082                                      (pbox[i].x1 & ~63)) >> 4;
1083                                 nrtilesy =
1084                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1085                                 for (j = 0; j <= nrtilesy; j++) {
1086                                         BEGIN_RING(4);
1087                                         OUT_RING(CP_PACKET3
1088                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1089                                         OUT_RING(tileoffset * 128);
1090                                         /* the number of tiles to clear */
1091                                         OUT_RING(nrtilesx + 4);
1092                                         /* clear mask : chooses the clearing pattern. */
1093                                         OUT_RING(clearmask);
1094                                         ADVANCE_RING();
1095                                         tileoffset += depthpixperline >> 6;
1096                                 }
1097                         }
1098                 }
1099
1100                 /* TODO don't always clear all hi-level z tiles */
1101                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1102                     && (dev_priv->microcode_version == UCODE_R200)
1103                     && (flags & RADEON_USE_HIERZ))
1104                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1105                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1106                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1107                            value into account? */
1108                 {
1109                         BEGIN_RING(4);
1110                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1111                         OUT_RING(0x0);  /* First tile */
1112                         OUT_RING(0x3cc0);
1113                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1114                         ADVANCE_RING();
1115                 }
1116         }
1117
1118         /* We have to clear the depth and/or stencil buffers by
1119          * rendering a quad into just those buffers.  Thus, we have to
1120          * make sure the 3D engine is configured correctly.
1121          */
1122         else if ((dev_priv->microcode_version == UCODE_R200) &&
1123                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1124
1125                 int tempPP_CNTL;
1126                 int tempRE_CNTL;
1127                 int tempRB3D_CNTL;
1128                 int tempRB3D_ZSTENCILCNTL;
1129                 int tempRB3D_STENCILREFMASK;
1130                 int tempRB3D_PLANEMASK;
1131                 int tempSE_CNTL;
1132                 int tempSE_VTE_CNTL;
1133                 int tempSE_VTX_FMT_0;
1134                 int tempSE_VTX_FMT_1;
1135                 int tempSE_VAP_CNTL;
1136                 int tempRE_AUX_SCISSOR_CNTL;
1137
1138                 tempPP_CNTL = 0;
1139                 tempRE_CNTL = 0;
1140
1141                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1142
1143                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1144                 tempRB3D_STENCILREFMASK = 0x0;
1145
1146                 tempSE_CNTL = depth_clear->se_cntl;
1147
1148                 /* Disable TCL */
1149
1150                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1151                                           (0x9 <<
1152                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1153
1154                 tempRB3D_PLANEMASK = 0x0;
1155
1156                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1157
1158                 tempSE_VTE_CNTL =
1159                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1160
1161                 /* Vertex format (X, Y, Z, W) */
1162                 tempSE_VTX_FMT_0 =
1163                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1164                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1165                 tempSE_VTX_FMT_1 = 0x0;
1166
1167                 /*
1168                  * Depth buffer specific enables
1169                  */
1170                 if (flags & RADEON_DEPTH) {
1171                         /* Enable depth buffer */
1172                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1173                 } else {
1174                         /* Disable depth buffer */
1175                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1176                 }
1177
1178                 /*
1179                  * Stencil buffer specific enables
1180                  */
1181                 if (flags & RADEON_STENCIL) {
1182                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1183                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1184                 } else {
1185                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1186                         tempRB3D_STENCILREFMASK = 0x00000000;
1187                 }
1188
1189                 if (flags & RADEON_USE_COMP_ZBUF) {
1190                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1191                             RADEON_Z_DECOMPRESSION_ENABLE;
1192                 }
1193                 if (flags & RADEON_USE_HIERZ) {
1194                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1195                 }
1196
1197                 BEGIN_RING(26);
1198                 RADEON_WAIT_UNTIL_2D_IDLE();
1199
1200                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1201                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1202                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1203                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1204                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1205                              tempRB3D_STENCILREFMASK);
1206                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1207                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1208                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1209                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1210                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1211                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1212                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1213                 ADVANCE_RING();
1214
1215                 /* Make sure we restore the 3D state next time.
1216                  */
1217                 sarea_priv->ctx_owner = 0;
1218
1219                 for (i = 0; i < nbox; i++) {
1220
1221                         /* Funny that this should be required --
1222                          *  sets top-left?
1223                          */
1224                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1225
1226                         BEGIN_RING(14);
1227                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1228                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1229                                   RADEON_PRIM_WALK_RING |
1230                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1231                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1232                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1233                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1234                         OUT_RING(0x3f800000);
1235                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1236                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1237                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1238                         OUT_RING(0x3f800000);
1239                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1240                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1241                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1242                         OUT_RING(0x3f800000);
1243                         ADVANCE_RING();
1244                 }
1245         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1246
1247                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1248
1249                 rb3d_cntl = depth_clear->rb3d_cntl;
1250
1251                 if (flags & RADEON_DEPTH) {
1252                         rb3d_cntl |= RADEON_Z_ENABLE;
1253                 } else {
1254                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1255                 }
1256
1257                 if (flags & RADEON_STENCIL) {
1258                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1259                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1260                 } else {
1261                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1262                         rb3d_stencilrefmask = 0x00000000;
1263                 }
1264
1265                 if (flags & RADEON_USE_COMP_ZBUF) {
1266                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1267                             RADEON_Z_DECOMPRESSION_ENABLE;
1268                 }
1269                 if (flags & RADEON_USE_HIERZ) {
1270                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1271                 }
1272
1273                 BEGIN_RING(13);
1274                 RADEON_WAIT_UNTIL_2D_IDLE();
1275
1276                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1277                 OUT_RING(0x00000000);
1278                 OUT_RING(rb3d_cntl);
1279
1280                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1281                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1282                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1283                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1284                 ADVANCE_RING();
1285
1286                 /* Make sure we restore the 3D state next time.
1287                  */
1288                 sarea_priv->ctx_owner = 0;
1289
1290                 for (i = 0; i < nbox; i++) {
1291
1292                         /* Funny that this should be required --
1293                          *  sets top-left?
1294                          */
1295                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1296
1297                         BEGIN_RING(15);
1298
1299                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1300                         OUT_RING(RADEON_VTX_Z_PRESENT |
1301                                  RADEON_VTX_PKCOLOR_PRESENT);
1302                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1303                                   RADEON_PRIM_WALK_RING |
1304                                   RADEON_MAOS_ENABLE |
1305                                   RADEON_VTX_FMT_RADEON_MODE |
1306                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1307
1308                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1309                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1310                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1311                         OUT_RING(0x0);
1312
1313                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1314                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1315                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1316                         OUT_RING(0x0);
1317
1318                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1319                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1320                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1321                         OUT_RING(0x0);
1322
1323                         ADVANCE_RING();
1324                 }
1325         }
1326
1327         /* Increment the clear counter.  The client-side 3D driver must
1328          * wait on this value before performing the clear ioctl.  We
1329          * need this because the card's so damned fast...
1330          */
1331         sarea_priv->last_clear++;
1332
1333         BEGIN_RING(4);
1334
1335         RADEON_CLEAR_AGE(sarea_priv->last_clear);
1336         RADEON_WAIT_UNTIL_IDLE();
1337
1338         ADVANCE_RING();
1339 }
1340
1341 static void radeon_cp_dispatch_swap(struct drm_device *dev)
1342 {
1343         drm_radeon_private_t *dev_priv = dev->dev_private;
1344         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1345         int nbox = sarea_priv->nbox;
1346         struct drm_clip_rect *pbox = sarea_priv->boxes;
1347         int i;
1348         RING_LOCALS;
1349         DRM_DEBUG("\n");
1350
1351         /* Do some trivial performance monitoring...
1352          */
1353         if (dev_priv->do_boxes)
1354                 radeon_cp_performance_boxes(dev_priv);
1355
1356         /* Wait for the 3D stream to idle before dispatching the bitblt.
1357          * This will prevent data corruption between the two streams.
1358          */
1359         BEGIN_RING(2);
1360
1361         RADEON_WAIT_UNTIL_3D_IDLE();
1362
1363         ADVANCE_RING();
1364
1365         for (i = 0; i < nbox; i++) {
1366                 int x = pbox[i].x1;
1367                 int y = pbox[i].y1;
1368                 int w = pbox[i].x2 - x;
1369                 int h = pbox[i].y2 - y;
1370
1371                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1372
1373                 BEGIN_RING(9);
1374
1375                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1376                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1377                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1378                          RADEON_GMC_BRUSH_NONE |
1379                          (dev_priv->color_fmt << 8) |
1380                          RADEON_GMC_SRC_DATATYPE_COLOR |
1381                          RADEON_ROP3_S |
1382                          RADEON_DP_SRC_SOURCE_MEMORY |
1383                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1384
1385                 /* Make this work even if front & back are flipped:
1386                  */
1387                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1388                 if (sarea_priv->pfCurrentPage == 0) {
1389                         OUT_RING(dev_priv->back_pitch_offset);
1390                         OUT_RING(dev_priv->front_pitch_offset);
1391                 } else {
1392                         OUT_RING(dev_priv->front_pitch_offset);
1393                         OUT_RING(dev_priv->back_pitch_offset);
1394                 }
1395
1396                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1397                 OUT_RING((x << 16) | y);
1398                 OUT_RING((x << 16) | y);
1399                 OUT_RING((w << 16) | h);
1400
1401                 ADVANCE_RING();
1402         }
1403
1404         /* Increment the frame counter.  The client-side 3D driver must
1405          * throttle the framerate by waiting for this value before
1406          * performing the swapbuffer ioctl.
1407          */
1408         sarea_priv->last_frame++;
1409
1410         BEGIN_RING(4);
1411
1412         RADEON_FRAME_AGE(sarea_priv->last_frame);
1413         RADEON_WAIT_UNTIL_2D_IDLE();
1414
1415         ADVANCE_RING();
1416 }
1417
1418 static void radeon_cp_dispatch_flip(struct drm_device *dev)
1419 {
1420         drm_radeon_private_t *dev_priv = dev->dev_private;
1421         struct drm_sarea *sarea = (struct drm_sarea *)dev_priv->sarea->handle;
1422         int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
1423             ? dev_priv->front_offset : dev_priv->back_offset;
1424         RING_LOCALS;
1425         DRM_DEBUG("pfCurrentPage=%d\n",
1426                   dev_priv->sarea_priv->pfCurrentPage);
1427
1428         /* Do some trivial performance monitoring...
1429          */
1430         if (dev_priv->do_boxes) {
1431                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1432                 radeon_cp_performance_boxes(dev_priv);
1433         }
1434
1435         /* Update the frame offsets for both CRTCs
1436          */
1437         BEGIN_RING(6);
1438
1439         RADEON_WAIT_UNTIL_3D_IDLE();
1440         OUT_RING_REG(RADEON_CRTC_OFFSET,
1441                      ((sarea->frame.y * dev_priv->front_pitch +
1442                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1443                      + offset);
1444         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1445                      + offset);
1446
1447         ADVANCE_RING();
1448
1449         /* Increment the frame counter.  The client-side 3D driver must
1450          * throttle the framerate by waiting for this value before
1451          * performing the swapbuffer ioctl.
1452          */
1453         dev_priv->sarea_priv->last_frame++;
1454         dev_priv->sarea_priv->pfCurrentPage =
1455                 1 - dev_priv->sarea_priv->pfCurrentPage;
1456
1457         BEGIN_RING(2);
1458
1459         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1460
1461         ADVANCE_RING();
1462 }
1463
1464 static int bad_prim_vertex_nr(int primitive, int nr)
1465 {
1466         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1467         case RADEON_PRIM_TYPE_NONE:
1468         case RADEON_PRIM_TYPE_POINT:
1469                 return nr < 1;
1470         case RADEON_PRIM_TYPE_LINE:
1471                 return (nr & 1) || nr == 0;
1472         case RADEON_PRIM_TYPE_LINE_STRIP:
1473                 return nr < 2;
1474         case RADEON_PRIM_TYPE_TRI_LIST:
1475         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1476         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1477         case RADEON_PRIM_TYPE_RECT_LIST:
1478                 return nr % 3 || nr == 0;
1479         case RADEON_PRIM_TYPE_TRI_FAN:
1480         case RADEON_PRIM_TYPE_TRI_STRIP:
1481                 return nr < 3;
1482         default:
1483                 return 1;
1484         }
1485 }
1486
1487 typedef struct {
1488         unsigned int start;
1489         unsigned int finish;
1490         unsigned int prim;
1491         unsigned int numverts;
1492         unsigned int offset;
1493         unsigned int vc_format;
1494 } drm_radeon_tcl_prim_t;
1495
1496 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1497                                       struct drm_buf * buf,
1498                                       drm_radeon_tcl_prim_t * prim)
1499 {
1500         drm_radeon_private_t *dev_priv = dev->dev_private;
1501         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1502         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1503         int numverts = (int)prim->numverts;
1504         int nbox = sarea_priv->nbox;
1505         int i = 0;
1506         RING_LOCALS;
1507
1508         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1509                   prim->prim,
1510                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1511
1512         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1513                 DRM_ERROR("bad prim %x numverts %d\n",
1514                           prim->prim, prim->numverts);
1515                 return;
1516         }
1517
1518         do {
1519                 /* Emit the next cliprect */
1520                 if (i < nbox) {
1521                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1522                 }
1523
1524                 /* Emit the vertex buffer rendering commands */
1525                 BEGIN_RING(5);
1526
1527                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1528                 OUT_RING(offset);
1529                 OUT_RING(numverts);
1530                 OUT_RING(prim->vc_format);
1531                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1532                          RADEON_COLOR_ORDER_RGBA |
1533                          RADEON_VTX_FMT_RADEON_MODE |
1534                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1535
1536                 ADVANCE_RING();
1537
1538                 i++;
1539         } while (i < nbox);
1540 }
1541
1542 void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_buf *buf)
1543 {
1544         drm_radeon_private_t *dev_priv = dev->dev_private;
1545         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1546         RING_LOCALS;
1547
1548         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1549
1550         /* Emit the vertex buffer age */
1551         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1552                 BEGIN_RING(3);
1553                 R600_DISPATCH_AGE(buf_priv->age);
1554                 ADVANCE_RING();
1555         } else {
1556                 BEGIN_RING(2);
1557                 RADEON_DISPATCH_AGE(buf_priv->age);
1558                 ADVANCE_RING();
1559         }
1560
1561         buf->pending = 1;
1562         buf->used = 0;
1563 }
1564
1565 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1566                                         struct drm_buf * buf, int start, int end)
1567 {
1568         drm_radeon_private_t *dev_priv = dev->dev_private;
1569         RING_LOCALS;
1570         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1571
1572         if (start != end) {
1573                 int offset = (dev_priv->gart_buffers_offset
1574                               + buf->offset + start);
1575                 int dwords = (end - start + 3) / sizeof(u32);
1576
1577                 /* Indirect buffer data must be an even number of
1578                  * dwords, so if we've been given an odd number we must
1579                  * pad the data with a Type-2 CP packet.
1580                  */
1581                 if (dwords & 1) {
1582                         u32 *data = (u32 *)
1583                             ((char *)dev->agp_buffer_map->handle
1584                              + buf->offset + start);
1585                         data[dwords++] = RADEON_CP_PACKET2;
1586                 }
1587
1588                 /* Fire off the indirect buffer */
1589                 BEGIN_RING(3);
1590
1591                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1592                 OUT_RING(offset);
1593                 OUT_RING(dwords);
1594
1595                 ADVANCE_RING();
1596         }
1597 }
1598
1599 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1600                                        struct drm_buf * elt_buf,
1601                                        drm_radeon_tcl_prim_t * prim)
1602 {
1603         drm_radeon_private_t *dev_priv = dev->dev_private;
1604         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1605         int offset = dev_priv->gart_buffers_offset + prim->offset;
1606         u32 *data;
1607         int dwords;
1608         int i = 0;
1609         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1610         int count = (prim->finish - start) / sizeof(u16);
1611         int nbox = sarea_priv->nbox;
1612
1613         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1614                   prim->prim,
1615                   prim->vc_format,
1616                   prim->start, prim->finish, prim->offset, prim->numverts);
1617
1618         if (bad_prim_vertex_nr(prim->prim, count)) {
1619                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1620                 return;
1621         }
1622
1623         if (start >= prim->finish || (prim->start & 0x7)) {
1624                 DRM_ERROR("buffer prim %d\n", prim->prim);
1625                 return;
1626         }
1627
1628         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1629
1630         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1631                         elt_buf->offset + prim->start);
1632
1633         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1634         data[1] = offset;
1635         data[2] = prim->numverts;
1636         data[3] = prim->vc_format;
1637         data[4] = (prim->prim |
1638                    RADEON_PRIM_WALK_IND |
1639                    RADEON_COLOR_ORDER_RGBA |
1640                    RADEON_VTX_FMT_RADEON_MODE |
1641                    (count << RADEON_NUM_VERTICES_SHIFT));
1642
1643         do {
1644                 if (i < nbox)
1645                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1646
1647                 radeon_cp_dispatch_indirect(dev, elt_buf,
1648                                             prim->start, prim->finish);
1649
1650                 i++;
1651         } while (i < nbox);
1652
1653 }
1654
1655 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1656
1657 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1658                                       struct drm_file *file_priv,
1659                                       drm_radeon_texture_t * tex,
1660                                       drm_radeon_tex_image_t * image)
1661 {
1662         drm_radeon_private_t *dev_priv = dev->dev_private;
1663         struct drm_buf *buf;
1664         u32 format;
1665         u32 *buffer;
1666         const u8 __user *data;
1667         int size, dwords, tex_width, blit_width, spitch;
1668         u32 height;
1669         int i;
1670         u32 texpitch, microtile;
1671         u32 offset, byte_offset;
1672         RING_LOCALS;
1673
1674         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1675                 DRM_ERROR("Invalid destination offset\n");
1676                 return -EINVAL;
1677         }
1678
1679         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1680
1681         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1682          * up with the texture data from the host data blit, otherwise
1683          * part of the texture image may be corrupted.
1684          */
1685         BEGIN_RING(4);
1686         RADEON_FLUSH_CACHE();
1687         RADEON_WAIT_UNTIL_IDLE();
1688         ADVANCE_RING();
1689
1690         /* The compiler won't optimize away a division by a variable,
1691          * even if the only legal values are powers of two.  Thus, we'll
1692          * use a shift instead.
1693          */
1694         switch (tex->format) {
1695         case RADEON_TXFORMAT_ARGB8888:
1696         case RADEON_TXFORMAT_RGBA8888:
1697                 format = RADEON_COLOR_FORMAT_ARGB8888;
1698                 tex_width = tex->width * 4;
1699                 blit_width = image->width * 4;
1700                 break;
1701         case RADEON_TXFORMAT_AI88:
1702         case RADEON_TXFORMAT_ARGB1555:
1703         case RADEON_TXFORMAT_RGB565:
1704         case RADEON_TXFORMAT_ARGB4444:
1705         case RADEON_TXFORMAT_VYUY422:
1706         case RADEON_TXFORMAT_YVYU422:
1707                 format = RADEON_COLOR_FORMAT_RGB565;
1708                 tex_width = tex->width * 2;
1709                 blit_width = image->width * 2;
1710                 break;
1711         case RADEON_TXFORMAT_I8:
1712         case RADEON_TXFORMAT_RGB332:
1713                 format = RADEON_COLOR_FORMAT_CI8;
1714                 tex_width = tex->width * 1;
1715                 blit_width = image->width * 1;
1716                 break;
1717         default:
1718                 DRM_ERROR("invalid texture format %d\n", tex->format);
1719                 return -EINVAL;
1720         }
1721         spitch = blit_width >> 6;
1722         if (spitch == 0 && image->height > 1)
1723                 return -EINVAL;
1724
1725         texpitch = tex->pitch;
1726         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1727                 microtile = 1;
1728                 if (tex_width < 64) {
1729                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1730                         /* we got tiled coordinates, untile them */
1731                         image->x *= 2;
1732                 }
1733         } else
1734                 microtile = 0;
1735
1736         /* this might fail for zero-sized uploads - are those illegal? */
1737         if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1738                                 blit_width - 1)) {
1739                 DRM_ERROR("Invalid final destination offset\n");
1740                 return -EINVAL;
1741         }
1742
1743         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1744
1745         do {
1746                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1747                           tex->offset >> 10, tex->pitch, tex->format,
1748                           image->x, image->y, image->width, image->height);
1749
1750                 /* Make a copy of some parameters in case we have to
1751                  * update them for a multi-pass texture blit.
1752                  */
1753                 height = image->height;
1754                 data = (const u8 __user *)image->data;
1755
1756                 size = height * blit_width;
1757
1758                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1759                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1760                         size = height * blit_width;
1761                 } else if (size < 4 && size > 0) {
1762                         size = 4;
1763                 } else if (size == 0) {
1764                         return 0;
1765                 }
1766
1767                 buf = radeon_freelist_get(dev);
1768                 if (0 && !buf) {
1769                         radeon_do_cp_idle(dev_priv);
1770                         buf = radeon_freelist_get(dev);
1771                 }
1772                 if (!buf) {
1773                         DRM_DEBUG("EAGAIN\n");
1774                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1775                                 return -EFAULT;
1776                         return -EAGAIN;
1777                 }
1778
1779                 /* Dispatch the indirect buffer.
1780                  */
1781                 buffer =
1782                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1783                 dwords = size / 4;
1784
1785 #define RADEON_COPY_MT(_buf, _data, _width) \
1786         do { \
1787                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1788                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1789                         return -EFAULT; \
1790                 } \
1791         } while(0)
1792
1793                 if (microtile) {
1794                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1795                            however, we cannot use blitter directly for texture width < 64 bytes,
1796                            since minimum tex pitch is 64 bytes and we need this to match
1797                            the texture width, otherwise the blitter will tile it wrong.
1798                            Thus, tiling manually in this case. Additionally, need to special
1799                            case tex height = 1, since our actual image will have height 2
1800                            and we need to ensure we don't read beyond the texture size
1801                            from user space. */
1802                         if (tex->height == 1) {
1803                                 if (tex_width >= 64 || tex_width <= 16) {
1804                                         RADEON_COPY_MT(buffer, data,
1805                                                 (int)(tex_width * sizeof(u32)));
1806                                 } else if (tex_width == 32) {
1807                                         RADEON_COPY_MT(buffer, data, 16);
1808                                         RADEON_COPY_MT(buffer + 8,
1809                                                        data + 16, 16);
1810                                 }
1811                         } else if (tex_width >= 64 || tex_width == 16) {
1812                                 RADEON_COPY_MT(buffer, data,
1813                                                (int)(dwords * sizeof(u32)));
1814                         } else if (tex_width < 16) {
1815                                 for (i = 0; i < tex->height; i++) {
1816                                         RADEON_COPY_MT(buffer, data, tex_width);
1817                                         buffer += 4;
1818                                         data += tex_width;
1819                                 }
1820                         } else if (tex_width == 32) {
1821                                 /* TODO: make sure this works when not fitting in one buffer
1822                                    (i.e. 32bytes x 2048...) */
1823                                 for (i = 0; i < tex->height; i += 2) {
1824                                         RADEON_COPY_MT(buffer, data, 16);
1825                                         data += 16;
1826                                         RADEON_COPY_MT(buffer + 8, data, 16);
1827                                         data += 16;
1828                                         RADEON_COPY_MT(buffer + 4, data, 16);
1829                                         data += 16;
1830                                         RADEON_COPY_MT(buffer + 12, data, 16);
1831                                         data += 16;
1832                                         buffer += 16;
1833                                 }
1834                         }
1835                 } else {
1836                         if (tex_width >= 32) {
1837                                 /* Texture image width is larger than the minimum, so we
1838                                  * can upload it directly.
1839                                  */
1840                                 RADEON_COPY_MT(buffer, data,
1841                                                (int)(dwords * sizeof(u32)));
1842                         } else {
1843                                 /* Texture image width is less than the minimum, so we
1844                                  * need to pad out each image scanline to the minimum
1845                                  * width.
1846                                  */
1847                                 for (i = 0; i < tex->height; i++) {
1848                                         RADEON_COPY_MT(buffer, data, tex_width);
1849                                         buffer += 8;
1850                                         data += tex_width;
1851                                 }
1852                         }
1853                 }
1854
1855 #undef RADEON_COPY_MT
1856                 byte_offset = (image->y & ~2047) * blit_width;
1857                 buf->file_priv = file_priv;
1858                 buf->used = size;
1859                 offset = dev_priv->gart_buffers_offset + buf->offset;
1860                 BEGIN_RING(9);
1861                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1862                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1863                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1864                          RADEON_GMC_BRUSH_NONE |
1865                          (format << 8) |
1866                          RADEON_GMC_SRC_DATATYPE_COLOR |
1867                          RADEON_ROP3_S |
1868                          RADEON_DP_SRC_SOURCE_MEMORY |
1869                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1870                 OUT_RING((spitch << 22) | (offset >> 10));
1871                 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1872                 OUT_RING(0);
1873                 OUT_RING((image->x << 16) | (image->y % 2048));
1874                 OUT_RING((image->width << 16) | height);
1875                 RADEON_WAIT_UNTIL_2D_IDLE();
1876                 ADVANCE_RING();
1877                 COMMIT_RING();
1878
1879                 radeon_cp_discard_buffer(dev, buf);
1880
1881                 /* Update the input parameters for next time */
1882                 image->y += height;
1883                 image->height -= height;
1884                 image->data = (const u8 __user *)image->data + size;
1885         } while (image->height > 0);
1886
1887         /* Flush the pixel cache after the blit completes.  This ensures
1888          * the texture data is written out to memory before rendering
1889          * continues.
1890          */
1891         BEGIN_RING(4);
1892         RADEON_FLUSH_CACHE();
1893         RADEON_WAIT_UNTIL_2D_IDLE();
1894         ADVANCE_RING();
1895         COMMIT_RING();
1896
1897         return 0;
1898 }
1899
1900 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1901 {
1902         drm_radeon_private_t *dev_priv = dev->dev_private;
1903         int i;
1904         RING_LOCALS;
1905         DRM_DEBUG("\n");
1906
1907         BEGIN_RING(35);
1908
1909         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1910         OUT_RING(0x00000000);
1911
1912         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1913         for (i = 0; i < 32; i++) {
1914                 OUT_RING(stipple[i]);
1915         }
1916
1917         ADVANCE_RING();
1918 }
1919
1920 static void radeon_apply_surface_regs(int surf_index,
1921                                       drm_radeon_private_t *dev_priv)
1922 {
1923         if (!dev_priv->mmio)
1924                 return;
1925
1926         radeon_do_cp_idle(dev_priv);
1927
1928         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1929                      dev_priv->surfaces[surf_index].flags);
1930         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1931                      dev_priv->surfaces[surf_index].lower);
1932         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1933                      dev_priv->surfaces[surf_index].upper);
1934 }
1935
1936 /* Allocates a virtual surface
1937  * doesn't always allocate a real surface, will stretch an existing
1938  * surface when possible.
1939  *
1940  * Note that refcount can be at most 2, since during a free refcount=3
1941  * might mean we have to allocate a new surface which might not always
1942  * be available.
1943  * For example : we allocate three contigous surfaces ABC. If B is
1944  * freed, we suddenly need two surfaces to store A and C, which might
1945  * not always be available.
1946  */
1947 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1948                          drm_radeon_private_t *dev_priv,
1949                          struct drm_file *file_priv)
1950 {
1951         struct radeon_virt_surface *s;
1952         int i;
1953         int virt_surface_index;
1954         uint32_t new_upper, new_lower;
1955
1956         new_lower = new->address;
1957         new_upper = new_lower + new->size - 1;
1958
1959         /* sanity check */
1960         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1961             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1962              RADEON_SURF_ADDRESS_FIXED_MASK)
1963             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1964                 return -1;
1965
1966         /* make sure there is no overlap with existing surfaces */
1967         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1968                 if ((dev_priv->surfaces[i].refcount != 0) &&
1969                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1970                       (new_lower < dev_priv->surfaces[i].upper)) ||
1971                      ((new_lower < dev_priv->surfaces[i].lower) &&
1972                       (new_upper > dev_priv->surfaces[i].lower)))) {
1973                         return -1;
1974                 }
1975         }
1976
1977         /* find a virtual surface */
1978         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1979                 if (dev_priv->virt_surfaces[i].file_priv == 0)
1980                         break;
1981         if (i == 2 * RADEON_MAX_SURFACES) {
1982                 return -1;
1983         }
1984         virt_surface_index = i;
1985
1986         /* try to reuse an existing surface */
1987         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1988                 /* extend before */
1989                 if ((dev_priv->surfaces[i].refcount == 1) &&
1990                     (new->flags == dev_priv->surfaces[i].flags) &&
1991                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1992                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1993                         s->surface_index = i;
1994                         s->lower = new_lower;
1995                         s->upper = new_upper;
1996                         s->flags = new->flags;
1997                         s->file_priv = file_priv;
1998                         dev_priv->surfaces[i].refcount++;
1999                         dev_priv->surfaces[i].lower = s->lower;
2000                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2001                         return virt_surface_index;
2002                 }
2003
2004                 /* extend after */
2005                 if ((dev_priv->surfaces[i].refcount == 1) &&
2006                     (new->flags == dev_priv->surfaces[i].flags) &&
2007                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
2008                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2009                         s->surface_index = i;
2010                         s->lower = new_lower;
2011                         s->upper = new_upper;
2012                         s->flags = new->flags;
2013                         s->file_priv = file_priv;
2014                         dev_priv->surfaces[i].refcount++;
2015                         dev_priv->surfaces[i].upper = s->upper;
2016                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2017                         return virt_surface_index;
2018                 }
2019         }
2020
2021         /* okay, we need a new one */
2022         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2023                 if (dev_priv->surfaces[i].refcount == 0) {
2024                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2025                         s->surface_index = i;
2026                         s->lower = new_lower;
2027                         s->upper = new_upper;
2028                         s->flags = new->flags;
2029                         s->file_priv = file_priv;
2030                         dev_priv->surfaces[i].refcount = 1;
2031                         dev_priv->surfaces[i].lower = s->lower;
2032                         dev_priv->surfaces[i].upper = s->upper;
2033                         dev_priv->surfaces[i].flags = s->flags;
2034                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2035                         return virt_surface_index;
2036                 }
2037         }
2038
2039         /* we didn't find anything */
2040         return -1;
2041 }
2042
2043 static int free_surface(struct drm_file *file_priv,
2044                         drm_radeon_private_t * dev_priv,
2045                         int lower)
2046 {
2047         struct radeon_virt_surface *s;
2048         int i;
2049         /* find the virtual surface */
2050         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2051                 s = &(dev_priv->virt_surfaces[i]);
2052                 if (s->file_priv) {
2053                         if ((lower == s->lower) && (file_priv == s->file_priv))
2054                         {
2055                                 if (dev_priv->surfaces[s->surface_index].
2056                                     lower == s->lower)
2057                                         dev_priv->surfaces[s->surface_index].
2058                                             lower = s->upper;
2059
2060                                 if (dev_priv->surfaces[s->surface_index].
2061                                     upper == s->upper)
2062                                         dev_priv->surfaces[s->surface_index].
2063                                             upper = s->lower;
2064
2065                                 dev_priv->surfaces[s->surface_index].refcount--;
2066                                 if (dev_priv->surfaces[s->surface_index].
2067                                     refcount == 0)
2068                                         dev_priv->surfaces[s->surface_index].
2069                                             flags = 0;
2070                                 s->file_priv = NULL;
2071                                 radeon_apply_surface_regs(s->surface_index,
2072                                                           dev_priv);
2073                                 return 0;
2074                         }
2075                 }
2076         }
2077         return 1;
2078 }
2079
2080 static void radeon_surfaces_release(struct drm_file *file_priv,
2081                                     drm_radeon_private_t * dev_priv)
2082 {
2083         int i;
2084         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2085                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2086                         free_surface(file_priv, dev_priv,
2087                                      dev_priv->virt_surfaces[i].lower);
2088         }
2089 }
2090
2091 /* ================================================================
2092  * IOCTL functions
2093  */
2094 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2095 {
2096         drm_radeon_private_t *dev_priv = dev->dev_private;
2097         drm_radeon_surface_alloc_t *alloc = data;
2098
2099         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2100                 return -EINVAL;
2101         else
2102                 return 0;
2103 }
2104
2105 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2106 {
2107         drm_radeon_private_t *dev_priv = dev->dev_private;
2108         drm_radeon_surface_free_t *memfree = data;
2109
2110         if (free_surface(file_priv, dev_priv, memfree->address))
2111                 return -EINVAL;
2112         else
2113                 return 0;
2114 }
2115
2116 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2117 {
2118         drm_radeon_private_t *dev_priv = dev->dev_private;
2119         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2120         drm_radeon_clear_t *clear = data;
2121         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2122         DRM_DEBUG("\n");
2123
2124         LOCK_TEST_WITH_RETURN(dev, file_priv);
2125
2126         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2127
2128         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2129                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2130
2131         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2132                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2133                 return -EFAULT;
2134
2135         radeon_cp_dispatch_clear(dev, clear, depth_boxes);
2136
2137         COMMIT_RING();
2138         return 0;
2139 }
2140
2141 /* Not sure why this isn't set all the time:
2142  */
2143 static int radeon_do_init_pageflip(struct drm_device *dev)
2144 {
2145         drm_radeon_private_t *dev_priv = dev->dev_private;
2146         RING_LOCALS;
2147
2148         DRM_DEBUG("\n");
2149
2150         BEGIN_RING(6);
2151         RADEON_WAIT_UNTIL_3D_IDLE();
2152         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2153         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2154                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2155         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2156         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2157                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2158         ADVANCE_RING();
2159
2160         dev_priv->page_flipping = 1;
2161
2162         if (dev_priv->sarea_priv->pfCurrentPage != 1)
2163                 dev_priv->sarea_priv->pfCurrentPage = 0;
2164
2165         return 0;
2166 }
2167
2168 /* Swapping and flipping are different operations, need different ioctls.
2169  * They can & should be intermixed to support multiple 3d windows.
2170  */
2171 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2172 {
2173         drm_radeon_private_t *dev_priv = dev->dev_private;
2174         DRM_DEBUG("\n");
2175
2176         LOCK_TEST_WITH_RETURN(dev, file_priv);
2177
2178         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2179
2180         if (!dev_priv->page_flipping)
2181                 radeon_do_init_pageflip(dev);
2182
2183         radeon_cp_dispatch_flip(dev);
2184
2185         COMMIT_RING();
2186         return 0;
2187 }
2188
2189 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2190 {
2191         drm_radeon_private_t *dev_priv = dev->dev_private;
2192         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2193
2194         DRM_DEBUG("\n");
2195
2196         LOCK_TEST_WITH_RETURN(dev, file_priv);
2197
2198         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2199
2200         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2201                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2202
2203         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2204                 r600_cp_dispatch_swap(dev);
2205         else
2206                 radeon_cp_dispatch_swap(dev);
2207         sarea_priv->ctx_owner = 0;
2208
2209         COMMIT_RING();
2210         return 0;
2211 }
2212
2213 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2214 {
2215         drm_radeon_private_t *dev_priv = dev->dev_private;
2216         drm_radeon_sarea_t *sarea_priv;
2217         struct drm_device_dma *dma = dev->dma;
2218         struct drm_buf *buf;
2219         drm_radeon_vertex_t *vertex = data;
2220         drm_radeon_tcl_prim_t prim;
2221
2222         LOCK_TEST_WITH_RETURN(dev, file_priv);
2223
2224         sarea_priv = dev_priv->sarea_priv;
2225
2226         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2227                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2228
2229         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2230                 DRM_ERROR("buffer index %d (of %d max)\n",
2231                           vertex->idx, dma->buf_count - 1);
2232                 return -EINVAL;
2233         }
2234         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2235                 DRM_ERROR("buffer prim %d\n", vertex->prim);
2236                 return -EINVAL;
2237         }
2238
2239         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2240         VB_AGE_TEST_WITH_RETURN(dev_priv);
2241
2242         buf = dma->buflist[vertex->idx];
2243
2244         if (buf->file_priv != file_priv) {
2245                 DRM_ERROR("process %d using buffer owned by %p\n",
2246                           DRM_CURRENTPID, buf->file_priv);
2247                 return -EINVAL;
2248         }
2249         if (buf->pending) {
2250                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2251                 return -EINVAL;
2252         }
2253
2254         /* Build up a prim_t record:
2255          */
2256         if (vertex->count) {
2257                 buf->used = vertex->count;      /* not used? */
2258
2259                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2260                         if (radeon_emit_state(dev_priv, file_priv,
2261                                               &sarea_priv->context_state,
2262                                               sarea_priv->tex_state,
2263                                               sarea_priv->dirty)) {
2264                                 DRM_ERROR("radeon_emit_state failed\n");
2265                                 return -EINVAL;
2266                         }
2267
2268                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2269                                                RADEON_UPLOAD_TEX1IMAGES |
2270                                                RADEON_UPLOAD_TEX2IMAGES |
2271                                                RADEON_REQUIRE_QUIESCENCE);
2272                 }
2273
2274                 prim.start = 0;
2275                 prim.finish = vertex->count;    /* unused */
2276                 prim.prim = vertex->prim;
2277                 prim.numverts = vertex->count;
2278                 prim.vc_format = sarea_priv->vc_format;
2279
2280                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2281         }
2282
2283         if (vertex->discard) {
2284                 radeon_cp_discard_buffer(dev, buf);
2285         }
2286
2287         COMMIT_RING();
2288         return 0;
2289 }
2290
2291 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2292 {
2293         drm_radeon_private_t *dev_priv = dev->dev_private;
2294         drm_radeon_sarea_t *sarea_priv;
2295         struct drm_device_dma *dma = dev->dma;
2296         struct drm_buf *buf;
2297         drm_radeon_indices_t *elts = data;
2298         drm_radeon_tcl_prim_t prim;
2299         int count;
2300
2301         LOCK_TEST_WITH_RETURN(dev, file_priv);
2302
2303         sarea_priv = dev_priv->sarea_priv;
2304
2305         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2306                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2307                   elts->discard);
2308
2309         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2310                 DRM_ERROR("buffer index %d (of %d max)\n",
2311                           elts->idx, dma->buf_count - 1);
2312                 return -EINVAL;
2313         }
2314         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2315                 DRM_ERROR("buffer prim %d\n", elts->prim);
2316                 return -EINVAL;
2317         }
2318
2319         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2320         VB_AGE_TEST_WITH_RETURN(dev_priv);
2321
2322         buf = dma->buflist[elts->idx];
2323
2324         if (buf->file_priv != file_priv) {
2325                 DRM_ERROR("process %d using buffer owned by %p\n",
2326                           DRM_CURRENTPID, buf->file_priv);
2327                 return -EINVAL;
2328         }
2329         if (buf->pending) {
2330                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2331                 return -EINVAL;
2332         }
2333
2334         count = (elts->end - elts->start) / sizeof(u16);
2335         elts->start -= RADEON_INDEX_PRIM_OFFSET;
2336
2337         if (elts->start & 0x7) {
2338                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2339                 return -EINVAL;
2340         }
2341         if (elts->start < buf->used) {
2342                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2343                 return -EINVAL;
2344         }
2345
2346         buf->used = elts->end;
2347
2348         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2349                 if (radeon_emit_state(dev_priv, file_priv,
2350                                       &sarea_priv->context_state,
2351                                       sarea_priv->tex_state,
2352                                       sarea_priv->dirty)) {
2353                         DRM_ERROR("radeon_emit_state failed\n");
2354                         return -EINVAL;
2355                 }
2356
2357                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2358                                        RADEON_UPLOAD_TEX1IMAGES |
2359                                        RADEON_UPLOAD_TEX2IMAGES |
2360                                        RADEON_REQUIRE_QUIESCENCE);
2361         }
2362
2363         /* Build up a prim_t record:
2364          */
2365         prim.start = elts->start;
2366         prim.finish = elts->end;
2367         prim.prim = elts->prim;
2368         prim.offset = 0;        /* offset from start of dma buffers */
2369         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2370         prim.vc_format = sarea_priv->vc_format;
2371
2372         radeon_cp_dispatch_indices(dev, buf, &prim);
2373         if (elts->discard) {
2374                 radeon_cp_discard_buffer(dev, buf);
2375         }
2376
2377         COMMIT_RING();
2378         return 0;
2379 }
2380
2381 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2382 {
2383         drm_radeon_private_t *dev_priv = dev->dev_private;
2384         drm_radeon_texture_t *tex = data;
2385         drm_radeon_tex_image_t image;
2386         int ret;
2387
2388         LOCK_TEST_WITH_RETURN(dev, file_priv);
2389
2390         if (tex->image == NULL) {
2391                 DRM_ERROR("null texture image!\n");
2392                 return -EINVAL;
2393         }
2394
2395         if (DRM_COPY_FROM_USER(&image,
2396                                (drm_radeon_tex_image_t __user *) tex->image,
2397                                sizeof(image)))
2398                 return -EFAULT;
2399
2400         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2401         VB_AGE_TEST_WITH_RETURN(dev_priv);
2402
2403         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2404                 ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2405         else
2406                 ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2407
2408         return ret;
2409 }
2410
2411 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2412 {
2413         drm_radeon_private_t *dev_priv = dev->dev_private;
2414         drm_radeon_stipple_t *stipple = data;
2415         u32 mask[32];
2416
2417         LOCK_TEST_WITH_RETURN(dev, file_priv);
2418
2419         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2420                 return -EFAULT;
2421
2422         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2423
2424         radeon_cp_dispatch_stipple(dev, mask);
2425
2426         COMMIT_RING();
2427         return 0;
2428 }
2429
2430 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2431 {
2432         drm_radeon_private_t *dev_priv = dev->dev_private;
2433         struct drm_device_dma *dma = dev->dma;
2434         struct drm_buf *buf;
2435         drm_radeon_indirect_t *indirect = data;
2436         RING_LOCALS;
2437
2438         LOCK_TEST_WITH_RETURN(dev, file_priv);
2439
2440         if (!dev_priv) {
2441                 DRM_ERROR("called with no initialization\n");
2442                 return -EINVAL;
2443         }
2444
2445         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2446                   indirect->idx, indirect->start, indirect->end,
2447                   indirect->discard);
2448
2449         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2450                 DRM_ERROR("buffer index %d (of %d max)\n",
2451                           indirect->idx, dma->buf_count - 1);
2452                 return -EINVAL;
2453         }
2454
2455         buf = dma->buflist[indirect->idx];
2456
2457         if (buf->file_priv != file_priv) {
2458                 DRM_ERROR("process %d using buffer owned by %p\n",
2459                           DRM_CURRENTPID, buf->file_priv);
2460                 return -EINVAL;
2461         }
2462         if (buf->pending) {
2463                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2464                 return -EINVAL;
2465         }
2466
2467         if (indirect->start < buf->used) {
2468                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2469                           indirect->start, buf->used);
2470                 return -EINVAL;
2471         }
2472
2473         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2474         VB_AGE_TEST_WITH_RETURN(dev_priv);
2475
2476         buf->used = indirect->end;
2477
2478         /* Dispatch the indirect buffer full of commands from the
2479          * X server.  This is insecure and is thus only available to
2480          * privileged clients.
2481          */
2482         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2483                 r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2484         else {
2485                 /* Wait for the 3D stream to idle before the indirect buffer
2486                  * containing 2D acceleration commands is processed.
2487                  */
2488                 BEGIN_RING(2);
2489                 RADEON_WAIT_UNTIL_3D_IDLE();
2490                 ADVANCE_RING();
2491                 radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2492         }
2493
2494         if (indirect->discard)
2495                 radeon_cp_discard_buffer(dev, buf);
2496
2497         COMMIT_RING();
2498         return 0;
2499 }
2500
2501 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2502 {
2503         drm_radeon_private_t *dev_priv = dev->dev_private;
2504         drm_radeon_sarea_t *sarea_priv;
2505         struct drm_device_dma *dma = dev->dma;
2506         struct drm_buf *buf;
2507         drm_radeon_vertex2_t *vertex = data;
2508         int i;
2509         unsigned char laststate;
2510
2511         LOCK_TEST_WITH_RETURN(dev, file_priv);
2512
2513         sarea_priv = dev_priv->sarea_priv;
2514
2515         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2516                   DRM_CURRENTPID, vertex->idx, vertex->discard);
2517
2518         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2519                 DRM_ERROR("buffer index %d (of %d max)\n",
2520                           vertex->idx, dma->buf_count - 1);
2521                 return -EINVAL;
2522         }
2523
2524         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2525         VB_AGE_TEST_WITH_RETURN(dev_priv);
2526
2527         buf = dma->buflist[vertex->idx];
2528
2529         if (buf->file_priv != file_priv) {
2530                 DRM_ERROR("process %d using buffer owned by %p\n",
2531                           DRM_CURRENTPID, buf->file_priv);
2532                 return -EINVAL;
2533         }
2534
2535         if (buf->pending) {
2536                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2537                 return -EINVAL;
2538         }
2539
2540         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2541                 return -EINVAL;
2542
2543         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2544                 drm_radeon_prim_t prim;
2545                 drm_radeon_tcl_prim_t tclprim;
2546
2547                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2548                         return -EFAULT;
2549
2550                 if (prim.stateidx != laststate) {
2551                         drm_radeon_state_t state;
2552
2553                         if (DRM_COPY_FROM_USER(&state,
2554                                                &vertex->state[prim.stateidx],
2555                                                sizeof(state)))
2556                                 return -EFAULT;
2557
2558                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2559                                 DRM_ERROR("radeon_emit_state2 failed\n");
2560                                 return -EINVAL;
2561                         }
2562
2563                         laststate = prim.stateidx;
2564                 }
2565
2566                 tclprim.start = prim.start;
2567                 tclprim.finish = prim.finish;
2568                 tclprim.prim = prim.prim;
2569                 tclprim.vc_format = prim.vc_format;
2570
2571                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2572                         tclprim.offset = prim.numverts * 64;
2573                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2574
2575                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2576                 } else {
2577                         tclprim.numverts = prim.numverts;
2578                         tclprim.offset = 0;     /* not used */
2579
2580                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2581                 }
2582
2583                 if (sarea_priv->nbox == 1)
2584                         sarea_priv->nbox = 0;
2585         }
2586
2587         if (vertex->discard) {
2588                 radeon_cp_discard_buffer(dev, buf);
2589         }
2590
2591         COMMIT_RING();
2592         return 0;
2593 }
2594
2595 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2596                                struct drm_file *file_priv,
2597                                drm_radeon_cmd_header_t header,
2598                                drm_radeon_kcmd_buffer_t *cmdbuf)
2599 {
2600         int id = (int)header.packet.packet_id;
2601         int sz, reg;
2602         int *data = (int *)cmdbuf->buf;
2603         RING_LOCALS;
2604
2605         if (id >= RADEON_MAX_STATE_PACKETS)
2606                 return -EINVAL;
2607
2608         sz = packet[id].len;
2609         reg = packet[id].start;
2610
2611         if (sz * sizeof(int) > cmdbuf->bufsz) {
2612                 DRM_ERROR("Packet size provided larger than data provided\n");
2613                 return -EINVAL;
2614         }
2615
2616         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2617                 DRM_ERROR("Packet verification failed\n");
2618                 return -EINVAL;
2619         }
2620
2621         BEGIN_RING(sz + 1);
2622         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2623         OUT_RING_TABLE(data, sz);
2624         ADVANCE_RING();
2625
2626         cmdbuf->buf += sz * sizeof(int);
2627         cmdbuf->bufsz -= sz * sizeof(int);
2628         return 0;
2629 }
2630
2631 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2632                                           drm_radeon_cmd_header_t header,
2633                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2634 {
2635         int sz = header.scalars.count;
2636         int start = header.scalars.offset;
2637         int stride = header.scalars.stride;
2638         RING_LOCALS;
2639
2640         BEGIN_RING(3 + sz);
2641         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2642         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2643         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2644         OUT_RING_TABLE(cmdbuf->buf, sz);
2645         ADVANCE_RING();
2646         cmdbuf->buf += sz * sizeof(int);
2647         cmdbuf->bufsz -= sz * sizeof(int);
2648         return 0;
2649 }
2650
2651 /* God this is ugly
2652  */
2653 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2654                                            drm_radeon_cmd_header_t header,
2655                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2656 {
2657         int sz = header.scalars.count;
2658         int start = ((unsigned int)header.scalars.offset) + 0x100;
2659         int stride = header.scalars.stride;
2660         RING_LOCALS;
2661
2662         BEGIN_RING(3 + sz);
2663         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2664         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2665         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2666         OUT_RING_TABLE(cmdbuf->buf, sz);
2667         ADVANCE_RING();
2668         cmdbuf->buf += sz * sizeof(int);
2669         cmdbuf->bufsz -= sz * sizeof(int);
2670         return 0;
2671 }
2672
2673 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2674                                           drm_radeon_cmd_header_t header,
2675                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2676 {
2677         int sz = header.vectors.count;
2678         int start = header.vectors.offset;
2679         int stride = header.vectors.stride;
2680         RING_LOCALS;
2681
2682         BEGIN_RING(5 + sz);
2683         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2684         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2685         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2686         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2687         OUT_RING_TABLE(cmdbuf->buf, sz);
2688         ADVANCE_RING();
2689
2690         cmdbuf->buf += sz * sizeof(int);
2691         cmdbuf->bufsz -= sz * sizeof(int);
2692         return 0;
2693 }
2694
2695 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2696                                           drm_radeon_cmd_header_t header,
2697                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2698 {
2699         int sz = header.veclinear.count * 4;
2700         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2701         RING_LOCALS;
2702
2703         if (!sz)
2704                 return 0;
2705         if (sz * 4 > cmdbuf->bufsz)
2706                 return -EINVAL;
2707
2708         BEGIN_RING(5 + sz);
2709         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2710         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2711         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2712         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2713         OUT_RING_TABLE(cmdbuf->buf, sz);
2714         ADVANCE_RING();
2715
2716         cmdbuf->buf += sz * sizeof(int);
2717         cmdbuf->bufsz -= sz * sizeof(int);
2718         return 0;
2719 }
2720
2721 static int radeon_emit_packet3(struct drm_device * dev,
2722                                struct drm_file *file_priv,
2723                                drm_radeon_kcmd_buffer_t *cmdbuf)
2724 {
2725         drm_radeon_private_t *dev_priv = dev->dev_private;
2726         unsigned int cmdsz;
2727         int ret;
2728         RING_LOCALS;
2729
2730         DRM_DEBUG("\n");
2731
2732         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2733                                                   cmdbuf, &cmdsz))) {
2734                 DRM_ERROR("Packet verification failed\n");
2735                 return ret;
2736         }
2737
2738         BEGIN_RING(cmdsz);
2739         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2740         ADVANCE_RING();
2741
2742         cmdbuf->buf += cmdsz * 4;
2743         cmdbuf->bufsz -= cmdsz * 4;
2744         return 0;
2745 }
2746
2747 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2748                                         struct drm_file *file_priv,
2749                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2750                                         int orig_nbox)
2751 {
2752         drm_radeon_private_t *dev_priv = dev->dev_private;
2753         struct drm_clip_rect box;
2754         unsigned int cmdsz;
2755         int ret;
2756         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2757         int i = 0;
2758         RING_LOCALS;
2759
2760         DRM_DEBUG("\n");
2761
2762         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2763                                                   cmdbuf, &cmdsz))) {
2764                 DRM_ERROR("Packet verification failed\n");
2765                 return ret;
2766         }
2767
2768         if (!orig_nbox)
2769                 goto out;
2770
2771         do {
2772                 if (i < cmdbuf->nbox) {
2773                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2774                                 return -EFAULT;
2775                         /* FIXME The second and subsequent times round
2776                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2777                          * calling emit_clip_rect(). This fixes a
2778                          * lockup on fast machines when sending
2779                          * several cliprects with a cmdbuf, as when
2780                          * waving a 2D window over a 3D
2781                          * window. Something in the commands from user
2782                          * space seems to hang the card when they're
2783                          * sent several times in a row. That would be
2784                          * the correct place to fix it but this works
2785                          * around it until I can figure that out - Tim
2786                          * Smith */
2787                         if (i) {
2788                                 BEGIN_RING(2);
2789                                 RADEON_WAIT_UNTIL_3D_IDLE();
2790                                 ADVANCE_RING();
2791                         }
2792                         radeon_emit_clip_rect(dev_priv, &box);
2793                 }
2794
2795                 BEGIN_RING(cmdsz);
2796                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2797                 ADVANCE_RING();
2798
2799         } while (++i < cmdbuf->nbox);
2800         if (cmdbuf->nbox == 1)
2801                 cmdbuf->nbox = 0;
2802
2803       out:
2804         cmdbuf->buf += cmdsz * 4;
2805         cmdbuf->bufsz -= cmdsz * 4;
2806         return 0;
2807 }
2808
2809 static int radeon_emit_wait(struct drm_device * dev, int flags)
2810 {
2811         drm_radeon_private_t *dev_priv = dev->dev_private;
2812         RING_LOCALS;
2813
2814         DRM_DEBUG("%x\n", flags);
2815         switch (flags) {
2816         case RADEON_WAIT_2D:
2817                 BEGIN_RING(2);
2818                 RADEON_WAIT_UNTIL_2D_IDLE();
2819                 ADVANCE_RING();
2820                 break;
2821         case RADEON_WAIT_3D:
2822                 BEGIN_RING(2);
2823                 RADEON_WAIT_UNTIL_3D_IDLE();
2824                 ADVANCE_RING();
2825                 break;
2826         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2827                 BEGIN_RING(2);
2828                 RADEON_WAIT_UNTIL_IDLE();
2829                 ADVANCE_RING();
2830                 break;
2831         default:
2832                 return -EINVAL;
2833         }
2834
2835         return 0;
2836 }
2837
2838 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2839 {
2840         drm_radeon_private_t *dev_priv = dev->dev_private;
2841         struct drm_device_dma *dma = dev->dma;
2842         struct drm_buf *buf = NULL;
2843         int idx;
2844         drm_radeon_kcmd_buffer_t *cmdbuf = data;
2845         drm_radeon_cmd_header_t header;
2846         int orig_nbox, orig_bufsz;
2847         char *kbuf = NULL;
2848
2849         LOCK_TEST_WITH_RETURN(dev, file_priv);
2850
2851         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2852         VB_AGE_TEST_WITH_RETURN(dev_priv);
2853
2854         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2855                 return -EINVAL;
2856         }
2857
2858         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2859          * races between checking values and using those values in other code,
2860          * and simply to avoid a lot of function calls to copy in data.
2861          */
2862         orig_bufsz = cmdbuf->bufsz;
2863         if (orig_bufsz != 0) {
2864                 kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
2865                 if (kbuf == NULL)
2866                         return -ENOMEM;
2867                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2868                                        cmdbuf->bufsz)) {
2869                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2870                         return -EFAULT;
2871                 }
2872                 cmdbuf->buf = kbuf;
2873         }
2874
2875         orig_nbox = cmdbuf->nbox;
2876
2877         if (dev_priv->microcode_version == UCODE_R300) {
2878                 int temp;
2879                 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2880
2881                 if (orig_bufsz != 0)
2882                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2883
2884                 return temp;
2885         }
2886
2887         /* microcode_version != r300 */
2888         while (cmdbuf->bufsz >= sizeof(header)) {
2889
2890                 header.i = *(int *)cmdbuf->buf;
2891                 cmdbuf->buf += sizeof(header);
2892                 cmdbuf->bufsz -= sizeof(header);
2893
2894                 switch (header.header.cmd_type) {
2895                 case RADEON_CMD_PACKET:
2896                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2897                         if (radeon_emit_packets
2898                             (dev_priv, file_priv, header, cmdbuf)) {
2899                                 DRM_ERROR("radeon_emit_packets failed\n");
2900                                 goto err;
2901                         }
2902                         break;
2903
2904                 case RADEON_CMD_SCALARS:
2905                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2906                         if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2907                                 DRM_ERROR("radeon_emit_scalars failed\n");
2908                                 goto err;
2909                         }
2910                         break;
2911
2912                 case RADEON_CMD_VECTORS:
2913                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2914                         if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2915                                 DRM_ERROR("radeon_emit_vectors failed\n");
2916                                 goto err;
2917                         }
2918                         break;
2919
2920                 case RADEON_CMD_DMA_DISCARD:
2921                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2922                         idx = header.dma.buf_idx;
2923                         if (idx < 0 || idx >= dma->buf_count) {
2924                                 DRM_ERROR("buffer index %d (of %d max)\n",
2925                                           idx, dma->buf_count - 1);
2926                                 goto err;
2927                         }
2928
2929                         buf = dma->buflist[idx];
2930                         if (buf->file_priv != file_priv || buf->pending) {
2931                                 DRM_ERROR("bad buffer %p %p %d\n",
2932                                           buf->file_priv, file_priv,
2933                                           buf->pending);
2934                                 goto err;
2935                         }
2936
2937                         radeon_cp_discard_buffer(dev, buf);
2938                         break;
2939
2940                 case RADEON_CMD_PACKET3:
2941                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2942                         if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2943                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2944                                 goto err;
2945                         }
2946                         break;
2947
2948                 case RADEON_CMD_PACKET3_CLIP:
2949                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2950                         if (radeon_emit_packet3_cliprect
2951                             (dev, file_priv, cmdbuf, orig_nbox)) {
2952                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2953                                 goto err;
2954                         }
2955                         break;
2956
2957                 case RADEON_CMD_SCALARS2:
2958                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2959                         if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2960                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2961                                 goto err;
2962                         }
2963                         break;
2964
2965                 case RADEON_CMD_WAIT:
2966                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2967                         if (radeon_emit_wait(dev, header.wait.flags)) {
2968                                 DRM_ERROR("radeon_emit_wait failed\n");
2969                                 goto err;
2970                         }
2971                         break;
2972                 case RADEON_CMD_VECLINEAR:
2973                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2974                         if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
2975                                 DRM_ERROR("radeon_emit_veclinear failed\n");
2976                                 goto err;
2977                         }
2978                         break;
2979
2980                 default:
2981                         DRM_ERROR("bad cmd_type %d at %p\n",
2982                                   header.header.cmd_type,
2983                                   cmdbuf->buf - sizeof(header));
2984                         goto err;
2985                 }
2986         }
2987
2988         if (orig_bufsz != 0)
2989                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2990
2991         DRM_DEBUG("DONE\n");
2992         COMMIT_RING();
2993         return 0;
2994
2995       err:
2996         if (orig_bufsz != 0)
2997                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2998         return -EINVAL;
2999 }
3000
3001 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3002 {
3003         drm_radeon_private_t *dev_priv = dev->dev_private;
3004         drm_radeon_getparam_t *param = data;
3005         int value;
3006
3007         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3008
3009         switch (param->param) {
3010         case RADEON_PARAM_GART_BUFFER_OFFSET:
3011                 value = dev_priv->gart_buffers_offset;
3012                 break;
3013         case RADEON_PARAM_LAST_FRAME:
3014                 dev_priv->stats.last_frame_reads++;
3015                 value = GET_SCRATCH(dev_priv, 0);
3016                 break;
3017         case RADEON_PARAM_LAST_DISPATCH:
3018                 value = GET_SCRATCH(dev_priv, 1);
3019                 break;
3020         case RADEON_PARAM_LAST_CLEAR:
3021                 dev_priv->stats.last_clear_reads++;
3022                 value = GET_SCRATCH(dev_priv, 2);
3023                 break;
3024         case RADEON_PARAM_IRQ_NR:
3025                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3026                         value = 0;
3027                 else
3028                         value = dev->irq;
3029                 break;
3030         case RADEON_PARAM_GART_BASE:
3031                 value = dev_priv->gart_vm_start;
3032                 break;
3033         case RADEON_PARAM_REGISTER_HANDLE:
3034                 value = dev_priv->mmio->offset;
3035                 break;
3036         case RADEON_PARAM_STATUS_HANDLE:
3037                 value = dev_priv->ring_rptr_offset;
3038                 break;
3039 #ifndef __LP64__
3040                 /*
3041                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3042                  * pointer which can't fit into an int-sized variable.  According to
3043                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3044                  * not supporting it shouldn't be a problem.  If the same functionality
3045                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
3046                  * so backwards-compatibility for the embedded platforms can be
3047                  * maintained.  --davidm 4-Feb-2004.
3048                  */
3049         case RADEON_PARAM_SAREA_HANDLE:
3050                 /* The lock is the first dword in the sarea. */
3051                 value = (long)dev->lock.hw_lock;
3052                 break;
3053 #endif
3054         case RADEON_PARAM_GART_TEX_HANDLE:
3055                 value = dev_priv->gart_textures_offset;
3056                 break;
3057         case RADEON_PARAM_SCRATCH_OFFSET:
3058                 if (!dev_priv->writeback_works)
3059                         return -EINVAL;
3060                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3061                         value = R600_SCRATCH_REG_OFFSET;
3062                 else
3063                         value = RADEON_SCRATCH_REG_OFFSET;
3064                 break;
3065         case RADEON_PARAM_CARD_TYPE:
3066                 if (dev_priv->flags & RADEON_IS_PCIE)
3067                         value = RADEON_CARD_PCIE;
3068                 else if (dev_priv->flags & RADEON_IS_AGP)
3069                         value = RADEON_CARD_AGP;
3070                 else
3071                         value = RADEON_CARD_PCI;
3072                 break;
3073         case RADEON_PARAM_VBLANK_CRTC:
3074                 value = radeon_vblank_crtc_get(dev);
3075                 break;
3076         case RADEON_PARAM_FB_LOCATION:
3077                 value = radeon_read_fb_location(dev_priv);
3078                 break;
3079         case RADEON_PARAM_NUM_GB_PIPES:
3080                 value = dev_priv->num_gb_pipes;
3081                 break;
3082         case RADEON_PARAM_NUM_Z_PIPES:
3083                 value = dev_priv->num_z_pipes;
3084                 break;
3085         default:
3086                 DRM_DEBUG("Invalid parameter %d\n", param->param);
3087                 return -EINVAL;
3088         }
3089
3090         if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3091                 DRM_ERROR("copy_to_user\n");
3092                 return -EFAULT;
3093         }
3094
3095         return 0;
3096 }
3097
3098 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3099 {
3100         drm_radeon_private_t *dev_priv = dev->dev_private;
3101         drm_radeon_setparam_t *sp = data;
3102         struct drm_radeon_driver_file_fields *radeon_priv;
3103
3104         switch (sp->param) {
3105         case RADEON_SETPARAM_FB_LOCATION:
3106                 radeon_priv = file_priv->driver_priv;
3107                 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3108                     sp->value;
3109                 break;
3110         case RADEON_SETPARAM_SWITCH_TILING:
3111                 if (sp->value == 0) {
3112                         DRM_DEBUG("color tiling disabled\n");
3113                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3114                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3115                         if (dev_priv->sarea_priv)
3116                                 dev_priv->sarea_priv->tiling_enabled = 0;
3117                 } else if (sp->value == 1) {
3118                         DRM_DEBUG("color tiling enabled\n");
3119                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3120                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3121                         if (dev_priv->sarea_priv)
3122                                 dev_priv->sarea_priv->tiling_enabled = 1;
3123                 }
3124                 break;
3125         case RADEON_SETPARAM_PCIGART_LOCATION:
3126                 dev_priv->pcigart_offset = sp->value;
3127                 dev_priv->pcigart_offset_set = 1;
3128                 break;
3129         case RADEON_SETPARAM_NEW_MEMMAP:
3130                 dev_priv->new_memmap = sp->value;
3131                 break;
3132         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3133                 dev_priv->gart_info.table_size = sp->value;
3134                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3135                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3136                 break;
3137         case RADEON_SETPARAM_VBLANK_CRTC:
3138                 return radeon_vblank_crtc_set(dev, sp->value);
3139                 break;
3140         default:
3141                 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3142                 return -EINVAL;
3143         }
3144
3145         return 0;
3146 }
3147
3148 /* When a client dies:
3149  *    - Check for and clean up flipped page state
3150  *    - Free any alloced GART memory.
3151  *    - Free any alloced radeon surfaces.
3152  *
3153  * DRM infrastructure takes care of reclaiming dma buffers.
3154  */
3155 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3156 {
3157         if (dev->dev_private) {
3158                 drm_radeon_private_t *dev_priv = dev->dev_private;
3159                 dev_priv->page_flipping = 0;
3160                 radeon_mem_release(file_priv, dev_priv->gart_heap);
3161                 radeon_mem_release(file_priv, dev_priv->fb_heap);
3162                 radeon_surfaces_release(file_priv, dev_priv);
3163         }
3164 }
3165
3166 void radeon_driver_lastclose(struct drm_device *dev)
3167 {
3168         radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3169         if (dev->dev_private) {
3170                 drm_radeon_private_t *dev_priv = dev->dev_private;
3171
3172                 if (dev_priv->sarea_priv &&
3173                     dev_priv->sarea_priv->pfCurrentPage != 0)
3174                         radeon_cp_dispatch_flip(dev);
3175         }
3176
3177         radeon_do_release(dev);
3178 }
3179
3180 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3181 {
3182         drm_radeon_private_t *dev_priv = dev->dev_private;
3183         struct drm_radeon_driver_file_fields *radeon_priv;
3184
3185         DRM_DEBUG("\n");
3186         radeon_priv =
3187             (struct drm_radeon_driver_file_fields *)
3188             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3189
3190         if (!radeon_priv)
3191                 return -ENOMEM;
3192
3193         file_priv->driver_priv = radeon_priv;
3194
3195         if (dev_priv)
3196                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3197         else
3198                 radeon_priv->radeon_fb_delta = 0;
3199         return 0;
3200 }
3201
3202 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3203 {
3204         struct drm_radeon_driver_file_fields *radeon_priv =
3205             file_priv->driver_priv;
3206
3207         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3208 }
3209
3210 struct drm_ioctl_desc radeon_ioctls[] = {
3211         DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3212         DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3213         DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3214         DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3215         DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3216         DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3217         DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3218         DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3219         DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3220         DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3221         DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3222         DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3223         DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3224         DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3225         DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_ROOT_ONLY),
3226         DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3227         DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3228         DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3229         DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3230         DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3231         DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3232         DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3233         DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3234         DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3235         DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3236         DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3237         DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3238         DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH)
3239 };
3240
3241 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);