Merge branch 'vendor/OPENSSL'
[dragonfly.git] / sys / dev / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  *
29  * $DragonFly: src/sys/dev/drm/radeon_state.c,v 1.1 2008/04/05 18:12:29 hasso Exp $
30  */
31
32 #include "drmP.h"
33 #include "drm.h"
34 #include "drm_sarea.h"
35 #include "radeon_drm.h"
36 #include "radeon_drv.h"
37
38 /* ================================================================
39  * Helper functions for client state checking and fixup
40  */
41
42 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
43                                                     dev_priv,
44                                                     struct drm_file *file_priv,
45                                                     u32 * offset)
46 {
47         u64 off = *offset;
48         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
49         struct drm_radeon_driver_file_fields *radeon_priv;
50
51         /* Hrm ... the story of the offset ... So this function converts
52          * the various ideas of what userland clients might have for an
53          * offset in the card address space into an offset into the card
54          * address space :) So with a sane client, it should just keep
55          * the value intact and just do some boundary checking. However,
56          * not all clients are sane. Some older clients pass us 0 based
57          * offsets relative to the start of the framebuffer and some may
58          * assume the AGP aperture it appended to the framebuffer, so we
59          * try to detect those cases and fix them up.
60          *
61          * Note: It might be a good idea here to make sure the offset lands
62          * in some "allowed" area to protect things like the PCIE GART...
63          */
64
65         /* First, the best case, the offset already lands in either the
66          * framebuffer or the GART mapped space
67          */
68         if (radeon_check_offset(dev_priv, off))
69                 return 0;
70
71         /* Ok, that didn't happen... now check if we have a zero based
72          * offset that fits in the framebuffer + gart space, apply the
73          * magic offset we get from SETPARAM or calculated from fb_location
74          */
75         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76                 radeon_priv = file_priv->driver_priv;
77                 off += radeon_priv->radeon_fb_delta;
78         }
79
80         /* Finally, assume we aimed at a GART offset if beyond the fb */
81         if (off > fb_end)
82                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
83
84         /* Now recheck and fail if out of bounds */
85         if (radeon_check_offset(dev_priv, off)) {
86                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
87                 *offset = off;
88                 return 0;
89         }
90         return -EINVAL;
91 }
92
93 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
94                                                      dev_priv,
95                                                      struct drm_file *file_priv,
96                                                      int id, u32 *data)
97 {
98         switch (id) {
99
100         case RADEON_EMIT_PP_MISC:
101                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
102                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
103                         DRM_ERROR("Invalid depth buffer offset\n");
104                         return -EINVAL;
105                 }
106                 break;
107
108         case RADEON_EMIT_PP_CNTL:
109                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
110                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
111                         DRM_ERROR("Invalid colour buffer offset\n");
112                         return -EINVAL;
113                 }
114                 break;
115
116         case R200_EMIT_PP_TXOFFSET_0:
117         case R200_EMIT_PP_TXOFFSET_1:
118         case R200_EMIT_PP_TXOFFSET_2:
119         case R200_EMIT_PP_TXOFFSET_3:
120         case R200_EMIT_PP_TXOFFSET_4:
121         case R200_EMIT_PP_TXOFFSET_5:
122                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
123                                                   &data[0])) {
124                         DRM_ERROR("Invalid R200 texture offset\n");
125                         return -EINVAL;
126                 }
127                 break;
128
129         case RADEON_EMIT_PP_TXFILTER_0:
130         case RADEON_EMIT_PP_TXFILTER_1:
131         case RADEON_EMIT_PP_TXFILTER_2:
132                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
133                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
134                         DRM_ERROR("Invalid R100 texture offset\n");
135                         return -EINVAL;
136                 }
137                 break;
138
139         case R200_EMIT_PP_CUBIC_OFFSETS_0:
140         case R200_EMIT_PP_CUBIC_OFFSETS_1:
141         case R200_EMIT_PP_CUBIC_OFFSETS_2:
142         case R200_EMIT_PP_CUBIC_OFFSETS_3:
143         case R200_EMIT_PP_CUBIC_OFFSETS_4:
144         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
145                         int i;
146                         for (i = 0; i < 5; i++) {
147                                 if (radeon_check_and_fixup_offset(dev_priv,
148                                                                   file_priv,
149                                                                   &data[i])) {
150                                         DRM_ERROR
151                                             ("Invalid R200 cubic texture offset\n");
152                                         return -EINVAL;
153                                 }
154                         }
155                         break;
156                 }
157
158         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
159         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
160         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
161                         int i;
162                         for (i = 0; i < 5; i++) {
163                                 if (radeon_check_and_fixup_offset(dev_priv,
164                                                                   file_priv,
165                                                                   &data[i])) {
166                                         DRM_ERROR
167                                             ("Invalid R100 cubic texture offset\n");
168                                         return -EINVAL;
169                                 }
170                         }
171                 }
172                 break;
173
174         case R200_EMIT_VAP_CTL: {
175                         RING_LOCALS;
176                         BEGIN_RING(2);
177                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
178                         ADVANCE_RING();
179                 }
180                 break;
181
182         case RADEON_EMIT_RB3D_COLORPITCH:
183         case RADEON_EMIT_RE_LINE_PATTERN:
184         case RADEON_EMIT_SE_LINE_WIDTH:
185         case RADEON_EMIT_PP_LUM_MATRIX:
186         case RADEON_EMIT_PP_ROT_MATRIX_0:
187         case RADEON_EMIT_RB3D_STENCILREFMASK:
188         case RADEON_EMIT_SE_VPORT_XSCALE:
189         case RADEON_EMIT_SE_CNTL:
190         case RADEON_EMIT_SE_CNTL_STATUS:
191         case RADEON_EMIT_RE_MISC:
192         case RADEON_EMIT_PP_BORDER_COLOR_0:
193         case RADEON_EMIT_PP_BORDER_COLOR_1:
194         case RADEON_EMIT_PP_BORDER_COLOR_2:
195         case RADEON_EMIT_SE_ZBIAS_FACTOR:
196         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
197         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
198         case R200_EMIT_PP_TXCBLEND_0:
199         case R200_EMIT_PP_TXCBLEND_1:
200         case R200_EMIT_PP_TXCBLEND_2:
201         case R200_EMIT_PP_TXCBLEND_3:
202         case R200_EMIT_PP_TXCBLEND_4:
203         case R200_EMIT_PP_TXCBLEND_5:
204         case R200_EMIT_PP_TXCBLEND_6:
205         case R200_EMIT_PP_TXCBLEND_7:
206         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
207         case R200_EMIT_TFACTOR_0:
208         case R200_EMIT_VTX_FMT_0:
209         case R200_EMIT_MATRIX_SELECT_0:
210         case R200_EMIT_TEX_PROC_CTL_2:
211         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
212         case R200_EMIT_PP_TXFILTER_0:
213         case R200_EMIT_PP_TXFILTER_1:
214         case R200_EMIT_PP_TXFILTER_2:
215         case R200_EMIT_PP_TXFILTER_3:
216         case R200_EMIT_PP_TXFILTER_4:
217         case R200_EMIT_PP_TXFILTER_5:
218         case R200_EMIT_VTE_CNTL:
219         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
220         case R200_EMIT_PP_TAM_DEBUG3:
221         case R200_EMIT_PP_CNTL_X:
222         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
223         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
224         case R200_EMIT_RE_SCISSOR_TL_0:
225         case R200_EMIT_RE_SCISSOR_TL_1:
226         case R200_EMIT_RE_SCISSOR_TL_2:
227         case R200_EMIT_SE_VAP_CNTL_STATUS:
228         case R200_EMIT_SE_VTX_STATE_CNTL:
229         case R200_EMIT_RE_POINTSIZE:
230         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
231         case R200_EMIT_PP_CUBIC_FACES_0:
232         case R200_EMIT_PP_CUBIC_FACES_1:
233         case R200_EMIT_PP_CUBIC_FACES_2:
234         case R200_EMIT_PP_CUBIC_FACES_3:
235         case R200_EMIT_PP_CUBIC_FACES_4:
236         case R200_EMIT_PP_CUBIC_FACES_5:
237         case RADEON_EMIT_PP_TEX_SIZE_0:
238         case RADEON_EMIT_PP_TEX_SIZE_1:
239         case RADEON_EMIT_PP_TEX_SIZE_2:
240         case R200_EMIT_RB3D_BLENDCOLOR:
241         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
242         case RADEON_EMIT_PP_CUBIC_FACES_0:
243         case RADEON_EMIT_PP_CUBIC_FACES_1:
244         case RADEON_EMIT_PP_CUBIC_FACES_2:
245         case R200_EMIT_PP_TRI_PERF_CNTL:
246         case R200_EMIT_PP_AFS_0:
247         case R200_EMIT_PP_AFS_1:
248         case R200_EMIT_ATF_TFACTOR:
249         case R200_EMIT_PP_TXCTLALL_0:
250         case R200_EMIT_PP_TXCTLALL_1:
251         case R200_EMIT_PP_TXCTLALL_2:
252         case R200_EMIT_PP_TXCTLALL_3:
253         case R200_EMIT_PP_TXCTLALL_4:
254         case R200_EMIT_PP_TXCTLALL_5:
255         case R200_EMIT_VAP_PVS_CNTL:
256                 /* These packets don't contain memory offsets */
257                 break;
258
259         default:
260                 DRM_ERROR("Unknown state packet ID %d\n", id);
261                 return -EINVAL;
262         }
263
264         return 0;
265 }
266
267 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
268                                                      dev_priv,
269                                                      struct drm_file *file_priv,
270                                                      drm_radeon_kcmd_buffer_t *
271                                                      cmdbuf,
272                                                      unsigned int *cmdsz)
273 {
274         u32 *cmd = (u32 *) cmdbuf->buf;
275         u32 offset, narrays;
276         int count, i, k;
277
278         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
279
280         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
281                 DRM_ERROR("Not a type 3 packet\n");
282                 return -EINVAL;
283         }
284
285         if (4 * *cmdsz > cmdbuf->bufsz) {
286                 DRM_ERROR("Packet size larger than size of data provided\n");
287                 return -EINVAL;
288         }
289
290         switch(cmd[0] & 0xff00) {
291         /* XXX Are there old drivers needing other packets? */
292
293         case RADEON_3D_DRAW_IMMD:
294         case RADEON_3D_DRAW_VBUF:
295         case RADEON_3D_DRAW_INDX:
296         case RADEON_WAIT_FOR_IDLE:
297         case RADEON_CP_NOP:
298         case RADEON_3D_CLEAR_ZMASK:
299 /*      case RADEON_CP_NEXT_CHAR:
300         case RADEON_CP_PLY_NEXTSCAN:
301         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
302                 /* these packets are safe */
303                 break;
304
305         case RADEON_CP_3D_DRAW_IMMD_2:
306         case RADEON_CP_3D_DRAW_VBUF_2:
307         case RADEON_CP_3D_DRAW_INDX_2:
308         case RADEON_3D_CLEAR_HIZ:
309                 /* safe but r200 only */
310                 if (dev_priv->microcode_version != UCODE_R200) {
311                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
312                         return -EINVAL;
313                 }
314                 break;
315
316         case RADEON_3D_LOAD_VBPNTR:
317                 count = (cmd[0] >> 16) & 0x3fff;
318
319                 if (count > 18) { /* 12 arrays max */
320                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
321                                   count);
322                         return -EINVAL;
323                 }
324
325                 /* carefully check packet contents */
326                 narrays = cmd[1] & ~0xc000;
327                 k = 0;
328                 i = 2;
329                 while ((k < narrays) && (i < (count + 2))) {
330                         i++;            /* skip attribute field */
331                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
332                                                           &cmd[i])) {
333                                 DRM_ERROR
334                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
335                                      k, i);
336                                 return -EINVAL;
337                         }
338                         k++;
339                         i++;
340                         if (k == narrays)
341                                 break;
342                         /* have one more to process, they come in pairs */
343                         if (radeon_check_and_fixup_offset(dev_priv,
344                                                           file_priv, &cmd[i]))
345                         {
346                                 DRM_ERROR
347                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
348                                      k, i);
349                                 return -EINVAL;
350                         }
351                         k++;
352                         i++;
353                 }
354                 /* do the counts match what we expect ? */
355                 if ((k != narrays) || (i != (count + 2))) {
356                         DRM_ERROR
357                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
358                               k, i, narrays, count + 1);
359                         return -EINVAL;
360                 }
361                 break;
362
363         case RADEON_3D_RNDR_GEN_INDX_PRIM:
364                 if (dev_priv->microcode_version != UCODE_R100) {
365                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
366                         return -EINVAL;
367                 }
368                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
369                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
370                                 return -EINVAL;
371                 }
372                 break;
373
374         case RADEON_CP_INDX_BUFFER:
375                 if (dev_priv->microcode_version != UCODE_R200) {
376                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
377                         return -EINVAL;
378                 }
379                 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
380                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
381                         return -EINVAL;
382                 }
383                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
384                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
385                         return -EINVAL;
386                 }
387                 break;
388
389         case RADEON_CNTL_HOSTDATA_BLT:
390         case RADEON_CNTL_PAINT_MULTI:
391         case RADEON_CNTL_BITBLT_MULTI:
392                 /* MSB of opcode: next DWORD GUI_CNTL */
393                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
394                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
395                         offset = cmd[2] << 10;
396                         if (radeon_check_and_fixup_offset
397                             (dev_priv, file_priv, &offset)) {
398                                 DRM_ERROR("Invalid first packet offset\n");
399                                 return -EINVAL;
400                         }
401                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
402                 }
403
404                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
405                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
406                         offset = cmd[3] << 10;
407                         if (radeon_check_and_fixup_offset
408                             (dev_priv, file_priv, &offset)) {
409                                 DRM_ERROR("Invalid second packet offset\n");
410                                 return -EINVAL;
411                         }
412                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
413                 }
414                 break;
415
416         default:
417                 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
418                 return -EINVAL;
419         }
420
421         return 0;
422 }
423
424 /* ================================================================
425  * CP hardware state programming functions
426  */
427
428 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
429                                              struct drm_clip_rect * box)
430 {
431         RING_LOCALS;
432
433         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
434                   box->x1, box->y1, box->x2, box->y2);
435
436         BEGIN_RING(4);
437         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
438         OUT_RING((box->y1 << 16) | box->x1);
439         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
440         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
441         ADVANCE_RING();
442 }
443
444 /* Emit 1.1 state
445  */
446 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
447                              struct drm_file *file_priv,
448                              drm_radeon_context_regs_t * ctx,
449                              drm_radeon_texture_regs_t * tex,
450                              unsigned int dirty)
451 {
452         RING_LOCALS;
453         DRM_DEBUG("dirty=0x%08x\n", dirty);
454
455         if (dirty & RADEON_UPLOAD_CONTEXT) {
456                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
457                                                   &ctx->rb3d_depthoffset)) {
458                         DRM_ERROR("Invalid depth buffer offset\n");
459                         return -EINVAL;
460                 }
461
462                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
463                                                   &ctx->rb3d_coloroffset)) {
464                         DRM_ERROR("Invalid depth buffer offset\n");
465                         return -EINVAL;
466                 }
467
468                 BEGIN_RING(14);
469                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
470                 OUT_RING(ctx->pp_misc);
471                 OUT_RING(ctx->pp_fog_color);
472                 OUT_RING(ctx->re_solid_color);
473                 OUT_RING(ctx->rb3d_blendcntl);
474                 OUT_RING(ctx->rb3d_depthoffset);
475                 OUT_RING(ctx->rb3d_depthpitch);
476                 OUT_RING(ctx->rb3d_zstencilcntl);
477                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
478                 OUT_RING(ctx->pp_cntl);
479                 OUT_RING(ctx->rb3d_cntl);
480                 OUT_RING(ctx->rb3d_coloroffset);
481                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
482                 OUT_RING(ctx->rb3d_colorpitch);
483                 ADVANCE_RING();
484         }
485
486         if (dirty & RADEON_UPLOAD_VERTFMT) {
487                 BEGIN_RING(2);
488                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
489                 OUT_RING(ctx->se_coord_fmt);
490                 ADVANCE_RING();
491         }
492
493         if (dirty & RADEON_UPLOAD_LINE) {
494                 BEGIN_RING(5);
495                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
496                 OUT_RING(ctx->re_line_pattern);
497                 OUT_RING(ctx->re_line_state);
498                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
499                 OUT_RING(ctx->se_line_width);
500                 ADVANCE_RING();
501         }
502
503         if (dirty & RADEON_UPLOAD_BUMPMAP) {
504                 BEGIN_RING(5);
505                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
506                 OUT_RING(ctx->pp_lum_matrix);
507                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
508                 OUT_RING(ctx->pp_rot_matrix_0);
509                 OUT_RING(ctx->pp_rot_matrix_1);
510                 ADVANCE_RING();
511         }
512
513         if (dirty & RADEON_UPLOAD_MASKS) {
514                 BEGIN_RING(4);
515                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
516                 OUT_RING(ctx->rb3d_stencilrefmask);
517                 OUT_RING(ctx->rb3d_ropcntl);
518                 OUT_RING(ctx->rb3d_planemask);
519                 ADVANCE_RING();
520         }
521
522         if (dirty & RADEON_UPLOAD_VIEWPORT) {
523                 BEGIN_RING(7);
524                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
525                 OUT_RING(ctx->se_vport_xscale);
526                 OUT_RING(ctx->se_vport_xoffset);
527                 OUT_RING(ctx->se_vport_yscale);
528                 OUT_RING(ctx->se_vport_yoffset);
529                 OUT_RING(ctx->se_vport_zscale);
530                 OUT_RING(ctx->se_vport_zoffset);
531                 ADVANCE_RING();
532         }
533
534         if (dirty & RADEON_UPLOAD_SETUP) {
535                 BEGIN_RING(4);
536                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
537                 OUT_RING(ctx->se_cntl);
538                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
539                 OUT_RING(ctx->se_cntl_status);
540                 ADVANCE_RING();
541         }
542
543         if (dirty & RADEON_UPLOAD_MISC) {
544                 BEGIN_RING(2);
545                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
546                 OUT_RING(ctx->re_misc);
547                 ADVANCE_RING();
548         }
549
550         if (dirty & RADEON_UPLOAD_TEX0) {
551                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
552                                                   &tex[0].pp_txoffset)) {
553                         DRM_ERROR("Invalid texture offset for unit 0\n");
554                         return -EINVAL;
555                 }
556
557                 BEGIN_RING(9);
558                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
559                 OUT_RING(tex[0].pp_txfilter);
560                 OUT_RING(tex[0].pp_txformat);
561                 OUT_RING(tex[0].pp_txoffset);
562                 OUT_RING(tex[0].pp_txcblend);
563                 OUT_RING(tex[0].pp_txablend);
564                 OUT_RING(tex[0].pp_tfactor);
565                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
566                 OUT_RING(tex[0].pp_border_color);
567                 ADVANCE_RING();
568         }
569
570         if (dirty & RADEON_UPLOAD_TEX1) {
571                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
572                                                   &tex[1].pp_txoffset)) {
573                         DRM_ERROR("Invalid texture offset for unit 1\n");
574                         return -EINVAL;
575                 }
576
577                 BEGIN_RING(9);
578                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
579                 OUT_RING(tex[1].pp_txfilter);
580                 OUT_RING(tex[1].pp_txformat);
581                 OUT_RING(tex[1].pp_txoffset);
582                 OUT_RING(tex[1].pp_txcblend);
583                 OUT_RING(tex[1].pp_txablend);
584                 OUT_RING(tex[1].pp_tfactor);
585                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
586                 OUT_RING(tex[1].pp_border_color);
587                 ADVANCE_RING();
588         }
589
590         if (dirty & RADEON_UPLOAD_TEX2) {
591                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
592                                                   &tex[2].pp_txoffset)) {
593                         DRM_ERROR("Invalid texture offset for unit 2\n");
594                         return -EINVAL;
595                 }
596
597                 BEGIN_RING(9);
598                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
599                 OUT_RING(tex[2].pp_txfilter);
600                 OUT_RING(tex[2].pp_txformat);
601                 OUT_RING(tex[2].pp_txoffset);
602                 OUT_RING(tex[2].pp_txcblend);
603                 OUT_RING(tex[2].pp_txablend);
604                 OUT_RING(tex[2].pp_tfactor);
605                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
606                 OUT_RING(tex[2].pp_border_color);
607                 ADVANCE_RING();
608         }
609
610         return 0;
611 }
612
613 /* Emit 1.2 state
614  */
615 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
616                               struct drm_file *file_priv,
617                               drm_radeon_state_t * state)
618 {
619         RING_LOCALS;
620
621         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
622                 BEGIN_RING(3);
623                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
624                 OUT_RING(state->context2.se_zbias_factor);
625                 OUT_RING(state->context2.se_zbias_constant);
626                 ADVANCE_RING();
627         }
628
629         return radeon_emit_state(dev_priv, file_priv, &state->context,
630                                  state->tex, state->dirty);
631 }
632
633 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
634  * 1.3 cmdbuffers allow all previous state to be updated as well as
635  * the tcl scalar and vector areas.
636  */
637 static struct {
638         int start;
639         int len;
640         const char *name;
641 } packet[RADEON_MAX_STATE_PACKETS] = {
642         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
643         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
644         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
645         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
646         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
647         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
648         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
649         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
650         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
651         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
652         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
653         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
654         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
655         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
656         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
657         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
658         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
659         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
660         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
661         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
662         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
663                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
664         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
665         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
666         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
667         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
668         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
669         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
670         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
671         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
672         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
673         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
674         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
675         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
676         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
677         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
678         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
679         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
680         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
681         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
682         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
683         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
684         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
685         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
686         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
687         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
688         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
689         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
690         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
691         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
692         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
693          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
694         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
695         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
696         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
697         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
698         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
699         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
700         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
701         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
702         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
703         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
704         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
705                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
706         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
707         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
708         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
709         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
710         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
711         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
712         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
713         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
714         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
715         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
716         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
717         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
718         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
719         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
720         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
721         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
722         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
723         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
724         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
725         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
726         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
727         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
728         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
729         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
730         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
731         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
732         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
733         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
734         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
735         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
736         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
737         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
738         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
739         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
740 };
741
742 /* ================================================================
743  * Performance monitoring functions
744  */
745
746 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
747                              int x, int y, int w, int h, int r, int g, int b)
748 {
749         u32 color;
750         RING_LOCALS;
751
752         x += dev_priv->sarea_priv->boxes[0].x1;
753         y += dev_priv->sarea_priv->boxes[0].y1;
754
755         switch (dev_priv->color_fmt) {
756         case RADEON_COLOR_FORMAT_RGB565:
757                 color = (((r & 0xf8) << 8) |
758                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
759                 break;
760         case RADEON_COLOR_FORMAT_ARGB8888:
761         default:
762                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
763                 break;
764         }
765
766         BEGIN_RING(4);
767         RADEON_WAIT_UNTIL_3D_IDLE();
768         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
769         OUT_RING(0xffffffff);
770         ADVANCE_RING();
771
772         BEGIN_RING(6);
773
774         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
775         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
776                  RADEON_GMC_BRUSH_SOLID_COLOR |
777                  (dev_priv->color_fmt << 8) |
778                  RADEON_GMC_SRC_DATATYPE_COLOR |
779                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
780
781         if (dev_priv->sarea_priv->pfCurrentPage == 1) {
782                 OUT_RING(dev_priv->front_pitch_offset);
783         } else {
784                 OUT_RING(dev_priv->back_pitch_offset);
785         }
786
787         OUT_RING(color);
788
789         OUT_RING((x << 16) | y);
790         OUT_RING((w << 16) | h);
791
792         ADVANCE_RING();
793 }
794
795 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
796 {
797         /* Collapse various things into a wait flag -- trying to
798          * guess if userspase slept -- better just to have them tell us.
799          */
800         if (dev_priv->stats.last_frame_reads > 1 ||
801             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
802                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
803         }
804
805         if (dev_priv->stats.freelist_loops) {
806                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
807         }
808
809         /* Purple box for page flipping
810          */
811         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
812                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
813
814         /* Red box if we have to wait for idle at any point
815          */
816         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
817                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
818
819         /* Blue box: lost context?
820          */
821
822         /* Yellow box for texture swaps
823          */
824         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
825                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
826
827         /* Green box if hardware never idles (as far as we can tell)
828          */
829         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
830                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
831
832         /* Draw bars indicating number of buffers allocated
833          * (not a great measure, easily confused)
834          */
835         if (dev_priv->stats.requested_bufs) {
836                 if (dev_priv->stats.requested_bufs > 100)
837                         dev_priv->stats.requested_bufs = 100;
838
839                 radeon_clear_box(dev_priv, 4, 16,
840                                  dev_priv->stats.requested_bufs, 4,
841                                  196, 128, 128);
842         }
843
844         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
845
846 }
847
848 /* ================================================================
849  * CP command dispatch functions
850  */
851
852 static void radeon_cp_dispatch_clear(struct drm_device * dev,
853                                      drm_radeon_clear_t * clear,
854                                      drm_radeon_clear_rect_t * depth_boxes)
855 {
856         drm_radeon_private_t *dev_priv = dev->dev_private;
857         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
858         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
859         int nbox = sarea_priv->nbox;
860         struct drm_clip_rect *pbox = sarea_priv->boxes;
861         unsigned int flags = clear->flags;
862         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
863         int i;
864         RING_LOCALS;
865         DRM_DEBUG("flags = 0x%x\n", flags);
866
867         dev_priv->stats.clears++;
868
869         if (dev_priv->sarea_priv->pfCurrentPage == 1) {
870                 unsigned int tmp = flags;
871
872                 flags &= ~(RADEON_FRONT | RADEON_BACK);
873                 if (tmp & RADEON_FRONT)
874                         flags |= RADEON_BACK;
875                 if (tmp & RADEON_BACK)
876                         flags |= RADEON_FRONT;
877         }
878
879         if (flags & (RADEON_FRONT | RADEON_BACK)) {
880
881                 BEGIN_RING(4);
882
883                 /* Ensure the 3D stream is idle before doing a
884                  * 2D fill to clear the front or back buffer.
885                  */
886                 RADEON_WAIT_UNTIL_3D_IDLE();
887
888                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
889                 OUT_RING(clear->color_mask);
890
891                 ADVANCE_RING();
892
893                 /* Make sure we restore the 3D state next time.
894                  */
895                 dev_priv->sarea_priv->ctx_owner = 0;
896
897                 for (i = 0; i < nbox; i++) {
898                         int x = pbox[i].x1;
899                         int y = pbox[i].y1;
900                         int w = pbox[i].x2 - x;
901                         int h = pbox[i].y2 - y;
902
903                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
904                                   x, y, w, h, flags);
905
906                         if (flags & RADEON_FRONT) {
907                                 BEGIN_RING(6);
908
909                                 OUT_RING(CP_PACKET3
910                                          (RADEON_CNTL_PAINT_MULTI, 4));
911                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
912                                          RADEON_GMC_BRUSH_SOLID_COLOR |
913                                          (dev_priv->
914                                           color_fmt << 8) |
915                                          RADEON_GMC_SRC_DATATYPE_COLOR |
916                                          RADEON_ROP3_P |
917                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
918
919                                 OUT_RING(dev_priv->front_pitch_offset);
920                                 OUT_RING(clear->clear_color);
921
922                                 OUT_RING((x << 16) | y);
923                                 OUT_RING((w << 16) | h);
924
925                                 ADVANCE_RING();
926                         }
927
928                         if (flags & RADEON_BACK) {
929                                 BEGIN_RING(6);
930
931                                 OUT_RING(CP_PACKET3
932                                          (RADEON_CNTL_PAINT_MULTI, 4));
933                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
934                                          RADEON_GMC_BRUSH_SOLID_COLOR |
935                                          (dev_priv->
936                                           color_fmt << 8) |
937                                          RADEON_GMC_SRC_DATATYPE_COLOR |
938                                          RADEON_ROP3_P |
939                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
940
941                                 OUT_RING(dev_priv->back_pitch_offset);
942                                 OUT_RING(clear->clear_color);
943
944                                 OUT_RING((x << 16) | y);
945                                 OUT_RING((w << 16) | h);
946
947                                 ADVANCE_RING();
948                         }
949                 }
950         }
951
952         /* hyper z clear */
953         /* no docs available, based on reverse engeneering by Stephane Marchesin */
954         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
955             && (flags & RADEON_CLEAR_FASTZ)) {
956
957                 int i;
958                 int depthpixperline =
959                     dev_priv->depth_fmt ==
960                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
961                                                        2) : (dev_priv->
962                                                              depth_pitch / 4);
963
964                 u32 clearmask;
965
966                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
967                     ((clear->depth_mask & 0xff) << 24);
968
969                 /* Make sure we restore the 3D state next time.
970                  * we haven't touched any "normal" state - still need this?
971                  */
972                 dev_priv->sarea_priv->ctx_owner = 0;
973
974                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
975                     && (flags & RADEON_USE_HIERZ)) {
976                         /* FIXME : reverse engineer that for Rx00 cards */
977                         /* FIXME : the mask supposedly contains low-res z values. So can't set
978                            just to the max (0xff? or actually 0x3fff?), need to take z clear
979                            value into account? */
980                         /* pattern seems to work for r100, though get slight
981                            rendering errors with glxgears. If hierz is not enabled for r100,
982                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
983                            other ones are ignored, and the same clear mask can be used. That's
984                            very different behaviour than R200 which needs different clear mask
985                            and different number of tiles to clear if hierz is enabled or not !?!
986                          */
987                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
988                 } else {
989                         /* clear mask : chooses the clearing pattern.
990                            rv250: could be used to clear only parts of macrotiles
991                            (but that would get really complicated...)?
992                            bit 0 and 1 (either or both of them ?!?!) are used to
993                            not clear tile (or maybe one of the bits indicates if the tile is
994                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
995                            Pattern is as follows:
996                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
997                            bits -------------------------------------------------
998                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
999                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1000                            covers 256 pixels ?!?
1001                          */
1002                         clearmask = 0x0;
1003                 }
1004
1005                 BEGIN_RING(8);
1006                 RADEON_WAIT_UNTIL_2D_IDLE();
1007                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1008                              tempRB3D_DEPTHCLEARVALUE);
1009                 /* what offset is this exactly ? */
1010                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1011                 /* need ctlstat, otherwise get some strange black flickering */
1012                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1013                              RADEON_RB3D_ZC_FLUSH_ALL);
1014                 ADVANCE_RING();
1015
1016                 for (i = 0; i < nbox; i++) {
1017                         int tileoffset, nrtilesx, nrtilesy, j;
1018                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1019                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1020                             && !(dev_priv->microcode_version == UCODE_R200)) {
1021                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1022                                    maybe r200 actually doesn't need to put the low-res z value into
1023                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1024                                    Works for R100, both with hierz and without.
1025                                    R100 seems to operate on 2x1 8x8 tiles, but...
1026                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1027                                    problematic with resolutions which are not 64 pix aligned? */
1028                                 tileoffset =
1029                                     ((pbox[i].y1 >> 3) * depthpixperline +
1030                                      pbox[i].x1) >> 6;
1031                                 nrtilesx =
1032                                     ((pbox[i].x2 & ~63) -
1033                                      (pbox[i].x1 & ~63)) >> 4;
1034                                 nrtilesy =
1035                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1036                                 for (j = 0; j <= nrtilesy; j++) {
1037                                         BEGIN_RING(4);
1038                                         OUT_RING(CP_PACKET3
1039                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1040                                         /* first tile */
1041                                         OUT_RING(tileoffset * 8);
1042                                         /* the number of tiles to clear */
1043                                         OUT_RING(nrtilesx + 4);
1044                                         /* clear mask : chooses the clearing pattern. */
1045                                         OUT_RING(clearmask);
1046                                         ADVANCE_RING();
1047                                         tileoffset += depthpixperline >> 6;
1048                                 }
1049                         } else if (dev_priv->microcode_version == UCODE_R200) {
1050                                 /* works for rv250. */
1051                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1052                                 tileoffset =
1053                                     ((pbox[i].y1 >> 3) * depthpixperline +
1054                                      pbox[i].x1) >> 5;
1055                                 nrtilesx =
1056                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1057                                 nrtilesy =
1058                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1059                                 for (j = 0; j <= nrtilesy; j++) {
1060                                         BEGIN_RING(4);
1061                                         OUT_RING(CP_PACKET3
1062                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1063                                         /* first tile */
1064                                         /* judging by the first tile offset needed, could possibly
1065                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1066                                            macro tiles, though would still need clear mask for
1067                                            right/bottom if truely 4x4 granularity is desired ? */
1068                                         OUT_RING(tileoffset * 16);
1069                                         /* the number of tiles to clear */
1070                                         OUT_RING(nrtilesx + 1);
1071                                         /* clear mask : chooses the clearing pattern. */
1072                                         OUT_RING(clearmask);
1073                                         ADVANCE_RING();
1074                                         tileoffset += depthpixperline >> 5;
1075                                 }
1076                         } else {        /* rv 100 */
1077                                 /* rv100 might not need 64 pix alignment, who knows */
1078                                 /* offsets are, hmm, weird */
1079                                 tileoffset =
1080                                     ((pbox[i].y1 >> 4) * depthpixperline +
1081                                      pbox[i].x1) >> 6;
1082                                 nrtilesx =
1083                                     ((pbox[i].x2 & ~63) -
1084                                      (pbox[i].x1 & ~63)) >> 4;
1085                                 nrtilesy =
1086                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1087                                 for (j = 0; j <= nrtilesy; j++) {
1088                                         BEGIN_RING(4);
1089                                         OUT_RING(CP_PACKET3
1090                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1091                                         OUT_RING(tileoffset * 128);
1092                                         /* the number of tiles to clear */
1093                                         OUT_RING(nrtilesx + 4);
1094                                         /* clear mask : chooses the clearing pattern. */
1095                                         OUT_RING(clearmask);
1096                                         ADVANCE_RING();
1097                                         tileoffset += depthpixperline >> 6;
1098                                 }
1099                         }
1100                 }
1101
1102                 /* TODO don't always clear all hi-level z tiles */
1103                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1104                     && (dev_priv->microcode_version == UCODE_R200)
1105                     && (flags & RADEON_USE_HIERZ))
1106                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1107                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1108                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1109                            value into account? */
1110                 {
1111                         BEGIN_RING(4);
1112                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1113                         OUT_RING(0x0);  /* First tile */
1114                         OUT_RING(0x3cc0);
1115                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1116                         ADVANCE_RING();
1117                 }
1118         }
1119
1120         /* We have to clear the depth and/or stencil buffers by
1121          * rendering a quad into just those buffers.  Thus, we have to
1122          * make sure the 3D engine is configured correctly.
1123          */
1124         else if ((dev_priv->microcode_version == UCODE_R200) &&
1125                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1126
1127                 int tempPP_CNTL;
1128                 int tempRE_CNTL;
1129                 int tempRB3D_CNTL;
1130                 int tempRB3D_ZSTENCILCNTL;
1131                 int tempRB3D_STENCILREFMASK;
1132                 int tempRB3D_PLANEMASK;
1133                 int tempSE_CNTL;
1134                 int tempSE_VTE_CNTL;
1135                 int tempSE_VTX_FMT_0;
1136                 int tempSE_VTX_FMT_1;
1137                 int tempSE_VAP_CNTL;
1138                 int tempRE_AUX_SCISSOR_CNTL;
1139
1140                 tempPP_CNTL = 0;
1141                 tempRE_CNTL = 0;
1142
1143                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1144
1145                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1146                 tempRB3D_STENCILREFMASK = 0x0;
1147
1148                 tempSE_CNTL = depth_clear->se_cntl;
1149
1150                 /* Disable TCL */
1151
1152                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1153                                           (0x9 <<
1154                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1155
1156                 tempRB3D_PLANEMASK = 0x0;
1157
1158                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1159
1160                 tempSE_VTE_CNTL =
1161                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1162
1163                 /* Vertex format (X, Y, Z, W) */
1164                 tempSE_VTX_FMT_0 =
1165                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1166                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1167                 tempSE_VTX_FMT_1 = 0x0;
1168
1169                 /*
1170                  * Depth buffer specific enables
1171                  */
1172                 if (flags & RADEON_DEPTH) {
1173                         /* Enable depth buffer */
1174                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1175                 } else {
1176                         /* Disable depth buffer */
1177                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1178                 }
1179
1180                 /*
1181                  * Stencil buffer specific enables
1182                  */
1183                 if (flags & RADEON_STENCIL) {
1184                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1185                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1186                 } else {
1187                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1188                         tempRB3D_STENCILREFMASK = 0x00000000;
1189                 }
1190
1191                 if (flags & RADEON_USE_COMP_ZBUF) {
1192                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1193                             RADEON_Z_DECOMPRESSION_ENABLE;
1194                 }
1195                 if (flags & RADEON_USE_HIERZ) {
1196                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1197                 }
1198
1199                 BEGIN_RING(26);
1200                 RADEON_WAIT_UNTIL_2D_IDLE();
1201
1202                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1203                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1204                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1205                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1206                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1207                              tempRB3D_STENCILREFMASK);
1208                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1209                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1210                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1211                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1212                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1213                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1214                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1215                 ADVANCE_RING();
1216
1217                 /* Make sure we restore the 3D state next time.
1218                  */
1219                 dev_priv->sarea_priv->ctx_owner = 0;
1220
1221                 for (i = 0; i < nbox; i++) {
1222
1223                         /* Funny that this should be required --
1224                          *  sets top-left?
1225                          */
1226                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1227
1228                         BEGIN_RING(14);
1229                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1230                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1231                                   RADEON_PRIM_WALK_RING |
1232                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1233                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1234                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1235                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1236                         OUT_RING(0x3f800000);
1237                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1238                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1239                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1240                         OUT_RING(0x3f800000);
1241                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1242                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1243                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1244                         OUT_RING(0x3f800000);
1245                         ADVANCE_RING();
1246                 }
1247         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1248
1249                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1250
1251                 rb3d_cntl = depth_clear->rb3d_cntl;
1252
1253                 if (flags & RADEON_DEPTH) {
1254                         rb3d_cntl |= RADEON_Z_ENABLE;
1255                 } else {
1256                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1257                 }
1258
1259                 if (flags & RADEON_STENCIL) {
1260                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1261                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1262                 } else {
1263                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1264                         rb3d_stencilrefmask = 0x00000000;
1265                 }
1266
1267                 if (flags & RADEON_USE_COMP_ZBUF) {
1268                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1269                             RADEON_Z_DECOMPRESSION_ENABLE;
1270                 }
1271                 if (flags & RADEON_USE_HIERZ) {
1272                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1273                 }
1274
1275                 BEGIN_RING(13);
1276                 RADEON_WAIT_UNTIL_2D_IDLE();
1277
1278                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1279                 OUT_RING(0x00000000);
1280                 OUT_RING(rb3d_cntl);
1281
1282                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1283                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1284                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1285                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1286                 ADVANCE_RING();
1287
1288                 /* Make sure we restore the 3D state next time.
1289                  */
1290                 dev_priv->sarea_priv->ctx_owner = 0;
1291
1292                 for (i = 0; i < nbox; i++) {
1293
1294                         /* Funny that this should be required --
1295                          *  sets top-left?
1296                          */
1297                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1298
1299                         BEGIN_RING(15);
1300
1301                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1302                         OUT_RING(RADEON_VTX_Z_PRESENT |
1303                                  RADEON_VTX_PKCOLOR_PRESENT);
1304                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1305                                   RADEON_PRIM_WALK_RING |
1306                                   RADEON_MAOS_ENABLE |
1307                                   RADEON_VTX_FMT_RADEON_MODE |
1308                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1309
1310                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1311                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1312                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1313                         OUT_RING(0x0);
1314
1315                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1316                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1317                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1318                         OUT_RING(0x0);
1319
1320                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1321                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1322                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1323                         OUT_RING(0x0);
1324
1325                         ADVANCE_RING();
1326                 }
1327         }
1328
1329         /* Increment the clear counter.  The client-side 3D driver must
1330          * wait on this value before performing the clear ioctl.  We
1331          * need this because the card's so damned fast...
1332          */
1333         dev_priv->sarea_priv->last_clear++;
1334
1335         BEGIN_RING(4);
1336
1337         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1338         RADEON_WAIT_UNTIL_IDLE();
1339
1340         ADVANCE_RING();
1341 }
1342
1343 static void radeon_cp_dispatch_swap(struct drm_device * dev)
1344 {
1345         drm_radeon_private_t *dev_priv = dev->dev_private;
1346         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1347         int nbox = sarea_priv->nbox;
1348         struct drm_clip_rect *pbox = sarea_priv->boxes;
1349         int i;
1350         RING_LOCALS;
1351         DRM_DEBUG("\n");
1352
1353         /* Do some trivial performance monitoring...
1354          */
1355         if (dev_priv->do_boxes)
1356                 radeon_cp_performance_boxes(dev_priv);
1357
1358         /* Wait for the 3D stream to idle before dispatching the bitblt.
1359          * This will prevent data corruption between the two streams.
1360          */
1361         BEGIN_RING(2);
1362
1363         RADEON_WAIT_UNTIL_3D_IDLE();
1364
1365         ADVANCE_RING();
1366
1367         for (i = 0; i < nbox; i++) {
1368                 int x = pbox[i].x1;
1369                 int y = pbox[i].y1;
1370                 int w = pbox[i].x2 - x;
1371                 int h = pbox[i].y2 - y;
1372
1373                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1374
1375                 BEGIN_RING(9);
1376
1377                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1378                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1379                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1380                          RADEON_GMC_BRUSH_NONE |
1381                          (dev_priv->color_fmt << 8) |
1382                          RADEON_GMC_SRC_DATATYPE_COLOR |
1383                          RADEON_ROP3_S |
1384                          RADEON_DP_SRC_SOURCE_MEMORY |
1385                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1386
1387                 /* Make this work even if front & back are flipped:
1388                  */
1389                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1390                 if (dev_priv->sarea_priv->pfCurrentPage == 0) {
1391                         OUT_RING(dev_priv->back_pitch_offset);
1392                         OUT_RING(dev_priv->front_pitch_offset);
1393                 } else {
1394                         OUT_RING(dev_priv->front_pitch_offset);
1395                         OUT_RING(dev_priv->back_pitch_offset);
1396                 }
1397
1398                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1399                 OUT_RING((x << 16) | y);
1400                 OUT_RING((x << 16) | y);
1401                 OUT_RING((w << 16) | h);
1402
1403                 ADVANCE_RING();
1404         }
1405
1406         /* Increment the frame counter.  The client-side 3D driver must
1407          * throttle the framerate by waiting for this value before
1408          * performing the swapbuffer ioctl.
1409          */
1410         dev_priv->sarea_priv->last_frame++;
1411
1412         BEGIN_RING(4);
1413
1414         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1415         RADEON_WAIT_UNTIL_2D_IDLE();
1416
1417         ADVANCE_RING();
1418 }
1419
1420 static void radeon_cp_dispatch_flip(struct drm_device * dev)
1421 {
1422         drm_radeon_private_t *dev_priv = dev->dev_private;
1423         struct drm_sarea *sarea = (struct drm_sarea *) dev_priv->sarea->handle;
1424         int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
1425             ? dev_priv->front_offset : dev_priv->back_offset;
1426         RING_LOCALS;
1427         DRM_DEBUG("pfCurrentPage=%d\n",
1428                   dev_priv->sarea_priv->pfCurrentPage);
1429
1430         /* Do some trivial performance monitoring...
1431          */
1432         if (dev_priv->do_boxes) {
1433                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1434                 radeon_cp_performance_boxes(dev_priv);
1435         }
1436
1437         /* Update the frame offsets for both CRTCs
1438          */
1439         BEGIN_RING(6);
1440
1441         RADEON_WAIT_UNTIL_3D_IDLE();
1442         OUT_RING_REG(RADEON_CRTC_OFFSET,
1443                      ((sarea->frame.y * dev_priv->front_pitch +
1444                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1445                      + offset);
1446         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1447                      + offset);
1448
1449         ADVANCE_RING();
1450
1451         /* Increment the frame counter.  The client-side 3D driver must
1452          * throttle the framerate by waiting for this value before
1453          * performing the swapbuffer ioctl.
1454          */
1455         dev_priv->sarea_priv->last_frame++;
1456         dev_priv->sarea_priv->pfCurrentPage =
1457                 1 - dev_priv->sarea_priv->pfCurrentPage;
1458
1459         BEGIN_RING(2);
1460
1461         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1462
1463         ADVANCE_RING();
1464 }
1465
1466 static int bad_prim_vertex_nr(int primitive, int nr)
1467 {
1468         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1469         case RADEON_PRIM_TYPE_NONE:
1470         case RADEON_PRIM_TYPE_POINT:
1471                 return nr < 1;
1472         case RADEON_PRIM_TYPE_LINE:
1473                 return (nr & 1) || nr == 0;
1474         case RADEON_PRIM_TYPE_LINE_STRIP:
1475                 return nr < 2;
1476         case RADEON_PRIM_TYPE_TRI_LIST:
1477         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1478         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1479         case RADEON_PRIM_TYPE_RECT_LIST:
1480                 return nr % 3 || nr == 0;
1481         case RADEON_PRIM_TYPE_TRI_FAN:
1482         case RADEON_PRIM_TYPE_TRI_STRIP:
1483                 return nr < 3;
1484         default:
1485                 return 1;
1486         }
1487 }
1488
1489 typedef struct {
1490         unsigned int start;
1491         unsigned int finish;
1492         unsigned int prim;
1493         unsigned int numverts;
1494         unsigned int offset;
1495         unsigned int vc_format;
1496 } drm_radeon_tcl_prim_t;
1497
1498 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1499                                       struct drm_buf * buf,
1500                                       drm_radeon_tcl_prim_t * prim)
1501 {
1502         drm_radeon_private_t *dev_priv = dev->dev_private;
1503         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1504         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1505         int numverts = (int)prim->numverts;
1506         int nbox = sarea_priv->nbox;
1507         int i = 0;
1508         RING_LOCALS;
1509
1510         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1511                   prim->prim,
1512                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1513
1514         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1515                 DRM_ERROR("bad prim %x numverts %d\n",
1516                           prim->prim, prim->numverts);
1517                 return;
1518         }
1519
1520         do {
1521                 /* Emit the next cliprect */
1522                 if (i < nbox) {
1523                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1524                 }
1525
1526                 /* Emit the vertex buffer rendering commands */
1527                 BEGIN_RING(5);
1528
1529                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1530                 OUT_RING(offset);
1531                 OUT_RING(numverts);
1532                 OUT_RING(prim->vc_format);
1533                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1534                          RADEON_COLOR_ORDER_RGBA |
1535                          RADEON_VTX_FMT_RADEON_MODE |
1536                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1537
1538                 ADVANCE_RING();
1539
1540                 i++;
1541         } while (i < nbox);
1542 }
1543
1544 static void radeon_cp_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
1545 {
1546         drm_radeon_private_t *dev_priv = dev->dev_private;
1547         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1548         RING_LOCALS;
1549
1550         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1551
1552         /* Emit the vertex buffer age */
1553         BEGIN_RING(2);
1554         RADEON_DISPATCH_AGE(buf_priv->age);
1555         ADVANCE_RING();
1556
1557         buf->pending = 1;
1558         buf->used = 0;
1559 }
1560
1561 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1562                                         struct drm_buf * buf, int start, int end)
1563 {
1564         drm_radeon_private_t *dev_priv = dev->dev_private;
1565         RING_LOCALS;
1566         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1567
1568         if (start != end) {
1569                 int offset = (dev_priv->gart_buffers_offset
1570                               + buf->offset + start);
1571                 int dwords = (end - start + 3) / sizeof(u32);
1572
1573                 /* Indirect buffer data must be an even number of
1574                  * dwords, so if we've been given an odd number we must
1575                  * pad the data with a Type-2 CP packet.
1576                  */
1577                 if (dwords & 1) {
1578                         u32 *data = (u32 *)
1579                             ((char *)dev->agp_buffer_map->handle
1580                              + buf->offset + start);
1581                         data[dwords++] = RADEON_CP_PACKET2;
1582                 }
1583
1584                 /* Fire off the indirect buffer */
1585                 BEGIN_RING(3);
1586
1587                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1588                 OUT_RING(offset);
1589                 OUT_RING(dwords);
1590
1591                 ADVANCE_RING();
1592         }
1593 }
1594
1595 static void radeon_cp_dispatch_indices(struct drm_device * dev,
1596                                        struct drm_buf * elt_buf,
1597                                        drm_radeon_tcl_prim_t * prim)
1598 {
1599         drm_radeon_private_t *dev_priv = dev->dev_private;
1600         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1601         int offset = dev_priv->gart_buffers_offset + prim->offset;
1602         u32 *data;
1603         int dwords;
1604         int i = 0;
1605         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1606         int count = (prim->finish - start) / sizeof(u16);
1607         int nbox = sarea_priv->nbox;
1608
1609         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1610                   prim->prim,
1611                   prim->vc_format,
1612                   prim->start, prim->finish, prim->offset, prim->numverts);
1613
1614         if (bad_prim_vertex_nr(prim->prim, count)) {
1615                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1616                 return;
1617         }
1618
1619         if (start >= prim->finish || (prim->start & 0x7)) {
1620                 DRM_ERROR("buffer prim %d\n", prim->prim);
1621                 return;
1622         }
1623
1624         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1625
1626         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1627                         elt_buf->offset + prim->start);
1628
1629         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1630         data[1] = offset;
1631         data[2] = prim->numverts;
1632         data[3] = prim->vc_format;
1633         data[4] = (prim->prim |
1634                    RADEON_PRIM_WALK_IND |
1635                    RADEON_COLOR_ORDER_RGBA |
1636                    RADEON_VTX_FMT_RADEON_MODE |
1637                    (count << RADEON_NUM_VERTICES_SHIFT));
1638
1639         do {
1640                 if (i < nbox)
1641                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1642
1643                 radeon_cp_dispatch_indirect(dev, elt_buf,
1644                                             prim->start, prim->finish);
1645
1646                 i++;
1647         } while (i < nbox);
1648
1649 }
1650
1651 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1652
1653 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1654                                       struct drm_file *file_priv,
1655                                       drm_radeon_texture_t * tex,
1656                                       drm_radeon_tex_image_t * image)
1657 {
1658         drm_radeon_private_t *dev_priv = dev->dev_private;
1659         struct drm_buf *buf;
1660         u32 format;
1661         u32 *buffer;
1662         const u8 __user *data;
1663         int size, dwords, tex_width, blit_width, spitch;
1664         u32 height;
1665         int i;
1666         u32 texpitch, microtile;
1667         u32 offset;
1668         RING_LOCALS;
1669
1670         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1671                 DRM_ERROR("Invalid destination offset\n");
1672                 return -EINVAL;
1673         }
1674
1675         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1676
1677         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1678          * up with the texture data from the host data blit, otherwise
1679          * part of the texture image may be corrupted.
1680          */
1681         BEGIN_RING(4);
1682         RADEON_FLUSH_CACHE();
1683         RADEON_WAIT_UNTIL_IDLE();
1684         ADVANCE_RING();
1685
1686         /* The compiler won't optimize away a division by a variable,
1687          * even if the only legal values are powers of two.  Thus, we'll
1688          * use a shift instead.
1689          */
1690         switch (tex->format) {
1691         case RADEON_TXFORMAT_ARGB8888:
1692         case RADEON_TXFORMAT_RGBA8888:
1693                 format = RADEON_COLOR_FORMAT_ARGB8888;
1694                 tex_width = tex->width * 4;
1695                 blit_width = image->width * 4;
1696                 break;
1697         case RADEON_TXFORMAT_AI88:
1698         case RADEON_TXFORMAT_ARGB1555:
1699         case RADEON_TXFORMAT_RGB565:
1700         case RADEON_TXFORMAT_ARGB4444:
1701         case RADEON_TXFORMAT_VYUY422:
1702         case RADEON_TXFORMAT_YVYU422:
1703                 format = RADEON_COLOR_FORMAT_RGB565;
1704                 tex_width = tex->width * 2;
1705                 blit_width = image->width * 2;
1706                 break;
1707         case RADEON_TXFORMAT_I8:
1708         case RADEON_TXFORMAT_RGB332:
1709                 format = RADEON_COLOR_FORMAT_CI8;
1710                 tex_width = tex->width * 1;
1711                 blit_width = image->width * 1;
1712                 break;
1713         default:
1714                 DRM_ERROR("invalid texture format %d\n", tex->format);
1715                 return -EINVAL;
1716         }
1717         spitch = blit_width >> 6;
1718         if (spitch == 0 && image->height > 1)
1719                 return -EINVAL;
1720
1721         texpitch = tex->pitch;
1722         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1723                 microtile = 1;
1724                 if (tex_width < 64) {
1725                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1726                         /* we got tiled coordinates, untile them */
1727                         image->x *= 2;
1728                 }
1729         } else
1730                 microtile = 0;
1731
1732         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1733
1734         do {
1735                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1736                           tex->offset >> 10, tex->pitch, tex->format,
1737                           image->x, image->y, image->width, image->height);
1738
1739                 /* Make a copy of some parameters in case we have to
1740                  * update them for a multi-pass texture blit.
1741                  */
1742                 height = image->height;
1743                 data = (const u8 __user *)image->data;
1744
1745                 size = height * blit_width;
1746
1747                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1748                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1749                         size = height * blit_width;
1750                 } else if (size < 4 && size > 0) {
1751                         size = 4;
1752                 } else if (size == 0) {
1753                         return 0;
1754                 }
1755
1756                 buf = radeon_freelist_get(dev);
1757                 if (0 && !buf) {
1758                         radeon_do_cp_idle(dev_priv);
1759                         buf = radeon_freelist_get(dev);
1760                 }
1761                 if (!buf) {
1762                         DRM_DEBUG("EAGAIN\n");
1763                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1764                                 return -EFAULT;
1765                         return -EAGAIN;
1766                 }
1767
1768                 /* Dispatch the indirect buffer.
1769                  */
1770                 buffer =
1771                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1772                 dwords = size / 4;
1773
1774 #define RADEON_COPY_MT(_buf, _data, _width) \
1775         do { \
1776                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1777                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1778                         return -EFAULT; \
1779                 } \
1780         } while(0)
1781
1782                 if (microtile) {
1783                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1784                            however, we cannot use blitter directly for texture width < 64 bytes,
1785                            since minimum tex pitch is 64 bytes and we need this to match
1786                            the texture width, otherwise the blitter will tile it wrong.
1787                            Thus, tiling manually in this case. Additionally, need to special
1788                            case tex height = 1, since our actual image will have height 2
1789                            and we need to ensure we don't read beyond the texture size
1790                            from user space. */
1791                         if (tex->height == 1) {
1792                                 if (tex_width >= 64 || tex_width <= 16) {
1793                                         RADEON_COPY_MT(buffer, data,
1794                                                 (int)(tex_width * sizeof(u32)));
1795                                 } else if (tex_width == 32) {
1796                                         RADEON_COPY_MT(buffer, data, 16);
1797                                         RADEON_COPY_MT(buffer + 8,
1798                                                        data + 16, 16);
1799                                 }
1800                         } else if (tex_width >= 64 || tex_width == 16) {
1801                                 RADEON_COPY_MT(buffer, data,
1802                                                (int)(dwords * sizeof(u32)));
1803                         } else if (tex_width < 16) {
1804                                 for (i = 0; i < tex->height; i++) {
1805                                         RADEON_COPY_MT(buffer, data, tex_width);
1806                                         buffer += 4;
1807                                         data += tex_width;
1808                                 }
1809                         } else if (tex_width == 32) {
1810                                 /* TODO: make sure this works when not fitting in one buffer
1811                                    (i.e. 32bytes x 2048...) */
1812                                 for (i = 0; i < tex->height; i += 2) {
1813                                         RADEON_COPY_MT(buffer, data, 16);
1814                                         data += 16;
1815                                         RADEON_COPY_MT(buffer + 8, data, 16);
1816                                         data += 16;
1817                                         RADEON_COPY_MT(buffer + 4, data, 16);
1818                                         data += 16;
1819                                         RADEON_COPY_MT(buffer + 12, data, 16);
1820                                         data += 16;
1821                                         buffer += 16;
1822                                 }
1823                         }
1824                 } else {
1825                         if (tex_width >= 32) {
1826                                 /* Texture image width is larger than the minimum, so we
1827                                  * can upload it directly.
1828                                  */
1829                                 RADEON_COPY_MT(buffer, data,
1830                                                (int)(dwords * sizeof(u32)));
1831                         } else {
1832                                 /* Texture image width is less than the minimum, so we
1833                                  * need to pad out each image scanline to the minimum
1834                                  * width.
1835                                  */
1836                                 for (i = 0; i < tex->height; i++) {
1837                                         RADEON_COPY_MT(buffer, data, tex_width);
1838                                         buffer += 8;
1839                                         data += tex_width;
1840                                 }
1841                         }
1842                 }
1843
1844 #undef RADEON_COPY_MT
1845                 buf->file_priv = file_priv;
1846                 buf->used = size;
1847                 offset = dev_priv->gart_buffers_offset + buf->offset;
1848                 BEGIN_RING(9);
1849                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1850                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1851                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1852                          RADEON_GMC_BRUSH_NONE |
1853                          (format << 8) |
1854                          RADEON_GMC_SRC_DATATYPE_COLOR |
1855                          RADEON_ROP3_S |
1856                          RADEON_DP_SRC_SOURCE_MEMORY |
1857                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1858                 OUT_RING((spitch << 22) | (offset >> 10));
1859                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1860                 OUT_RING(0);
1861                 OUT_RING((image->x << 16) | image->y);
1862                 OUT_RING((image->width << 16) | height);
1863                 RADEON_WAIT_UNTIL_2D_IDLE();
1864                 ADVANCE_RING();
1865                 COMMIT_RING();
1866
1867                 radeon_cp_discard_buffer(dev, buf);
1868
1869                 /* Update the input parameters for next time */
1870                 image->y += height;
1871                 image->height -= height;
1872                 image->data = (const u8 __user *)image->data + size;
1873         } while (image->height > 0);
1874
1875         /* Flush the pixel cache after the blit completes.  This ensures
1876          * the texture data is written out to memory before rendering
1877          * continues.
1878          */
1879         BEGIN_RING(4);
1880         RADEON_FLUSH_CACHE();
1881         RADEON_WAIT_UNTIL_2D_IDLE();
1882         ADVANCE_RING();
1883         COMMIT_RING();
1884
1885         return 0;
1886 }
1887
1888 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1889 {
1890         drm_radeon_private_t *dev_priv = dev->dev_private;
1891         int i;
1892         RING_LOCALS;
1893         DRM_DEBUG("\n");
1894
1895         BEGIN_RING(35);
1896
1897         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1898         OUT_RING(0x00000000);
1899
1900         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1901         for (i = 0; i < 32; i++) {
1902                 OUT_RING(stipple[i]);
1903         }
1904
1905         ADVANCE_RING();
1906 }
1907
1908 static void radeon_apply_surface_regs(int surf_index,
1909                                       drm_radeon_private_t *dev_priv)
1910 {
1911         if (!dev_priv->mmio)
1912                 return;
1913
1914         radeon_do_cp_idle(dev_priv);
1915
1916         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1917                      dev_priv->surfaces[surf_index].flags);
1918         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1919                      dev_priv->surfaces[surf_index].lower);
1920         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1921                      dev_priv->surfaces[surf_index].upper);
1922 }
1923
1924 /* Allocates a virtual surface
1925  * doesn't always allocate a real surface, will stretch an existing
1926  * surface when possible.
1927  *
1928  * Note that refcount can be at most 2, since during a free refcount=3
1929  * might mean we have to allocate a new surface which might not always
1930  * be available.
1931  * For example : we allocate three contigous surfaces ABC. If B is
1932  * freed, we suddenly need two surfaces to store A and C, which might
1933  * not always be available.
1934  */
1935 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1936                          drm_radeon_private_t *dev_priv,
1937                          struct drm_file *file_priv)
1938 {
1939         struct radeon_virt_surface *s;
1940         int i;
1941         int virt_surface_index;
1942         uint32_t new_upper, new_lower;
1943
1944         new_lower = new->address;
1945         new_upper = new_lower + new->size - 1;
1946
1947         /* sanity check */
1948         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1949             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1950              RADEON_SURF_ADDRESS_FIXED_MASK)
1951             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1952                 return -1;
1953
1954         /* make sure there is no overlap with existing surfaces */
1955         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1956                 if ((dev_priv->surfaces[i].refcount != 0) &&
1957                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1958                       (new_lower < dev_priv->surfaces[i].upper)) ||
1959                      ((new_lower < dev_priv->surfaces[i].lower) &&
1960                       (new_upper > dev_priv->surfaces[i].lower)))) {
1961                         return -1;
1962                 }
1963         }
1964
1965         /* find a virtual surface */
1966         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1967                 if (dev_priv->virt_surfaces[i].file_priv == 0)
1968                         break;
1969         if (i == 2 * RADEON_MAX_SURFACES) {
1970                 return -1;
1971         }
1972         virt_surface_index = i;
1973
1974         /* try to reuse an existing surface */
1975         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1976                 /* extend before */
1977                 if ((dev_priv->surfaces[i].refcount == 1) &&
1978                     (new->flags == dev_priv->surfaces[i].flags) &&
1979                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1980                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1981                         s->surface_index = i;
1982                         s->lower = new_lower;
1983                         s->upper = new_upper;
1984                         s->flags = new->flags;
1985                         s->file_priv = file_priv;
1986                         dev_priv->surfaces[i].refcount++;
1987                         dev_priv->surfaces[i].lower = s->lower;
1988                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1989                         return virt_surface_index;
1990                 }
1991
1992                 /* extend after */
1993                 if ((dev_priv->surfaces[i].refcount == 1) &&
1994                     (new->flags == dev_priv->surfaces[i].flags) &&
1995                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
1996                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1997                         s->surface_index = i;
1998                         s->lower = new_lower;
1999                         s->upper = new_upper;
2000                         s->flags = new->flags;
2001                         s->file_priv = file_priv;
2002                         dev_priv->surfaces[i].refcount++;
2003                         dev_priv->surfaces[i].upper = s->upper;
2004                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2005                         return virt_surface_index;
2006                 }
2007         }
2008
2009         /* okay, we need a new one */
2010         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2011                 if (dev_priv->surfaces[i].refcount == 0) {
2012                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2013                         s->surface_index = i;
2014                         s->lower = new_lower;
2015                         s->upper = new_upper;
2016                         s->flags = new->flags;
2017                         s->file_priv = file_priv;
2018                         dev_priv->surfaces[i].refcount = 1;
2019                         dev_priv->surfaces[i].lower = s->lower;
2020                         dev_priv->surfaces[i].upper = s->upper;
2021                         dev_priv->surfaces[i].flags = s->flags;
2022                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2023                         return virt_surface_index;
2024                 }
2025         }
2026
2027         /* we didn't find anything */
2028         return -1;
2029 }
2030
2031 static int free_surface(struct drm_file *file_priv,
2032                         drm_radeon_private_t * dev_priv,
2033                         int lower)
2034 {
2035         struct radeon_virt_surface *s;
2036         int i;
2037         /* find the virtual surface */
2038         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2039                 s = &(dev_priv->virt_surfaces[i]);
2040                 if (s->file_priv) {
2041                         if ((lower == s->lower) && (file_priv == s->file_priv))
2042                         {
2043                                 if (dev_priv->surfaces[s->surface_index].
2044                                     lower == s->lower)
2045                                         dev_priv->surfaces[s->surface_index].
2046                                             lower = s->upper;
2047
2048                                 if (dev_priv->surfaces[s->surface_index].
2049                                     upper == s->upper)
2050                                         dev_priv->surfaces[s->surface_index].
2051                                             upper = s->lower;
2052
2053                                 dev_priv->surfaces[s->surface_index].refcount--;
2054                                 if (dev_priv->surfaces[s->surface_index].
2055                                     refcount == 0)
2056                                         dev_priv->surfaces[s->surface_index].
2057                                             flags = 0;
2058                                 s->file_priv = NULL;
2059                                 radeon_apply_surface_regs(s->surface_index,
2060                                                           dev_priv);
2061                                 return 0;
2062                         }
2063                 }
2064         }
2065         return 1;
2066 }
2067
2068 static void radeon_surfaces_release(struct drm_file *file_priv,
2069                                     drm_radeon_private_t * dev_priv)
2070 {
2071         int i;
2072         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2073                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2074                         free_surface(file_priv, dev_priv,
2075                                      dev_priv->virt_surfaces[i].lower);
2076         }
2077 }
2078
2079 /* ================================================================
2080  * IOCTL functions
2081  */
2082 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2083 {
2084         drm_radeon_private_t *dev_priv = dev->dev_private;
2085         drm_radeon_surface_alloc_t *alloc = data;
2086
2087         if (!dev_priv) {
2088                 DRM_ERROR("called with no initialization\n");
2089                 return -EINVAL;
2090         }
2091
2092         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2093                 return -EINVAL;
2094         else
2095                 return 0;
2096 }
2097
2098 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2099 {
2100         drm_radeon_private_t *dev_priv = dev->dev_private;
2101         drm_radeon_surface_free_t *memfree = data;
2102
2103         if (!dev_priv) {
2104                 DRM_ERROR("called with no initialization\n");
2105                 return -EINVAL;
2106         }
2107
2108         if (free_surface(file_priv, dev_priv, memfree->address))
2109                 return -EINVAL;
2110         else
2111                 return 0;
2112 }
2113
2114 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2115 {
2116         drm_radeon_private_t *dev_priv = dev->dev_private;
2117         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2118         drm_radeon_clear_t *clear = data;
2119         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2120         DRM_DEBUG("\n");
2121
2122         LOCK_TEST_WITH_RETURN(dev, file_priv);
2123
2124         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2125
2126         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2127                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2128
2129         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2130                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2131                 return -EFAULT;
2132
2133         radeon_cp_dispatch_clear(dev, clear, depth_boxes);
2134
2135         COMMIT_RING();
2136         return 0;
2137 }
2138
2139 /* Not sure why this isn't set all the time:
2140  */
2141 static int radeon_do_init_pageflip(struct drm_device * dev)
2142 {
2143         drm_radeon_private_t *dev_priv = dev->dev_private;
2144         RING_LOCALS;
2145
2146         DRM_DEBUG("\n");
2147
2148         BEGIN_RING(6);
2149         RADEON_WAIT_UNTIL_3D_IDLE();
2150         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2151         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2152                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2153         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2154         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2155                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2156         ADVANCE_RING();
2157
2158         dev_priv->page_flipping = 1;
2159
2160         if (dev_priv->sarea_priv->pfCurrentPage != 1)
2161                 dev_priv->sarea_priv->pfCurrentPage = 0;
2162
2163         return 0;
2164 }
2165
2166 /* Swapping and flipping are different operations, need different ioctls.
2167  * They can & should be intermixed to support multiple 3d windows.
2168  */
2169 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2170 {
2171         drm_radeon_private_t *dev_priv = dev->dev_private;
2172         DRM_DEBUG("\n");
2173
2174         LOCK_TEST_WITH_RETURN(dev, file_priv);
2175
2176         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2177
2178         if (!dev_priv->page_flipping)
2179                 radeon_do_init_pageflip(dev);
2180
2181         radeon_cp_dispatch_flip(dev);
2182
2183         COMMIT_RING();
2184         return 0;
2185 }
2186
2187 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2188 {
2189         drm_radeon_private_t *dev_priv = dev->dev_private;
2190         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2191         DRM_DEBUG("\n");
2192
2193         LOCK_TEST_WITH_RETURN(dev, file_priv);
2194
2195         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2196
2197         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2198                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2199
2200         radeon_cp_dispatch_swap(dev);
2201         dev_priv->sarea_priv->ctx_owner = 0;
2202
2203         COMMIT_RING();
2204         return 0;
2205 }
2206
2207 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2208 {
2209         drm_radeon_private_t *dev_priv = dev->dev_private;
2210         drm_radeon_sarea_t *sarea_priv;
2211         struct drm_device_dma *dma = dev->dma;
2212         struct drm_buf *buf;
2213         drm_radeon_vertex_t *vertex = data;
2214         drm_radeon_tcl_prim_t prim;
2215
2216         LOCK_TEST_WITH_RETURN(dev, file_priv);
2217
2218         if (!dev_priv) {
2219                 DRM_ERROR("called with no initialization\n");
2220                 return -EINVAL;
2221         }
2222
2223         sarea_priv = dev_priv->sarea_priv;
2224
2225         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2226                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2227
2228         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2229                 DRM_ERROR("buffer index %d (of %d max)\n",
2230                           vertex->idx, dma->buf_count - 1);
2231                 return -EINVAL;
2232         }
2233         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2234                 DRM_ERROR("buffer prim %d\n", vertex->prim);
2235                 return -EINVAL;
2236         }
2237
2238         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2239         VB_AGE_TEST_WITH_RETURN(dev_priv);
2240
2241         buf = dma->buflist[vertex->idx];
2242
2243         if (buf->file_priv != file_priv) {
2244                 DRM_ERROR("process %d using buffer owned by %p\n",
2245                           DRM_CURRENTPID, buf->file_priv);
2246                 return -EINVAL;
2247         }
2248         if (buf->pending) {
2249                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2250                 return -EINVAL;
2251         }
2252
2253         /* Build up a prim_t record:
2254          */
2255         if (vertex->count) {
2256                 buf->used = vertex->count;      /* not used? */
2257
2258                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2259                         if (radeon_emit_state(dev_priv, file_priv,
2260                                               &sarea_priv->context_state,
2261                                               sarea_priv->tex_state,
2262                                               sarea_priv->dirty)) {
2263                                 DRM_ERROR("radeon_emit_state failed\n");
2264                                 return -EINVAL;
2265                         }
2266
2267                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2268                                                RADEON_UPLOAD_TEX1IMAGES |
2269                                                RADEON_UPLOAD_TEX2IMAGES |
2270                                                RADEON_REQUIRE_QUIESCENCE);
2271                 }
2272
2273                 prim.start = 0;
2274                 prim.finish = vertex->count;    /* unused */
2275                 prim.prim = vertex->prim;
2276                 prim.numverts = vertex->count;
2277                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2278
2279                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2280         }
2281
2282         if (vertex->discard) {
2283                 radeon_cp_discard_buffer(dev, buf);
2284         }
2285
2286         COMMIT_RING();
2287         return 0;
2288 }
2289
2290 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2291 {
2292         drm_radeon_private_t *dev_priv = dev->dev_private;
2293         drm_radeon_sarea_t *sarea_priv;
2294         struct drm_device_dma *dma = dev->dma;
2295         struct drm_buf *buf;
2296         drm_radeon_indices_t *elts = data;
2297         drm_radeon_tcl_prim_t prim;
2298         int count;
2299
2300         LOCK_TEST_WITH_RETURN(dev, file_priv);
2301
2302         if (!dev_priv) {
2303                 DRM_ERROR("called with no initialization\n");
2304                 return -EINVAL;
2305         }
2306         sarea_priv = dev_priv->sarea_priv;
2307
2308         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2309                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2310                   elts->discard);
2311
2312         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2313                 DRM_ERROR("buffer index %d (of %d max)\n",
2314                           elts->idx, dma->buf_count - 1);
2315                 return -EINVAL;
2316         }
2317         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2318                 DRM_ERROR("buffer prim %d\n", elts->prim);
2319                 return -EINVAL;
2320         }
2321
2322         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2323         VB_AGE_TEST_WITH_RETURN(dev_priv);
2324
2325         buf = dma->buflist[elts->idx];
2326
2327         if (buf->file_priv != file_priv) {
2328                 DRM_ERROR("process %d using buffer owned by %p\n",
2329                           DRM_CURRENTPID, buf->file_priv);
2330                 return -EINVAL;
2331         }
2332         if (buf->pending) {
2333                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2334                 return -EINVAL;
2335         }
2336
2337         count = (elts->end - elts->start) / sizeof(u16);
2338         elts->start -= RADEON_INDEX_PRIM_OFFSET;
2339
2340         if (elts->start & 0x7) {
2341                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2342                 return -EINVAL;
2343         }
2344         if (elts->start < buf->used) {
2345                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2346                 return -EINVAL;
2347         }
2348
2349         buf->used = elts->end;
2350
2351         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2352                 if (radeon_emit_state(dev_priv, file_priv,
2353                                       &sarea_priv->context_state,
2354                                       sarea_priv->tex_state,
2355                                       sarea_priv->dirty)) {
2356                         DRM_ERROR("radeon_emit_state failed\n");
2357                         return -EINVAL;
2358                 }
2359
2360                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2361                                        RADEON_UPLOAD_TEX1IMAGES |
2362                                        RADEON_UPLOAD_TEX2IMAGES |
2363                                        RADEON_REQUIRE_QUIESCENCE);
2364         }
2365
2366         /* Build up a prim_t record:
2367          */
2368         prim.start = elts->start;
2369         prim.finish = elts->end;
2370         prim.prim = elts->prim;
2371         prim.offset = 0;        /* offset from start of dma buffers */
2372         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2373         prim.vc_format = dev_priv->sarea_priv->vc_format;
2374
2375         radeon_cp_dispatch_indices(dev, buf, &prim);
2376         if (elts->discard) {
2377                 radeon_cp_discard_buffer(dev, buf);
2378         }
2379
2380         COMMIT_RING();
2381         return 0;
2382 }
2383
2384 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2385 {
2386         drm_radeon_private_t *dev_priv = dev->dev_private;
2387         drm_radeon_texture_t *tex = data;
2388         drm_radeon_tex_image_t image;
2389         int ret;
2390
2391         LOCK_TEST_WITH_RETURN(dev, file_priv);
2392
2393         if (tex->image == NULL) {
2394                 DRM_ERROR("null texture image!\n");
2395                 return -EINVAL;
2396         }
2397
2398         if (DRM_COPY_FROM_USER(&image,
2399                                (drm_radeon_tex_image_t __user *) tex->image,
2400                                sizeof(image)))
2401                 return -EFAULT;
2402
2403         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2404         VB_AGE_TEST_WITH_RETURN(dev_priv);
2405
2406         ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2407
2408         return ret;
2409 }
2410
2411 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2412 {
2413         drm_radeon_private_t *dev_priv = dev->dev_private;
2414         drm_radeon_stipple_t *stipple = data;
2415         u32 mask[32];
2416
2417         LOCK_TEST_WITH_RETURN(dev, file_priv);
2418
2419         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2420                 return -EFAULT;
2421
2422         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2423
2424         radeon_cp_dispatch_stipple(dev, mask);
2425
2426         COMMIT_RING();
2427         return 0;
2428 }
2429
2430 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2431 {
2432         drm_radeon_private_t *dev_priv = dev->dev_private;
2433         struct drm_device_dma *dma = dev->dma;
2434         struct drm_buf *buf;
2435         drm_radeon_indirect_t *indirect = data;
2436         RING_LOCALS;
2437
2438         LOCK_TEST_WITH_RETURN(dev, file_priv);
2439
2440         if (!dev_priv) {
2441                 DRM_ERROR("called with no initialization\n");
2442                 return -EINVAL;
2443         }
2444
2445         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2446                   indirect->idx, indirect->start, indirect->end,
2447                   indirect->discard);
2448
2449         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2450                 DRM_ERROR("buffer index %d (of %d max)\n",
2451                           indirect->idx, dma->buf_count - 1);
2452                 return -EINVAL;
2453         }
2454
2455         buf = dma->buflist[indirect->idx];
2456
2457         if (buf->file_priv != file_priv) {
2458                 DRM_ERROR("process %d using buffer owned by %p\n",
2459                           DRM_CURRENTPID, buf->file_priv);
2460                 return -EINVAL;
2461         }
2462         if (buf->pending) {
2463                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2464                 return -EINVAL;
2465         }
2466
2467         if (indirect->start < buf->used) {
2468                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2469                           indirect->start, buf->used);
2470                 return -EINVAL;
2471         }
2472
2473         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2474         VB_AGE_TEST_WITH_RETURN(dev_priv);
2475
2476         buf->used = indirect->end;
2477
2478         /* Wait for the 3D stream to idle before the indirect buffer
2479          * containing 2D acceleration commands is processed.
2480          */
2481         BEGIN_RING(2);
2482
2483         RADEON_WAIT_UNTIL_3D_IDLE();
2484
2485         ADVANCE_RING();
2486
2487         /* Dispatch the indirect buffer full of commands from the
2488          * X server.  This is insecure and is thus only available to
2489          * privileged clients.
2490          */
2491         radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2492         if (indirect->discard) {
2493                 radeon_cp_discard_buffer(dev, buf);
2494         }
2495
2496         COMMIT_RING();
2497         return 0;
2498 }
2499
2500 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2501 {
2502         drm_radeon_private_t *dev_priv = dev->dev_private;
2503         drm_radeon_sarea_t *sarea_priv;
2504         struct drm_device_dma *dma = dev->dma;
2505         struct drm_buf *buf;
2506         drm_radeon_vertex2_t *vertex = data;
2507         int i;
2508         unsigned char laststate;
2509
2510         LOCK_TEST_WITH_RETURN(dev, file_priv);
2511
2512         if (!dev_priv) {
2513                 DRM_ERROR("called with no initialization\n");
2514                 return -EINVAL;
2515         }
2516
2517         sarea_priv = dev_priv->sarea_priv;
2518
2519         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2520                   DRM_CURRENTPID, vertex->idx, vertex->discard);
2521
2522         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2523                 DRM_ERROR("buffer index %d (of %d max)\n",
2524                           vertex->idx, dma->buf_count - 1);
2525                 return -EINVAL;
2526         }
2527
2528         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2529         VB_AGE_TEST_WITH_RETURN(dev_priv);
2530
2531         buf = dma->buflist[vertex->idx];
2532
2533         if (buf->file_priv != file_priv) {
2534                 DRM_ERROR("process %d using buffer owned by %p\n",
2535                           DRM_CURRENTPID, buf->file_priv);
2536                 return -EINVAL;
2537         }
2538
2539         if (buf->pending) {
2540                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2541                 return -EINVAL;
2542         }
2543
2544         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2545                 return -EINVAL;
2546
2547         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2548                 drm_radeon_prim_t prim;
2549                 drm_radeon_tcl_prim_t tclprim;
2550
2551                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2552                         return -EFAULT;
2553
2554                 if (prim.stateidx != laststate) {
2555                         drm_radeon_state_t state;
2556
2557                         if (DRM_COPY_FROM_USER(&state,
2558                                                &vertex->state[prim.stateidx],
2559                                                sizeof(state)))
2560                                 return -EFAULT;
2561
2562                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2563                                 DRM_ERROR("radeon_emit_state2 failed\n");
2564                                 return -EINVAL;
2565                         }
2566
2567                         laststate = prim.stateidx;
2568                 }
2569
2570                 tclprim.start = prim.start;
2571                 tclprim.finish = prim.finish;
2572                 tclprim.prim = prim.prim;
2573                 tclprim.vc_format = prim.vc_format;
2574
2575                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2576                         tclprim.offset = prim.numverts * 64;
2577                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2578
2579                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2580                 } else {
2581                         tclprim.numverts = prim.numverts;
2582                         tclprim.offset = 0;     /* not used */
2583
2584                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2585                 }
2586
2587                 if (sarea_priv->nbox == 1)
2588                         sarea_priv->nbox = 0;
2589         }
2590
2591         if (vertex->discard) {
2592                 radeon_cp_discard_buffer(dev, buf);
2593         }
2594
2595         COMMIT_RING();
2596         return 0;
2597 }
2598
2599 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2600                                struct drm_file *file_priv,
2601                                drm_radeon_cmd_header_t header,
2602                                drm_radeon_kcmd_buffer_t *cmdbuf)
2603 {
2604         int id = (int)header.packet.packet_id;
2605         int sz, reg;
2606         int *data = (int *)cmdbuf->buf;
2607         RING_LOCALS;
2608
2609         if (id >= RADEON_MAX_STATE_PACKETS)
2610                 return -EINVAL;
2611
2612         sz = packet[id].len;
2613         reg = packet[id].start;
2614
2615         if (sz * sizeof(int) > cmdbuf->bufsz) {
2616                 DRM_ERROR("Packet size provided larger than data provided\n");
2617                 return -EINVAL;
2618         }
2619
2620         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2621                 DRM_ERROR("Packet verification failed\n");
2622                 return -EINVAL;
2623         }
2624
2625         BEGIN_RING(sz + 1);
2626         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2627         OUT_RING_TABLE(data, sz);
2628         ADVANCE_RING();
2629
2630         cmdbuf->buf += sz * sizeof(int);
2631         cmdbuf->bufsz -= sz * sizeof(int);
2632         return 0;
2633 }
2634
2635 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2636                                           drm_radeon_cmd_header_t header,
2637                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2638 {
2639         int sz = header.scalars.count;
2640         int start = header.scalars.offset;
2641         int stride = header.scalars.stride;
2642         RING_LOCALS;
2643
2644         BEGIN_RING(3 + sz);
2645         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2646         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2647         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2648         OUT_RING_TABLE(cmdbuf->buf, sz);
2649         ADVANCE_RING();
2650         cmdbuf->buf += sz * sizeof(int);
2651         cmdbuf->bufsz -= sz * sizeof(int);
2652         return 0;
2653 }
2654
2655 /* God this is ugly
2656  */
2657 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2658                                            drm_radeon_cmd_header_t header,
2659                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2660 {
2661         int sz = header.scalars.count;
2662         int start = ((unsigned int)header.scalars.offset) + 0x100;
2663         int stride = header.scalars.stride;
2664         RING_LOCALS;
2665
2666         BEGIN_RING(3 + sz);
2667         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2668         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2669         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2670         OUT_RING_TABLE(cmdbuf->buf, sz);
2671         ADVANCE_RING();
2672         cmdbuf->buf += sz * sizeof(int);
2673         cmdbuf->bufsz -= sz * sizeof(int);
2674         return 0;
2675 }
2676
2677 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2678                                           drm_radeon_cmd_header_t header,
2679                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2680 {
2681         int sz = header.vectors.count;
2682         int start = header.vectors.offset;
2683         int stride = header.vectors.stride;
2684         RING_LOCALS;
2685
2686         BEGIN_RING(5 + sz);
2687         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2688         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2689         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2690         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2691         OUT_RING_TABLE(cmdbuf->buf, sz);
2692         ADVANCE_RING();
2693
2694         cmdbuf->buf += sz * sizeof(int);
2695         cmdbuf->bufsz -= sz * sizeof(int);
2696         return 0;
2697 }
2698
2699 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2700                                           drm_radeon_cmd_header_t header,
2701                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2702 {
2703         int sz = header.veclinear.count * 4;
2704         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2705         RING_LOCALS;
2706
2707         if (!sz)
2708                 return 0;
2709         if (sz * 4 > cmdbuf->bufsz)
2710                 return -EINVAL;
2711
2712         BEGIN_RING(5 + sz);
2713         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2714         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2715         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2716         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2717         OUT_RING_TABLE(cmdbuf->buf, sz);
2718         ADVANCE_RING();
2719
2720         cmdbuf->buf += sz * sizeof(int);
2721         cmdbuf->bufsz -= sz * sizeof(int);
2722         return 0;
2723 }
2724
2725 static int radeon_emit_packet3(struct drm_device * dev,
2726                                struct drm_file *file_priv,
2727                                drm_radeon_kcmd_buffer_t *cmdbuf)
2728 {
2729         drm_radeon_private_t *dev_priv = dev->dev_private;
2730         unsigned int cmdsz;
2731         int ret;
2732         RING_LOCALS;
2733
2734         DRM_DEBUG("\n");
2735
2736         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2737                                                   cmdbuf, &cmdsz))) {
2738                 DRM_ERROR("Packet verification failed\n");
2739                 return ret;
2740         }
2741
2742         BEGIN_RING(cmdsz);
2743         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2744         ADVANCE_RING();
2745
2746         cmdbuf->buf += cmdsz * 4;
2747         cmdbuf->bufsz -= cmdsz * 4;
2748         return 0;
2749 }
2750
2751 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2752                                         struct drm_file *file_priv,
2753                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2754                                         int orig_nbox)
2755 {
2756         drm_radeon_private_t *dev_priv = dev->dev_private;
2757         struct drm_clip_rect box;
2758         unsigned int cmdsz;
2759         int ret;
2760         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2761         int i = 0;
2762         RING_LOCALS;
2763
2764         DRM_DEBUG("\n");
2765
2766         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2767                                                   cmdbuf, &cmdsz))) {
2768                 DRM_ERROR("Packet verification failed\n");
2769                 return ret;
2770         }
2771
2772         if (!orig_nbox)
2773                 goto out;
2774
2775         do {
2776                 if (i < cmdbuf->nbox) {
2777                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2778                                 return -EFAULT;
2779                         /* FIXME The second and subsequent times round
2780                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2781                          * calling emit_clip_rect(). This fixes a
2782                          * lockup on fast machines when sending
2783                          * several cliprects with a cmdbuf, as when
2784                          * waving a 2D window over a 3D
2785                          * window. Something in the commands from user
2786                          * space seems to hang the card when they're
2787                          * sent several times in a row. That would be
2788                          * the correct place to fix it but this works
2789                          * around it until I can figure that out - Tim
2790                          * Smith */
2791                         if (i) {
2792                                 BEGIN_RING(2);
2793                                 RADEON_WAIT_UNTIL_3D_IDLE();
2794                                 ADVANCE_RING();
2795                         }
2796                         radeon_emit_clip_rect(dev_priv, &box);
2797                 }
2798
2799                 BEGIN_RING(cmdsz);
2800                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2801                 ADVANCE_RING();
2802
2803         } while (++i < cmdbuf->nbox);
2804         if (cmdbuf->nbox == 1)
2805                 cmdbuf->nbox = 0;
2806
2807       out:
2808         cmdbuf->buf += cmdsz * 4;
2809         cmdbuf->bufsz -= cmdsz * 4;
2810         return 0;
2811 }
2812
2813 static int radeon_emit_wait(struct drm_device * dev, int flags)
2814 {
2815         drm_radeon_private_t *dev_priv = dev->dev_private;
2816         RING_LOCALS;
2817
2818         DRM_DEBUG("%x\n", flags);
2819         switch (flags) {
2820         case RADEON_WAIT_2D:
2821                 BEGIN_RING(2);
2822                 RADEON_WAIT_UNTIL_2D_IDLE();
2823                 ADVANCE_RING();
2824                 break;
2825         case RADEON_WAIT_3D:
2826                 BEGIN_RING(2);
2827                 RADEON_WAIT_UNTIL_3D_IDLE();
2828                 ADVANCE_RING();
2829                 break;
2830         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2831                 BEGIN_RING(2);
2832                 RADEON_WAIT_UNTIL_IDLE();
2833                 ADVANCE_RING();
2834                 break;
2835         default:
2836                 return -EINVAL;
2837         }
2838
2839         return 0;
2840 }
2841
2842 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2843 {
2844         drm_radeon_private_t *dev_priv = dev->dev_private;
2845         struct drm_device_dma *dma = dev->dma;
2846         struct drm_buf *buf = NULL;
2847         int idx;
2848         drm_radeon_kcmd_buffer_t *cmdbuf = data;
2849         drm_radeon_cmd_header_t header;
2850         int orig_nbox, orig_bufsz;
2851         char *kbuf = NULL;
2852
2853         LOCK_TEST_WITH_RETURN(dev, file_priv);
2854
2855         if (!dev_priv) {
2856                 DRM_ERROR("called with no initialization\n");
2857                 return -EINVAL;
2858         }
2859
2860         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2861         VB_AGE_TEST_WITH_RETURN(dev_priv);
2862
2863         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2864                 return -EINVAL;
2865         }
2866
2867         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2868          * races between checking values and using those values in other code,
2869          * and simply to avoid a lot of function calls to copy in data.
2870          */
2871         orig_bufsz = cmdbuf->bufsz;
2872         if (orig_bufsz != 0) {
2873                 kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
2874                 if (kbuf == NULL)
2875                         return -ENOMEM;
2876                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2877                                        cmdbuf->bufsz)) {
2878                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2879                         return -EFAULT;
2880                 }
2881                 cmdbuf->buf = kbuf;
2882         }
2883
2884         orig_nbox = cmdbuf->nbox;
2885
2886         if (dev_priv->microcode_version == UCODE_R300) {
2887                 int temp;
2888                 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2889
2890                 if (orig_bufsz != 0)
2891                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2892
2893                 return temp;
2894         }
2895
2896         /* microcode_version != r300 */
2897         while (cmdbuf->bufsz >= sizeof(header)) {
2898
2899                 header.i = *(int *)cmdbuf->buf;
2900                 cmdbuf->buf += sizeof(header);
2901                 cmdbuf->bufsz -= sizeof(header);
2902
2903                 switch (header.header.cmd_type) {
2904                 case RADEON_CMD_PACKET:
2905                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2906                         if (radeon_emit_packets
2907                             (dev_priv, file_priv, header, cmdbuf)) {
2908                                 DRM_ERROR("radeon_emit_packets failed\n");
2909                                 goto err;
2910                         }
2911                         break;
2912
2913                 case RADEON_CMD_SCALARS:
2914                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2915                         if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2916                                 DRM_ERROR("radeon_emit_scalars failed\n");
2917                                 goto err;
2918                         }
2919                         break;
2920
2921                 case RADEON_CMD_VECTORS:
2922                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2923                         if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2924                                 DRM_ERROR("radeon_emit_vectors failed\n");
2925                                 goto err;
2926                         }
2927                         break;
2928
2929                 case RADEON_CMD_DMA_DISCARD:
2930                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2931                         idx = header.dma.buf_idx;
2932                         if (idx < 0 || idx >= dma->buf_count) {
2933                                 DRM_ERROR("buffer index %d (of %d max)\n",
2934                                           idx, dma->buf_count - 1);
2935                                 goto err;
2936                         }
2937
2938                         buf = dma->buflist[idx];
2939                         if (buf->file_priv != file_priv || buf->pending) {
2940                                 DRM_ERROR("bad buffer %p %p %d\n",
2941                                           buf->file_priv, file_priv,
2942                                           buf->pending);
2943                                 goto err;
2944                         }
2945
2946                         radeon_cp_discard_buffer(dev, buf);
2947                         break;
2948
2949                 case RADEON_CMD_PACKET3:
2950                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2951                         if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2952                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2953                                 goto err;
2954                         }
2955                         break;
2956
2957                 case RADEON_CMD_PACKET3_CLIP:
2958                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2959                         if (radeon_emit_packet3_cliprect
2960                             (dev, file_priv, cmdbuf, orig_nbox)) {
2961                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2962                                 goto err;
2963                         }
2964                         break;
2965
2966                 case RADEON_CMD_SCALARS2:
2967                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2968                         if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2969                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2970                                 goto err;
2971                         }
2972                         break;
2973
2974                 case RADEON_CMD_WAIT:
2975                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2976                         if (radeon_emit_wait(dev, header.wait.flags)) {
2977                                 DRM_ERROR("radeon_emit_wait failed\n");
2978                                 goto err;
2979                         }
2980                         break;
2981                 case RADEON_CMD_VECLINEAR:
2982                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2983                         if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
2984                                 DRM_ERROR("radeon_emit_veclinear failed\n");
2985                                 goto err;
2986                         }
2987                         break;
2988
2989                 default:
2990                         DRM_ERROR("bad cmd_type %d at %p\n",
2991                                   header.header.cmd_type,
2992                                   cmdbuf->buf - sizeof(header));
2993                         goto err;
2994                 }
2995         }
2996
2997         if (orig_bufsz != 0)
2998                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2999
3000         DRM_DEBUG("DONE\n");
3001         COMMIT_RING();
3002         return 0;
3003
3004       err:
3005         if (orig_bufsz != 0)
3006                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3007         return -EINVAL;
3008 }
3009
3010 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3011 {
3012         drm_radeon_private_t *dev_priv = dev->dev_private;
3013         drm_radeon_getparam_t *param = data;
3014         int value;
3015
3016         if (!dev_priv) {
3017                 DRM_ERROR("called with no initialization\n");
3018                 return -EINVAL;
3019         }
3020
3021         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3022
3023         switch (param->param) {
3024         case RADEON_PARAM_GART_BUFFER_OFFSET:
3025                 value = dev_priv->gart_buffers_offset;
3026                 break;
3027         case RADEON_PARAM_LAST_FRAME:
3028                 dev_priv->stats.last_frame_reads++;
3029                 value = GET_SCRATCH(0);
3030                 break;
3031         case RADEON_PARAM_LAST_DISPATCH:
3032                 value = GET_SCRATCH(1);
3033                 break;
3034         case RADEON_PARAM_LAST_CLEAR:
3035                 dev_priv->stats.last_clear_reads++;
3036                 value = GET_SCRATCH(2);
3037                 break;
3038         case RADEON_PARAM_IRQ_NR:
3039                 value = dev->irq;
3040                 break;
3041         case RADEON_PARAM_GART_BASE:
3042                 value = dev_priv->gart_vm_start;
3043                 break;
3044         case RADEON_PARAM_REGISTER_HANDLE:
3045                 value = dev_priv->mmio->offset;
3046                 break;
3047         case RADEON_PARAM_STATUS_HANDLE:
3048                 value = dev_priv->ring_rptr_offset;
3049                 break;
3050 #ifndef __LP64__
3051                 /*
3052                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3053                  * pointer which can't fit into an int-sized variable.  According to
3054                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3055                  * not supporting it shouldn't be a problem.  If the same functionality
3056                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
3057                  * so backwards-compatibility for the embedded platforms can be
3058                  * maintained.  --davidm 4-Feb-2004.
3059                  */
3060         case RADEON_PARAM_SAREA_HANDLE:
3061                 /* The lock is the first dword in the sarea. */
3062                 value = (long)dev->lock.hw_lock;
3063                 break;
3064 #endif
3065         case RADEON_PARAM_GART_TEX_HANDLE:
3066                 value = dev_priv->gart_textures_offset;
3067                 break;
3068         case RADEON_PARAM_SCRATCH_OFFSET:
3069                 if (!dev_priv->writeback_works)
3070                         return -EINVAL;
3071                 value = RADEON_SCRATCH_REG_OFFSET;
3072                 break;
3073
3074         case RADEON_PARAM_CARD_TYPE:
3075                 if (dev_priv->flags & RADEON_IS_PCIE)
3076                         value = RADEON_CARD_PCIE;
3077                 else if (dev_priv->flags & RADEON_IS_AGP)
3078                         value = RADEON_CARD_AGP;
3079                 else
3080                         value = RADEON_CARD_PCI;
3081                 break;
3082         case RADEON_PARAM_VBLANK_CRTC:
3083                 value = radeon_vblank_crtc_get(dev);
3084                 break;
3085         case RADEON_PARAM_FB_LOCATION:
3086                 value = radeon_read_fb_location(dev_priv);
3087                 break;
3088         default:
3089                 DRM_DEBUG( "Invalid parameter %d\n", param->param );
3090                 return -EINVAL;
3091         }
3092
3093         if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3094                 DRM_ERROR("copy_to_user\n");
3095                 return -EFAULT;
3096         }
3097
3098         return 0;
3099 }
3100
3101 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3102 {
3103         drm_radeon_private_t *dev_priv = dev->dev_private;
3104         drm_radeon_setparam_t *sp = data;
3105         struct drm_radeon_driver_file_fields *radeon_priv;
3106
3107         if (!dev_priv) {
3108                 DRM_ERROR("called with no initialization\n");
3109                 return -EINVAL;
3110         }
3111
3112         switch (sp->param) {
3113         case RADEON_SETPARAM_FB_LOCATION:
3114                 radeon_priv = file_priv->driver_priv;
3115                 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3116                     sp->value;
3117                 break;
3118         case RADEON_SETPARAM_SWITCH_TILING:
3119                 if (sp->value == 0) {
3120                         DRM_DEBUG("color tiling disabled\n");
3121                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3122                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3123                         dev_priv->sarea_priv->tiling_enabled = 0;
3124                 } else if (sp->value == 1) {
3125                         DRM_DEBUG("color tiling enabled\n");
3126                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3127                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3128                         dev_priv->sarea_priv->tiling_enabled = 1;
3129                 }
3130                 break;
3131         case RADEON_SETPARAM_PCIGART_LOCATION:
3132                 dev_priv->pcigart_offset = sp->value;
3133                 dev_priv->pcigart_offset_set = 1;
3134                 break;
3135         case RADEON_SETPARAM_NEW_MEMMAP:
3136                 dev_priv->new_memmap = sp->value;
3137                 break;
3138         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3139                 dev_priv->gart_info.table_size = sp->value;
3140                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3141                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3142                 break;
3143         case RADEON_SETPARAM_VBLANK_CRTC:
3144                 return radeon_vblank_crtc_set(dev, sp->value);
3145                 break;
3146         default:
3147                 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3148                 return -EINVAL;
3149         }
3150
3151         return 0;
3152 }
3153
3154 /* When a client dies:
3155  *    - Check for and clean up flipped page state
3156  *    - Free any alloced GART memory.
3157  *    - Free any alloced radeon surfaces.
3158  *
3159  * DRM infrastructure takes care of reclaiming dma buffers.
3160  */
3161 void radeon_driver_preclose(struct drm_device *dev,
3162                             struct drm_file *file_priv)
3163 {
3164         if (dev->dev_private) {
3165                 drm_radeon_private_t *dev_priv = dev->dev_private;
3166                 dev_priv->page_flipping = 0;
3167                 radeon_mem_release(file_priv, dev_priv->gart_heap);
3168                 radeon_mem_release(file_priv, dev_priv->fb_heap);
3169                 radeon_surfaces_release(file_priv, dev_priv);
3170         }
3171 }
3172
3173 void radeon_driver_lastclose(struct drm_device *dev)
3174 {
3175         if (dev->dev_private) {
3176                 drm_radeon_private_t *dev_priv = dev->dev_private;
3177
3178                 if (dev_priv->sarea_priv &&
3179                     dev_priv->sarea_priv->pfCurrentPage != 0)
3180                         radeon_cp_dispatch_flip(dev);
3181         }
3182
3183         radeon_do_release(dev);
3184 }
3185
3186 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3187 {
3188         drm_radeon_private_t *dev_priv = dev->dev_private;
3189         struct drm_radeon_driver_file_fields *radeon_priv;
3190
3191         DRM_DEBUG("\n");
3192         radeon_priv =
3193             (struct drm_radeon_driver_file_fields *)
3194             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3195
3196         if (!radeon_priv)
3197                 return -ENOMEM;
3198
3199         file_priv->driver_priv = radeon_priv;
3200
3201         if (dev_priv)
3202                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3203         else
3204                 radeon_priv->radeon_fb_delta = 0;
3205         return 0;
3206 }
3207
3208 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3209 {
3210         struct drm_radeon_driver_file_fields *radeon_priv =
3211             file_priv->driver_priv;
3212
3213         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3214 }
3215
3216 struct drm_ioctl_desc radeon_ioctls[] = {
3217         DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3218         DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3219         DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3220         DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3221         DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3222         DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3223         DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3224         DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3225         DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3226         DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3227         DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3228         DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3229         DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3230         DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3231         DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3232         DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3233         DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3234         DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3235         DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3236         DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3237         DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3238         DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3239         DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3240         DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3241         DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3242         DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3243         DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH)
3244 };
3245
3246 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);