Merge branch 'vendor/OPENSSL'
[dragonfly.git] / sys / dev / drm / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  *
33  * $DragonFly: src/sys/dev/drm/r300_cmdbuf.c,v 1.1 2008/04/05 18:12:29 hasso Exp $
34  */
35
36 #include "drmP.h"
37 #include "drm.h"
38 #include "radeon_drm.h"
39 #include "radeon_drv.h"
40 #include "r300_reg.h"
41
42 #define R300_SIMULTANEOUS_CLIPRECTS             4
43
44 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
45  */
46 static const int r300_cliprect_cntl[4] = {
47         0xAAAA,
48         0xEEEE,
49         0xFEFE,
50         0xFFFE
51 };
52
53 /**
54  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
55  * buffer, starting with index n.
56  */
57 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
58                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
59 {
60         struct drm_clip_rect box;
61         int nr;
62         int i;
63         RING_LOCALS;
64
65         nr = cmdbuf->nbox - n;
66         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
67                 nr = R300_SIMULTANEOUS_CLIPRECTS;
68
69         DRM_DEBUG("%i cliprects\n", nr);
70
71         if (nr) {
72                 BEGIN_RING(6 + nr * 2);
73                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
74
75                 for (i = 0; i < nr; ++i) {
76                         if (DRM_COPY_FROM_USER_UNCHECKED
77                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
78                                 DRM_ERROR("copy cliprect faulted\n");
79                                 return -EFAULT;
80                         }
81
82                         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
83                                 box.x1 = (box.x1) &
84                                         R300_CLIPRECT_MASK;
85                                 box.y1 = (box.y1) &
86                                         R300_CLIPRECT_MASK;
87                                 box.x2 = (box.x2) &
88                                         R300_CLIPRECT_MASK;
89                                 box.y2 = (box.y2) &
90                                         R300_CLIPRECT_MASK;
91                         } else {
92                                 box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
93                                         R300_CLIPRECT_MASK;
94                                 box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
95                                         R300_CLIPRECT_MASK;
96                                 box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
97                                         R300_CLIPRECT_MASK;
98                                 box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
99                                         R300_CLIPRECT_MASK;
100
101                         }
102                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
103                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
104                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
105                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
106
107                 }
108
109                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
110
111                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
112                  * client might be able to trample over memory.
113                  * The impact should be very limited, but I'd rather be safe than
114                  * sorry.
115                  */
116                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
117                 OUT_RING(0);
118                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
119                 ADVANCE_RING();
120         } else {
121                 /* Why we allow zero cliprect rendering:
122                  * There are some commands in a command buffer that must be submitted
123                  * even when there are no cliprects, e.g. DMA buffer discard
124                  * or state setting (though state setting could be avoided by
125                  * simulating a loss of context).
126                  *
127                  * Now since the cmdbuf interface is so chaotic right now (and is
128                  * bound to remain that way for a bit until things settle down),
129                  * it is basically impossible to filter out the commands that are
130                  * necessary and those that aren't.
131                  *
132                  * So I choose the safe way and don't do any filtering at all;
133                  * instead, I simply set up the engine so that all rendering
134                  * can't produce any fragments.
135                  */
136                 BEGIN_RING(2);
137                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
138                 ADVANCE_RING();
139         }
140
141         return 0;
142 }
143
144 static u8 r300_reg_flags[0x10000 >> 2];
145
146 void r300_init_reg_flags(struct drm_device *dev)
147 {
148         int i;
149         drm_radeon_private_t *dev_priv = dev->dev_private;
150
151         memset(r300_reg_flags, 0, 0x10000 >> 2);
152 #define ADD_RANGE_MARK(reg, count,mark) \
153                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
154                         r300_reg_flags[i]|=(mark);
155
156 #define MARK_SAFE               1
157 #define MARK_CHECK_OFFSET       2
158
159 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
160
161         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
162         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
163         ADD_RANGE(R300_VAP_CNTL, 1);
164         ADD_RANGE(R300_SE_VTE_CNTL, 2);
165         ADD_RANGE(0x2134, 2);
166         ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
167         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
168         ADD_RANGE(0x21DC, 1);
169         ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
170         ADD_RANGE(R300_VAP_CLIP_X_0, 4);
171         ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1);
172         ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
173         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
174         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
175         ADD_RANGE(R300_GB_ENABLE, 1);
176         ADD_RANGE(R300_GB_MSPOS0, 5);
177         ADD_RANGE(R300_TX_CNTL, 1);
178         ADD_RANGE(R300_TX_ENABLE, 1);
179         ADD_RANGE(0x4200, 4);
180         ADD_RANGE(0x4214, 1);
181         ADD_RANGE(R300_RE_POINTSIZE, 1);
182         ADD_RANGE(0x4230, 3);
183         ADD_RANGE(R300_RE_LINE_CNT, 1);
184         ADD_RANGE(R300_RE_UNK4238, 1);
185         ADD_RANGE(0x4260, 3);
186         ADD_RANGE(R300_RE_SHADE, 4);
187         ADD_RANGE(R300_RE_POLYGON_MODE, 5);
188         ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
189         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
190         ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
191         ADD_RANGE(R300_RE_CULL_CNTL, 1);
192         ADD_RANGE(0x42C0, 2);
193         ADD_RANGE(R300_RS_CNTL_0, 2);
194         ADD_RANGE(R300_RS_INTERP_0, 8);
195         ADD_RANGE(R300_RS_ROUTE_0, 8);
196         ADD_RANGE(0x43A4, 2);
197         ADD_RANGE(0x43E8, 1);
198         ADD_RANGE(R300_PFS_CNTL_0, 3);
199         ADD_RANGE(R300_PFS_NODE_0, 4);
200         ADD_RANGE(R300_PFS_TEXI_0, 64);
201         ADD_RANGE(0x46A4, 5);
202         ADD_RANGE(R300_PFS_INSTR0_0, 64);
203         ADD_RANGE(R300_PFS_INSTR1_0, 64);
204         ADD_RANGE(R300_PFS_INSTR2_0, 64);
205         ADD_RANGE(R300_PFS_INSTR3_0, 64);
206         ADD_RANGE(R300_RE_FOG_STATE, 1);
207         ADD_RANGE(R300_FOG_COLOR_R, 3);
208         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
209         ADD_RANGE(0x4BD8, 1);
210         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
211         ADD_RANGE(0x4E00, 1);
212         ADD_RANGE(R300_RB3D_CBLEND, 2);
213         ADD_RANGE(R300_RB3D_COLORMASK, 1);
214         ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
215         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
216         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
217         ADD_RANGE(0x4E50, 9);
218         ADD_RANGE(0x4E88, 1);
219         ADD_RANGE(0x4EA0, 2);
220         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
221         ADD_RANGE(R300_RB3D_ZSTENCIL_FORMAT, 4);
222         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);    /* check offset */
223         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
224         ADD_RANGE(0x4F28, 1);
225         ADD_RANGE(0x4F30, 2);
226         ADD_RANGE(0x4F44, 1);
227         ADD_RANGE(0x4F54, 1);
228
229         ADD_RANGE(R300_TX_FILTER_0, 16);
230         ADD_RANGE(R300_TX_FILTER1_0, 16);
231         ADD_RANGE(R300_TX_SIZE_0, 16);
232         ADD_RANGE(R300_TX_FORMAT_0, 16);
233         ADD_RANGE(R300_TX_PITCH_0, 16);
234         /* Texture offset is dangerous and needs more checking */
235         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
236         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
237         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
238
239         /* Sporadic registers used as primitives are emitted */
240         ADD_RANGE(R300_RB3D_ZCACHE_CTLSTAT, 1);
241         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
242         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
243         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
244
245         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
246                 ADD_RANGE(0x4074, 16);
247         }
248 }
249
250 static __inline__ int r300_check_range(unsigned reg, int count)
251 {
252         int i;
253         if (reg & ~0xffff)
254                 return -1;
255         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
256                 if (r300_reg_flags[i] != MARK_SAFE)
257                         return 1;
258         return 0;
259 }
260
261 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
262                                                           dev_priv,
263                                                           drm_radeon_kcmd_buffer_t
264                                                           * cmdbuf,
265                                                           drm_r300_cmd_header_t
266                                                           header)
267 {
268         int reg;
269         int sz;
270         int i;
271         int values[64];
272         RING_LOCALS;
273
274         sz = header.packet0.count;
275         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
276
277         if ((sz > 64) || (sz < 0)) {
278                 DRM_ERROR
279                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
280                      reg, sz);
281                 return -EINVAL;
282         }
283         for (i = 0; i < sz; i++) {
284                 values[i] = ((int *)cmdbuf->buf)[i];
285                 switch (r300_reg_flags[(reg >> 2) + i]) {
286                 case MARK_SAFE:
287                         break;
288                 case MARK_CHECK_OFFSET:
289                         if (!radeon_check_offset(dev_priv, (u32) values[i])) {
290                                 DRM_ERROR
291                                     ("Offset failed range check (reg=%04x sz=%d)\n",
292                                      reg, sz);
293                                 return -EINVAL;
294                         }
295                         break;
296                 default:
297                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
298                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
299                         return -EINVAL;
300                 }
301         }
302
303         BEGIN_RING(1 + sz);
304         OUT_RING(CP_PACKET0(reg, sz - 1));
305         OUT_RING_TABLE(values, sz);
306         ADVANCE_RING();
307
308         cmdbuf->buf += sz * 4;
309         cmdbuf->bufsz -= sz * 4;
310
311         return 0;
312 }
313
314 /**
315  * Emits a packet0 setting arbitrary registers.
316  * Called by r300_do_cp_cmdbuf.
317  *
318  * Note that checks are performed on contents and addresses of the registers
319  */
320 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
321                                         drm_radeon_kcmd_buffer_t *cmdbuf,
322                                         drm_r300_cmd_header_t header)
323 {
324         int reg;
325         int sz;
326         RING_LOCALS;
327
328         sz = header.packet0.count;
329         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
330
331         if (!sz)
332                 return 0;
333
334         if (sz * 4 > cmdbuf->bufsz)
335                 return -EINVAL;
336
337         if (reg + sz * 4 >= 0x10000) {
338                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
339                           sz);
340                 return -EINVAL;
341         }
342
343         if (r300_check_range(reg, sz)) {
344                 /* go and check everything */
345                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
346                                                            header);
347         }
348         /* the rest of the data is safe to emit, whatever the values the user passed */
349
350         BEGIN_RING(1 + sz);
351         OUT_RING(CP_PACKET0(reg, sz - 1));
352         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
353         ADVANCE_RING();
354
355         cmdbuf->buf += sz * 4;
356         cmdbuf->bufsz -= sz * 4;
357
358         return 0;
359 }
360
361 /**
362  * Uploads user-supplied vertex program instructions or parameters onto
363  * the graphics card.
364  * Called by r300_do_cp_cmdbuf.
365  */
366 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
367                                     drm_radeon_kcmd_buffer_t *cmdbuf,
368                                     drm_r300_cmd_header_t header)
369 {
370         int sz;
371         int addr;
372         RING_LOCALS;
373
374         sz = header.vpu.count;
375         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
376
377         if (!sz)
378                 return 0;
379         if (sz * 16 > cmdbuf->bufsz)
380                 return -EINVAL;
381
382         BEGIN_RING(5 + sz * 4);
383         /* Wait for VAP to come to senses.. */
384         /* there is no need to emit it multiple times, (only once before VAP is programmed,
385            but this optimization is for later */
386         OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
387         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
388         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
389         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
390
391         ADVANCE_RING();
392
393         cmdbuf->buf += sz * 16;
394         cmdbuf->bufsz -= sz * 16;
395
396         return 0;
397 }
398
399 /**
400  * Emit a clear packet from userspace.
401  * Called by r300_emit_packet3.
402  */
403 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
404                                       drm_radeon_kcmd_buffer_t *cmdbuf)
405 {
406         RING_LOCALS;
407
408         if (8 * 4 > cmdbuf->bufsz)
409                 return -EINVAL;
410
411         BEGIN_RING(10);
412         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
413         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
414                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
415         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
416         ADVANCE_RING();
417
418         cmdbuf->buf += 8 * 4;
419         cmdbuf->bufsz -= 8 * 4;
420
421         return 0;
422 }
423
424 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
425                                                drm_radeon_kcmd_buffer_t *cmdbuf,
426                                                u32 header)
427 {
428         int count, i, k;
429 #define MAX_ARRAY_PACKET  64
430         u32 payload[MAX_ARRAY_PACKET];
431         u32 narrays;
432         RING_LOCALS;
433
434         count = (header >> 16) & 0x3fff;
435
436         if ((count + 1) > MAX_ARRAY_PACKET) {
437                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
438                           count);
439                 return -EINVAL;
440         }
441         memset(payload, 0, MAX_ARRAY_PACKET * 4);
442         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
443
444         /* carefully check packet contents */
445
446         narrays = payload[0];
447         k = 0;
448         i = 1;
449         while ((k < narrays) && (i < (count + 1))) {
450                 i++;            /* skip attribute field */
451                 if (!radeon_check_offset(dev_priv, payload[i])) {
452                         DRM_ERROR
453                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
454                              k, i);
455                         return -EINVAL;
456                 }
457                 k++;
458                 i++;
459                 if (k == narrays)
460                         break;
461                 /* have one more to process, they come in pairs */
462                 if (!radeon_check_offset(dev_priv, payload[i])) {
463                         DRM_ERROR
464                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
465                              k, i);
466                         return -EINVAL;
467                 }
468                 k++;
469                 i++;
470         }
471         /* do the counts match what we expect ? */
472         if ((k != narrays) || (i != (count + 1))) {
473                 DRM_ERROR
474                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
475                      k, i, narrays, count + 1);
476                 return -EINVAL;
477         }
478
479         /* all clear, output packet */
480
481         BEGIN_RING(count + 2);
482         OUT_RING(header);
483         OUT_RING_TABLE(payload, count + 1);
484         ADVANCE_RING();
485
486         cmdbuf->buf += (count + 2) * 4;
487         cmdbuf->bufsz -= (count + 2) * 4;
488
489         return 0;
490 }
491
492 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
493                                              drm_radeon_kcmd_buffer_t *cmdbuf)
494 {
495         u32 *cmd = (u32 *) cmdbuf->buf;
496         int count, ret;
497         RING_LOCALS;
498
499         count=(cmd[0]>>16) & 0x3fff;
500
501         if (cmd[0] & 0x8000) {
502                 u32 offset;
503
504                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
505                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
506                         offset = cmd[2] << 10;
507                         ret = !radeon_check_offset(dev_priv, offset);
508                         if (ret) {
509                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
510                                 return -EINVAL;
511                         }
512                 }
513
514                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
515                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
516                         offset = cmd[3] << 10;
517                         ret = !radeon_check_offset(dev_priv, offset);
518                         if (ret) {
519                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
520                                 return -EINVAL;
521                         }
522
523                 }
524         }
525
526         BEGIN_RING(count+2);
527         OUT_RING(cmd[0]);
528         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
529         ADVANCE_RING();
530
531         cmdbuf->buf += (count+2)*4;
532         cmdbuf->bufsz -= (count+2)*4;
533
534         return 0;
535 }
536
537 static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
538                                              drm_radeon_kcmd_buffer_t *cmdbuf)
539 {
540         u32 *cmd = (u32 *) cmdbuf->buf;
541         int count, ret;
542         RING_LOCALS;
543
544         count=(cmd[0]>>16) & 0x3fff;
545
546         if ((cmd[1] & 0x8000ffff) != 0x80000810) {
547                 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
548                 return -EINVAL;
549         }
550         ret = !radeon_check_offset(dev_priv, cmd[2]);
551         if (ret) {
552                 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
553                 return -EINVAL;
554         }
555
556         BEGIN_RING(count+2);
557         OUT_RING(cmd[0]);
558         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
559         ADVANCE_RING();
560
561         cmdbuf->buf += (count+2)*4;
562         cmdbuf->bufsz -= (count+2)*4;
563
564         return 0;
565 }
566
567 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
568                                             drm_radeon_kcmd_buffer_t *cmdbuf)
569 {
570         u32 header;
571         int count;
572         RING_LOCALS;
573
574         if (4 > cmdbuf->bufsz)
575                 return -EINVAL;
576
577         /* Fixme !! This simply emits a packet without much checking.
578            We need to be smarter. */
579
580         /* obtain first word - actual packet3 header */
581         header = *(u32 *) cmdbuf->buf;
582
583         /* Is it packet 3 ? */
584         if ((header >> 30) != 0x3) {
585                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
586                 return -EINVAL;
587         }
588
589         count = (header >> 16) & 0x3fff;
590
591         /* Check again now that we know how much data to expect */
592         if ((count + 2) * 4 > cmdbuf->bufsz) {
593                 DRM_ERROR
594                     ("Expected packet3 of length %d but have only %d bytes left\n",
595                      (count + 2) * 4, cmdbuf->bufsz);
596                 return -EINVAL;
597         }
598
599         /* Is it a packet type we know about ? */
600         switch (header & 0xff00) {
601         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
602                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
603
604         case RADEON_CNTL_BITBLT_MULTI:
605                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
606
607         case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
608                 return r300_emit_indx_buffer(dev_priv, cmdbuf);
609         case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
610         case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
611         case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
612         case RADEON_WAIT_FOR_IDLE:
613         case RADEON_CP_NOP:
614                 /* these packets are safe */
615                 break;
616         default:
617                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
618                 return -EINVAL;
619         }
620
621         BEGIN_RING(count + 2);
622         OUT_RING(header);
623         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
624         ADVANCE_RING();
625
626         cmdbuf->buf += (count + 2) * 4;
627         cmdbuf->bufsz -= (count + 2) * 4;
628
629         return 0;
630 }
631
632 /**
633  * Emit a rendering packet3 from userspace.
634  * Called by r300_do_cp_cmdbuf.
635  */
636 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
637                                         drm_radeon_kcmd_buffer_t *cmdbuf,
638                                         drm_r300_cmd_header_t header)
639 {
640         int n;
641         int ret;
642         char *orig_buf = cmdbuf->buf;
643         int orig_bufsz = cmdbuf->bufsz;
644
645         /* This is a do-while-loop so that we run the interior at least once,
646          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
647          */
648         n = 0;
649         do {
650                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
651                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
652                         if (ret)
653                                 return ret;
654
655                         cmdbuf->buf = orig_buf;
656                         cmdbuf->bufsz = orig_bufsz;
657                 }
658
659                 switch (header.packet3.packet) {
660                 case R300_CMD_PACKET3_CLEAR:
661                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
662                         ret = r300_emit_clear(dev_priv, cmdbuf);
663                         if (ret) {
664                                 DRM_ERROR("r300_emit_clear failed\n");
665                                 return ret;
666                         }
667                         break;
668
669                 case R300_CMD_PACKET3_RAW:
670                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
671                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
672                         if (ret) {
673                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
674                                 return ret;
675                         }
676                         break;
677
678                 default:
679                         DRM_ERROR("bad packet3 type %i at %p\n",
680                                   header.packet3.packet,
681                                   cmdbuf->buf - sizeof(header));
682                         return -EINVAL;
683                 }
684
685                 n += R300_SIMULTANEOUS_CLIPRECTS;
686         } while (n < cmdbuf->nbox);
687
688         return 0;
689 }
690
691 /* Some of the R300 chips seem to be extremely touchy about the two registers
692  * that are configured in r300_pacify.
693  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
694  * sends a command buffer that contains only state setting commands and a
695  * vertex program/parameter upload sequence, this will eventually lead to a
696  * lockup, unless the sequence is bracketed by calls to r300_pacify.
697  * So we should take great care to *always* call r300_pacify before
698  * *anything* 3D related, and again afterwards. This is what the
699  * call bracket in r300_do_cp_cmdbuf is for.
700  */
701
702 /**
703  * Emit the sequence to pacify R300.
704  */
705 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
706 {
707         RING_LOCALS;
708
709         BEGIN_RING(6);
710         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
711         OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A);
712         OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
713         OUT_RING(R300_RB3D_ZCACHE_UNKNOWN_03);
714         OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
715         OUT_RING(0x0);
716         ADVANCE_RING();
717 }
718
719 /**
720  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
721  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
722  * be careful about how this function is called.
723  */
724 static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
725 {
726         drm_radeon_private_t *dev_priv = dev->dev_private;
727         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
728
729         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
730         buf->pending = 1;
731         buf->used = 0;
732 }
733
734 static int r300_scratch(drm_radeon_private_t *dev_priv,
735                         drm_radeon_kcmd_buffer_t *cmdbuf,
736                         drm_r300_cmd_header_t header)
737 {
738         u32 *ref_age_base;
739         u32 i, buf_idx, h_pending;
740         RING_LOCALS;
741
742         if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) {
743                 return -EINVAL;
744         }
745
746         if (header.scratch.reg >= 5) {
747                 return -EINVAL;
748         }
749
750         dev_priv->scratch_ages[header.scratch.reg] ++;
751
752         ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
753
754         cmdbuf->buf += sizeof(uint64_t);
755         cmdbuf->bufsz -= sizeof(uint64_t);
756
757         for (i=0; i < header.scratch.n_bufs; i++) {
758                 buf_idx = *(u32 *)cmdbuf->buf;
759                 buf_idx *= 2; /* 8 bytes per buf */
760
761                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
762                         return -EINVAL;
763                 }
764
765                 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
766                         return -EINVAL;
767                 }
768
769                 if (h_pending == 0) {
770                         return -EINVAL;
771                 }
772
773                 h_pending--;
774
775                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
776                         return -EINVAL;
777                 }
778
779                 cmdbuf->buf += sizeof(buf_idx);
780                 cmdbuf->bufsz -= sizeof(buf_idx);
781         }
782
783         BEGIN_RING(2);
784         OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
785         OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
786         ADVANCE_RING();
787
788         return 0;
789 }
790
791 /**
792  * Parses and validates a user-supplied command buffer and emits appropriate
793  * commands on the DMA ring buffer.
794  * Called by the ioctl handler function radeon_cp_cmdbuf.
795  */
796 int r300_do_cp_cmdbuf(struct drm_device *dev,
797                       struct drm_file *file_priv,
798                       drm_radeon_kcmd_buffer_t *cmdbuf)
799 {
800         drm_radeon_private_t *dev_priv = dev->dev_private;
801         struct drm_device_dma *dma = dev->dma;
802         struct drm_buf *buf = NULL;
803         int emit_dispatch_age = 0;
804         int ret = 0;
805
806         DRM_DEBUG("\n");
807
808         /* See the comment above r300_emit_begin3d for why this call must be here,
809          * and what the cleanup gotos are for. */
810         r300_pacify(dev_priv);
811
812         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
813                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
814                 if (ret)
815                         goto cleanup;
816         }
817
818         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
819                 int idx;
820                 drm_r300_cmd_header_t header;
821
822                 header.u = *(unsigned int *)cmdbuf->buf;
823
824                 cmdbuf->buf += sizeof(header);
825                 cmdbuf->bufsz -= sizeof(header);
826
827                 switch (header.header.cmd_type) {
828                 case R300_CMD_PACKET0:
829                         DRM_DEBUG("R300_CMD_PACKET0\n");
830                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
831                         if (ret) {
832                                 DRM_ERROR("r300_emit_packet0 failed\n");
833                                 goto cleanup;
834                         }
835                         break;
836
837                 case R300_CMD_VPU:
838                         DRM_DEBUG("R300_CMD_VPU\n");
839                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
840                         if (ret) {
841                                 DRM_ERROR("r300_emit_vpu failed\n");
842                                 goto cleanup;
843                         }
844                         break;
845
846                 case R300_CMD_PACKET3:
847                         DRM_DEBUG("R300_CMD_PACKET3\n");
848                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
849                         if (ret) {
850                                 DRM_ERROR("r300_emit_packet3 failed\n");
851                                 goto cleanup;
852                         }
853                         break;
854
855                 case R300_CMD_END3D:
856                         DRM_DEBUG("R300_CMD_END3D\n");
857                         /* TODO:
858                            Ideally userspace driver should not need to issue this call,
859                            i.e. the drm driver should issue it automatically and prevent
860                            lockups.
861
862                            In practice, we do not understand why this call is needed and what
863                            it does (except for some vague guesses that it has to do with cache
864                            coherence) and so the user space driver does it.
865
866                            Once we are sure which uses prevent lockups the code could be moved
867                            into the kernel and the userspace driver will not
868                            need to use this command.
869
870                            Note that issuing this command does not hurt anything
871                            except, possibly, performance */
872                         r300_pacify(dev_priv);
873                         break;
874
875                 case R300_CMD_CP_DELAY:
876                         /* simple enough, we can do it here */
877                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
878                         {
879                                 int i;
880                                 RING_LOCALS;
881
882                                 BEGIN_RING(header.delay.count);
883                                 for (i = 0; i < header.delay.count; i++)
884                                         OUT_RING(RADEON_CP_PACKET2);
885                                 ADVANCE_RING();
886                         }
887                         break;
888
889                 case R300_CMD_DMA_DISCARD:
890                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
891                         idx = header.dma.buf_idx;
892                         if (idx < 0 || idx >= dma->buf_count) {
893                                 DRM_ERROR("buffer index %d (of %d max)\n",
894                                           idx, dma->buf_count - 1);
895                                 ret = -EINVAL;
896                                 goto cleanup;
897                         }
898
899                         buf = dma->buflist[idx];
900                         if (buf->file_priv != file_priv || buf->pending) {
901                                 DRM_ERROR("bad buffer %p %p %d\n",
902                                           buf->file_priv, file_priv,
903                                           buf->pending);
904                                 ret = -EINVAL;
905                                 goto cleanup;
906                         }
907
908                         emit_dispatch_age = 1;
909                         r300_discard_buffer(dev, buf);
910                         break;
911
912                 case R300_CMD_WAIT:
913                         /* simple enough, we can do it here */
914                         DRM_DEBUG("R300_CMD_WAIT\n");
915                         if (header.wait.flags == 0)
916                                 break;  /* nothing to do */
917
918                         {
919                                 RING_LOCALS;
920
921                                 BEGIN_RING(2);
922                                 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
923                                 OUT_RING((header.wait.flags & 0xf) << 14);
924                                 ADVANCE_RING();
925                         }
926                         break;
927
928                 case R300_CMD_SCRATCH:
929                         DRM_DEBUG("R300_CMD_SCRATCH\n");
930                         ret = r300_scratch(dev_priv, cmdbuf, header);
931                         if (ret) {
932                                 DRM_ERROR("r300_scratch failed\n");
933                                 goto cleanup;
934                         }
935                         break;
936
937                 default:
938                         DRM_ERROR("bad cmd_type %i at %p\n",
939                                   header.header.cmd_type,
940                                   cmdbuf->buf - sizeof(header));
941                         ret = -EINVAL;
942                         goto cleanup;
943                 }
944         }
945
946         DRM_DEBUG("END\n");
947
948       cleanup:
949         r300_pacify(dev_priv);
950
951         /* We emit the vertex buffer age here, outside the pacifier "brackets"
952          * for two reasons:
953          *  (1) This may coalesce multiple age emissions into a single one and
954          *  (2) more importantly, some chips lock up hard when scratch registers
955          *      are written inside the pacifier bracket.
956          */
957         if (emit_dispatch_age) {
958                 RING_LOCALS;
959
960                 /* Emit the vertex buffer age */
961                 BEGIN_RING(2);
962                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
963                 ADVANCE_RING();
964         }
965
966         COMMIT_RING();
967
968         return ret;
969 }