Fix for password truncation when using crypt(3) with DES
[dragonfly.git] / sys / dev / drm / savage_state.c
1 /* savage_state.c -- State and drawing support for Savage
2  *
3  * Copyright 2004  Felix Kuehling
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sub license,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial portions
15  * of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25
26 #include "dev/drm/drmP.h"
27 #include "dev/drm/savage_drm.h"
28 #include "dev/drm/savage_drv.h"
29
30 void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv,
31                                const struct drm_clip_rect *pbox)
32 {
33         uint32_t scstart = dev_priv->state.s3d.new_scstart;
34         uint32_t scend = dev_priv->state.s3d.new_scend;
35         scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
36                 ((uint32_t)pbox->x1 & 0x000007ff) |
37                 (((uint32_t)pbox->y1 << 16) & 0x07ff0000);
38         scend   = (scend & ~SAVAGE_SCISSOR_MASK_S3D) |
39                 (((uint32_t)pbox->x2 - 1) & 0x000007ff) |
40                 ((((uint32_t)pbox->y2 - 1) << 16) & 0x07ff0000);
41         if (scstart != dev_priv->state.s3d.scstart ||
42             scend   != dev_priv->state.s3d.scend) {
43                 DMA_LOCALS;
44                 BEGIN_DMA(4);
45                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
46                 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
47                 DMA_WRITE(scstart);
48                 DMA_WRITE(scend);
49                 dev_priv->state.s3d.scstart = scstart;
50                 dev_priv->state.s3d.scend = scend;
51                 dev_priv->waiting = 1;
52                 DMA_COMMIT();
53         }
54 }
55
56 void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
57                               const struct drm_clip_rect *pbox)
58 {
59         uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
60         uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
61         drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
62                 ((uint32_t)pbox->x1 & 0x000007ff) |
63                 (((uint32_t)pbox->y1 << 12) & 0x00fff000);
64         drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
65                 (((uint32_t)pbox->x2 - 1) & 0x000007ff) |
66                 ((((uint32_t)pbox->y2 - 1) << 12) & 0x00fff000);
67         if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
68             drawctrl1 != dev_priv->state.s4.drawctrl1) {
69                 DMA_LOCALS;
70                 BEGIN_DMA(4);
71                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
72                 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
73                 DMA_WRITE(drawctrl0);
74                 DMA_WRITE(drawctrl1);
75                 dev_priv->state.s4.drawctrl0 = drawctrl0;
76                 dev_priv->state.s4.drawctrl1 = drawctrl1;
77                 dev_priv->waiting = 1;
78                 DMA_COMMIT();
79         }
80 }
81
82 static int savage_verify_texaddr(drm_savage_private_t *dev_priv, int unit,
83                                  uint32_t addr)
84 {
85         if ((addr & 6) != 2) { /* reserved bits */
86                 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
87                 return -EINVAL;
88         }
89         if (!(addr & 1)) { /* local */
90                 addr &= ~7;
91                 if (addr < dev_priv->texture_offset ||
92                     addr >= dev_priv->texture_offset + dev_priv->texture_size) {
93                         DRM_ERROR
94                             ("bad texAddr%d %08x (local addr out of range)\n",
95                              unit, addr);
96                         return -EINVAL;
97                 }
98         } else { /* AGP */
99                 if (!dev_priv->agp_textures) {
100                         DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
101                                   unit, addr);
102                         return -EINVAL;
103                 }
104                 addr &= ~7;
105                 if (addr < dev_priv->agp_textures->offset ||
106                     addr >= (dev_priv->agp_textures->offset +
107                              dev_priv->agp_textures->size)) {
108                         DRM_ERROR
109                             ("bad texAddr%d %08x (AGP addr out of range)\n",
110                              unit, addr);
111                         return -EINVAL;
112                 }
113         }
114         return 0;
115 }
116
117 #define SAVE_STATE(reg,where)                   \
118         if(start <= reg && start + count > reg) \
119                 dev_priv->state.where = regs[reg - start]
120 #define SAVE_STATE_MASK(reg,where,mask) do {                    \
121         if(start <= reg && start + count > reg) {                       \
122                 uint32_t tmp;                                   \
123                 tmp = regs[reg - start];                        \
124                 dev_priv->state.where = (tmp & (mask)) |        \
125                         (dev_priv->state.where & ~(mask));      \
126         }                                                       \
127 } while (0)
128 static int savage_verify_state_s3d(drm_savage_private_t *dev_priv,
129                                    unsigned int start, unsigned int count,
130                                    const uint32_t *regs)
131 {
132         if (start < SAVAGE_TEXPALADDR_S3D ||
133             start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
134                 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
135                           start, start + count - 1);
136                 return -EINVAL;
137         }
138
139         SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
140                         ~SAVAGE_SCISSOR_MASK_S3D);
141         SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
142                         ~SAVAGE_SCISSOR_MASK_S3D);
143
144         /* if any texture regs were changed ... */
145         if (start <= SAVAGE_TEXCTRL_S3D &&
146             start + count > SAVAGE_TEXPALADDR_S3D) {
147                 /* ... check texture state */
148                 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
149                 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
150                 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
151                         return savage_verify_texaddr(dev_priv, 0,
152                                                 dev_priv->state.s3d.texaddr);
153         }
154
155         return 0;
156 }
157
158 static int savage_verify_state_s4(drm_savage_private_t *dev_priv,
159                                   unsigned int start, unsigned int count,
160                                   const uint32_t *regs)
161 {
162         int ret = 0;
163
164         if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
165             start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) {
166                 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
167                           start, start + count - 1);
168                 return -EINVAL;
169         }
170
171         SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
172                         ~SAVAGE_SCISSOR_MASK_S4);
173         SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
174                         ~SAVAGE_SCISSOR_MASK_S4);
175
176         /* if any texture regs were changed ... */
177         if (start <= SAVAGE_TEXDESCR_S4 &&
178             start + count > SAVAGE_TEXPALADDR_S4) {
179                 /* ... check texture state */
180                 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
181                 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
182                 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
183                 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
184                         ret |= savage_verify_texaddr(dev_priv, 0,
185                                                 dev_priv->state.s4.texaddr0);
186                 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
187                         ret |= savage_verify_texaddr(dev_priv, 1,
188                                                 dev_priv->state.s4.texaddr1);
189         }
190
191         return ret;
192 }
193 #undef SAVE_STATE
194 #undef SAVE_STATE_MASK
195
196 static int savage_dispatch_state(drm_savage_private_t *dev_priv,
197                                  const drm_savage_cmd_header_t *cmd_header,
198                                  const uint32_t *regs)
199 {
200         unsigned int count = cmd_header->state.count;
201         unsigned int start = cmd_header->state.start;
202         unsigned int count2 = 0;
203         unsigned int bci_size;
204         int ret;
205         DMA_LOCALS;
206
207         if (!count)
208                 return 0;
209
210         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
211                 ret = savage_verify_state_s3d(dev_priv, start, count, regs);
212                 if (ret != 0)
213                         return ret;
214                 /* scissor regs are emitted in savage_dispatch_draw */
215                 if (start < SAVAGE_SCSTART_S3D) {
216                         if (start + count > SAVAGE_SCEND_S3D + 1)
217                                 count2 = count - (SAVAGE_SCEND_S3D + 1 - start);
218                         if (start + count > SAVAGE_SCSTART_S3D)
219                                 count = SAVAGE_SCSTART_S3D - start;
220                 } else if (start <= SAVAGE_SCEND_S3D) {
221                         if (start + count > SAVAGE_SCEND_S3D + 1) {
222                                 count -= SAVAGE_SCEND_S3D + 1 - start;
223                                 start = SAVAGE_SCEND_S3D + 1;
224                         } else
225                                 return 0;
226                 }
227         } else {
228                 ret = savage_verify_state_s4(dev_priv, start, count, regs);
229                 if (ret != 0)
230                         return ret;
231                 /* scissor regs are emitted in savage_dispatch_draw */
232                 if (start < SAVAGE_DRAWCTRL0_S4) {
233                         if (start + count > SAVAGE_DRAWCTRL1_S4 + 1)
234                                 count2 = count -
235                                          (SAVAGE_DRAWCTRL1_S4 + 1 - start);
236                         if (start + count > SAVAGE_DRAWCTRL0_S4)
237                                 count = SAVAGE_DRAWCTRL0_S4 - start;
238                 } else if (start <= SAVAGE_DRAWCTRL1_S4) {
239                         if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) {
240                                 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start;
241                                 start = SAVAGE_DRAWCTRL1_S4 + 1;
242                         } else
243                                 return 0;
244                 }
245         }
246
247         bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255;
248
249         if (cmd_header->state.global) {
250                 BEGIN_DMA(bci_size + 1);
251                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
252                 dev_priv->waiting = 1;
253         } else {
254                 BEGIN_DMA(bci_size);
255         }
256
257         do {
258                 while (count > 0) {
259                         unsigned int n = count < 255 ? count : 255;
260                         DMA_SET_REGISTERS(start, n);
261                         DMA_COPY(regs, n);
262                         count -= n;
263                         start += n;
264                         regs += n;
265                 }
266                 start += 2;
267                 regs += 2;
268                 count = count2;
269                 count2 = 0;
270         } while (count);
271
272         DMA_COMMIT();
273
274         return 0;
275 }
276
277 static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv,
278                                     const drm_savage_cmd_header_t *cmd_header,
279                                     const struct drm_buf *dmabuf)
280 {
281         unsigned char reorder = 0;
282         unsigned int prim = cmd_header->prim.prim;
283         unsigned int skip = cmd_header->prim.skip;
284         unsigned int n = cmd_header->prim.count;
285         unsigned int start = cmd_header->prim.start;
286         unsigned int i;
287         BCI_LOCALS;
288
289         if (!dmabuf) {
290                 DRM_ERROR("called without dma buffers!\n");
291                 return -EINVAL;
292         }
293
294         if (!n)
295                 return 0;
296
297         switch (prim) {
298         case SAVAGE_PRIM_TRILIST_201:
299                 reorder = 1;
300                 prim = SAVAGE_PRIM_TRILIST;
301         case SAVAGE_PRIM_TRILIST:
302                 if (n % 3 != 0) {
303                         DRM_ERROR("wrong number of vertices %u in TRILIST\n",
304                                   n);
305                         return -EINVAL;
306                 }
307                 break;
308         case SAVAGE_PRIM_TRISTRIP:
309         case SAVAGE_PRIM_TRIFAN:
310                 if (n < 3) {
311                         DRM_ERROR
312                            ("wrong number of vertices %u in TRIFAN/STRIP\n",
313                             n);
314                         return -EINVAL;
315                 }
316                 break;
317         default:
318                 DRM_ERROR("invalid primitive type %u\n", prim);
319                 return -EINVAL;
320         }
321
322         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
323                 if (skip != 0) {
324                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
325                         return -EINVAL;
326                 }
327         } else {
328                 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
329                         (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
330                         (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
331                 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
332                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
333                         return -EINVAL;
334                 }
335                 if (reorder) {
336                         DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
337                         return -EINVAL;
338                 }
339         }
340
341         if (start + n > dmabuf->total / 32) {
342                 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
343                           start, start + n - 1, dmabuf->total / 32);
344                 return -EINVAL;
345         }
346
347         /* Vertex DMA doesn't work with command DMA at the same time,
348          * so we use BCI_... to submit commands here. Flush buffered
349          * faked DMA first. */
350         DMA_FLUSH();
351
352         if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
353                 BEGIN_BCI(2);
354                 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
355                 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
356                 dev_priv->state.common.vbaddr = dmabuf->bus_address;
357         }
358         if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
359                 /* Workaround for what looks like a hardware bug. If a
360                  * WAIT_3D_IDLE was emitted some time before the
361                  * indexed drawing command then the engine will lock
362                  * up. There are two known workarounds:
363                  * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
364                 BEGIN_BCI(63);
365                 for (i = 0; i < 63; ++i)
366                         BCI_WRITE(BCI_CMD_WAIT);
367                 dev_priv->waiting = 0;
368         }
369
370         prim <<= 25;
371         while (n != 0) {
372                 /* Can emit up to 255 indices (85 triangles) at once. */
373                 unsigned int count = n > 255 ? 255 : n;
374                 if (reorder) {
375                         /* Need to reorder indices for correct flat
376                          * shading while preserving the clock sense
377                          * for correct culling. Only on Savage3D. */
378                         int reorder[3] = { -1, -1, -1 };
379                         reorder[start % 3] = 2;
380
381                         BEGIN_BCI((count + 1 + 1) / 2);
382                         BCI_DRAW_INDICES_S3D(count, prim, start + 2);
383
384                         for (i = start + 1; i + 1 < start + count; i += 2)
385                                 BCI_WRITE((i + reorder[i % 3]) |
386                                           ((i + 1 +
387                                             reorder[(i + 1) % 3]) << 16));
388                         if (i < start + count)
389                                 BCI_WRITE(i + reorder[i % 3]);
390                 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
391                         BEGIN_BCI((count + 1 + 1) / 2);
392                         BCI_DRAW_INDICES_S3D(count, prim, start);
393
394                         for (i = start + 1; i + 1 < start + count; i += 2)
395                                 BCI_WRITE(i | ((i + 1) << 16));
396                         if (i < start + count)
397                                 BCI_WRITE(i);
398                 } else {
399                         BEGIN_BCI((count + 2 + 1) / 2);
400                         BCI_DRAW_INDICES_S4(count, prim, skip);
401
402                         for (i = start; i + 1 < start + count; i += 2)
403                                 BCI_WRITE(i | ((i + 1) << 16));
404                         if (i < start + count)
405                                 BCI_WRITE(i);
406                 }
407
408                 start += count;
409                 n -= count;
410
411                 prim |= BCI_CMD_DRAW_CONT;
412         }
413
414         return 0;
415 }
416
417 static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv,
418                                    const drm_savage_cmd_header_t *cmd_header,
419                                    const uint32_t *vtxbuf, unsigned int vb_size,
420                                    unsigned int vb_stride)
421 {
422         unsigned char reorder = 0;
423         unsigned int prim = cmd_header->prim.prim;
424         unsigned int skip = cmd_header->prim.skip;
425         unsigned int n = cmd_header->prim.count;
426         unsigned int start = cmd_header->prim.start;
427         unsigned int vtx_size;
428         unsigned int i;
429         DMA_LOCALS;
430
431         if (!n)
432                 return 0;
433
434         switch (prim) {
435         case SAVAGE_PRIM_TRILIST_201:
436                 reorder = 1;
437                 prim = SAVAGE_PRIM_TRILIST;
438         case SAVAGE_PRIM_TRILIST:
439                 if (n % 3 != 0) {
440                         DRM_ERROR("wrong number of vertices %u in TRILIST\n",
441                                   n);
442                         return -EINVAL;
443                 }
444                 break;
445         case SAVAGE_PRIM_TRISTRIP:
446         case SAVAGE_PRIM_TRIFAN:
447                 if (n < 3) {
448                         DRM_ERROR
449                             ("wrong number of vertices %u in TRIFAN/STRIP\n",
450                              n);
451                         return -EINVAL;
452                 }
453                 break;
454         default:
455                 DRM_ERROR("invalid primitive type %u\n", prim);
456                 return -EINVAL;
457         }
458
459         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
460                 if (skip > SAVAGE_SKIP_ALL_S3D) {
461                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
462                         return -EINVAL;
463                 }
464                 vtx_size = 8; /* full vertex */
465         } else {
466                 if (skip > SAVAGE_SKIP_ALL_S4) {
467                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
468                         return -EINVAL;
469                 }
470                 vtx_size = 10; /* full vertex */
471         }
472
473         vtx_size -= (skip & 1) + (skip >> 1 & 1) +
474                 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
475                 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
476
477         if (vtx_size > vb_stride) {
478                 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
479                           vtx_size, vb_stride);
480                 return -EINVAL;
481         }
482
483         if (start + n > vb_size / (vb_stride * 4)) {
484                 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
485                           start, start + n - 1, vb_size / (vb_stride * 4));
486                 return -EINVAL;
487         }
488
489         prim <<= 25;
490         while (n != 0) {
491                 /* Can emit up to 255 vertices (85 triangles) at once. */
492                 unsigned int count = n > 255 ? 255 : n;
493                 if (reorder) {
494                         /* Need to reorder vertices for correct flat
495                          * shading while preserving the clock sense
496                          * for correct culling. Only on Savage3D. */
497                         int reorder[3] = { -1, -1, -1 };
498                         reorder[start % 3] = 2;
499
500                         BEGIN_DMA(count * vtx_size + 1);
501                         DMA_DRAW_PRIMITIVE(count, prim, skip);
502
503                         for (i = start; i < start + count; ++i) {
504                                 unsigned int j = i + reorder[i % 3];
505                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
506                         }
507
508                         DMA_COMMIT();
509                 } else {
510                         BEGIN_DMA(count * vtx_size + 1);
511                         DMA_DRAW_PRIMITIVE(count, prim, skip);
512
513                         if (vb_stride == vtx_size) {
514                                 DMA_COPY(&vtxbuf[vb_stride * start],
515                                          vtx_size * count);
516                         } else {
517                                 for (i = start; i < start + count; ++i) {
518                                         DMA_COPY(&vtxbuf[vb_stride * i],
519                                                  vtx_size);
520                                 }
521                         }
522
523                         DMA_COMMIT();
524                 }
525
526                 start += count;
527                 n -= count;
528
529                 prim |= BCI_CMD_DRAW_CONT;
530         }
531
532         return 0;
533 }
534
535 static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv,
536                                    const drm_savage_cmd_header_t *cmd_header,
537                                    const uint16_t *idx,
538                                    const struct drm_buf *dmabuf)
539 {
540         unsigned char reorder = 0;
541         unsigned int prim = cmd_header->idx.prim;
542         unsigned int skip = cmd_header->idx.skip;
543         unsigned int n = cmd_header->idx.count;
544         unsigned int i;
545         BCI_LOCALS;
546
547         if (!dmabuf) {
548                 DRM_ERROR("called without dma buffers!\n");
549                 return -EINVAL;
550         }
551
552         if (!n)
553                 return 0;
554
555         switch (prim) {
556         case SAVAGE_PRIM_TRILIST_201:
557                 reorder = 1;
558                 prim = SAVAGE_PRIM_TRILIST;
559         case SAVAGE_PRIM_TRILIST:
560                 if (n % 3 != 0) {
561                         DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
562                         return -EINVAL;
563                 }
564                 break;
565         case SAVAGE_PRIM_TRISTRIP:
566         case SAVAGE_PRIM_TRIFAN:
567                 if (n < 3) {
568                         DRM_ERROR
569                             ("wrong number of indices %u in TRIFAN/STRIP\n", n);
570                         return -EINVAL;
571                 }
572                 break;
573         default:
574                 DRM_ERROR("invalid primitive type %u\n", prim);
575                 return -EINVAL;
576         }
577
578         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
579                 if (skip != 0) {
580                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
581                         return -EINVAL;
582                 }
583         } else {
584                 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
585                         (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
586                         (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
587                 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
588                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
589                         return -EINVAL;
590                 }
591                 if (reorder) {
592                         DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
593                         return -EINVAL;
594                 }
595         }
596
597         /* Vertex DMA doesn't work with command DMA at the same time,
598          * so we use BCI_... to submit commands here. Flush buffered
599          * faked DMA first. */
600         DMA_FLUSH();
601
602         if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
603                 BEGIN_BCI(2);
604                 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
605                 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
606                 dev_priv->state.common.vbaddr = dmabuf->bus_address;
607         }
608         if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
609                 /* Workaround for what looks like a hardware bug. If a
610                  * WAIT_3D_IDLE was emitted some time before the
611                  * indexed drawing command then the engine will lock
612                  * up. There are two known workarounds:
613                  * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
614                 BEGIN_BCI(63);
615                 for (i = 0; i < 63; ++i)
616                         BCI_WRITE(BCI_CMD_WAIT);
617                 dev_priv->waiting = 0;
618         }
619
620         prim <<= 25;
621         while (n != 0) {
622                 /* Can emit up to 255 indices (85 triangles) at once. */
623                 unsigned int count = n > 255 ? 255 : n;
624
625                 /* check indices */
626                 for (i = 0; i < count; ++i) {
627                         if (idx[i] > dmabuf->total / 32) {
628                                 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
629                                           i, idx[i], dmabuf->total / 32);
630                                 return -EINVAL;
631                         }
632                 }
633
634                 if (reorder) {
635                         /* Need to reorder indices for correct flat
636                          * shading while preserving the clock sense
637                          * for correct culling. Only on Savage3D. */
638                         int reorder[3] = { 2, -1, -1 };
639
640                         BEGIN_BCI((count + 1 + 1) / 2);
641                         BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
642
643                         for (i = 1; i + 1 < count; i += 2)
644                                 BCI_WRITE(idx[i + reorder[i % 3]] |
645                                           (idx[i + 1 +
646                                            reorder[(i + 1) % 3]] << 16));
647                         if (i < count)
648                                 BCI_WRITE(idx[i + reorder[i % 3]]);
649                 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
650                         BEGIN_BCI((count + 1 + 1) / 2);
651                         BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
652
653                         for (i = 1; i + 1 < count; i += 2)
654                                 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
655                         if (i < count)
656                                 BCI_WRITE(idx[i]);
657                 } else {
658                         BEGIN_BCI((count + 2 + 1) / 2);
659                         BCI_DRAW_INDICES_S4(count, prim, skip);
660
661                         for (i = 0; i + 1 < count; i += 2)
662                                 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
663                         if (i < count)
664                                 BCI_WRITE(idx[i]);
665                 }
666
667                 idx += count;
668                 n -= count;
669
670                 prim |= BCI_CMD_DRAW_CONT;
671         }
672
673         return 0;
674 }
675
676 static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv,
677                                   const drm_savage_cmd_header_t *cmd_header,
678                                   const uint16_t *idx,
679                                   const uint32_t *vtxbuf,
680                                   unsigned int vb_size, unsigned int vb_stride)
681 {
682         unsigned char reorder = 0;
683         unsigned int prim = cmd_header->idx.prim;
684         unsigned int skip = cmd_header->idx.skip;
685         unsigned int n = cmd_header->idx.count;
686         unsigned int vtx_size;
687         unsigned int i;
688         DMA_LOCALS;
689
690         if (!n)
691                 return 0;
692
693         switch (prim) {
694         case SAVAGE_PRIM_TRILIST_201:
695                 reorder = 1;
696                 prim = SAVAGE_PRIM_TRILIST;
697         case SAVAGE_PRIM_TRILIST:
698                 if (n % 3 != 0) {
699                         DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
700                         return -EINVAL;
701                 }
702                 break;
703         case SAVAGE_PRIM_TRISTRIP:
704         case SAVAGE_PRIM_TRIFAN:
705                 if (n < 3) {
706                         DRM_ERROR
707                             ("wrong number of indices %u in TRIFAN/STRIP\n", n);
708                         return -EINVAL;
709                 }
710                 break;
711         default:
712                 DRM_ERROR("invalid primitive type %u\n", prim);
713                 return -EINVAL;
714         }
715
716         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
717                 if (skip > SAVAGE_SKIP_ALL_S3D) {
718                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
719                         return -EINVAL;
720                 }
721                 vtx_size = 8; /* full vertex */
722         } else {
723                 if (skip > SAVAGE_SKIP_ALL_S4) {
724                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
725                         return -EINVAL;
726                 }
727                 vtx_size = 10; /* full vertex */
728         }
729
730         vtx_size -= (skip & 1) + (skip >> 1 & 1) +
731                 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
732                 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
733
734         if (vtx_size > vb_stride) {
735                 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
736                           vtx_size, vb_stride);
737                 return -EINVAL;
738         }
739
740         prim <<= 25;
741         while (n != 0) {
742                 /* Can emit up to 255 vertices (85 triangles) at once. */
743                 unsigned int count = n > 255 ? 255 : n;
744
745                 /* Check indices */
746                 for (i = 0; i < count; ++i) {
747                         if (idx[i] > vb_size / (vb_stride * 4)) {
748                                 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
749                                           i, idx[i],  vb_size / (vb_stride * 4));
750                                 return -EINVAL;
751                         }
752                 }
753
754                 if (reorder) {
755                         /* Need to reorder vertices for correct flat
756                          * shading while preserving the clock sense
757                          * for correct culling. Only on Savage3D. */
758                         int reorder[3] = { 2, -1, -1 };
759
760                         BEGIN_DMA(count * vtx_size + 1);
761                         DMA_DRAW_PRIMITIVE(count, prim, skip);
762
763                         for (i = 0; i < count; ++i) {
764                                 unsigned int j = idx[i + reorder[i % 3]];
765                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
766                         }
767
768                         DMA_COMMIT();
769                 } else {
770                         BEGIN_DMA(count * vtx_size + 1);
771                         DMA_DRAW_PRIMITIVE(count, prim, skip);
772
773                         for (i = 0; i < count; ++i) {
774                                 unsigned int j = idx[i];
775                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
776                         }
777
778                         DMA_COMMIT();
779                 }
780
781                 idx += count;
782                 n -= count;
783
784                 prim |= BCI_CMD_DRAW_CONT;
785         }
786
787         return 0;
788 }
789
790 static int savage_dispatch_clear(drm_savage_private_t *dev_priv,
791                                  const drm_savage_cmd_header_t *cmd_header,
792                                  const drm_savage_cmd_header_t *data,
793                                  unsigned int nbox,
794                                  const struct drm_clip_rect *boxes)
795 {
796         unsigned int flags = cmd_header->clear0.flags;
797         unsigned int clear_cmd;
798         unsigned int i, nbufs;
799         DMA_LOCALS;
800
801         if (nbox == 0)
802                 return 0;
803
804         clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
805                 BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
806         BCI_CMD_SET_ROP(clear_cmd,0xCC);
807
808         nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
809             ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0);
810         if (nbufs == 0)
811                 return 0;
812
813         if (data->clear1.mask != 0xffffffff) {
814                 /* set mask */
815                 BEGIN_DMA(2);
816                 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
817                 DMA_WRITE(data->clear1.mask);
818                 DMA_COMMIT();
819         }
820         for (i = 0; i < nbox; ++i) {
821                 unsigned int x, y, w, h;
822                 unsigned int buf;
823
824                 x = boxes[i].x1, y = boxes[i].y1;
825                 w = boxes[i].x2 - boxes[i].x1;
826                 h = boxes[i].y2 - boxes[i].y1;
827                 BEGIN_DMA(nbufs * 6);
828                 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
829                         if (!(flags & buf))
830                                 continue;
831                         DMA_WRITE(clear_cmd);
832                         switch (buf) {
833                         case SAVAGE_FRONT:
834                                 DMA_WRITE(dev_priv->front_offset);
835                                 DMA_WRITE(dev_priv->front_bd);
836                                 break;
837                         case SAVAGE_BACK:
838                                 DMA_WRITE(dev_priv->back_offset);
839                                 DMA_WRITE(dev_priv->back_bd);
840                                 break;
841                         case SAVAGE_DEPTH:
842                                 DMA_WRITE(dev_priv->depth_offset);
843                                 DMA_WRITE(dev_priv->depth_bd);
844                                 break;
845                         }
846                         DMA_WRITE(data->clear1.value);
847                         DMA_WRITE(BCI_X_Y(x, y));
848                         DMA_WRITE(BCI_W_H(w, h));
849                 }
850                 DMA_COMMIT();
851         }
852         if (data->clear1.mask != 0xffffffff) {
853                 /* reset mask */
854                 BEGIN_DMA(2);
855                 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
856                 DMA_WRITE(0xffffffff);
857                 DMA_COMMIT();
858         }
859
860         return 0;
861 }
862
863 static int savage_dispatch_swap(drm_savage_private_t *dev_priv,
864                                 unsigned int nbox, const struct drm_clip_rect *boxes)
865 {
866         unsigned int swap_cmd;
867         unsigned int i;
868         DMA_LOCALS;
869
870         if (nbox == 0)
871                 return 0;
872
873         swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
874                 BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
875         BCI_CMD_SET_ROP(swap_cmd,0xCC);
876
877         for (i = 0; i < nbox; ++i) {
878                 BEGIN_DMA(6);
879                 DMA_WRITE(swap_cmd);
880                 DMA_WRITE(dev_priv->back_offset);
881                 DMA_WRITE(dev_priv->back_bd);
882                 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
883                 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
884                 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1,
885                                   boxes[i].y2 - boxes[i].y1));
886                 DMA_COMMIT();
887         }
888
889         return 0;
890 }
891
892 static int savage_dispatch_draw(drm_savage_private_t *dev_priv,
893                                 const drm_savage_cmd_header_t *start,
894                                 const drm_savage_cmd_header_t *end,
895                                 const struct drm_buf *dmabuf,
896                                 const unsigned int *vtxbuf,
897                                 unsigned int vb_size, unsigned int vb_stride,
898                                 unsigned int nbox,
899                                 const struct drm_clip_rect *boxes)
900 {
901         unsigned int i, j;
902         int ret;
903
904         for (i = 0; i < nbox; ++i) {
905                 const drm_savage_cmd_header_t *cmdbuf;
906                 dev_priv->emit_clip_rect(dev_priv, &boxes[i]);
907
908                 cmdbuf = start;
909                 while (cmdbuf < end) {
910                         drm_savage_cmd_header_t cmd_header;
911                         cmd_header = *cmdbuf;
912                         cmdbuf++;
913                         switch (cmd_header.cmd.cmd) {
914                         case SAVAGE_CMD_DMA_PRIM:
915                                 ret = savage_dispatch_dma_prim(
916                                         dev_priv, &cmd_header, dmabuf);
917                                 break;
918                         case SAVAGE_CMD_VB_PRIM:
919                                 ret = savage_dispatch_vb_prim(
920                                         dev_priv, &cmd_header,
921                                         vtxbuf, vb_size, vb_stride);
922                                 break;
923                         case SAVAGE_CMD_DMA_IDX:
924                                 j = (cmd_header.idx.count + 3) / 4;
925                                 /* j was check in savage_bci_cmdbuf */
926                                 ret = savage_dispatch_dma_idx(dev_priv,
927                                         &cmd_header, (const uint16_t *)cmdbuf,
928                                         dmabuf);
929                                 cmdbuf += j;
930                                 break;
931                         case SAVAGE_CMD_VB_IDX:
932                                 j = (cmd_header.idx.count + 3) / 4;
933                                 /* j was check in savage_bci_cmdbuf */
934                                 ret = savage_dispatch_vb_idx(dev_priv,
935                                         &cmd_header, (const uint16_t *)cmdbuf,
936                                         (const uint32_t *)vtxbuf, vb_size,
937                                         vb_stride);
938                                 cmdbuf += j;
939                                 break;
940                         default:
941                                 /* What's the best return code? EFAULT? */
942                                 DRM_ERROR("IMPLEMENTATION ERROR: "
943                                           "non-drawing-command %d\n",
944                                           cmd_header.cmd.cmd);
945                                 return -EINVAL;
946                         }
947
948                         if (ret != 0)
949                                 return ret;
950                 }
951         }
952
953         return 0;
954 }
955
956 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
957 {
958         drm_savage_private_t *dev_priv = dev->dev_private;
959         struct drm_device_dma *dma = dev->dma;
960         struct drm_buf *dmabuf;
961         drm_savage_cmdbuf_t *cmdbuf = data;
962         drm_savage_cmd_header_t *kcmd_addr = NULL;
963         drm_savage_cmd_header_t *first_draw_cmd;
964         unsigned int *kvb_addr = NULL;
965         struct drm_clip_rect *kbox_addr = NULL;
966         unsigned int i, j;
967         int ret = 0;
968
969         DRM_DEBUG("\n");
970
971         LOCK_TEST_WITH_RETURN(dev, file_priv);
972
973         if (dma && dma->buflist) {
974                 if (cmdbuf->dma_idx > dma->buf_count) {
975                         DRM_ERROR
976                             ("vertex buffer index %u out of range (0-%u)\n",
977                              cmdbuf->dma_idx, dma->buf_count - 1);
978                         return -EINVAL;
979                 }
980                 dmabuf = dma->buflist[cmdbuf->dma_idx];
981         } else {
982                 dmabuf = NULL;
983         }
984
985         /* Copy the user buffers into kernel temporary areas.  This hasn't been
986          * a performance loss compared to VERIFYAREA_READ/
987          * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct
988          * for locking on FreeBSD.
989          */
990         if (cmdbuf->size) {
991                 kcmd_addr = drm_alloc(cmdbuf->size * 8, DRM_MEM_DRIVER);
992                 if (kcmd_addr == NULL)
993                         return -ENOMEM;
994
995                 if (DRM_COPY_FROM_USER(kcmd_addr, cmdbuf->cmd_addr,
996                                        cmdbuf->size * 8))
997                 {
998                         drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER);
999                         return -EFAULT;
1000                 }
1001                 cmdbuf->cmd_addr = kcmd_addr;
1002         }
1003         if (cmdbuf->vb_size) {
1004                 kvb_addr = drm_alloc(cmdbuf->vb_size, DRM_MEM_DRIVER);
1005                 if (kvb_addr == NULL) {
1006                         ret = -ENOMEM;
1007                         goto done;
1008                 }
1009
1010                 if (DRM_COPY_FROM_USER(kvb_addr, cmdbuf->vb_addr,
1011                                        cmdbuf->vb_size)) {
1012                         ret = -EFAULT;
1013                         goto done;
1014                 }
1015                 cmdbuf->vb_addr = kvb_addr;
1016         }
1017         if (cmdbuf->nbox) {
1018                 kbox_addr = drm_alloc(cmdbuf->nbox *
1019                                       sizeof(struct drm_clip_rect),
1020                                       DRM_MEM_DRIVER);
1021                 if (kbox_addr == NULL) {
1022                         ret = -ENOMEM;
1023                         goto done;
1024                 }
1025
1026                 if (DRM_COPY_FROM_USER(kbox_addr, cmdbuf->box_addr,
1027                                        cmdbuf->nbox *
1028                                        sizeof(struct drm_clip_rect))) {
1029                         ret = -EFAULT;
1030                         goto done;
1031                 }
1032                 cmdbuf->box_addr = kbox_addr;
1033         }
1034
1035         /* Make sure writes to DMA buffers are finished before sending
1036          * DMA commands to the graphics hardware. */
1037         DRM_MEMORYBARRIER();
1038
1039         /* Coming from user space. Don't know if the Xserver has
1040          * emitted wait commands. Assuming the worst. */
1041         dev_priv->waiting = 1;
1042
1043         i = 0;
1044         first_draw_cmd = NULL;
1045         while (i < cmdbuf->size) {
1046                 drm_savage_cmd_header_t cmd_header;
1047                 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr;
1048                 cmdbuf->cmd_addr++;
1049                 i++;
1050
1051                 /* Group drawing commands with same state to minimize
1052                  * iterations over clip rects. */
1053                 j = 0;
1054                 switch (cmd_header.cmd.cmd) {
1055                 case SAVAGE_CMD_DMA_IDX:
1056                 case SAVAGE_CMD_VB_IDX:
1057                         j = (cmd_header.idx.count + 3) / 4;
1058                         if (i + j > cmdbuf->size) {
1059                                 DRM_ERROR("indexed drawing command extends "
1060                                           "beyond end of command buffer\n");
1061                                 DMA_FLUSH();
1062                                 return -EINVAL;
1063                         }
1064                         /* fall through */
1065                 case SAVAGE_CMD_DMA_PRIM:
1066                 case SAVAGE_CMD_VB_PRIM:
1067                         if (!first_draw_cmd)
1068                                 first_draw_cmd = cmdbuf->cmd_addr - 1;
1069                         cmdbuf->cmd_addr += j;
1070                         i += j;
1071                         break;
1072                 default:
1073                         if (first_draw_cmd) {
1074                                 ret = savage_dispatch_draw(
1075                                         dev_priv, first_draw_cmd,
1076                                         cmdbuf->cmd_addr - 1,
1077                                         dmabuf, cmdbuf->vb_addr,
1078                                         cmdbuf->vb_size,
1079                                         cmdbuf->vb_stride,
1080                                         cmdbuf->nbox, cmdbuf->box_addr);
1081                                 if (ret != 0)
1082                                         return ret;
1083                                 first_draw_cmd = NULL;
1084                         }
1085                 }
1086                 if (first_draw_cmd)
1087                         continue;
1088
1089                 switch (cmd_header.cmd.cmd) {
1090                 case SAVAGE_CMD_STATE:
1091                         j = (cmd_header.state.count + 1) / 2;
1092                         if (i + j > cmdbuf->size) {
1093                                 DRM_ERROR("command SAVAGE_CMD_STATE extends "
1094                                           "beyond end of command buffer\n");
1095                                 DMA_FLUSH();
1096                                 ret = -EINVAL;
1097                                 goto done;
1098                         }
1099                         ret = savage_dispatch_state(dev_priv, &cmd_header,
1100                                 (const uint32_t *)cmdbuf->cmd_addr);
1101                         cmdbuf->cmd_addr += j;
1102                         i += j;
1103                         break;
1104                 case SAVAGE_CMD_CLEAR:
1105                         if (i + 1 > cmdbuf->size) {
1106                                 DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
1107                                           "beyond end of command buffer\n");
1108                                 DMA_FLUSH();
1109                                 ret = -EINVAL;
1110                                 goto done;
1111                         }
1112                         ret = savage_dispatch_clear(dev_priv, &cmd_header,
1113                                                     cmdbuf->cmd_addr,
1114                                                     cmdbuf->nbox,
1115                                                     cmdbuf->box_addr);
1116                         cmdbuf->cmd_addr++;
1117                         i++;
1118                         break;
1119                 case SAVAGE_CMD_SWAP:
1120                         ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox,
1121                                                    cmdbuf->box_addr);
1122                         break;
1123                 default:
1124                         DRM_ERROR("invalid command 0x%x\n",
1125                                   cmd_header.cmd.cmd);
1126                         DMA_FLUSH();
1127                         ret = -EINVAL;
1128                         goto done;
1129                 }
1130
1131                 if (ret != 0) {
1132                         DMA_FLUSH();
1133                         goto done;
1134                 }
1135         }
1136
1137         if (first_draw_cmd) {
1138                 ret = savage_dispatch_draw(
1139                         dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf,
1140                         cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride,
1141                         cmdbuf->nbox, cmdbuf->box_addr);
1142                 if (ret != 0) {
1143                         DMA_FLUSH();
1144                         goto done;
1145                 }
1146         }
1147
1148         DMA_FLUSH();
1149
1150         if (dmabuf && cmdbuf->discard) {
1151                 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
1152                 uint16_t event;
1153                 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
1154                 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
1155                 savage_freelist_put(dev, dmabuf);
1156         }
1157
1158 done:
1159         /* If we didn't need to allocate them, these'll be NULL */
1160         drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER);
1161         drm_free(kvb_addr, cmdbuf->vb_size, DRM_MEM_DRIVER);
1162         drm_free(kbox_addr, cmdbuf->nbox * sizeof(struct drm_clip_rect),
1163                  DRM_MEM_DRIVER);
1164
1165         return ret;
1166 }