drm/i915: Sync ringbuffer code with Linux 3.8.13
[dragonfly.git] / sys / dev / drm / i915 / intel_ringbuffer.c
1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Zou Nan hai <nanhai.zou@intel.com>
26  *    Xiang Hai hao<haihao.xiang@intel.com>
27  *
28  * $FreeBSD: head/sys/dev/drm2/i915/intel_ringbuffer.c 253709 2013-07-27 16:42:29Z kib $
29  */
30
31 #include <drm/drmP.h>
32 #include <drm/i915_drm.h>
33 #include "i915_drv.h"
34 #include "intel_drv.h"
35 #include "intel_ringbuffer.h"
36 #include <sys/sched.h>
37
38 /*
39  * 965+ support PIPE_CONTROL commands, which provide finer grained control
40  * over cache flushing.
41  */
42 struct pipe_control {
43         struct drm_i915_gem_object *obj;
44         volatile u32 *cpu_page;
45         u32 gtt_offset;
46 };
47
48 static inline int ring_space(struct intel_ring_buffer *ring)
49 {
50         int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
51         if (space < 0)
52                 space += ring->size;
53         return space;
54 }
55
56 static int
57 gen2_render_ring_flush(struct intel_ring_buffer *ring,
58                        u32      invalidate_domains,
59                        u32      flush_domains)
60 {
61         u32 cmd;
62         int ret;
63
64         cmd = MI_FLUSH;
65         if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
66                 cmd |= MI_NO_WRITE_FLUSH;
67
68         if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
69                 cmd |= MI_READ_FLUSH;
70
71         ret = intel_ring_begin(ring, 2);
72         if (ret)
73                 return ret;
74
75         intel_ring_emit(ring, cmd);
76         intel_ring_emit(ring, MI_NOOP);
77         intel_ring_advance(ring);
78
79         return 0;
80 }
81
82 static int
83 gen4_render_ring_flush(struct intel_ring_buffer *ring,
84                        u32      invalidate_domains,
85                        u32      flush_domains)
86 {
87         struct drm_device *dev = ring->dev;
88         u32 cmd;
89         int ret;
90
91         /*
92          * read/write caches:
93          *
94          * I915_GEM_DOMAIN_RENDER is always invalidated, but is
95          * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
96          * also flushed at 2d versus 3d pipeline switches.
97          *
98          * read-only caches:
99          *
100          * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
101          * MI_READ_FLUSH is set, and is always flushed on 965.
102          *
103          * I915_GEM_DOMAIN_COMMAND may not exist?
104          *
105          * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
106          * invalidated when MI_EXE_FLUSH is set.
107          *
108          * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
109          * invalidated with every MI_FLUSH.
110          *
111          * TLBs:
112          *
113          * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
114          * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
115          * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
116          * are flushed at any MI_FLUSH.
117          */
118
119         cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
120         if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
121                 cmd &= ~MI_NO_WRITE_FLUSH;
122         if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
123                 cmd |= MI_EXE_FLUSH;
124
125         if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
126             (IS_G4X(dev) || IS_GEN5(dev)))
127                 cmd |= MI_INVALIDATE_ISP;
128
129         ret = intel_ring_begin(ring, 2);
130         if (ret)
131                 return ret;
132
133         intel_ring_emit(ring, cmd);
134         intel_ring_emit(ring, MI_NOOP);
135         intel_ring_advance(ring);
136
137         return 0;
138 }
139
140 /**
141  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
142  * implementing two workarounds on gen6.  From section 1.4.7.1
143  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
144  *
145  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
146  * produced by non-pipelined state commands), software needs to first
147  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
148  * 0.
149  *
150  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
151  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
152  *
153  * And the workaround for these two requires this workaround first:
154  *
155  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
156  * BEFORE the pipe-control with a post-sync op and no write-cache
157  * flushes.
158  *
159  * And this last workaround is tricky because of the requirements on
160  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
161  * volume 2 part 1:
162  *
163  *     "1 of the following must also be set:
164  *      - Render Target Cache Flush Enable ([12] of DW1)
165  *      - Depth Cache Flush Enable ([0] of DW1)
166  *      - Stall at Pixel Scoreboard ([1] of DW1)
167  *      - Depth Stall ([13] of DW1)
168  *      - Post-Sync Operation ([13] of DW1)
169  *      - Notify Enable ([8] of DW1)"
170  *
171  * The cache flushes require the workaround flush that triggered this
172  * one, so we can't use it.  Depth stall would trigger the same.
173  * Post-sync nonzero is what triggered this second workaround, so we
174  * can't use that one either.  Notify enable is IRQs, which aren't
175  * really our business.  That leaves only stall at scoreboard.
176  */
177 static int
178 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
179 {
180         struct pipe_control *pc = ring->private;
181         u32 scratch_addr = pc->gtt_offset + 128;
182         int ret;
183
184
185         ret = intel_ring_begin(ring, 6);
186         if (ret)
187                 return ret;
188
189         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
190         intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
191                         PIPE_CONTROL_STALL_AT_SCOREBOARD);
192         intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
193         intel_ring_emit(ring, 0); /* low dword */
194         intel_ring_emit(ring, 0); /* high dword */
195         intel_ring_emit(ring, MI_NOOP);
196         intel_ring_advance(ring);
197
198         ret = intel_ring_begin(ring, 6);
199         if (ret)
200                 return ret;
201
202         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
203         intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
204         intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
205         intel_ring_emit(ring, 0);
206         intel_ring_emit(ring, 0);
207         intel_ring_emit(ring, MI_NOOP);
208         intel_ring_advance(ring);
209
210         return 0;
211 }
212
213 static int
214 gen6_render_ring_flush(struct intel_ring_buffer *ring,
215                          u32 invalidate_domains, u32 flush_domains)
216 {
217         u32 flags = 0;
218         struct pipe_control *pc = ring->private;
219         u32 scratch_addr = pc->gtt_offset + 128;
220         int ret;
221
222         /* Force SNB workarounds for PIPE_CONTROL flushes */
223         ret = intel_emit_post_sync_nonzero_flush(ring);
224         if (ret)
225                 return ret;
226
227         /* Just flush everything.  Experiments have shown that reducing the
228          * number of bits based on the write domains has little performance
229          * impact.
230          */
231         flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
232         flags |= PIPE_CONTROL_TLB_INVALIDATE;
233         flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
234         flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
235         flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
236         flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
237         flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
238         flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
239         /*
240          * Ensure that any following seqno writes only happen when the render
241          * cache is indeed flushed (but only if the caller actually wants that).
242          */
243         if (flush_domains)
244                 flags |= PIPE_CONTROL_CS_STALL;
245
246         ret = intel_ring_begin(ring, 6);
247         if (ret)
248                 return ret;
249
250         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
251         intel_ring_emit(ring, flags);
252         intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
253         intel_ring_emit(ring, 0); /* lower dword */
254         intel_ring_emit(ring, 0); /* uppwer dword */
255         intel_ring_emit(ring, MI_NOOP);
256         intel_ring_advance(ring);
257
258         return 0;
259 }
260
261 static void ring_write_tail(struct intel_ring_buffer *ring,
262                             uint32_t value)
263 {
264         drm_i915_private_t *dev_priv = ring->dev->dev_private;
265         I915_WRITE_TAIL(ring, value);
266 }
267
268 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
269 {
270         drm_i915_private_t *dev_priv = ring->dev->dev_private;
271         uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
272                         RING_ACTHD(ring->mmio_base) : ACTHD;
273
274         return I915_READ(acthd_reg);
275 }
276
277 static int init_ring_common(struct intel_ring_buffer *ring)
278 {
279         struct drm_device *dev = ring->dev;
280         drm_i915_private_t *dev_priv = dev->dev_private;
281         struct drm_i915_gem_object *obj = ring->obj;
282         int ret = 0;
283         uint32_t head;
284
285         if (HAS_FORCE_WAKE(dev))
286                 gen6_gt_force_wake_get(dev_priv);
287
288         /* Stop the ring if it's running. */
289         I915_WRITE_CTL(ring, 0);
290         I915_WRITE_HEAD(ring, 0);
291         ring->write_tail(ring, 0);
292
293         /* Initialize the ring. */
294         I915_WRITE_START(ring, obj->gtt_offset);
295         head = I915_READ_HEAD(ring) & HEAD_ADDR;
296
297         /* G45 ring initialization fails to reset head to zero */
298         if (head != 0) {
299                 DRM_DEBUG("%s head not reset to zero "
300                               "ctl %08x head %08x tail %08x start %08x\n",
301                               ring->name,
302                               I915_READ_CTL(ring),
303                               I915_READ_HEAD(ring),
304                               I915_READ_TAIL(ring),
305                               I915_READ_START(ring));
306
307                 I915_WRITE_HEAD(ring, 0);
308
309                 if (I915_READ_HEAD(ring) & HEAD_ADDR) {
310                         DRM_ERROR("failed to set %s head to zero "
311                                   "ctl %08x head %08x tail %08x start %08x\n",
312                                   ring->name,
313                                   I915_READ_CTL(ring),
314                                   I915_READ_HEAD(ring),
315                                   I915_READ_TAIL(ring),
316                                   I915_READ_START(ring));
317                 }
318         }
319
320         I915_WRITE_CTL(ring,
321                         ((ring->size - PAGE_SIZE) & RING_NR_PAGES)
322                         | RING_VALID);
323
324         /* If the head is still not zero, the ring is dead */
325         if (_intel_wait_for(ring->dev,
326             (I915_READ_CTL(ring) & RING_VALID) != 0 &&
327              I915_READ_START(ring) == obj->gtt_offset &&
328              (I915_READ_HEAD(ring) & HEAD_ADDR) == 0,
329             50, 1, "915rii")) {
330                 DRM_ERROR("%s initialization failed "
331                                 "ctl %08x head %08x tail %08x start %08x\n",
332                                 ring->name,
333                                 I915_READ_CTL(ring),
334                                 I915_READ_HEAD(ring),
335                                 I915_READ_TAIL(ring),
336                                 I915_READ_START(ring));
337                 ret = -EIO;
338                 goto out;
339         }
340
341         if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
342                 i915_kernel_lost_context(ring->dev);
343         else {
344                 ring->head = I915_READ_HEAD(ring);
345                 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
346                 ring->space = ring_space(ring);
347                 ring->last_retired_head = -1;
348         }
349
350 out:
351         if (HAS_FORCE_WAKE(dev))
352                 gen6_gt_force_wake_put(dev_priv);
353
354         return ret;
355 }
356
357 static int
358 init_pipe_control(struct intel_ring_buffer *ring)
359 {
360         struct pipe_control *pc;
361         struct drm_i915_gem_object *obj;
362         int ret;
363
364         if (ring->private)
365                 return 0;
366
367         pc = kmalloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK);
368         if (!pc)
369                 return -ENOMEM;
370
371         obj = i915_gem_alloc_object(ring->dev, 4096);
372         if (obj == NULL) {
373                 DRM_ERROR("Failed to allocate seqno page\n");
374                 ret = -ENOMEM;
375                 goto err;
376         }
377
378         i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
379
380         ret = i915_gem_object_pin(obj, 4096, true);
381         if (ret)
382                 goto err_unref;
383
384         pc->gtt_offset = obj->gtt_offset;
385         pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE);
386         if (pc->cpu_page == NULL)
387                 goto err_unpin;
388         pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
389         pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
390             (vm_offset_t)pc->cpu_page + PAGE_SIZE);
391
392         pc->obj = obj;
393         ring->private = pc;
394         return 0;
395
396 err_unpin:
397         i915_gem_object_unpin(obj);
398 err_unref:
399         drm_gem_object_unreference(&obj->base);
400 err:
401         drm_free(pc, DRM_I915_GEM);
402         return ret;
403 }
404
405 static void
406 cleanup_pipe_control(struct intel_ring_buffer *ring)
407 {
408         struct pipe_control *pc = ring->private;
409         struct drm_i915_gem_object *obj;
410
411         if (!ring->private)
412                 return;
413
414         obj = pc->obj;
415         pmap_qremove((vm_offset_t)pc->cpu_page, 1);
416         kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE);
417         i915_gem_object_unpin(obj);
418         drm_gem_object_unreference(&obj->base);
419
420         drm_free(pc, DRM_I915_GEM);
421         ring->private = NULL;
422 }
423
424 static int init_render_ring(struct intel_ring_buffer *ring)
425 {
426         struct drm_device *dev = ring->dev;
427         struct drm_i915_private *dev_priv = dev->dev_private;
428         int ret = init_ring_common(ring);
429
430         if (INTEL_INFO(dev)->gen > 3)
431                 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
432
433         /* We need to disable the AsyncFlip performance optimisations in order
434          * to use MI_WAIT_FOR_EVENT within the CS. It should already be
435          * programmed to '1' on all products.
436          */
437         if (INTEL_INFO(dev)->gen >= 6)
438                 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
439
440         /* Required for the hardware to program scanline values for waiting */
441         if (INTEL_INFO(dev)->gen == 6)
442                 I915_WRITE(GFX_MODE,
443                            _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS));
444
445         if (IS_GEN7(dev))
446                 I915_WRITE(GFX_MODE_GEN7,
447                            _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
448                            _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
449
450         if (INTEL_INFO(dev)->gen >= 5) {
451                 ret = init_pipe_control(ring);
452                 if (ret)
453                         return ret;
454         }
455
456         if (IS_GEN6(dev)) {
457                 /* From the Sandybridge PRM, volume 1 part 3, page 24:
458                  * "If this bit is set, STCunit will have LRA as replacement
459                  *  policy. [...] This bit must be reset.  LRA replacement
460                  *  policy is not supported."
461                  */
462                 I915_WRITE(CACHE_MODE_0,
463                            _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
464
465                 /* This is not explicitly set for GEN6, so read the register.
466                  * see intel_ring_mi_set_context() for why we care.
467                  * TODO: consider explicitly setting the bit for GEN5
468                  */
469                 ring->itlb_before_ctx_switch =
470                         !!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS);
471         }
472
473         if (INTEL_INFO(dev)->gen >= 6)
474                 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
475
476         if (HAS_L3_GPU_CACHE(dev))
477                 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
478
479         return ret;
480 }
481
482 static void render_ring_cleanup(struct intel_ring_buffer *ring)
483 {
484         if (!ring->private)
485                 return;
486
487         cleanup_pipe_control(ring);
488 }
489
490 static void
491 update_mboxes(struct intel_ring_buffer *ring,
492             u32 seqno,
493             u32 mmio_offset)
494 {
495         intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
496                               MI_SEMAPHORE_GLOBAL_GTT |
497                               MI_SEMAPHORE_REGISTER |
498                               MI_SEMAPHORE_UPDATE);
499         intel_ring_emit(ring, seqno);
500         intel_ring_emit(ring, mmio_offset);
501 }
502
503 /**
504  * gen6_add_request - Update the semaphore mailbox registers
505  * 
506  * @ring - ring that is adding a request
507  * @seqno - return seqno stuck into the ring
508  *
509  * Update the mailbox registers in the *other* rings with the current seqno.
510  * This acts like a signal in the canonical semaphore.
511  */
512 static int
513 gen6_add_request(struct intel_ring_buffer *ring,
514                  u32 *seqno)
515 {
516         u32 mbox1_reg;
517         u32 mbox2_reg;
518         int ret;
519
520         ret = intel_ring_begin(ring, 10);
521         if (ret)
522                 return ret;
523
524         mbox1_reg = ring->signal_mbox[0];
525         mbox2_reg = ring->signal_mbox[1];
526
527         *seqno = i915_gem_next_request_seqno(ring);
528
529         update_mboxes(ring, *seqno, mbox1_reg);
530         update_mboxes(ring, *seqno, mbox2_reg);
531         intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
532         intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
533         intel_ring_emit(ring, *seqno);
534         intel_ring_emit(ring, MI_USER_INTERRUPT);
535         intel_ring_advance(ring);
536
537         return 0;
538 }
539
540 /**
541  * intel_ring_sync - sync the waiter to the signaller on seqno
542  *
543  * @waiter - ring that is waiting
544  * @signaller - ring which has, or will signal
545  * @seqno - seqno which the waiter will block on
546  */
547 static int
548 gen6_ring_sync(struct intel_ring_buffer *waiter,
549                struct intel_ring_buffer *signaller,
550                u32 seqno)
551 {
552         int ret;
553         u32 dw1 = MI_SEMAPHORE_MBOX |
554                   MI_SEMAPHORE_COMPARE |
555                   MI_SEMAPHORE_REGISTER;
556
557         /* Throughout all of the GEM code, seqno passed implies our current
558          * seqno is >= the last seqno executed. However for hardware the
559          * comparison is strictly greater than.
560          */
561         seqno -= 1;
562
563         WARN_ON(signaller->semaphore_register[waiter->id] ==
564                 MI_SEMAPHORE_SYNC_INVALID);
565
566         ret = intel_ring_begin(waiter, 4);
567         if (ret)
568                 return ret;
569
570         intel_ring_emit(waiter,
571                         dw1 | signaller->semaphore_register[waiter->id]);
572         intel_ring_emit(waiter, seqno);
573         intel_ring_emit(waiter, 0);
574         intel_ring_emit(waiter, MI_NOOP);
575         intel_ring_advance(waiter);
576
577         return 0;
578 }
579
580 int render_ring_sync_to(struct intel_ring_buffer *waiter,
581     struct intel_ring_buffer *signaller, u32 seqno);
582 int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
583     struct intel_ring_buffer *signaller, u32 seqno);
584 int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
585     struct intel_ring_buffer *signaller, u32 seqno);
586
587 #define PIPE_CONTROL_FLUSH(ring__, addr__)                                      \
588 do {                                                                    \
589         intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |                \
590                  PIPE_CONTROL_DEPTH_STALL);                             \
591         intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);                    \
592         intel_ring_emit(ring__, 0);                                                     \
593         intel_ring_emit(ring__, 0);                                                     \
594 } while (0)
595
596 static int
597 pc_render_add_request(struct intel_ring_buffer *ring,
598                       uint32_t *result)
599 {
600         u32 seqno = i915_gem_next_request_seqno(ring);
601         struct pipe_control *pc = ring->private;
602         u32 scratch_addr = pc->gtt_offset + 128;
603         int ret;
604
605         /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
606          * incoherent with writes to memory, i.e. completely fubar,
607          * so we need to use PIPE_NOTIFY instead.
608          *
609          * However, we also need to workaround the qword write
610          * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
611          * memory before requesting an interrupt.
612          */
613         ret = intel_ring_begin(ring, 32);
614         if (ret)
615                 return ret;
616
617         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
618                         PIPE_CONTROL_WRITE_FLUSH |
619                         PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
620         intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
621         intel_ring_emit(ring, seqno);
622         intel_ring_emit(ring, 0);
623         PIPE_CONTROL_FLUSH(ring, scratch_addr);
624         scratch_addr += 128; /* write to separate cachelines */
625         PIPE_CONTROL_FLUSH(ring, scratch_addr);
626         scratch_addr += 128;
627         PIPE_CONTROL_FLUSH(ring, scratch_addr);
628         scratch_addr += 128;
629         PIPE_CONTROL_FLUSH(ring, scratch_addr);
630         scratch_addr += 128;
631         PIPE_CONTROL_FLUSH(ring, scratch_addr);
632         scratch_addr += 128;
633         PIPE_CONTROL_FLUSH(ring, scratch_addr);
634         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
635                         PIPE_CONTROL_WRITE_FLUSH |
636                         PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
637                         PIPE_CONTROL_NOTIFY);
638         intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
639         intel_ring_emit(ring, seqno);
640         intel_ring_emit(ring, 0);
641         intel_ring_advance(ring);
642
643         *result = seqno;
644         return 0;
645 }
646
647 static u32
648 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
649 {
650         /* Workaround to force correct ordering between irq and seqno writes on
651          * ivb (and maybe also on snb) by reading from a CS register (like
652          * ACTHD) before reading the status page. */
653         if (!lazy_coherency)
654                 intel_ring_get_active_head(ring);
655         return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
656 }
657
658 static u32
659 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
660 {
661         return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
662 }
663
664 static u32
665 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
666 {
667         struct pipe_control *pc = ring->private;
668         return pc->cpu_page[0];
669 }
670
671 static bool
672 gen5_ring_get_irq(struct intel_ring_buffer *ring)
673 {
674         struct drm_device *dev = ring->dev;
675         drm_i915_private_t *dev_priv = dev->dev_private;
676
677         if (!dev->irq_enabled)
678                 return false;
679
680         lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
681         if (ring->irq_refcount++ == 0) {
682                 dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
683                 I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
684                 POSTING_READ(GTIMR);
685         }
686         lockmgr(&dev_priv->irq_lock, LK_RELEASE);
687
688         return true;
689 }
690
691 static void
692 gen5_ring_put_irq(struct intel_ring_buffer *ring)
693 {
694         struct drm_device *dev = ring->dev;
695         drm_i915_private_t *dev_priv = dev->dev_private;
696
697         lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
698         if (--ring->irq_refcount == 0) {
699                 dev_priv->gt_irq_mask |= ring->irq_enable_mask;
700                 I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
701                 POSTING_READ(GTIMR);
702         }
703         lockmgr(&dev_priv->irq_lock, LK_RELEASE);
704 }
705
706 static bool
707 i9xx_ring_get_irq(struct intel_ring_buffer *ring)
708 {
709         struct drm_device *dev = ring->dev;
710         drm_i915_private_t *dev_priv = dev->dev_private;
711
712         if (!dev->irq_enabled)
713                 return false;
714
715         lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
716         if (ring->irq_refcount++ == 0) {
717                 dev_priv->irq_mask &= ~ring->irq_enable_mask;
718                 I915_WRITE(IMR, dev_priv->irq_mask);
719                 POSTING_READ(IMR);
720         }
721         lockmgr(&dev_priv->irq_lock, LK_RELEASE);
722
723         return true;
724 }
725
726 static void
727 i9xx_ring_put_irq(struct intel_ring_buffer *ring)
728 {
729         struct drm_device *dev = ring->dev;
730         drm_i915_private_t *dev_priv = dev->dev_private;
731
732         lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
733         if (--ring->irq_refcount == 0) {
734                 dev_priv->irq_mask |= ring->irq_enable_mask;
735                 I915_WRITE(IMR, dev_priv->irq_mask);
736                 POSTING_READ(IMR);
737         }
738         lockmgr(&dev_priv->irq_lock, LK_RELEASE);
739 }
740
741 static bool
742 i8xx_ring_get_irq(struct intel_ring_buffer *ring)
743 {
744         struct drm_device *dev = ring->dev;
745         drm_i915_private_t *dev_priv = dev->dev_private;
746
747         if (!dev->irq_enabled)
748                 return false;
749
750         lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
751         if (ring->irq_refcount++ == 0) {
752                 dev_priv->irq_mask &= ~ring->irq_enable_mask;
753                 I915_WRITE16(IMR, dev_priv->irq_mask);
754                 POSTING_READ16(IMR);
755         }
756         lockmgr(&dev_priv->irq_lock, LK_RELEASE);
757
758         return true;
759 }
760
761 static void
762 i8xx_ring_put_irq(struct intel_ring_buffer *ring)
763 {
764         struct drm_device *dev = ring->dev;
765         drm_i915_private_t *dev_priv = dev->dev_private;
766
767         lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
768         if (--ring->irq_refcount == 0) {
769                 dev_priv->irq_mask |= ring->irq_enable_mask;
770                 I915_WRITE16(IMR, dev_priv->irq_mask);
771                 POSTING_READ16(IMR);
772         }
773         lockmgr(&dev_priv->irq_lock, LK_RELEASE);
774 }
775
776 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
777 {
778         struct drm_device *dev = ring->dev;
779         drm_i915_private_t *dev_priv = dev->dev_private;
780         uint32_t mmio = 0;
781
782         /* The ring status page addresses are no longer next to the rest of
783          * the ring registers as of gen7.
784          */
785         if (IS_GEN7(dev)) {
786                 switch (ring->id) {
787                 case RCS:
788                         mmio = RENDER_HWS_PGA_GEN7;
789                         break;
790                 case BCS:
791                         mmio = BLT_HWS_PGA_GEN7;
792                         break;
793                 case VCS:
794                         mmio = BSD_HWS_PGA_GEN7;
795                         break;
796                 }
797         } else if (IS_GEN6(dev)) {
798                 mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
799         } else {
800                 mmio = RING_HWS_PGA(ring->mmio_base);
801         }
802
803         I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
804         POSTING_READ(mmio);
805 }
806
807 static int
808 bsd_ring_flush(struct intel_ring_buffer *ring,
809                uint32_t     invalidate_domains,
810                uint32_t     flush_domains)
811 {
812         int ret;
813
814         ret = intel_ring_begin(ring, 2);
815         if (ret)
816                 return ret;
817
818         intel_ring_emit(ring, MI_FLUSH);
819         intel_ring_emit(ring, MI_NOOP);
820         intel_ring_advance(ring);
821         return 0;
822 }
823
824 static int
825 i9xx_add_request(struct intel_ring_buffer *ring,
826                  u32 *result)
827 {
828         uint32_t seqno;
829         int ret;
830
831         ret = intel_ring_begin(ring, 4);
832         if (ret)
833                 return ret;
834
835         seqno = i915_gem_next_request_seqno(ring);
836
837         intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
838         intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
839         intel_ring_emit(ring, seqno);
840         intel_ring_emit(ring, MI_USER_INTERRUPT);
841         intel_ring_advance(ring);
842
843         *result = seqno;
844         return 0;
845 }
846
847 static bool
848 gen6_ring_get_irq(struct intel_ring_buffer *ring)
849 {
850         struct drm_device *dev = ring->dev;
851         drm_i915_private_t *dev_priv = dev->dev_private;
852
853         if (!dev->irq_enabled)
854                return false;
855
856         /* It looks like we need to prevent the gt from suspending while waiting
857          * for an notifiy irq, otherwise irqs seem to get lost on at least the
858          * blt/bsd rings on ivb. */
859         gen6_gt_force_wake_get(dev_priv);
860
861         lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
862         if (ring->irq_refcount++ == 0) {
863                 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
864                         I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
865                                                 GEN6_RENDER_L3_PARITY_ERROR));
866                 else
867                         I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
868                 dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
869                 I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
870                 POSTING_READ(GTIMR);
871         }
872         lockmgr(&dev_priv->irq_lock, LK_RELEASE);
873
874         return true;
875 }
876
877 static void
878 gen6_ring_put_irq(struct intel_ring_buffer *ring)
879 {
880         struct drm_device *dev = ring->dev;
881         drm_i915_private_t *dev_priv = dev->dev_private;
882
883         lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
884         if (--ring->irq_refcount == 0) {
885                 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
886                         I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
887                 else
888                         I915_WRITE_IMR(ring, ~0);
889                 dev_priv->gt_irq_mask |= ring->irq_enable_mask;
890                 I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
891                 POSTING_READ(GTIMR);
892         }
893         lockmgr(&dev_priv->irq_lock, LK_RELEASE);
894
895         gen6_gt_force_wake_put(dev_priv);
896 }
897
898 static int
899 i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length)
900 {
901         int ret;
902
903         ret = intel_ring_begin(ring, 2);
904         if (ret)
905                 return ret;
906
907         intel_ring_emit(ring,
908                         MI_BATCH_BUFFER_START |
909                         MI_BATCH_NON_SECURE_I965);
910         intel_ring_emit(ring, offset);
911         intel_ring_advance(ring);
912
913         return 0;
914 }
915
916 static int
917 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
918                                 u32 offset, u32 len)
919 {
920         int ret;
921
922         ret = intel_ring_begin(ring, 4);
923         if (ret)
924                 return ret;
925
926         intel_ring_emit(ring, MI_BATCH_BUFFER);
927         intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
928         intel_ring_emit(ring, offset + len - 8);
929         intel_ring_emit(ring, 0);
930         intel_ring_advance(ring);
931
932         return 0;
933 }
934
935 static int
936 i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
937                          u32 offset, u32 len)
938 {
939         int ret;
940         unsigned flags = 0;
941
942         ret = intel_ring_begin(ring, 2);
943         if (ret)
944                 return ret;
945
946         intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
947         intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
948         intel_ring_advance(ring);
949
950         return 0;
951 }
952
953 static void cleanup_status_page(struct intel_ring_buffer *ring)
954 {
955         struct drm_i915_gem_object *obj;
956
957         obj = ring->status_page.obj;
958         if (obj == NULL)
959                 return;
960
961         pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
962         kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr,
963             PAGE_SIZE);
964         i915_gem_object_unpin(obj);
965         drm_gem_object_unreference(&obj->base);
966         ring->status_page.obj = NULL;
967 }
968
969 static int init_status_page(struct intel_ring_buffer *ring)
970 {
971         struct drm_device *dev = ring->dev;
972         struct drm_i915_gem_object *obj;
973         int ret;
974
975         obj = i915_gem_alloc_object(dev, 4096);
976         if (obj == NULL) {
977                 DRM_ERROR("Failed to allocate status page\n");
978                 ret = -ENOMEM;
979                 goto err;
980         }
981
982         i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
983
984         ret = i915_gem_object_pin(obj, 4096, true);
985         if (ret != 0) {
986                 goto err_unref;
987         }
988
989         ring->status_page.gfx_addr = obj->gtt_offset;
990         ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map,
991             PAGE_SIZE, PAGE_SIZE);
992         if (ring->status_page.page_addr == NULL) {
993                 ret = -ENOMEM;
994                 goto err_unpin;
995         }
996         pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
997             1);
998         pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
999             (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
1000         ring->status_page.obj = obj;
1001         memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1002
1003         intel_ring_setup_status_page(ring);
1004         DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n",
1005                         ring->name, ring->status_page.gfx_addr);
1006
1007         return 0;
1008
1009 err_unpin:
1010         i915_gem_object_unpin(obj);
1011 err_unref:
1012         drm_gem_object_unreference(&obj->base);
1013 err:
1014         return ret;
1015 }
1016
1017 static int init_phys_hws_pga(struct intel_ring_buffer *ring)
1018 {
1019         struct drm_i915_private *dev_priv = ring->dev->dev_private;
1020         u32 addr;
1021
1022         if (!dev_priv->status_page_dmah) {
1023                 dev_priv->status_page_dmah =
1024                         drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE, ~0);
1025                 if (!dev_priv->status_page_dmah)
1026                         return -ENOMEM;
1027         }
1028
1029         addr = dev_priv->status_page_dmah->busaddr;
1030         if (INTEL_INFO(ring->dev)->gen >= 4)
1031                 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
1032         I915_WRITE(HWS_PGA, addr);
1033
1034         ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1035         memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1036
1037         return 0;
1038 }
1039
1040 static inline void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
1041 {
1042         return pmap_mapdev_attr(phys_addr, size, VM_MEMATTR_WRITE_COMBINING);
1043 }
1044
1045 static int intel_init_ring_buffer(struct drm_device *dev,
1046                                   struct intel_ring_buffer *ring)
1047 {
1048         struct drm_i915_gem_object *obj;
1049         int ret;
1050
1051         ring->dev = dev;
1052         INIT_LIST_HEAD(&ring->active_list);
1053         INIT_LIST_HEAD(&ring->request_list);
1054         ring->size = 32 * PAGE_SIZE;
1055         memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
1056
1057         init_waitqueue_head(&ring->irq_queue);
1058
1059         if (I915_NEED_GFX_HWS(dev)) {
1060                 ret = init_status_page(ring);
1061                 if (ret)
1062                         return ret;
1063         } else {
1064                 BUG_ON(ring->id != RCS);
1065                 ret = init_phys_hws_pga(ring);
1066                 if (ret)
1067                         return ret;
1068         }
1069
1070         obj = i915_gem_alloc_object(dev, ring->size);
1071         if (obj == NULL) {
1072                 DRM_ERROR("Failed to allocate ringbuffer\n");
1073                 ret = -ENOMEM;
1074                 goto err_hws;
1075         }
1076
1077         ring->obj = obj;
1078
1079         ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
1080         if (ret)
1081                 goto err_unref;
1082
1083         ret = i915_gem_object_set_to_gtt_domain(obj, true);
1084         if (ret)
1085                 goto err_unpin;
1086
1087         ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset,
1088                                          ring->size);
1089         if (ring->virtual_start == NULL) {
1090                 DRM_ERROR("Failed to map ringbuffer.\n");
1091                 ret = -EINVAL;
1092                 goto err_unpin;
1093         }
1094
1095         ret = ring->init(ring);
1096         if (ret)
1097                 goto err_unmap;
1098
1099         /* Workaround an erratum on the i830 which causes a hang if
1100          * the TAIL pointer points to within the last 2 cachelines
1101          * of the buffer.
1102          */
1103         ring->effective_size = ring->size;
1104         if (IS_I830(ring->dev) || IS_845G(ring->dev))
1105                 ring->effective_size -= 128;
1106
1107         return 0;
1108
1109 err_unmap:
1110         pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1111 err_unpin:
1112         i915_gem_object_unpin(obj);
1113 err_unref:
1114         drm_gem_object_unreference(&obj->base);
1115         ring->obj = NULL;
1116 err_hws:
1117         cleanup_status_page(ring);
1118         return ret;
1119 }
1120
1121 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1122 {
1123         struct drm_i915_private *dev_priv;
1124         int ret;
1125
1126         if (ring->obj == NULL)
1127                 return;
1128
1129         /* Disable the ring buffer. The ring must be idle at this point */
1130         dev_priv = ring->dev->dev_private;
1131         ret = intel_ring_idle(ring);
1132         if (ret)
1133                 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1134                           ring->name, ret);
1135
1136         I915_WRITE_CTL(ring, 0);
1137
1138         pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1139
1140         i915_gem_object_unpin(ring->obj);
1141         drm_gem_object_unreference(&ring->obj->base);
1142         ring->obj = NULL;
1143
1144         if (ring->cleanup)
1145                 ring->cleanup(ring);
1146
1147         cleanup_status_page(ring);
1148 }
1149
1150 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1151 {
1152         int ret;
1153
1154         ret = i915_wait_seqno(ring, seqno);
1155         if (!ret)
1156                 i915_gem_retire_requests_ring(ring);
1157
1158         return ret;
1159 }
1160
1161 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1162 {
1163         struct drm_i915_gem_request *request;
1164         u32 seqno = 0;
1165         int ret;
1166
1167         i915_gem_retire_requests_ring(ring);
1168
1169         if (ring->last_retired_head != -1) {
1170                 ring->head = ring->last_retired_head;
1171                 ring->last_retired_head = -1;
1172                 ring->space = ring_space(ring);
1173                 if (ring->space >= n)
1174                         return 0;
1175         }
1176
1177         list_for_each_entry(request, &ring->request_list, list) {
1178                 int space;
1179
1180                 if (request->tail == -1)
1181                         continue;
1182
1183                 space = request->tail - (ring->tail + 8);
1184                 if (space < 0)
1185                         space += ring->size;
1186                 if (space >= n) {
1187                         seqno = request->seqno;
1188                         break;
1189                 }
1190
1191                 /* Consume this request in case we need more space than
1192                  * is available and so need to prevent a race between
1193                  * updating last_retired_head and direct reads of
1194                  * I915_RING_HEAD. It also provides a nice sanity check.
1195                  */
1196                 request->tail = -1;
1197         }
1198
1199         if (seqno == 0)
1200                 return -ENOSPC;
1201
1202         ret = intel_ring_wait_seqno(ring, seqno);
1203         if (ret)
1204                 return ret;
1205
1206         if (ring->last_retired_head == -1)
1207                 return -ENOSPC;
1208
1209         ring->head = ring->last_retired_head;
1210         ring->last_retired_head = -1;
1211         ring->space = ring_space(ring);
1212         if (ring->space < n)
1213                 return -ENOSPC;
1214
1215         return 0;
1216 }
1217
1218 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
1219 {
1220         struct drm_device *dev = ring->dev;
1221         struct drm_i915_private *dev_priv = dev->dev_private;
1222         unsigned long end;
1223         int ret;
1224
1225         ret = intel_ring_wait_request(ring, n);
1226         if (ret != -ENOSPC)
1227                 return ret;
1228
1229         /* With GEM the hangcheck timer should kick us out of the loop,
1230          * leaving it early runs the risk of corrupting GEM state (due
1231          * to running on almost untested codepaths). But on resume
1232          * timers don't work yet, so prevent a complete hang in that
1233          * case by choosing an insanely large timeout. */
1234         end = ticks + 60 * hz;
1235
1236         do {
1237                 ring->head = I915_READ_HEAD(ring);
1238                 ring->space = ring_space(ring);
1239                 if (ring->space >= n) {
1240                         return 0;
1241                 }
1242
1243 #if 0
1244                 if (dev->primary->master) {
1245                         struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1246                         if (master_priv->sarea_priv)
1247                                 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1248                 }
1249 #else
1250                 if (dev_priv->sarea_priv)
1251                         dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1252 #endif
1253
1254                 DELAY(1000);
1255
1256                 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1257                 if (ret)
1258                         return ret;
1259         } while (!time_after(ticks, end));
1260         return -EBUSY;
1261 }
1262
1263 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1264 {
1265         uint32_t __iomem *virt;
1266         int rem = ring->size - ring->tail;
1267
1268         if (ring->space < rem) {
1269                 int ret = ring_wait_for_space(ring, rem);
1270                 if (ret)
1271                         return ret;
1272         }
1273
1274         virt = (unsigned int *)((char *)ring->virtual_start + ring->tail);
1275         rem /= 4;
1276         while (rem--)
1277                 iowrite32(MI_NOOP, virt++);
1278
1279         ring->tail = 0;
1280         ring->space = ring_space(ring);
1281
1282         return 0;
1283 }
1284
1285 int intel_ring_idle(struct intel_ring_buffer *ring)
1286 {
1287         return ring_wait_for_space(ring, ring->size - 8);
1288 }
1289
1290 int intel_ring_begin(struct intel_ring_buffer *ring,
1291                      int num_dwords)
1292 {
1293         struct drm_i915_private *dev_priv = ring->dev->dev_private;
1294         int n = 4*num_dwords;
1295         int ret;
1296
1297         ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1298         if (ret)
1299                 return ret;
1300
1301         if (unlikely(ring->tail + n > ring->effective_size)) {
1302                 ret = intel_wrap_ring_buffer(ring);
1303                 if (unlikely(ret))
1304                         return ret;
1305         }
1306
1307         if (unlikely(ring->space < n)) {
1308                 ret = ring_wait_for_space(ring, n);
1309                 if (unlikely(ret))
1310                         return ret;
1311         }
1312
1313         ring->space -= n;
1314         return 0;
1315 }
1316
1317 void intel_ring_advance(struct intel_ring_buffer *ring)
1318 {
1319         struct drm_i915_private *dev_priv = ring->dev->dev_private;
1320
1321         ring->tail &= ring->size - 1;
1322         if (dev_priv->stop_rings & intel_ring_flag(ring))
1323                 return;
1324         ring->write_tail(ring, ring->tail);
1325 }
1326
1327 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1328                                      u32 value)
1329 {
1330         drm_i915_private_t *dev_priv = ring->dev->dev_private;
1331
1332        /* Every tail move must follow the sequence below */
1333
1334         /* Disable notification that the ring is IDLE. The GT
1335          * will then assume that it is busy and bring it out of rc6.
1336          */
1337         I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1338                    _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1339
1340         /* Clear the context id. Here be magic! */
1341         I915_WRITE64(GEN6_BSD_RNCID, 0x0);
1342
1343         /* Wait for the ring not to be idle, i.e. for it to wake up. */
1344         if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1345                       GEN6_BSD_SLEEP_INDICATOR) == 0,
1346                      50))
1347                 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
1348
1349         /* Now that the ring is fully powered up, update the tail */
1350         I915_WRITE_TAIL(ring, value);
1351         POSTING_READ(RING_TAIL(ring->mmio_base));
1352
1353         /* Let the ring send IDLE messages to the GT again,
1354          * and so let it sleep to conserve power when idle.
1355          */
1356         I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1357                    _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1358 }
1359
1360 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1361                            uint32_t invalidate, uint32_t flush)
1362 {
1363         uint32_t cmd;
1364         int ret;
1365
1366         ret = intel_ring_begin(ring, 4);
1367         if (ret)
1368                 return ret;
1369
1370         cmd = MI_FLUSH_DW;
1371         if (invalidate & I915_GEM_GPU_DOMAINS)
1372                 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1373         intel_ring_emit(ring, cmd);
1374         intel_ring_emit(ring, 0);
1375         intel_ring_emit(ring, 0);
1376         intel_ring_emit(ring, MI_NOOP);
1377         intel_ring_advance(ring);
1378         return 0;
1379 }
1380
1381 static int
1382 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1383                               uint32_t offset, uint32_t len)
1384 {
1385         int ret;
1386
1387         ret = intel_ring_begin(ring, 2);
1388         if (ret)
1389                 return ret;
1390
1391         intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
1392         /* bit0-7 is the length on GEN6+ */
1393         intel_ring_emit(ring, offset);
1394         intel_ring_advance(ring);
1395
1396         return 0;
1397 }
1398
1399 /* Blitter support (SandyBridge+) */
1400
1401 static int blt_ring_flush(struct intel_ring_buffer *ring,
1402                           uint32_t invalidate, uint32_t flush)
1403 {
1404         uint32_t cmd;
1405         int ret;
1406
1407         ret = intel_ring_begin(ring, 4);
1408         if (ret)
1409                 return ret;
1410
1411         cmd = MI_FLUSH_DW;
1412         if (invalidate & I915_GEM_DOMAIN_RENDER)
1413                 cmd |= MI_INVALIDATE_TLB;
1414         intel_ring_emit(ring, cmd);
1415         intel_ring_emit(ring, 0);
1416         intel_ring_emit(ring, 0);
1417         intel_ring_emit(ring, MI_NOOP);
1418         intel_ring_advance(ring);
1419         return 0;
1420 }
1421
1422 int intel_init_render_ring_buffer(struct drm_device *dev)
1423 {
1424         drm_i915_private_t *dev_priv = dev->dev_private;
1425         struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1426
1427         ring->name = "render ring";
1428         ring->id = RCS;
1429         ring->mmio_base = RENDER_RING_BASE;
1430
1431         if (INTEL_INFO(dev)->gen >= 6) {
1432                 ring->add_request = gen6_add_request;
1433                 ring->flush = gen6_render_ring_flush;
1434                 ring->irq_get = gen6_ring_get_irq;
1435                 ring->irq_put = gen6_ring_put_irq;
1436                 ring->irq_enable_mask = GT_USER_INTERRUPT;
1437                 ring->get_seqno = gen6_ring_get_seqno;
1438                 ring->sync_to = gen6_ring_sync;
1439                 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID;
1440                 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV;
1441                 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB;
1442                 ring->signal_mbox[0] = GEN6_VRSYNC;
1443                 ring->signal_mbox[1] = GEN6_BRSYNC;
1444         } else if (IS_GEN5(dev)) {
1445                 ring->add_request = pc_render_add_request;
1446                 ring->flush = gen4_render_ring_flush;
1447                 ring->get_seqno = pc_render_get_seqno;
1448                 ring->irq_get = gen5_ring_get_irq;
1449                 ring->irq_put = gen5_ring_put_irq;
1450                 ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY;
1451         } else {
1452                 ring->add_request = i9xx_add_request;
1453                 if (INTEL_INFO(dev)->gen < 4)
1454                         ring->flush = gen2_render_ring_flush;
1455                 else
1456                         ring->flush = gen4_render_ring_flush;
1457                 ring->get_seqno = ring_get_seqno;
1458                 if (IS_GEN2(dev)) {
1459                         ring->irq_get = i8xx_ring_get_irq;
1460                         ring->irq_put = i8xx_ring_put_irq;
1461                 } else {
1462                         ring->irq_get = i9xx_ring_get_irq;
1463                         ring->irq_put = i9xx_ring_put_irq;
1464                 }
1465                 ring->irq_enable_mask = I915_USER_INTERRUPT;
1466         }
1467         ring->write_tail = ring_write_tail;
1468         if (INTEL_INFO(dev)->gen >= 6)
1469                 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1470         else if (INTEL_INFO(dev)->gen >= 4)
1471                 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1472         else if (IS_I830(dev) || IS_845G(dev))
1473                 ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1474         else
1475                 ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1476         ring->init = init_render_ring;
1477         ring->cleanup = render_ring_cleanup;
1478
1479         if (!I915_NEED_GFX_HWS(dev)) {
1480                 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1481                 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1482         }
1483
1484         return intel_init_ring_buffer(dev, ring);
1485 }
1486
1487 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1488 {
1489         drm_i915_private_t *dev_priv = dev->dev_private;
1490         struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1491
1492         ring->name = "render ring";
1493         ring->id = RCS;
1494         ring->mmio_base = RENDER_RING_BASE;
1495
1496         if (INTEL_INFO(dev)->gen >= 6) {
1497                 /* non-kms not supported on gen6+ */
1498                 return -ENODEV;
1499         }
1500
1501         /* Note: gem is not supported on gen5/ilk without kms (the corresponding
1502          * gem_init ioctl returns with -ENODEV). Hence we do not need to set up
1503          * the special gen5 functions. */
1504         ring->add_request = i9xx_add_request;
1505         if (INTEL_INFO(dev)->gen < 4)
1506                 ring->flush = gen2_render_ring_flush;
1507         else
1508                 ring->flush = gen4_render_ring_flush;
1509         ring->get_seqno = ring_get_seqno;
1510         if (IS_GEN2(dev)) {
1511                 ring->irq_get = i8xx_ring_get_irq;
1512                 ring->irq_put = i8xx_ring_put_irq;
1513         } else {
1514                 ring->irq_get = i9xx_ring_get_irq;
1515                 ring->irq_put = i9xx_ring_put_irq;
1516         }
1517         ring->irq_enable_mask = I915_USER_INTERRUPT;
1518         ring->write_tail = ring_write_tail;
1519         if (INTEL_INFO(dev)->gen >= 4)
1520                 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1521         else if (IS_I830(dev) || IS_845G(dev))
1522                 ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1523         else
1524                 ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1525         ring->init = init_render_ring;
1526         ring->cleanup = render_ring_cleanup;
1527
1528         if (!I915_NEED_GFX_HWS(dev))
1529                 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1530
1531         ring->dev = dev;
1532         INIT_LIST_HEAD(&ring->active_list);
1533         INIT_LIST_HEAD(&ring->request_list);
1534         INIT_LIST_HEAD(&ring->gpu_write_list);
1535
1536         ring->size = size;
1537         ring->effective_size = ring->size;
1538         if (IS_I830(ring->dev))
1539                 ring->effective_size -= 128;
1540
1541         ring->virtual_start = ioremap_wc(start, size);
1542         if (ring->virtual_start == NULL) {
1543                 DRM_ERROR("can not ioremap virtual address for"
1544                           " ring buffer\n");
1545                 return -ENOMEM;
1546         }
1547
1548         return 0;
1549 }
1550
1551 int intel_init_bsd_ring_buffer(struct drm_device *dev)
1552 {
1553         drm_i915_private_t *dev_priv = dev->dev_private;
1554         struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
1555
1556         ring->name = "bsd ring";
1557         ring->id = VCS;
1558
1559         ring->write_tail = ring_write_tail;
1560         if (IS_GEN6(dev) || IS_GEN7(dev)) {
1561                 ring->mmio_base = GEN6_BSD_RING_BASE;
1562                 /* gen6 bsd needs a special wa for tail updates */
1563                 if (IS_GEN6(dev))
1564                         ring->write_tail = gen6_bsd_ring_write_tail;
1565                 ring->flush = gen6_ring_flush;
1566                 ring->add_request = gen6_add_request;
1567                 ring->get_seqno = gen6_ring_get_seqno;
1568                 ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT;
1569                 ring->irq_get = gen6_ring_get_irq;
1570                 ring->irq_put = gen6_ring_put_irq;
1571                 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1572                 ring->sync_to = gen6_ring_sync;
1573                 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR;
1574                 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID;
1575                 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB;
1576                 ring->signal_mbox[0] = GEN6_RVSYNC;
1577                 ring->signal_mbox[1] = GEN6_BVSYNC;
1578         } else {
1579                 ring->mmio_base = BSD_RING_BASE;
1580                 ring->flush = bsd_ring_flush;
1581                 ring->add_request = i9xx_add_request;
1582                 ring->get_seqno = ring_get_seqno;
1583                 if (IS_GEN5(dev)) {
1584                         ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1585                         ring->irq_get = gen5_ring_get_irq;
1586                         ring->irq_put = gen5_ring_put_irq;
1587                 } else {
1588                         ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1589                         ring->irq_get = i9xx_ring_get_irq;
1590                         ring->irq_put = i9xx_ring_put_irq;
1591                 }
1592                 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1593         }
1594         ring->init = init_ring_common;
1595
1596         return intel_init_ring_buffer(dev, ring);
1597 }
1598
1599 int intel_init_blt_ring_buffer(struct drm_device *dev)
1600 {
1601         drm_i915_private_t *dev_priv = dev->dev_private;
1602         struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
1603
1604         ring->name = "blitter ring";
1605         ring->id = BCS;
1606
1607         ring->mmio_base = BLT_RING_BASE;
1608         ring->write_tail = ring_write_tail;
1609         ring->flush = blt_ring_flush;
1610         ring->add_request = gen6_add_request;
1611         ring->get_seqno = gen6_ring_get_seqno;
1612         ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT;
1613         ring->irq_get = gen6_ring_get_irq;
1614         ring->irq_put = gen6_ring_put_irq;
1615         ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1616         ring->sync_to = gen6_ring_sync;
1617         ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR;
1618         ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV;
1619         ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID;
1620         ring->signal_mbox[0] = GEN6_RBSYNC;
1621         ring->signal_mbox[1] = GEN6_VBSYNC;
1622         ring->init = init_ring_common;
1623
1624         return intel_init_ring_buffer(dev, ring);
1625 }
1626
1627 int
1628 intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
1629 {
1630         int ret;
1631
1632         if (!ring->gpu_caches_dirty)
1633                 return 0;
1634
1635         ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
1636         if (ret)
1637                 return ret;
1638
1639         ring->gpu_caches_dirty = false;
1640         return 0;
1641 }
1642
1643 int
1644 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
1645 {
1646         uint32_t flush_domains;
1647         int ret;
1648
1649         flush_domains = 0;
1650         if (ring->gpu_caches_dirty)
1651                 flush_domains = I915_GEM_GPU_DOMAINS;
1652
1653         ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1654         if (ret)
1655                 return ret;
1656
1657         ring->gpu_caches_dirty = false;
1658         return 0;
1659 }