Merge branch 'vendor/BYACC'
[dragonfly.git] / sys / dev / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24
25 #include <drm/drmP.h>
26 #include <drm/i915_drm.h>
27 #include "i915_drv.h"
28 #include "intel_drv.h"
29
30 #include <linux/highmem.h>
31
32 typedef uint32_t gtt_pte_t;
33
34 /* PPGTT stuff */
35 #define GEN6_GTT_ADDR_ENCODE(addr)      ((addr) | (((addr) >> 28) & 0xff0))
36
37 #define GEN6_PDE_VALID                  (1 << 0)
38 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
39 #define GEN6_PDE_ADDR_ENCODE(addr)      GEN6_GTT_ADDR_ENCODE(addr)
40
41 #define GEN6_PTE_VALID                  (1 << 0)
42 #define GEN6_PTE_UNCACHED               (1 << 1)
43 #define HSW_PTE_UNCACHED                (0)
44 #define GEN6_PTE_CACHE_LLC              (2 << 1)
45 #define GEN6_PTE_CACHE_LLC_MLC          (3 << 1)
46 #define GEN6_PTE_ADDR_ENCODE(addr)      GEN6_GTT_ADDR_ENCODE(addr)
47
48 static inline gtt_pte_t pte_encode(struct drm_device *dev,
49                                    dma_addr_t addr,
50                                    enum i915_cache_level level)
51 {
52         gtt_pte_t pte = GEN6_PTE_VALID;
53         pte |= GEN6_PTE_ADDR_ENCODE(addr);
54
55         switch (level) {
56         case I915_CACHE_LLC_MLC:
57                 /* Haswell doesn't set L3 this way */
58                 if (IS_HASWELL(dev))
59                         pte |= GEN6_PTE_CACHE_LLC;
60                 else
61                         pte |= GEN6_PTE_CACHE_LLC_MLC;
62                 break;
63         case I915_CACHE_LLC:
64                 pte |= GEN6_PTE_CACHE_LLC;
65                 break;
66         case I915_CACHE_NONE:
67                 if (IS_HASWELL(dev))
68                         pte |= HSW_PTE_UNCACHED;
69                 else
70                         pte |= GEN6_PTE_UNCACHED;
71                 break;
72         default:
73                 BUG();
74         }
75
76
77         return pte;
78 }
79
80 /* PPGTT support for Sandybdrige/Gen6 and later */
81 static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
82                                    unsigned first_entry,
83                                    unsigned num_entries)
84 {
85         gtt_pte_t *pt_vaddr;
86         gtt_pte_t scratch_pte;
87         unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
88         unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
89         unsigned last_pte, i;
90
91         scratch_pte = pte_encode(ppgtt->dev, ppgtt->scratch_page_dma_addr,
92                                  I915_CACHE_LLC);
93
94         while (num_entries) {
95                 last_pte = first_pte + num_entries;
96                 if (last_pte > I915_PPGTT_PT_ENTRIES)
97                         last_pte = I915_PPGTT_PT_ENTRIES;
98
99                 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
100
101                 for (i = first_pte; i < last_pte; i++)
102                         pt_vaddr[i] = scratch_pte;
103
104                 kunmap_atomic(pt_vaddr);
105
106                 num_entries -= last_pte - first_pte;
107                 first_pte = 0;
108                 act_pd++;
109         }
110 }
111
112 int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
113 {
114         struct drm_i915_private *dev_priv = dev->dev_private;
115         struct i915_hw_ppgtt *ppgtt;
116         u_int first_pd_entry_in_global_pt, i;
117
118         /*
119          * ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
120          * entries. For aliasing ppgtt support we just steal them at the end for
121          * now.
122          */
123         first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES;
124
125         ppgtt = kmalloc(sizeof(*ppgtt), M_DRM, M_WAITOK | M_ZERO);
126
127         ppgtt->dev = dev;
128         ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
129         ppgtt->pt_pages = kmalloc(sizeof(vm_page_t) * ppgtt->num_pd_entries,
130             M_DRM, M_WAITOK | M_ZERO);
131
132         for (i = 0; i < ppgtt->num_pd_entries; i++) {
133                 ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0,
134                     VM_ALLOC_NORMAL | VM_ALLOC_ZERO);
135                 if (ppgtt->pt_pages[i] == NULL) {
136                         dev_priv->mm.aliasing_ppgtt = ppgtt;
137                         i915_gem_cleanup_aliasing_ppgtt(dev);
138                         return (-ENOMEM);
139                 }
140         }
141
142         ppgtt->scratch_page_dma_addr = dev_priv->mm.gtt->scratch_page_dma;
143
144         i915_ppgtt_clear_range(ppgtt, 0, ppgtt->num_pd_entries *
145             I915_PPGTT_PT_ENTRIES);
146         ppgtt->pd_offset = (first_pd_entry_in_global_pt) * sizeof(uint32_t);
147         dev_priv->mm.aliasing_ppgtt = ppgtt;
148         return (0);
149 }
150
151 void
152 i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
153 {
154         struct drm_i915_private *dev_priv;
155         struct i915_hw_ppgtt *ppgtt;
156         vm_page_t m;
157         int i;
158
159         dev_priv = dev->dev_private;
160         ppgtt = dev_priv->mm.aliasing_ppgtt;
161         if (ppgtt == NULL)
162                 return;
163         dev_priv->mm.aliasing_ppgtt = NULL;
164
165         for (i = 0; i < ppgtt->num_pd_entries; i++) {
166                 m = ppgtt->pt_pages[i];
167                 if (m != NULL) {
168                         vm_page_busy_wait(m, FALSE, "i915gem");
169                         vm_page_unwire(m, 0);
170                         vm_page_free(m);
171                 }
172         }
173         drm_free(ppgtt->pt_pages, M_DRM);
174         drm_free(ppgtt, M_DRM);
175 }
176
177 static void
178 i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, unsigned first_entry,
179     unsigned num_entries, vm_page_t *pages, enum i915_cache_level cache_level)
180 {
181         uint32_t *pt_vaddr;
182         unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
183         unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
184         unsigned last_pte, i;
185         dma_addr_t page_addr;
186
187         while (num_entries) {
188                 last_pte = first_pte + num_entries;
189                 if (last_pte > I915_PPGTT_PT_ENTRIES)
190                         last_pte = I915_PPGTT_PT_ENTRIES;
191
192                 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
193
194                 for (i = first_pte; i < last_pte; i++) {
195                         page_addr = VM_PAGE_TO_PHYS(*pages);
196                         pt_vaddr[i] = pte_encode(ppgtt->dev, page_addr,
197                                                  cache_level);
198
199                         pages++;
200                 }
201
202                 kunmap_atomic(pt_vaddr);
203
204                 num_entries -= last_pte - first_pte;
205                 first_pte = 0;
206                 act_pd++;
207         }
208 }
209
210 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
211                             struct drm_i915_gem_object *obj,
212                             enum i915_cache_level cache_level)
213 {
214         i915_ppgtt_insert_pages(ppgtt, obj->gtt_space->start >> PAGE_SHIFT,
215             obj->base.size >> PAGE_SHIFT, obj->pages, cache_level);
216 }
217
218 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
219                               struct drm_i915_gem_object *obj)
220 {
221         i915_ppgtt_clear_range(ppgtt, obj->gtt_space->start >> PAGE_SHIFT,
222             obj->base.size >> PAGE_SHIFT);
223 }
224
225 void i915_gem_init_ppgtt(struct drm_device *dev)
226 {
227         drm_i915_private_t *dev_priv = dev->dev_private;
228         uint32_t pd_offset;
229         struct intel_ring_buffer *ring;
230         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
231         uint32_t pd_entry, first_pd_entry_in_global_pt;
232         int i;
233
234         if (!dev_priv->mm.aliasing_ppgtt)
235                 return;
236
237         first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES;
238         for (i = 0; i < ppgtt->num_pd_entries; i++) {
239                 vm_paddr_t pt_addr;
240
241                 pt_addr = VM_PAGE_TO_PHYS(ppgtt->pt_pages[i]);
242                 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
243                 pd_entry |= GEN6_PDE_VALID;
244
245                 intel_gtt_write(first_pd_entry_in_global_pt + i, pd_entry);
246         }
247         intel_gtt_read_pte(first_pd_entry_in_global_pt);
248
249         pd_offset = ppgtt->pd_offset;
250         pd_offset /= 64; /* in cachelines, */
251         pd_offset <<= 16;
252
253         if (INTEL_INFO(dev)->gen == 6) {
254                 uint32_t ecochk, gab_ctl, ecobits;
255
256                 ecobits = I915_READ(GAC_ECO_BITS);
257                 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
258
259                 gab_ctl = I915_READ(GAB_CTL);
260                 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
261
262                 ecochk = I915_READ(GAM_ECOCHK);
263                 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
264                                        ECOCHK_PPGTT_CACHE64B);
265                 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
266         } else if (INTEL_INFO(dev)->gen >= 7) {
267                 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
268                 /* GFX_MODE is per-ring on gen7+ */
269         }
270
271         for_each_ring(ring, dev_priv, i) {
272                 if (INTEL_INFO(dev)->gen >= 7)
273                         I915_WRITE(RING_MODE_GEN7(ring),
274                                    _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
275
276                 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
277                 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
278         }
279 }
280
281 static bool do_idling(struct drm_i915_private *dev_priv)
282 {
283         bool ret = dev_priv->mm.interruptible;
284
285         if (unlikely(dev_priv->mm.gtt->do_idle_maps)) {
286                 dev_priv->mm.interruptible = false;
287                 if (i915_gpu_idle(dev_priv->dev)) {
288                         DRM_ERROR("Couldn't idle GPU\n");
289                         /* Wait a bit, in hopes it avoids the hang */
290                         DELAY(10);
291                 }
292         }
293
294         return ret;
295 }
296
297 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
298 {
299
300         if (unlikely(dev_priv->mm.gtt->do_idle_maps))
301                 dev_priv->mm.interruptible = interruptible;
302 }
303
304 #if 0
305 static void i915_ggtt_clear_range(struct drm_device *dev,
306                                  unsigned first_entry,
307                                  unsigned num_entries)
308 {
309         struct drm_i915_private *dev_priv = dev->dev_private;
310         gtt_pte_t scratch_pte;
311         gtt_pte_t __iomem *gtt_base = dev_priv->mm.gtt->gtt + first_entry;
312         const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
313         int i;
314
315         if (INTEL_INFO(dev)->gen < 6) {
316                 intel_gtt_clear_range(first_entry, num_entries);
317                 return;
318         }
319
320         if (WARN(num_entries > max_entries,
321                  "First entry = %d; Num entries = %d (max=%d)\n",
322                  first_entry, num_entries, max_entries))
323                 num_entries = max_entries;
324
325         scratch_pte = pte_encode(dev, dev_priv->mm.gtt->scratch_page_dma, I915_CACHE_LLC);
326         for (i = 0; i < num_entries; i++)
327                 iowrite32(scratch_pte, &gtt_base[i]);
328         readl(gtt_base);
329 }
330 #endif
331
332 void
333 i915_gem_restore_gtt_mappings(struct drm_device *dev)
334 {
335         struct drm_i915_private *dev_priv;
336         struct drm_i915_gem_object *obj;
337
338         dev_priv = dev->dev_private;
339
340         /* First fill our portion of the GTT with scratch pages */
341         intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE,
342             (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
343
344         list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
345                 i915_gem_clflush_object(obj);
346                 i915_gem_gtt_bind_object(obj, obj->cache_level);
347         }
348
349         intel_gtt_chipset_flush();
350 }
351
352 #if 0
353 /*
354  * Binds an object into the global gtt with the specified cache level. The object
355  * will be accessible to the GPU via commands whose operands reference offsets
356  * within the global GTT as well as accessible by the GPU through the GMADR
357  * mapped BAR (dev_priv->mm.gtt->gtt).
358  */
359 static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
360                                   enum i915_cache_level level)
361 {
362         struct drm_device *dev = obj->base.dev;
363         struct drm_i915_private *dev_priv = dev->dev_private;
364         struct sg_table *st = obj->pages;
365         struct scatterlist *sg = st->sgl;
366         const int first_entry = obj->gtt_space->start >> PAGE_SHIFT;
367         const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
368         gtt_pte_t __iomem *gtt_entries = dev_priv->mm.gtt->gtt + first_entry;
369         int unused, i = 0;
370         unsigned int len, m = 0;
371         dma_addr_t addr;
372
373         for_each_sg(st->sgl, sg, st->nents, unused) {
374                 len = sg_dma_len(sg) >> PAGE_SHIFT;
375                 for (m = 0; m < len; m++) {
376                         addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
377                         iowrite32(pte_encode(dev, addr, level), &gtt_entries[i]);
378                         i++;
379                 }
380         }
381
382         BUG_ON(i > max_entries);
383         BUG_ON(i != obj->base.size / PAGE_SIZE);
384
385         /* XXX: This serves as a posting read to make sure that the PTE has
386          * actually been updated. There is some concern that even though
387          * registers and PTEs are within the same BAR that they are potentially
388          * of NUMA access patterns. Therefore, even with the way we assume
389          * hardware should work, we must keep this posting read for paranoia.
390          */
391         if (i != 0)
392                 WARN_ON(readl(&gtt_entries[i-1]) != pte_encode(dev, addr, level));
393
394         /* This next bit makes the above posting read even more important. We
395          * want to flush the TLBs only after we're certain all the PTE updates
396          * have finished.
397          */
398         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
399         POSTING_READ(GFX_FLSH_CNTL_GEN6);
400 }
401 #endif
402
403 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
404                               enum i915_cache_level cache_level)
405 {
406         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
407                         AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
408         intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
409             obj->base.size >> PAGE_SHIFT, obj->pages, flags);
410
411         obj->has_global_gtt_mapping = 1;
412 }
413
414 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
415 {
416         struct drm_device *dev = obj->base.dev;
417         struct drm_i915_private *dev_priv = dev->dev_private;
418         bool interruptible;
419
420         interruptible = do_idling(dev_priv);
421
422         intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT,
423             obj->base.size >> PAGE_SHIFT);
424
425         undo_idling(dev_priv, interruptible);
426         obj->has_global_gtt_mapping = 0;
427 }
428
429 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
430 {
431         struct drm_device *dev = obj->base.dev;
432         struct drm_i915_private *dev_priv = dev->dev_private;
433         bool interruptible;
434
435         interruptible = do_idling(dev_priv);
436
437 #if 0
438         if (!obj->has_dma_mapping)
439                 dma_unmap_sg(&dev->pdev->dev,
440                              obj->pages->sgl, obj->pages->nents,
441                              PCI_DMA_BIDIRECTIONAL);
442 #endif
443
444         undo_idling(dev_priv, interruptible);
445 }
446
447 static void i915_gtt_color_adjust(struct drm_mm_node *node,
448                                   unsigned long color,
449                                   unsigned long *start,
450                                   unsigned long *end)
451 {
452         if (node->color != color)
453                 *start += 4096;
454
455         if (!list_empty(&node->node_list)) {
456                 node = list_entry(node->node_list.next,
457                                   struct drm_mm_node,
458                                   node_list);
459                 if (node->allocated && node->color != color)
460                         *end -= 4096;
461         }
462 }
463
464 void i915_gem_init_global_gtt(struct drm_device *dev,
465                               unsigned long start,
466                               unsigned long mappable_end,
467                               unsigned long end)
468 {
469         drm_i915_private_t *dev_priv = dev->dev_private;
470
471         /* Substract the guard page ... */
472         drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
473         if (!HAS_LLC(dev))
474                 dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
475
476         dev_priv->mm.gtt_start = start;
477         dev_priv->mm.gtt_mappable_end = mappable_end;
478         dev_priv->mm.gtt_end = end;
479         dev_priv->mm.gtt_total = end - start;
480         dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
481
482         /* ... but ensure that we clear the entire range. */
483         intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
484 }