2b447c1e109856c95d539d4cf8aa80ce4abc6fd9
[dragonfly.git] / sys / dev / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24
25 #include <drm/drmP.h>
26 #include <drm/i915_drm.h>
27 #include "i915_drv.h"
28 #include "intel_drv.h"
29
30 #include <linux/highmem.h>
31
32 typedef uint32_t gtt_pte_t;
33
34 /* PPGTT stuff */
35 #define GEN6_GTT_ADDR_ENCODE(addr)      ((addr) | (((addr) >> 28) & 0xff0))
36
37 #define GEN6_PDE_VALID                  (1 << 0)
38 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
39 #define GEN6_PDE_ADDR_ENCODE(addr)      GEN6_GTT_ADDR_ENCODE(addr)
40
41 #define GEN6_PTE_VALID                  (1 << 0)
42 #define GEN6_PTE_UNCACHED               (1 << 1)
43 #define HSW_PTE_UNCACHED                (0)
44 #define GEN6_PTE_CACHE_LLC              (2 << 1)
45 #define GEN6_PTE_CACHE_LLC_MLC          (3 << 1)
46 #define GEN6_PTE_ADDR_ENCODE(addr)      GEN6_GTT_ADDR_ENCODE(addr)
47
48 static inline gtt_pte_t pte_encode(struct drm_device *dev,
49                                    dma_addr_t addr,
50                                    enum i915_cache_level level)
51 {
52         gtt_pte_t pte = GEN6_PTE_VALID;
53         pte |= GEN6_PTE_ADDR_ENCODE(addr);
54
55         switch (level) {
56         case I915_CACHE_LLC_MLC:
57                 /* Haswell doesn't set L3 this way */
58                 if (IS_HASWELL(dev))
59                         pte |= GEN6_PTE_CACHE_LLC;
60                 else
61                         pte |= GEN6_PTE_CACHE_LLC_MLC;
62                 break;
63         case I915_CACHE_LLC:
64                 pte |= GEN6_PTE_CACHE_LLC;
65                 break;
66         case I915_CACHE_NONE:
67                 if (IS_HASWELL(dev))
68                         pte |= HSW_PTE_UNCACHED;
69                 else
70                         pte |= GEN6_PTE_UNCACHED;
71                 break;
72         default:
73                 BUG();
74         }
75
76
77         return pte;
78 }
79
80 /* PPGTT support for Sandybdrige/Gen6 and later */
81 static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
82                                    unsigned first_entry,
83                                    unsigned num_entries)
84 {
85         gtt_pte_t *pt_vaddr;
86         gtt_pte_t scratch_pte;
87         unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
88         unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
89         unsigned last_pte, i;
90
91         scratch_pte = pte_encode(ppgtt->dev, ppgtt->scratch_page_dma_addr,
92                                  I915_CACHE_LLC);
93
94         while (num_entries) {
95                 last_pte = first_pte + num_entries;
96                 if (last_pte > I915_PPGTT_PT_ENTRIES)
97                         last_pte = I915_PPGTT_PT_ENTRIES;
98
99                 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
100
101                 for (i = first_pte; i < last_pte; i++)
102                         pt_vaddr[i] = scratch_pte;
103
104                 kunmap_atomic(pt_vaddr);
105
106                 num_entries -= last_pte - first_pte;
107                 first_pte = 0;
108                 act_pd++;
109         }
110 }
111
112 int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
113 {
114         struct drm_i915_private *dev_priv = dev->dev_private;
115         struct i915_hw_ppgtt *ppgtt;
116         unsigned first_pd_entry_in_global_pt;
117         int i;
118         int ret = -ENOMEM;
119
120         /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
121          * entries. For aliasing ppgtt support we just steal them at the end for
122          * now.
123          */
124         first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES;
125
126         ppgtt = kmalloc(sizeof(*ppgtt), M_DRM, M_WAITOK | M_ZERO);
127         if (!ppgtt)
128                 return ret;
129
130         ppgtt->dev = dev;
131         ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
132         ppgtt->pt_pages = kmalloc(sizeof(vm_page_t) * ppgtt->num_pd_entries,
133             M_DRM, M_WAITOK | M_ZERO);
134         if (!ppgtt->pt_pages)
135                 goto err_ppgtt;
136
137         for (i = 0; i < ppgtt->num_pd_entries; i++) {
138                 ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0,
139                     VM_ALLOC_NORMAL | VM_ALLOC_ZERO);
140                 if (!ppgtt->pt_pages[i])
141                         goto err_pt_alloc;
142         }
143
144         ppgtt->scratch_page_dma_addr = dev_priv->mm.gtt->scratch_page_dma;
145
146         i915_ppgtt_clear_range(ppgtt, 0,
147                                ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
148
149         ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(gtt_pte_t);
150
151         dev_priv->mm.aliasing_ppgtt = ppgtt;
152
153         return 0;
154
155
156 err_pt_alloc:
157         dev_priv->mm.aliasing_ppgtt = ppgtt;
158         i915_gem_cleanup_aliasing_ppgtt(dev);
159         return (-ENOMEM);
160 err_ppgtt:
161         kfree(ppgtt, M_DRM);
162
163         return ret;
164 }
165
166 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
167 {
168         struct drm_i915_private *dev_priv = dev->dev_private;
169         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
170         vm_page_t m;
171         int i;
172
173         if (!ppgtt)
174                 return;
175         dev_priv->mm.aliasing_ppgtt = NULL;
176
177         for (i = 0; i < ppgtt->num_pd_entries; i++) {
178                 m = ppgtt->pt_pages[i];
179                 if (m != NULL) {
180                         vm_page_busy_wait(m, FALSE, "i915gem");
181                         vm_page_unwire(m, 0);
182                         vm_page_free(m);
183                 }
184         }
185         drm_free(ppgtt->pt_pages, M_DRM);
186         drm_free(ppgtt, M_DRM);
187 }
188
189 static void
190 i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, unsigned first_entry,
191     unsigned num_entries, vm_page_t *pages, enum i915_cache_level cache_level)
192 {
193         uint32_t *pt_vaddr;
194         unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
195         unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
196         unsigned last_pte, i;
197         dma_addr_t page_addr;
198
199         while (num_entries) {
200                 last_pte = first_pte + num_entries;
201                 if (last_pte > I915_PPGTT_PT_ENTRIES)
202                         last_pte = I915_PPGTT_PT_ENTRIES;
203
204                 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
205
206                 for (i = first_pte; i < last_pte; i++) {
207                         page_addr = VM_PAGE_TO_PHYS(*pages);
208                         pt_vaddr[i] = pte_encode(ppgtt->dev, page_addr,
209                                                  cache_level);
210
211                         pages++;
212                 }
213
214                 kunmap_atomic(pt_vaddr);
215
216                 num_entries -= last_pte - first_pte;
217                 first_pte = 0;
218                 act_pd++;
219         }
220 }
221
222 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
223                             struct drm_i915_gem_object *obj,
224                             enum i915_cache_level cache_level)
225 {
226         i915_ppgtt_insert_pages(ppgtt, obj->gtt_space->start >> PAGE_SHIFT,
227             obj->base.size >> PAGE_SHIFT, obj->pages, cache_level);
228 }
229
230 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
231                               struct drm_i915_gem_object *obj)
232 {
233         i915_ppgtt_clear_range(ppgtt,
234                                obj->gtt_space->start >> PAGE_SHIFT,
235                                obj->base.size >> PAGE_SHIFT);
236 }
237
238 void i915_gem_init_ppgtt(struct drm_device *dev)
239 {
240         drm_i915_private_t *dev_priv = dev->dev_private;
241         uint32_t pd_offset;
242         struct intel_ring_buffer *ring;
243         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
244         uint32_t pd_entry, first_pd_entry_in_global_pt;
245         int i;
246
247         if (!dev_priv->mm.aliasing_ppgtt)
248                 return;
249
250         first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES;
251         for (i = 0; i < ppgtt->num_pd_entries; i++) {
252                 vm_paddr_t pt_addr;
253
254                 pt_addr = VM_PAGE_TO_PHYS(ppgtt->pt_pages[i]);
255                 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
256                 pd_entry |= GEN6_PDE_VALID;
257
258                 intel_gtt_write(first_pd_entry_in_global_pt + i, pd_entry);
259         }
260         intel_gtt_read_pte(first_pd_entry_in_global_pt);
261
262         pd_offset = ppgtt->pd_offset;
263         pd_offset /= 64; /* in cachelines, */
264         pd_offset <<= 16;
265
266         if (INTEL_INFO(dev)->gen == 6) {
267                 uint32_t ecochk, gab_ctl, ecobits;
268
269                 ecobits = I915_READ(GAC_ECO_BITS);
270                 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
271
272                 gab_ctl = I915_READ(GAB_CTL);
273                 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
274
275                 ecochk = I915_READ(GAM_ECOCHK);
276                 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
277                                        ECOCHK_PPGTT_CACHE64B);
278                 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
279         } else if (INTEL_INFO(dev)->gen >= 7) {
280                 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
281                 /* GFX_MODE is per-ring on gen7+ */
282         }
283
284         for_each_ring(ring, dev_priv, i) {
285                 if (INTEL_INFO(dev)->gen >= 7)
286                         I915_WRITE(RING_MODE_GEN7(ring),
287                                    _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
288
289                 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
290                 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
291         }
292 }
293
294 static bool do_idling(struct drm_i915_private *dev_priv)
295 {
296         bool ret = dev_priv->mm.interruptible;
297
298         if (unlikely(dev_priv->mm.gtt->do_idle_maps)) {
299                 dev_priv->mm.interruptible = false;
300                 if (i915_gpu_idle(dev_priv->dev)) {
301                         DRM_ERROR("Couldn't idle GPU\n");
302                         /* Wait a bit, in hopes it avoids the hang */
303                         udelay(10);
304                 }
305         }
306
307         return ret;
308 }
309
310 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
311 {
312         if (unlikely(dev_priv->mm.gtt->do_idle_maps))
313                 dev_priv->mm.interruptible = interruptible;
314 }
315
316
317 static void i915_ggtt_clear_range(struct drm_device *dev,
318                                  unsigned first_entry,
319                                  unsigned num_entries)
320 {
321         struct drm_i915_private *dev_priv = dev->dev_private;
322         gtt_pte_t scratch_pte;
323         gtt_pte_t __iomem *gtt_base = dev_priv->mm.gtt->gtt + first_entry;
324         const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
325         int i;
326
327         if (INTEL_INFO(dev)->gen < 6) {
328                 intel_gtt_clear_range(first_entry, num_entries);
329                 return;
330         }
331
332         if (WARN(num_entries > max_entries,
333                  "First entry = %d; Num entries = %d (max=%d)\n",
334                  first_entry, num_entries, max_entries))
335                 num_entries = max_entries;
336
337         scratch_pte = pte_encode(dev, dev_priv->mm.gtt->scratch_page_dma, I915_CACHE_LLC);
338         for (i = 0; i < num_entries; i++)
339                 iowrite32(scratch_pte, &gtt_base[i]);
340         readl(gtt_base);
341 }
342
343 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
344 {
345         struct drm_i915_private *dev_priv = dev->dev_private;
346         struct drm_i915_gem_object *obj;
347
348         /* First fill our portion of the GTT with scratch pages */
349         i915_ggtt_clear_range(dev, dev_priv->mm.gtt_start / PAGE_SIZE,
350                               (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
351
352         list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
353                 i915_gem_clflush_object(obj);
354                 i915_gem_gtt_bind_object(obj, obj->cache_level);
355         }
356
357         i915_gem_chipset_flush(dev);
358 }
359
360 #if 0
361 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
362 {
363         if (obj->has_dma_mapping)
364                 return 0;
365
366         if (!dma_map_sg(&obj->base.dev->pdev->dev,
367                         obj->pages->sgl, obj->pages->nents,
368                         PCI_DMA_BIDIRECTIONAL))
369                 return -ENOSPC;
370
371         return 0;
372 }
373
374 /*
375  * Binds an object into the global gtt with the specified cache level. The object
376  * will be accessible to the GPU via commands whose operands reference offsets
377  * within the global GTT as well as accessible by the GPU through the GMADR
378  * mapped BAR (dev_priv->mm.gtt->gtt).
379  */
380 static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
381                                   enum i915_cache_level level)
382 {
383         struct drm_device *dev = obj->base.dev;
384         struct drm_i915_private *dev_priv = dev->dev_private;
385         struct sg_table *st = obj->pages;
386         struct scatterlist *sg = st->sgl;
387         const int first_entry = obj->gtt_space->start >> PAGE_SHIFT;
388         const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
389         gtt_pte_t __iomem *gtt_entries = dev_priv->mm.gtt->gtt + first_entry;
390         int unused, i = 0;
391         unsigned int len, m = 0;
392         dma_addr_t addr;
393
394         for_each_sg(st->sgl, sg, st->nents, unused) {
395                 len = sg_dma_len(sg) >> PAGE_SHIFT;
396                 for (m = 0; m < len; m++) {
397                         addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
398                         iowrite32(pte_encode(dev, addr, level), &gtt_entries[i]);
399                         i++;
400                 }
401         }
402
403         BUG_ON(i > max_entries);
404         BUG_ON(i != obj->base.size / PAGE_SIZE);
405
406         /* XXX: This serves as a posting read to make sure that the PTE has
407          * actually been updated. There is some concern that even though
408          * registers and PTEs are within the same BAR that they are potentially
409          * of NUMA access patterns. Therefore, even with the way we assume
410          * hardware should work, we must keep this posting read for paranoia.
411          */
412         if (i != 0)
413                 WARN_ON(readl(&gtt_entries[i-1]) != pte_encode(dev, addr, level));
414
415         /* This next bit makes the above posting read even more important. We
416          * want to flush the TLBs only after we're certain all the PTE updates
417          * have finished.
418          */
419         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
420         POSTING_READ(GFX_FLSH_CNTL_GEN6);
421 }
422 #endif
423
424 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
425                               enum i915_cache_level cache_level)
426 {
427         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
428                         AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
429         intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
430             obj->base.size >> PAGE_SHIFT, obj->pages, flags);
431
432         obj->has_global_gtt_mapping = 1;
433 }
434
435 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
436 {
437         struct drm_device *dev = obj->base.dev;
438         struct drm_i915_private *dev_priv = dev->dev_private;
439         bool interruptible;
440
441         interruptible = do_idling(dev_priv);
442
443         intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT,
444             obj->base.size >> PAGE_SHIFT);
445
446         undo_idling(dev_priv, interruptible);
447         obj->has_global_gtt_mapping = 0;
448 }
449
450 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
451 {
452         struct drm_device *dev = obj->base.dev;
453         struct drm_i915_private *dev_priv = dev->dev_private;
454         bool interruptible;
455
456         interruptible = do_idling(dev_priv);
457
458 #if 0
459         if (!obj->has_dma_mapping)
460                 dma_unmap_sg(&dev->pdev->dev,
461                              obj->pages->sgl, obj->pages->nents,
462                              PCI_DMA_BIDIRECTIONAL);
463 #endif
464
465         undo_idling(dev_priv, interruptible);
466 }
467
468 static void i915_gtt_color_adjust(struct drm_mm_node *node,
469                                   unsigned long color,
470                                   unsigned long *start,
471                                   unsigned long *end)
472 {
473         if (node->color != color)
474                 *start += 4096;
475
476         if (!list_empty(&node->node_list)) {
477                 node = list_entry(node->node_list.next,
478                                   struct drm_mm_node,
479                                   node_list);
480                 if (node->allocated && node->color != color)
481                         *end -= 4096;
482         }
483 }
484
485 void i915_gem_init_global_gtt(struct drm_device *dev,
486                               unsigned long start,
487                               unsigned long mappable_end,
488                               unsigned long end)
489 {
490         drm_i915_private_t *dev_priv = dev->dev_private;
491
492         unsigned long mappable;
493         int error;
494
495         mappable = min(end, mappable_end) - start;
496
497         /* Substract the guard page ... */
498         drm_mm_init(&dev_priv->mm.gtt_space, start, end - start);
499         if (!HAS_LLC(dev))
500                 dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
501
502         dev_priv->mm.gtt_start = start;
503         dev_priv->mm.gtt_mappable_end = mappable_end;
504         dev_priv->mm.gtt_end = end;
505         dev_priv->mm.gtt_total = end - start;
506         dev_priv->mm.mappable_gtt_total = mappable;
507
508         /* ... but ensure that we clear the entire range. */
509         intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
510         device_printf(dev->dev,
511             "taking over the fictitious range 0x%lx-0x%lx\n",
512             dev->agp->base + start, dev->agp->base + start + mappable);
513         error = -vm_phys_fictitious_reg_range(dev->agp->base + start,
514             dev->agp->base + start + mappable, VM_MEMATTR_WRITE_COMBINING);
515 }
516
517 int i915_gem_gtt_init(struct drm_device *dev)
518 {
519         struct drm_i915_private *dev_priv = dev->dev_private;
520
521         /* On modern platforms we need not worry ourself with the legacy
522          * hostbridge query stuff. Skip it entirely
523          */
524         if (INTEL_INFO(dev)->gen < 6 || 1) {
525                 dev_priv->mm.gtt = intel_gtt_get();
526                 if (!dev_priv->mm.gtt) {
527                         DRM_ERROR("Failed to initialize GTT\n");
528                         return -ENODEV;
529                 }
530                 return 0;
531         }
532
533         dev_priv->mm.gtt = kmalloc(sizeof(*dev_priv->mm.gtt), M_DRM, M_WAITOK | M_ZERO);
534         if (!dev_priv->mm.gtt)
535                 return -ENOMEM;
536
537 #ifdef CONFIG_INTEL_IOMMU
538         dev_priv->mm.gtt->needs_dmar = 1;
539 #endif
540
541         /* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
542         DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8);
543         DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8);
544         DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20);
545
546         return 0;
547 }