Merge branch 'vendor/OPENBSD_LIBM'
[dragonfly.git] / sys / dev / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24
25 #include <drm/drmP.h>
26 #include <drm/i915_drm.h>
27 #include "i915_drv.h"
28 #include "intel_drv.h"
29
30 #include <linux/highmem.h>
31
32 #define GEN6_PPGTT_PD_ENTRIES 512
33 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
34 typedef uint64_t gen8_gtt_pte_t;
35 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
36
37 /* PPGTT stuff */
38 #define GEN6_GTT_ADDR_ENCODE(addr)      ((addr) | (((addr) >> 28) & 0xff0))
39 #define HSW_GTT_ADDR_ENCODE(addr)       ((addr) | (((addr) >> 28) & 0x7f0))
40
41 #define GEN6_PDE_VALID                  (1 << 0)
42 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
43 #define GEN6_PDE_ADDR_ENCODE(addr)      GEN6_GTT_ADDR_ENCODE(addr)
44
45 #define GEN6_PTE_VALID                  (1 << 0)
46 #define GEN6_PTE_UNCACHED               (1 << 1)
47 #define HSW_PTE_UNCACHED                (0)
48 #define GEN6_PTE_CACHE_LLC              (2 << 1)
49 #define GEN7_PTE_CACHE_L3_LLC           (3 << 1)
50 #define GEN6_PTE_ADDR_ENCODE(addr)      GEN6_GTT_ADDR_ENCODE(addr)
51 #define HSW_PTE_ADDR_ENCODE(addr)       HSW_GTT_ADDR_ENCODE(addr)
52
53 /* Cacheability Control is a 4-bit value. The low three bits are stored in *
54  * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
55  */
56 #define HSW_CACHEABILITY_CONTROL(bits)  ((((bits) & 0x7) << 1) | \
57                                          (((bits) & 0x8) << (11 - 3)))
58 #define HSW_WB_LLC_AGE3                 HSW_CACHEABILITY_CONTROL(0x2)
59 #define HSW_WB_LLC_AGE0                 HSW_CACHEABILITY_CONTROL(0x3)
60 #define HSW_WB_ELLC_LLC_AGE0            HSW_CACHEABILITY_CONTROL(0xb)
61 #define HSW_WB_ELLC_LLC_AGE3            HSW_CACHEABILITY_CONTROL(0x8)
62 #define HSW_WT_ELLC_LLC_AGE0            HSW_CACHEABILITY_CONTROL(0x6)
63 #define HSW_WT_ELLC_LLC_AGE3            HSW_CACHEABILITY_CONTROL(0x7)
64
65 #define GEN8_PTES_PER_PAGE              (PAGE_SIZE / sizeof(gen8_gtt_pte_t))
66 #define GEN8_PDES_PER_PAGE              (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
67 #define GEN8_LEGACY_PDPS                4
68
69 #define PPAT_UNCACHED_INDEX             (_PAGE_PWT | _PAGE_PCD)
70 #define PPAT_CACHED_PDE_INDEX           0 /* WB LLC */
71 #define PPAT_CACHED_INDEX               _PAGE_PAT /* WB LLCeLLC */
72 #define PPAT_DISPLAY_ELLC_INDEX         _PAGE_PCD /* WT eLLC */
73
74 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
75                                              enum i915_cache_level level,
76                                              bool valid)
77 {
78         gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
79         pte |= addr;
80         if (level != I915_CACHE_NONE)
81                 pte |= PPAT_CACHED_INDEX;
82         else
83                 pte |= PPAT_UNCACHED_INDEX;
84         return pte;
85 }
86
87 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
88                                              dma_addr_t addr,
89                                              enum i915_cache_level level)
90 {
91         gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
92         pde |= addr;
93         if (level != I915_CACHE_NONE)
94                 pde |= PPAT_CACHED_PDE_INDEX;
95         else
96                 pde |= PPAT_UNCACHED_INDEX;
97         return pde;
98 }
99
100 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
101                                      enum i915_cache_level level,
102                                      bool valid)
103 {
104         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
105         pte |= GEN6_PTE_ADDR_ENCODE(addr);
106
107         switch (level) {
108         case I915_CACHE_L3_LLC:
109         case I915_CACHE_LLC:
110                 pte |= GEN6_PTE_CACHE_LLC;
111                 break;
112         case I915_CACHE_NONE:
113                 pte |= GEN6_PTE_UNCACHED;
114                 break;
115         default:
116                 WARN_ON(1);
117         }
118
119         return pte;
120 }
121
122 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
123                                      enum i915_cache_level level,
124                                      bool valid)
125 {
126         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
127         pte |= GEN6_PTE_ADDR_ENCODE(addr);
128
129         switch (level) {
130         case I915_CACHE_L3_LLC:
131                 pte |= GEN7_PTE_CACHE_L3_LLC;
132                 break;
133         case I915_CACHE_LLC:
134                 pte |= GEN6_PTE_CACHE_LLC;
135                 break;
136         case I915_CACHE_NONE:
137                 pte |= GEN6_PTE_UNCACHED;
138                 break;
139         default:
140                 WARN_ON(1);
141         }
142
143         return pte;
144 }
145
146 #define BYT_PTE_WRITEABLE               (1 << 1)
147 #define BYT_PTE_SNOOPED_BY_CPU_CACHES   (1 << 2)
148
149 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
150                                      enum i915_cache_level level,
151                                      bool valid)
152 {
153         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
154         pte |= GEN6_PTE_ADDR_ENCODE(addr);
155
156         /* Mark the page as writeable.  Other platforms don't have a
157          * setting for read-only/writable, so this matches that behavior.
158          */
159         pte |= BYT_PTE_WRITEABLE;
160
161         if (level != I915_CACHE_NONE)
162                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
163
164         return pte;
165 }
166
167 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
168                                      enum i915_cache_level level,
169                                      bool valid)
170 {
171         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
172         pte |= HSW_PTE_ADDR_ENCODE(addr);
173
174         if (level != I915_CACHE_NONE)
175                 pte |= HSW_WB_LLC_AGE3;
176
177         return pte;
178 }
179
180 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
181                                       enum i915_cache_level level,
182                                       bool valid)
183 {
184         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
185         pte |= HSW_PTE_ADDR_ENCODE(addr);
186
187         switch (level) {
188         case I915_CACHE_NONE:
189                 break;
190         case I915_CACHE_WT:
191                 pte |= HSW_WT_ELLC_LLC_AGE3;
192                 break;
193         default:
194                 pte |= HSW_WB_ELLC_LLC_AGE3;
195                 break;
196         }
197
198         return pte;
199 }
200
201 /* Broadwell Page Directory Pointer Descriptors */
202 static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
203                            uint64_t val)
204 {
205         int ret;
206
207         BUG_ON(entry >= 4);
208
209         ret = intel_ring_begin(ring, 6);
210         if (ret)
211                 return ret;
212
213         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
214         intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
215         intel_ring_emit(ring, (u32)(val >> 32));
216         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
217         intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
218         intel_ring_emit(ring, (u32)(val));
219         intel_ring_advance(ring);
220
221         return 0;
222 }
223
224 static int gen8_ppgtt_enable(struct drm_device *dev)
225 {
226         struct drm_i915_private *dev_priv = dev->dev_private;
227         struct intel_ring_buffer *ring;
228         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
229         int i, j, ret;
230
231         /* bit of a hack to find the actual last used pd */
232         int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
233
234         for_each_ring(ring, dev_priv, j) {
235                 I915_WRITE(RING_MODE_GEN7(ring),
236                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
237         }
238
239         for (i = used_pd - 1; i >= 0; i--) {
240                 dma_addr_t addr = ppgtt->pd_dma_addr[i];
241                 for_each_ring(ring, dev_priv, j) {
242                         ret = gen8_write_pdp(ring, i, addr);
243                         if (ret)
244                                 goto err_out;
245                 }
246         }
247         return 0;
248
249 err_out:
250         for_each_ring(ring, dev_priv, j)
251                 I915_WRITE(RING_MODE_GEN7(ring),
252                            _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
253         return ret;
254 }
255
256 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
257                                    unsigned first_entry,
258                                    unsigned num_entries,
259                                    bool use_scratch)
260 {
261         struct i915_hw_ppgtt *ppgtt =
262                 container_of(vm, struct i915_hw_ppgtt, base);
263         gen8_gtt_pte_t *pt_vaddr, scratch_pte;
264         unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
265         unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE;
266         unsigned last_pte, i;
267
268         scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
269                                       I915_CACHE_LLC, use_scratch);
270
271         while (num_entries) {
272                 struct vm_page *page_table = &ppgtt->gen8_pt_pages[act_pt];
273
274                 last_pte = first_pte + num_entries;
275                 if (last_pte > GEN8_PTES_PER_PAGE)
276                         last_pte = GEN8_PTES_PER_PAGE;
277
278                 pt_vaddr = kmap_atomic(page_table);
279
280                 for (i = first_pte; i < last_pte; i++)
281                         pt_vaddr[i] = scratch_pte;
282
283                 kunmap_atomic(pt_vaddr);
284
285                 num_entries -= last_pte - first_pte;
286                 first_pte = 0;
287                 act_pt++;
288         }
289 }
290
291 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
292                                       vm_page_t *pages,
293                                       unsigned int first_entry,
294                                       unsigned int num_entries,
295                                       enum i915_cache_level cache_level)
296 {
297         struct i915_hw_ppgtt *ppgtt =
298                 container_of(vm, struct i915_hw_ppgtt, base);
299         gen8_gtt_pte_t *pt_vaddr;
300         unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
301         unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE;
302         int i;
303
304         pt_vaddr = NULL;
305         for (i=0;i<num_entries;i++) {
306                 if (pt_vaddr == NULL)
307                         pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]);
308
309                 pt_vaddr[act_pte] =
310                         gen8_pte_encode(VM_PAGE_TO_PHYS(pages[i]),
311                                         cache_level, true);
312                 if (++act_pte == GEN8_PTES_PER_PAGE) {
313                         kunmap_atomic(pt_vaddr);
314                         pt_vaddr = NULL;
315                         act_pt++;
316                         act_pte = 0;
317                 }
318         }
319         if (pt_vaddr)
320                 kunmap_atomic(pt_vaddr);
321 }
322
323 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
324 {
325         struct i915_hw_ppgtt *ppgtt =
326                 container_of(vm, struct i915_hw_ppgtt, base);
327         int i, j;
328
329         drm_mm_takedown(&vm->mm);
330
331         for (i = 0; i < ppgtt->num_pd_pages ; i++) {
332                 if (ppgtt->pd_dma_addr[i]) {
333                         pci_unmap_page(ppgtt->base.dev->pdev,
334                                        ppgtt->pd_dma_addr[i],
335                                        PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
336
337                         for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
338                                 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
339                                 if (addr)
340                                         pci_unmap_page(ppgtt->base.dev->pdev,
341                                                        addr,
342                                                        PAGE_SIZE,
343                                                        PCI_DMA_BIDIRECTIONAL);
344
345                         }
346                 }
347                 kfree(ppgtt->gen8_pt_dma_addr[i]);
348         }
349
350         __free_pages(ppgtt->gen8_pt_pages, get_order(ppgtt->num_pt_pages << PAGE_SHIFT));
351         __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
352 }
353
354 /**
355  * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a
356  * net effect resembling a 2-level page table in normal x86 terms. Each PDP
357  * represents 1GB of memory
358  * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space.
359  *
360  * TODO: Do something with the size parameter
361  **/
362 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
363 {
364         struct vm_page *pt_pages;
365         int i, j, ret = -ENOMEM;
366         const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
367         const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
368
369         if (size % (1<<30))
370                 DRM_INFO("Pages will be wasted unless GTT size (%lu) is divisible by 1GB\n", size);
371
372         /* FIXME: split allocation into smaller pieces. For now we only ever do
373          * this once, but with full PPGTT, the multiple contiguous allocations
374          * will be bad.
375          */
376         ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT));
377         if (!ppgtt->pd_pages)
378                 return -ENOMEM;
379
380         pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT));
381         if (!pt_pages) {
382                 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
383                 return -ENOMEM;
384         }
385
386         ppgtt->gen8_pt_pages = pt_pages;
387         ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT);
388         ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT);
389         ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
390         ppgtt->enable = gen8_ppgtt_enable;
391         ppgtt->base.clear_range = gen8_ppgtt_clear_range;
392         ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
393         ppgtt->base.cleanup = gen8_ppgtt_cleanup;
394         ppgtt->base.start = 0;
395         ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE;
396
397         BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
398
399         /*
400          * - Create a mapping for the page directories.
401          * - For each page directory:
402          *      allocate space for page table mappings.
403          *      map each page table
404          */
405         for (i = 0; i < max_pdp; i++) {
406                 dma_addr_t temp;
407                 temp = pci_map_page(ppgtt->base.dev->pdev,
408                                     &ppgtt->pd_pages[i], 0,
409                                     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
410                 if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp))
411                         goto err_out;
412
413                 ppgtt->pd_dma_addr[i] = temp;
414
415                 ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, M_DRM, M_WAITOK);
416                 if (!ppgtt->gen8_pt_dma_addr[i])
417                         goto err_out;
418
419                 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
420                         struct vm_page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j];
421                         temp = pci_map_page(ppgtt->base.dev->pdev,
422                                             p, 0, PAGE_SIZE,
423                                             PCI_DMA_BIDIRECTIONAL);
424
425                         if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp))
426                                 goto err_out;
427
428                         ppgtt->gen8_pt_dma_addr[i][j] = temp;
429                 }
430         }
431
432         /* For now, the PPGTT helper functions all require that the PDEs are
433          * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
434          * will never need to touch the PDEs again */
435         for (i = 0; i < max_pdp; i++) {
436                 gen8_ppgtt_pde_t *pd_vaddr;
437                 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
438                 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
439                         dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
440                         pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
441                                                       I915_CACHE_LLC);
442                 }
443                 kunmap_atomic(pd_vaddr);
444         }
445
446         ppgtt->base.clear_range(&ppgtt->base, 0,
447                                 ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE,
448                                 true);
449
450         DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
451                          ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
452         DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%ld wasted)\n",
453                          ppgtt->num_pt_pages,
454                          (ppgtt->num_pt_pages - num_pt_pages) +
455                          size % (1<<30));
456         return 0;
457
458 err_out:
459         ppgtt->base.cleanup(&ppgtt->base);
460         return ret;
461 }
462
463 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
464 {
465         struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
466         gen6_gtt_pte_t __iomem *pd_addr;
467         uint32_t pd_entry;
468         int i;
469
470         WARN_ON(ppgtt->pd_offset & 0x3f);
471         pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
472                 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
473         for (i = 0; i < ppgtt->num_pd_entries; i++) {
474                 dma_addr_t pt_addr;
475
476                 pt_addr = ppgtt->pt_dma_addr[i];
477                 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
478                 pd_entry |= GEN6_PDE_VALID;
479
480                 writel(pd_entry, pd_addr + i);
481         }
482         readl(pd_addr);
483 }
484
485 static int gen6_ppgtt_enable(struct drm_device *dev)
486 {
487         drm_i915_private_t *dev_priv = dev->dev_private;
488         uint32_t pd_offset;
489         struct intel_ring_buffer *ring;
490         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
491         int i;
492
493         BUG_ON(ppgtt->pd_offset & 0x3f);
494
495         gen6_write_pdes(ppgtt);
496
497         pd_offset = ppgtt->pd_offset;
498         pd_offset /= 64; /* in cachelines, */
499         pd_offset <<= 16;
500
501         if (INTEL_INFO(dev)->gen == 6) {
502                 uint32_t ecochk, gab_ctl, ecobits;
503
504                 ecobits = I915_READ(GAC_ECO_BITS);
505                 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
506                                          ECOBITS_PPGTT_CACHE64B);
507
508                 gab_ctl = I915_READ(GAB_CTL);
509                 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
510
511                 ecochk = I915_READ(GAM_ECOCHK);
512                 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
513                                        ECOCHK_PPGTT_CACHE64B);
514                 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
515         } else if (INTEL_INFO(dev)->gen >= 7) {
516                 uint32_t ecochk, ecobits;
517
518                 ecobits = I915_READ(GAC_ECO_BITS);
519                 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
520
521                 ecochk = I915_READ(GAM_ECOCHK);
522                 if (IS_HASWELL(dev)) {
523                         ecochk |= ECOCHK_PPGTT_WB_HSW;
524                 } else {
525                         ecochk |= ECOCHK_PPGTT_LLC_IVB;
526                         ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
527                 }
528                 I915_WRITE(GAM_ECOCHK, ecochk);
529                 /* GFX_MODE is per-ring on gen7+ */
530         }
531
532         for_each_ring(ring, dev_priv, i) {
533                 if (INTEL_INFO(dev)->gen >= 7)
534                         I915_WRITE(RING_MODE_GEN7(ring),
535                                    _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
536
537                 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
538                 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
539         }
540         return 0;
541 }
542
543 /* PPGTT support for Sandybdrige/Gen6 and later */
544 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
545                                    unsigned first_entry,
546                                    unsigned num_entries,
547                                    bool use_scratch)
548 {
549         struct i915_hw_ppgtt *ppgtt =
550                 container_of(vm, struct i915_hw_ppgtt, base);
551         gen6_gtt_pte_t *pt_vaddr, scratch_pte;
552         unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
553         unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
554         unsigned last_pte, i;
555
556         scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
557
558         while (num_entries) {
559                 last_pte = first_pte + num_entries;
560                 if (last_pte > I915_PPGTT_PT_ENTRIES)
561                         last_pte = I915_PPGTT_PT_ENTRIES;
562
563                 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
564
565                 for (i = first_pte; i < last_pte; i++)
566                         pt_vaddr[i] = scratch_pte;
567
568                 kunmap_atomic(pt_vaddr);
569
570                 num_entries -= last_pte - first_pte;
571                 first_pte = 0;
572                 act_pt++;
573         }
574 }
575
576 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
577                                       vm_page_t *pages,
578                                       unsigned first_entry,
579                                       unsigned num_entries,
580                                       enum i915_cache_level cache_level)
581 {
582         struct i915_hw_ppgtt *ppgtt =
583                 container_of(vm, struct i915_hw_ppgtt, base);
584         gen6_gtt_pte_t *pt_vaddr;
585         unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
586         unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
587
588         pt_vaddr = NULL;
589         for (int i=0;i<num_entries;i++) {
590                 if (pt_vaddr == NULL)
591                         pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
592
593                 pt_vaddr[act_pte] =
594                         vm->pte_encode(VM_PAGE_TO_PHYS(pages[i]),
595                                        cache_level, true);
596                 if (++act_pte == I915_PPGTT_PT_ENTRIES) {
597                         kunmap_atomic(pt_vaddr);
598                         pt_vaddr = NULL;
599                         act_pt++;
600                         act_pte = 0;
601                 }
602         }
603         if (pt_vaddr)
604                 kunmap_atomic(pt_vaddr);
605 }
606
607 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
608 {
609         struct i915_hw_ppgtt *ppgtt =
610                 container_of(vm, struct i915_hw_ppgtt, base);
611         int i;
612
613         drm_mm_takedown(&ppgtt->base.mm);
614
615         if (ppgtt->pt_dma_addr) {
616                 for (i = 0; i < ppgtt->num_pd_entries; i++)
617                         pci_unmap_page(ppgtt->base.dev->pdev,
618                                        ppgtt->pt_dma_addr[i],
619                                        4096, PCI_DMA_BIDIRECTIONAL);
620         }
621
622         kfree(ppgtt->pt_dma_addr);
623         for (i = 0; i < ppgtt->num_pd_entries; i++)
624                 __free_page(ppgtt->pt_pages[i]);
625         kfree(ppgtt->pt_pages);
626         kfree(ppgtt);
627 }
628
629 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
630 {
631         struct drm_device *dev = ppgtt->base.dev;
632         struct drm_i915_private *dev_priv = dev->dev_private;
633         unsigned first_pd_entry_in_global_pt;
634         int i;
635         int ret = -ENOMEM;
636
637         /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
638          * entries. For aliasing ppgtt support we just steal them at the end for
639          * now. */
640         first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt);
641
642         ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
643         ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
644         ppgtt->enable = gen6_ppgtt_enable;
645         ppgtt->base.clear_range = gen6_ppgtt_clear_range;
646         ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
647         ppgtt->base.cleanup = gen6_ppgtt_cleanup;
648         ppgtt->base.scratch = dev_priv->gtt.base.scratch;
649         ppgtt->base.start = 0;
650         ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
651         ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
652                                   GFP_KERNEL);
653         if (!ppgtt->pt_pages)
654                 return -ENOMEM;
655
656         for (i = 0; i < ppgtt->num_pd_entries; i++) {
657                 ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0,
658                     VM_ALLOC_NORMAL | VM_ALLOC_ZERO);
659                 if (!ppgtt->pt_pages[i])
660                         goto err_pt_alloc;
661         }
662
663         ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t),
664                                      GFP_KERNEL);
665         if (!ppgtt->pt_dma_addr)
666                 goto err_pt_alloc;
667
668         for (i = 0; i < ppgtt->num_pd_entries; i++) {
669                 dma_addr_t pt_addr;
670
671                 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096,
672                                        PCI_DMA_BIDIRECTIONAL);
673
674 #if 0
675                 if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
676                         ret = -EIO;
677                         goto err_pd_pin;        /* XXX where is label? */
678
679                 }
680 #endif
681                 ppgtt->pt_dma_addr[i] = pt_addr;
682         }
683
684         ppgtt->base.clear_range(&ppgtt->base, 0,
685                                 ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true);
686
687         ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
688
689         return 0;
690
691 err_pt_alloc:
692         kfree(ppgtt->pt_dma_addr);
693         for (i = 0; i < ppgtt->num_pd_entries; i++) {
694                 if (ppgtt->pt_pages[i])
695                         __free_page(ppgtt->pt_pages[i]);
696
697         }
698         kfree(ppgtt->pt_pages);
699
700         return ret;
701 }
702
703 static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
704 {
705         struct drm_i915_private *dev_priv = dev->dev_private;
706         struct i915_hw_ppgtt *ppgtt;
707         int ret;
708
709         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
710         if (!ppgtt)
711                 return -ENOMEM;
712
713         ppgtt->base.dev = dev;
714
715         if (INTEL_INFO(dev)->gen < 8)
716                 ret = gen6_ppgtt_init(ppgtt);
717         else if (IS_GEN8(dev))
718                 ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
719         else
720                 BUG();
721
722         if (ret)
723                 kfree(ppgtt);
724         else {
725                 dev_priv->mm.aliasing_ppgtt = ppgtt;
726                 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
727                             ppgtt->base.total);
728         }
729
730         return ret;
731 }
732
733 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
734 {
735         struct drm_i915_private *dev_priv = dev->dev_private;
736         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
737
738         if (!ppgtt)
739                 return;
740
741         ppgtt->base.cleanup(&ppgtt->base);
742         dev_priv->mm.aliasing_ppgtt = NULL;
743 }
744
745 #if 0
746 static void
747 i915_ppgtt_insert_pages(struct i915_address_space *vm, unsigned first_entry,
748     unsigned num_entries, vm_page_t *pages, enum i915_cache_level cache_level)
749 {
750         struct i915_hw_ppgtt *ppgtt =
751                container_of(vm, struct i915_hw_ppgtt, base);
752         uint32_t *pt_vaddr4;
753         uint64_t *pt_vaddr8;
754         unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
755         unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
756         unsigned last_pte, i;
757         dma_addr_t page_addr;
758
759         while (num_entries) {
760                 last_pte = first_pte + num_entries;
761                 if (last_pte > I915_PPGTT_PT_ENTRIES)
762                         last_pte = I915_PPGTT_PT_ENTRIES;
763
764                 /*
765                  * XXX severe hack.  The insert_entries() function pointer
766                  * needs to be setup and used instead of this mess.
767                  */
768                 if (IS_GEN8(ppgtt->base.dev)) {
769                         pt_vaddr4 = NULL;
770                         pt_vaddr8 = kmap_atomic(&ppgtt->gen8_pt_pages[act_pd]);
771                 } else {
772                         pt_vaddr4 = kmap_atomic(ppgtt->pt_pages[act_pd]);
773                         pt_vaddr8 = NULL;
774                 }
775
776                 for (i = first_pte; i < last_pte; i++) {
777                         page_addr = VM_PAGE_TO_PHYS(*pages);
778                         if (IS_GEN8(ppgtt->base.dev)) {
779                                 pt_vaddr8[i] = gen8_pte_encode(page_addr, cache_level, true);
780                         } else {
781                                 pt_vaddr4[i] = vm->pte_encode(page_addr, cache_level, true);
782                         }
783
784                         pages++;
785                 }
786
787                 if (IS_GEN8(ppgtt->base.dev))
788                         kunmap_atomic(pt_vaddr8);
789                 else
790                         kunmap_atomic(pt_vaddr4);
791
792                 num_entries -= last_pte - first_pte;
793                 first_pte = 0;
794                 act_pd++;
795         }
796 }
797 #endif
798
799 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
800                             struct drm_i915_gem_object *obj,
801                             enum i915_cache_level cache_level)
802 {
803 #if 0
804         i915_ppgtt_insert_pages(&ppgtt->base,
805                               i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
806             obj->base.size >> PAGE_SHIFT, obj->pages, cache_level);
807 #endif
808         ppgtt->base.insert_entries(&ppgtt->base, obj->pages,
809                                    i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
810                                    obj->base.size >> PAGE_SHIFT,
811                                    cache_level);
812 }
813
814 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
815                               struct drm_i915_gem_object *obj)
816 {
817         ppgtt->base.clear_range(&ppgtt->base,
818                                 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
819                                 obj->base.size >> PAGE_SHIFT,
820                                 true);
821 }
822
823 extern int intel_iommu_gfx_mapped;
824 /* Certain Gen5 chipsets require require idling the GPU before
825  * unmapping anything from the GTT when VT-d is enabled.
826  */
827 static inline bool needs_idle_maps(struct drm_device *dev)
828 {
829 #ifdef CONFIG_INTEL_IOMMU
830         /* Query intel_iommu to see if we need the workaround. Presumably that
831          * was loaded first.
832          */
833         if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
834                 return true;
835 #endif
836         return false;
837 }
838
839 static bool do_idling(struct drm_i915_private *dev_priv)
840 {
841         bool ret = dev_priv->mm.interruptible;
842
843         if (unlikely(dev_priv->gtt.do_idle_maps)) {
844                 dev_priv->mm.interruptible = false;
845                 if (i915_gpu_idle(dev_priv->dev)) {
846                         DRM_ERROR("Couldn't idle GPU\n");
847                         /* Wait a bit, in hopes it avoids the hang */
848                         udelay(10);
849                 }
850         }
851
852         return ret;
853 }
854
855 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
856 {
857         if (unlikely(dev_priv->gtt.do_idle_maps))
858                 dev_priv->mm.interruptible = interruptible;
859 }
860
861 void i915_check_and_clear_faults(struct drm_device *dev)
862 {
863         struct drm_i915_private *dev_priv = dev->dev_private;
864         struct intel_ring_buffer *ring;
865         int i;
866
867         if (INTEL_INFO(dev)->gen < 6)
868                 return;
869
870         for_each_ring(ring, dev_priv, i) {
871                 u32 fault_reg;
872                 fault_reg = I915_READ(RING_FAULT_REG(ring));
873                 if (fault_reg & RING_FAULT_VALID) {
874 #if 0
875                         DRM_DEBUG_DRIVER("Unexpected fault\n"
876                                          "\tAddr: 0x%08lx\\n"
877                                          "\tAddress space: %s\n"
878                                          "\tSource ID: %d\n"
879                                          "\tType: %d\n",
880                                          fault_reg & PAGE_MASK,
881                                          fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
882                                          RING_FAULT_SRCID(fault_reg),
883                                          RING_FAULT_FAULT_TYPE(fault_reg));
884 #endif
885                         I915_WRITE(RING_FAULT_REG(ring),
886                                    fault_reg & ~RING_FAULT_VALID);
887                 }
888         }
889         POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
890 }
891
892 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
893 {
894         struct drm_i915_private *dev_priv = dev->dev_private;
895
896         /* Don't bother messing with faults pre GEN6 as we have little
897          * documentation supporting that it's a good idea.
898          */
899         if (INTEL_INFO(dev)->gen < 6)
900                 return;
901
902         i915_check_and_clear_faults(dev);
903
904         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
905                                        dev_priv->gtt.base.start / PAGE_SIZE,
906                                        dev_priv->gtt.base.total / PAGE_SIZE,
907                                        true);
908 }
909
910 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
911 {
912         struct drm_i915_private *dev_priv = dev->dev_private;
913         struct drm_i915_gem_object *obj;
914
915         i915_check_and_clear_faults(dev);
916
917         /* First fill our portion of the GTT with scratch pages */
918         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
919                                        dev_priv->gtt.base.start / PAGE_SIZE,
920                                        dev_priv->gtt.base.total / PAGE_SIZE,
921                                        true);
922
923         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
924                 i915_gem_clflush_object(obj, obj->pin_display);
925                 i915_gem_gtt_bind_object(obj, obj->cache_level);
926         }
927
928         i915_gem_chipset_flush(dev);
929 }
930
931 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
932 {
933         if (obj->has_dma_mapping)
934                 return 0;
935
936 #if 0
937         if (!dma_map_sg(&obj->base.dev->pdev->dev,
938                         obj->pages->sgl, obj->pages->nents,
939                         PCI_DMA_BIDIRECTIONAL))
940                 return -ENOSPC;
941 #endif
942
943         return 0;
944 }
945
946 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
947 {
948 #if 0
949         writeq(pte, addr);
950 #else
951         iowrite32((u32)pte, addr);
952         iowrite32(pte >> 32, addr + 4);
953 #endif
954 }
955
956 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
957                                      vm_page_t *pages,
958                                      unsigned int first_entry,
959                                      unsigned int num_entries,
960                                      enum i915_cache_level level)
961 {
962         struct drm_i915_private *dev_priv = vm->dev->dev_private;
963         gen8_gtt_pte_t __iomem *gtt_entries =
964                 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
965         int i = 0;
966         dma_addr_t addr;
967
968         for (i=0;i<num_entries;i++) {
969                 addr = VM_PAGE_TO_PHYS(pages[i]);
970                 gen8_set_pte(&gtt_entries[i],
971                              gen8_pte_encode(addr, level, true));
972         }
973
974         /*
975          * XXX: This serves as a posting read to make sure that the PTE has
976          * actually been updated. There is some concern that even though
977          * registers and PTEs are within the same BAR that they are potentially
978          * of NUMA access patterns. Therefore, even with the way we assume
979          * hardware should work, we must keep this posting read for paranoia.
980          */
981         if (i != 0)
982                 WARN_ON(readq(&gtt_entries[i-1])
983                         != gen8_pte_encode(addr, level, true));
984
985         /* This next bit makes the above posting read even more important. We
986          * want to flush the TLBs only after we're certain all the PTE updates
987          * have finished.
988          */
989         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
990         POSTING_READ(GFX_FLSH_CNTL_GEN6);
991 }
992
993 /*
994  * Binds an object into the global gtt with the specified cache level. The object
995  * will be accessible to the GPU via commands whose operands reference offsets
996  * within the global GTT as well as accessible by the GPU through the GMADR
997  * mapped BAR (dev_priv->mm.gtt->gtt).
998  */
999 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
1000                                      vm_page_t *pages,
1001                                      unsigned int first_entry,
1002                                      unsigned int num_entries,
1003                                      enum i915_cache_level level)
1004 {
1005         struct drm_i915_private *dev_priv = vm->dev->dev_private;
1006         gen6_gtt_pte_t __iomem *gtt_entries =
1007                 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
1008         int i = 0;
1009         dma_addr_t addr;
1010
1011         for (i = 0; i < num_entries; i++) {
1012                 addr = VM_PAGE_TO_PHYS(pages[i]);
1013                 iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
1014         }
1015
1016         /* XXX: This serves as a posting read to make sure that the PTE has
1017          * actually been updated. There is some concern that even though
1018          * registers and PTEs are within the same BAR that they are potentially
1019          * of NUMA access patterns. Therefore, even with the way we assume
1020          * hardware should work, we must keep this posting read for paranoia.
1021          */
1022         if (i != 0)
1023                 WARN_ON(readl(&gtt_entries[i-1]) !=
1024                         vm->pte_encode(addr, level, true));
1025
1026         /* This next bit makes the above posting read even more important. We
1027          * want to flush the TLBs only after we're certain all the PTE updates
1028          * have finished.
1029          */
1030         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1031         POSTING_READ(GFX_FLSH_CNTL_GEN6);
1032 }
1033
1034 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
1035                                   unsigned int first_entry,
1036                                   unsigned int num_entries,
1037                                   bool use_scratch)
1038 {
1039         struct drm_i915_private *dev_priv = vm->dev->dev_private;
1040         gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
1041                 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
1042         const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
1043         int i;
1044
1045         if (WARN(num_entries > max_entries,
1046                  "First entry = %d; Num entries = %d (max=%d)\n",
1047                  first_entry, num_entries, max_entries))
1048                 num_entries = max_entries;
1049
1050         scratch_pte = gen8_pte_encode(vm->scratch.addr,
1051                                       I915_CACHE_LLC,
1052                                       use_scratch);
1053         for (i = 0; i < num_entries; i++)
1054                 gen8_set_pte(&gtt_base[i], scratch_pte);
1055         readl(gtt_base);
1056 }
1057
1058 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
1059                                   unsigned int first_entry,
1060                                   unsigned int num_entries,
1061                                   bool use_scratch)
1062 {
1063         struct drm_i915_private *dev_priv = vm->dev->dev_private;
1064         gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
1065                 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
1066         const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
1067         int i;
1068
1069         if (WARN(num_entries > max_entries,
1070                  "First entry = %d; Num entries = %d (max=%d)\n",
1071                  first_entry, num_entries, max_entries))
1072                 num_entries = max_entries;
1073
1074         scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
1075
1076         for (i = 0; i < num_entries; i++)
1077                 iowrite32(scratch_pte, &gtt_base[i]);
1078         readl(gtt_base);
1079 }
1080
1081 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
1082                                      vm_page_t *pages,
1083                                      unsigned int pg_start,
1084                                      unsigned int num_entries,
1085                                      enum i915_cache_level cache_level)
1086 {
1087         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
1088                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
1089
1090         intel_gtt_insert_pages(pg_start, num_entries, pages, flags);
1091 }
1092
1093 static void i915_ggtt_clear_range(struct i915_address_space *vm,
1094                                   unsigned int first_entry,
1095                                   unsigned int num_entries,
1096                                   bool unused)
1097 {
1098         intel_gtt_clear_range(first_entry, num_entries);
1099 }
1100
1101 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
1102                               enum i915_cache_level cache_level)
1103 {
1104         struct drm_device *dev = obj->base.dev;
1105         struct drm_i915_private *dev_priv = dev->dev_private;
1106         const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
1107
1108         dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages,
1109                                           entry,
1110                                           obj->base.size >> PAGE_SHIFT,
1111                                           cache_level);
1112
1113         obj->has_global_gtt_mapping = 1;
1114 }
1115
1116 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
1117 {
1118         struct drm_device *dev = obj->base.dev;
1119         struct drm_i915_private *dev_priv = dev->dev_private;
1120         const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
1121
1122         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1123                                        entry,
1124                                        obj->base.size >> PAGE_SHIFT,
1125                                        true);
1126
1127         obj->has_global_gtt_mapping = 0;
1128 }
1129
1130 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
1131 {
1132         struct drm_device *dev = obj->base.dev;
1133         struct drm_i915_private *dev_priv = dev->dev_private;
1134         bool interruptible;
1135
1136         interruptible = do_idling(dev_priv);
1137
1138 #if 0
1139         if (!obj->has_dma_mapping)
1140                 dma_unmap_sg(&dev->pdev->dev,
1141                              obj->pages->sgl, obj->pages->nents,
1142                              PCI_DMA_BIDIRECTIONAL);
1143 #endif
1144
1145         undo_idling(dev_priv, interruptible);
1146 }
1147
1148 static void i915_gtt_color_adjust(struct drm_mm_node *node,
1149                                   unsigned long color,
1150                                   unsigned long *start,
1151                                   unsigned long *end)
1152 {
1153         if (node->color != color)
1154                 *start += 4096;
1155
1156         if (!list_empty(&node->node_list)) {
1157                 node = list_entry(node->node_list.next,
1158                                   struct drm_mm_node,
1159                                   node_list);
1160                 if (node->allocated && node->color != color)
1161                         *end -= 4096;
1162         }
1163 }
1164
1165 void i915_gem_setup_global_gtt(struct drm_device *dev,
1166                                unsigned long start,
1167                                unsigned long mappable_end,
1168                                unsigned long end)
1169 {
1170         /* Let GEM Manage all of the aperture.
1171          *
1172          * However, leave one page at the end still bound to the scratch page.
1173          * There are a number of places where the hardware apparently prefetches
1174          * past the end of the object, and we've seen multiple hangs with the
1175          * GPU head pointer stuck in a batchbuffer bound at the last page of the
1176          * aperture.  One page should be enough to keep any prefetching inside
1177          * of the aperture.
1178          */
1179         struct drm_i915_private *dev_priv = dev->dev_private;
1180         struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
1181         unsigned long mappable;
1182         int error;
1183         struct drm_mm_node *entry;
1184         struct drm_i915_gem_object *obj;
1185         unsigned long hole_start, hole_end;
1186
1187         kprintf("MAPPABLE_END VS END %016jx %016jx\n", mappable_end, end);
1188         tsleep(&mappable_end, 0, "DELAY", hz); /* for kprintf */
1189         /*BUG_ON(mappable_end > end);*/
1190
1191         mappable = min(end, mappable_end) - start;
1192
1193         /* Subtract the guard page ... */
1194         drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
1195         if (!HAS_LLC(dev))
1196                 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
1197
1198         /* Mark any preallocated objects as occupied */
1199         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
1200                 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
1201                 int ret;
1202                 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
1203                               i915_gem_obj_ggtt_offset(obj), obj->base.size);
1204
1205                 WARN_ON(i915_gem_obj_ggtt_bound(obj));
1206                 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
1207                 if (ret)
1208                         DRM_DEBUG_KMS("Reservation failed\n");
1209                 obj->has_global_gtt_mapping = 1;
1210         }
1211
1212         dev_priv->gtt.base.start = start;
1213         dev_priv->gtt.base.total = end - start;
1214
1215         /* Clear any non-preallocated blocks */
1216         drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
1217                 const unsigned long count = (hole_end - hole_start) / PAGE_SIZE;
1218                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
1219                               hole_start, hole_end);
1220                 ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true);
1221         }
1222         /* ... but ensure that we clear the entire range. */
1223         intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
1224         device_printf(dev->dev,
1225             "taking over the fictitious range 0x%lx-0x%lx\n",
1226             dev->agp->base + start, dev->agp->base + start + mappable);
1227         error = -vm_phys_fictitious_reg_range(dev->agp->base + start,
1228             dev->agp->base + start + mappable, VM_MEMATTR_WRITE_COMBINING);
1229
1230         /* And finally clear the reserved guard page */
1231         ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true);
1232 }
1233
1234 static bool
1235 intel_enable_ppgtt(struct drm_device *dev)
1236 {
1237         if (i915_enable_ppgtt >= 0)
1238                 return i915_enable_ppgtt;
1239
1240 #ifdef CONFIG_INTEL_IOMMU
1241         /* Disable ppgtt on SNB if VT-d is on. */
1242         if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
1243                 return false;
1244 #endif
1245
1246         return true;
1247 }
1248
1249 void i915_gem_init_global_gtt(struct drm_device *dev)
1250 {
1251         struct drm_i915_private *dev_priv = dev->dev_private;
1252         unsigned long gtt_size, mappable_size;
1253
1254         gtt_size = dev_priv->gtt.base.total;
1255         mappable_size = dev_priv->gtt.mappable_end;
1256
1257         if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
1258                 int ret;
1259
1260                 if (INTEL_INFO(dev)->gen <= 7) {
1261                         /* PPGTT pdes are stolen from global gtt ptes, so shrink the
1262                          * aperture accordingly when using aliasing ppgtt. */
1263                         gtt_size -= GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE;
1264                 }
1265
1266                 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
1267
1268                 ret = i915_gem_init_aliasing_ppgtt(dev);
1269                 if (!ret)
1270                         return;
1271
1272                 DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
1273                 drm_mm_takedown(&dev_priv->gtt.base.mm);
1274                 if (INTEL_INFO(dev)->gen < 8)
1275                         gtt_size += GEN6_PPGTT_PD_ENTRIES*PAGE_SIZE;
1276         }
1277         i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
1278 }
1279
1280 static int setup_scratch_page(struct drm_device *dev)
1281 {
1282         struct drm_i915_private *dev_priv = dev->dev_private;
1283         struct vm_page *page;
1284         dma_addr_t dma_addr;
1285
1286         page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
1287         if (page == NULL)
1288                 return -ENOMEM;
1289         get_page(page);
1290         set_pages_uc(page, 1);
1291
1292 #ifdef CONFIG_INTEL_IOMMU
1293         dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
1294                                 PCI_DMA_BIDIRECTIONAL);
1295         if (pci_dma_mapping_error(dev->pdev, dma_addr))
1296                 return -EINVAL;
1297 #else
1298         dma_addr = page_to_phys(page);
1299 #endif
1300         dev_priv->gtt.base.scratch.page = page;
1301         dev_priv->gtt.base.scratch.addr = dma_addr;
1302
1303         return 0;
1304 }
1305
1306 #if 0
1307 static void teardown_scratch_page(struct drm_device *dev)
1308 {
1309         struct drm_i915_private *dev_priv = dev->dev_private;
1310         struct page *page = dev_priv->gtt.base.scratch.page;
1311
1312         set_pages_wb(page, 1);
1313         pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
1314                        PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
1315         put_page(page);
1316         __free_page(page);
1317 }
1318 #endif
1319
1320 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
1321 {
1322         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
1323         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
1324         return snb_gmch_ctl << 20;
1325 }
1326
1327 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
1328 {
1329         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
1330         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
1331         if (bdw_gmch_ctl)
1332                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
1333         if (bdw_gmch_ctl > 4) {
1334                 WARN_ON(!i915_preliminary_hw_support);
1335                 return 4<<20;
1336         }
1337
1338         return bdw_gmch_ctl << 20;
1339 }
1340
1341 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
1342 {
1343         snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
1344         snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
1345         return snb_gmch_ctl << 25; /* 32 MB units */
1346 }
1347
1348 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
1349 {
1350         bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
1351         bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
1352         return bdw_gmch_ctl << 25; /* 32 MB units */
1353 }
1354
1355 static int ggtt_probe_common(struct drm_device *dev,
1356                              size_t gtt_size)
1357 {
1358         struct drm_i915_private *dev_priv = dev->dev_private;
1359         phys_addr_t gtt_phys_addr;
1360         int ret;
1361
1362         /* For Modern GENs the PTEs and register space are split in the BAR */
1363         gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
1364                 (pci_resource_len(dev->pdev, 0) / 2);
1365
1366         kprintf("gtt_probe_common: gtt_phys_addr=0x%lx\n", gtt_phys_addr);
1367         dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
1368         if (!dev_priv->gtt.gsm) {
1369                 DRM_ERROR("Failed to map the gtt page table\n");
1370                 return -ENOMEM;
1371         }
1372
1373         ret = setup_scratch_page(dev);
1374         if (ret) {
1375                 DRM_ERROR("Scratch setup failed\n");
1376                 /* iounmap will also get called at remove, but meh */
1377 #if 0
1378                 iounmap(dev_priv->gtt.gsm);
1379 #endif
1380         }
1381
1382         return ret;
1383 }
1384
1385 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
1386  * bits. When using advanced contexts each context stores its own PAT, but
1387  * writing this data shouldn't be harmful even in those cases. */
1388 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
1389 {
1390 #define GEN8_PPAT_UC            (0<<0)
1391 #define GEN8_PPAT_WC            (1<<0)
1392 #define GEN8_PPAT_WT            (2<<0)
1393 #define GEN8_PPAT_WB            (3<<0)
1394 #define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
1395 /* FIXME(BDW): Bspec is completely confused about cache control bits. */
1396 #define GEN8_PPAT_LLC           (1<<2)
1397 #define GEN8_PPAT_LLCELLC       (2<<2)
1398 #define GEN8_PPAT_LLCeLLC       (3<<2)
1399 #define GEN8_PPAT_AGE(x)        (x<<4)
1400 #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
1401         uint64_t pat;
1402
1403         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
1404               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
1405               GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
1406               GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
1407               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
1408               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
1409               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
1410               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
1411
1412         /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
1413          * write would work. */
1414         I915_WRITE(GEN8_PRIVATE_PAT, pat);
1415         I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
1416 }
1417
1418 static int gen8_gmch_probe(struct drm_device *dev,
1419                            size_t *gtt_total,
1420                            size_t *stolen,
1421                            phys_addr_t *mappable_base,
1422                            unsigned long *mappable_end)
1423 {
1424         struct drm_i915_private *dev_priv = dev->dev_private;
1425         unsigned int gtt_size;
1426         u16 snb_gmch_ctl;
1427         int ret;
1428
1429         /* TODO: We're not aware of mappable constraints on gen8 yet */
1430         *mappable_base = pci_resource_start(dev->pdev, 2);
1431         *mappable_end = pci_resource_len(dev->pdev, 2);
1432
1433 #if 0
1434         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
1435                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
1436 #endif
1437
1438         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1439
1440         *stolen = gen8_get_stolen_size(snb_gmch_ctl);
1441
1442         gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
1443         *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
1444
1445         gen8_setup_private_ppat(dev_priv);
1446
1447         ret = ggtt_probe_common(dev, gtt_size);
1448
1449         dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
1450         dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
1451
1452         return ret;
1453 }
1454
1455 static int gen6_gmch_probe(struct drm_device *dev,
1456                            size_t *gtt_total,
1457                            size_t *stolen,
1458                            phys_addr_t *mappable_base,
1459                            unsigned long *mappable_end)
1460 {
1461         struct drm_i915_private *dev_priv = dev->dev_private;
1462         unsigned int gtt_size;
1463         u16 snb_gmch_ctl;
1464         int ret;
1465
1466         *mappable_base = pci_resource_start(dev->pdev, 2);
1467         *mappable_end = pci_resource_len(dev->pdev, 2);
1468
1469         /* 64/512MB is the current min/max we actually know of, but this is just
1470          * a coarse sanity check.
1471          */
1472         if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
1473                 DRM_ERROR("Unknown GMADR size (%lx)\n",
1474                           dev_priv->gtt.mappable_end);
1475                 return -ENXIO;
1476         }
1477
1478 #if 0
1479         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
1480                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
1481 #endif
1482         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1483
1484         *stolen = gen6_get_stolen_size(snb_gmch_ctl);
1485
1486         gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
1487         *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
1488
1489         ret = ggtt_probe_common(dev, gtt_size);
1490
1491         dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
1492         dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
1493
1494         return ret;
1495 }
1496
1497 static void gen6_gmch_remove(struct i915_address_space *vm)
1498 {
1499 #if 0
1500         struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
1501
1502         drm_mm_takedown(&vm->mm);
1503         iounmap(gtt->gsm);
1504         teardown_scratch_page(vm->dev);
1505 #endif
1506 }
1507
1508 static int i915_gmch_probe(struct drm_device *dev,
1509                            size_t *gtt_total,
1510                            size_t *stolen,
1511                            phys_addr_t *mappable_base,
1512                            unsigned long *mappable_end)
1513 {
1514         struct drm_i915_private *dev_priv = dev->dev_private;
1515 #if 0
1516         int ret;
1517
1518         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
1519         if (!ret) {
1520                 DRM_ERROR("failed to set up gmch\n");
1521                 return -EIO;
1522         }
1523 #endif
1524
1525         intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
1526
1527         dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
1528         dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
1529         dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
1530
1531         if (unlikely(dev_priv->gtt.do_idle_maps))
1532                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
1533
1534         return 0;
1535 }
1536
1537 static void i915_gmch_remove(struct i915_address_space *vm)
1538 {
1539 }
1540
1541 int i915_gem_gtt_init(struct drm_device *dev)
1542 {
1543         struct drm_i915_private *dev_priv = dev->dev_private;
1544         struct i915_gtt *gtt = &dev_priv->gtt;
1545         int ret;
1546
1547         if (INTEL_INFO(dev)->gen <= 5) {
1548                 gtt->gtt_probe = i915_gmch_probe;
1549                 gtt->base.cleanup = i915_gmch_remove;
1550         } else if (INTEL_INFO(dev)->gen < 8) {
1551                 gtt->gtt_probe = gen6_gmch_probe;
1552                 gtt->base.cleanup = gen6_gmch_remove;
1553                 if (IS_HASWELL(dev) && dev_priv->ellc_size)
1554                         gtt->base.pte_encode = iris_pte_encode;
1555                 else if (IS_HASWELL(dev))
1556                         gtt->base.pte_encode = hsw_pte_encode;
1557                 else if (IS_VALLEYVIEW(dev))
1558                         gtt->base.pte_encode = byt_pte_encode;
1559                 else if (INTEL_INFO(dev)->gen >= 7)
1560                         gtt->base.pte_encode = ivb_pte_encode;
1561                 else
1562                         gtt->base.pte_encode = snb_pte_encode;
1563         } else {
1564                 dev_priv->gtt.gtt_probe = gen8_gmch_probe;
1565                 dev_priv->gtt.base.cleanup = gen6_gmch_remove;
1566         }
1567
1568         ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
1569                              &gtt->mappable_base, &gtt->mappable_end);
1570         if (ret)
1571                 return ret;
1572
1573         gtt->base.dev = dev;
1574
1575         /* GMADR is the PCI mmio aperture into the global GTT. */
1576         DRM_INFO("Memory usable by graphics device = %zdM\n",
1577                  gtt->base.total >> 20);
1578         DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
1579         DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
1580
1581         return 0;
1582 }