Merge branch 'vendor/TNFTP'
[dragonfly.git] / sys / dev / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25
26 #include <linux/seq_file.h>
27 #include <drm/drmP.h>
28 #include <drm/i915_drm.h>
29 #include "i915_drv.h"
30 #include "intel_drv.h"
31
32 #include <linux/highmem.h>
33
34 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv);
35 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv);
36
37 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
38 {
39         if (enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
40                 return 0;
41
42         if (enable_ppgtt == 1)
43                 return 1;
44
45         if (enable_ppgtt == 2 && HAS_PPGTT(dev))
46                 return 2;
47
48 #ifdef CONFIG_INTEL_IOMMU
49         /* Disable ppgtt on SNB if VT-d is on. */
50         if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
51                 DRM_INFO("Disabling PPGTT because VT-d is on\n");
52                 return 0;
53         }
54 #endif
55
56         /* Early VLV doesn't have this */
57         int revision = pci_read_config(dev->dev, PCIR_REVID, 1);
58         if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
59             revision < 0xb) {
60                 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
61                 return 0;
62         }
63
64         return HAS_ALIASING_PPGTT(dev) ? 1 : 0;
65 }
66
67 static void ppgtt_bind_vma(struct i915_vma *vma,
68                            enum i915_cache_level cache_level,
69                            u32 flags);
70 static void ppgtt_unbind_vma(struct i915_vma *vma);
71
72 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
73                                              enum i915_cache_level level,
74                                              bool valid)
75 {
76         gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
77         pte |= addr;
78
79         switch (level) {
80         case I915_CACHE_NONE:
81                 pte |= PPAT_UNCACHED_INDEX;
82                 break;
83         case I915_CACHE_WT:
84                 pte |= PPAT_DISPLAY_ELLC_INDEX;
85                 break;
86         default:
87                 pte |= PPAT_CACHED_INDEX;
88                 break;
89         }
90
91         return pte;
92 }
93
94 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
95                                              dma_addr_t addr,
96                                              enum i915_cache_level level)
97 {
98         gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
99         pde |= addr;
100         if (level != I915_CACHE_NONE)
101                 pde |= PPAT_CACHED_PDE_INDEX;
102         else
103                 pde |= PPAT_UNCACHED_INDEX;
104         return pde;
105 }
106
107 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
108                                      enum i915_cache_level level,
109                                      bool valid, u32 unused)
110 {
111         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
112         pte |= GEN6_PTE_ADDR_ENCODE(addr);
113
114         switch (level) {
115         case I915_CACHE_L3_LLC:
116         case I915_CACHE_LLC:
117                 pte |= GEN6_PTE_CACHE_LLC;
118                 break;
119         case I915_CACHE_NONE:
120                 pte |= GEN6_PTE_UNCACHED;
121                 break;
122         default:
123                 WARN_ON(1);
124         }
125
126         return pte;
127 }
128
129 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
130                                      enum i915_cache_level level,
131                                      bool valid, u32 unused)
132 {
133         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
134         pte |= GEN6_PTE_ADDR_ENCODE(addr);
135
136         switch (level) {
137         case I915_CACHE_L3_LLC:
138                 pte |= GEN7_PTE_CACHE_L3_LLC;
139                 break;
140         case I915_CACHE_LLC:
141                 pte |= GEN6_PTE_CACHE_LLC;
142                 break;
143         case I915_CACHE_NONE:
144                 pte |= GEN6_PTE_UNCACHED;
145                 break;
146         default:
147                 WARN_ON(1);
148         }
149
150         return pte;
151 }
152
153 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
154                                      enum i915_cache_level level,
155                                      bool valid, u32 flags)
156 {
157         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
158         pte |= GEN6_PTE_ADDR_ENCODE(addr);
159
160         /* Mark the page as writeable.  Other platforms don't have a
161          * setting for read-only/writable, so this matches that behavior.
162          */
163         if (!(flags & PTE_READ_ONLY))
164                 pte |= BYT_PTE_WRITEABLE;
165
166         if (level != I915_CACHE_NONE)
167                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
168
169         return pte;
170 }
171
172 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
173                                      enum i915_cache_level level,
174                                      bool valid, u32 unused)
175 {
176         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
177         pte |= HSW_PTE_ADDR_ENCODE(addr);
178
179         if (level != I915_CACHE_NONE)
180                 pte |= HSW_WB_LLC_AGE3;
181
182         return pte;
183 }
184
185 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
186                                       enum i915_cache_level level,
187                                       bool valid, u32 unused)
188 {
189         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
190         pte |= HSW_PTE_ADDR_ENCODE(addr);
191
192         switch (level) {
193         case I915_CACHE_NONE:
194                 break;
195         case I915_CACHE_WT:
196                 pte |= HSW_WT_ELLC_LLC_AGE3;
197                 break;
198         default:
199                 pte |= HSW_WB_ELLC_LLC_AGE3;
200                 break;
201         }
202
203         return pte;
204 }
205
206 /* Broadwell Page Directory Pointer Descriptors */
207 static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
208                            uint64_t val)
209 {
210         int ret;
211
212         BUG_ON(entry >= 4);
213
214         ret = intel_ring_begin(ring, 6);
215         if (ret)
216                 return ret;
217
218         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
219         intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
220         intel_ring_emit(ring, (u32)(val >> 32));
221         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
222         intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
223         intel_ring_emit(ring, (u32)(val));
224         intel_ring_advance(ring);
225
226         return 0;
227 }
228
229 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
230                           struct intel_engine_cs *ring)
231 {
232         int i, ret;
233
234         /* bit of a hack to find the actual last used pd */
235         int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
236
237         for (i = used_pd - 1; i >= 0; i--) {
238                 dma_addr_t addr = ppgtt->pd_dma_addr[i];
239                 ret = gen8_write_pdp(ring, i, addr);
240                 if (ret)
241                         return ret;
242         }
243
244         return 0;
245 }
246
247 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
248                                    uint64_t start,
249                                    uint64_t length,
250                                    bool use_scratch)
251 {
252         struct i915_hw_ppgtt *ppgtt =
253                 container_of(vm, struct i915_hw_ppgtt, base);
254         gen8_gtt_pte_t *pt_vaddr, scratch_pte;
255         unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
256         unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
257         unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
258         unsigned num_entries = length >> PAGE_SHIFT;
259         unsigned last_pte, i;
260
261         scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
262                                       I915_CACHE_LLC, use_scratch);
263
264         while (num_entries) {
265                 struct vm_page *page_table = ppgtt->gen8_pt_pages[pdpe][pde];
266
267                 last_pte = pte + num_entries;
268                 if (last_pte > GEN8_PTES_PER_PAGE)
269                         last_pte = GEN8_PTES_PER_PAGE;
270
271                 pt_vaddr = kmap_atomic(page_table);
272
273                 for (i = pte; i < last_pte; i++) {
274                         pt_vaddr[i] = scratch_pte;
275                         num_entries--;
276                 }
277
278                 if (!HAS_LLC(ppgtt->base.dev))
279                         drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
280                 kunmap_atomic(pt_vaddr);
281
282                 pte = 0;
283                 if (++pde == GEN8_PDES_PER_PAGE) {
284                         pdpe++;
285                         pde = 0;
286                 }
287         }
288 }
289
290 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
291                                       vm_page_t *pages,
292                                       uint64_t start,
293                                       unsigned int num_entries,
294                                       enum i915_cache_level cache_level, u32 unused)
295 {
296         struct i915_hw_ppgtt *ppgtt =
297                 container_of(vm, struct i915_hw_ppgtt, base);
298         gen8_gtt_pte_t *pt_vaddr;
299         unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
300         unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
301         unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
302         int i;
303
304         pt_vaddr = NULL;
305
306         for (i=0;i<num_entries;i++) {
307                 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS))
308                         break;
309
310                 if (pt_vaddr == NULL)
311                         pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]);
312
313                 pt_vaddr[pte] =
314                         gen8_pte_encode(VM_PAGE_TO_PHYS(pages[i]),
315                                         cache_level, true);
316                 if (++pte == GEN8_PTES_PER_PAGE) {
317                         if (!HAS_LLC(ppgtt->base.dev))
318                                 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
319                         kunmap_atomic(pt_vaddr);
320                         pt_vaddr = NULL;
321                         if (++pde == GEN8_PDES_PER_PAGE) {
322                                 pdpe++;
323                                 pde = 0;
324                         }
325                         pte = 0;
326                 }
327         }
328         if (pt_vaddr) {
329                 if (!HAS_LLC(ppgtt->base.dev))
330                         drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
331                 kunmap_atomic(pt_vaddr);
332         }
333 }
334
335 static void gen8_free_page_tables(struct vm_page **pt_pages)
336 {
337         int i;
338
339         if (pt_pages == NULL)
340                 return;
341
342         for (i = 0; i < GEN8_PDES_PER_PAGE; i++)
343                 if (pt_pages[i])
344                         __free_pages(pt_pages[i], 0);
345 }
346
347 static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt)
348 {
349         int i;
350
351         for (i = 0; i < ppgtt->num_pd_pages; i++) {
352                 gen8_free_page_tables(ppgtt->gen8_pt_pages[i]);
353                 kfree(ppgtt->gen8_pt_pages[i]);
354                 kfree(ppgtt->gen8_pt_dma_addr[i]);
355         }
356
357         __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
358 }
359
360 static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
361 {
362         struct pci_dev *hwdev = ppgtt->base.dev->pdev;
363         int i, j;
364
365         for (i = 0; i < ppgtt->num_pd_pages; i++) {
366                 /* TODO: In the future we'll support sparse mappings, so this
367                  * will have to change. */
368                 if (!ppgtt->pd_dma_addr[i])
369                         continue;
370
371                 pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE,
372                                PCI_DMA_BIDIRECTIONAL);
373
374                 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
375                         dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
376                         if (addr)
377                                 pci_unmap_page(hwdev, addr, PAGE_SIZE,
378                                                PCI_DMA_BIDIRECTIONAL);
379                 }
380         }
381 }
382
383 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
384 {
385         struct i915_hw_ppgtt *ppgtt =
386                 container_of(vm, struct i915_hw_ppgtt, base);
387
388         gen8_ppgtt_unmap_pages(ppgtt);
389         gen8_ppgtt_free(ppgtt);
390 }
391
392 static struct vm_page **__gen8_alloc_page_tables(void)
393 {
394         struct vm_page **pt_pages;
395         int i;
396
397         pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct vm_page *), GFP_KERNEL);
398         if (!pt_pages)
399                 return ERR_PTR(-ENOMEM);
400
401         for (i = 0; i < GEN8_PDES_PER_PAGE; i++) {
402                 pt_pages[i] = alloc_page(GFP_KERNEL);
403                 if (!pt_pages[i])
404                         goto bail;
405         }
406
407         return pt_pages;
408
409 bail:
410         gen8_free_page_tables(pt_pages);
411         kfree(pt_pages);
412         return ERR_PTR(-ENOMEM);
413 }
414
415 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt,
416                                            const int max_pdp)
417 {
418         struct vm_page **pt_pages[GEN8_LEGACY_PDPS];
419         int i, ret;
420
421         for (i = 0; i < max_pdp; i++) {
422                 pt_pages[i] = __gen8_alloc_page_tables();
423                 if (IS_ERR(pt_pages[i])) {
424                         ret = PTR_ERR(pt_pages[i]);
425                         goto unwind_out;
426                 }
427         }
428
429         /* NB: Avoid touching gen8_pt_pages until last to keep the allocation,
430          * "atomic" - for cleanup purposes.
431          */
432         for (i = 0; i < max_pdp; i++)
433                 ppgtt->gen8_pt_pages[i] = pt_pages[i];
434
435         return 0;
436
437 unwind_out:
438         while (i--) {
439                 gen8_free_page_tables(pt_pages[i]);
440                 kfree(pt_pages[i]);
441         }
442
443         return ret;
444 }
445
446 static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
447 {
448         int i;
449
450         for (i = 0; i < ppgtt->num_pd_pages; i++) {
451                 ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE,
452                                                      sizeof(dma_addr_t),
453                                                      GFP_KERNEL);
454                 if (!ppgtt->gen8_pt_dma_addr[i])
455                         return -ENOMEM;
456         }
457
458         return 0;
459 }
460
461 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
462                                                 const int max_pdp)
463 {
464         ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT));
465         if (!ppgtt->pd_pages)
466                 return -ENOMEM;
467
468         ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT);
469         BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
470
471         return 0;
472 }
473
474 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
475                             const int max_pdp)
476 {
477         int ret;
478
479         ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp);
480         if (ret)
481                 return ret;
482
483         ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp);
484         if (ret) {
485                 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
486                 return ret;
487         }
488
489         ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
490
491         ret = gen8_ppgtt_allocate_dma(ppgtt);
492         if (ret)
493                 gen8_ppgtt_free(ppgtt);
494
495         return ret;
496 }
497
498 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
499                                              const int pd)
500 {
501         dma_addr_t pd_addr;
502         int ret;
503
504         pd_addr = pci_map_page(ppgtt->base.dev->pdev,
505                                &ppgtt->pd_pages[pd], 0,
506                                PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
507
508         ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
509         if (ret)
510                 return ret;
511
512         ppgtt->pd_dma_addr[pd] = pd_addr;
513
514         return 0;
515 }
516
517 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
518                                         const int pd,
519                                         const int pt)
520 {
521         dma_addr_t pt_addr;
522         struct vm_page *p;
523         int ret;
524
525         p = ppgtt->gen8_pt_pages[pd][pt];
526         pt_addr = pci_map_page(ppgtt->base.dev->pdev,
527                                p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
528         ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
529         if (ret)
530                 return ret;
531
532         ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr;
533
534         return 0;
535 }
536
537 /**
538  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
539  * with a net effect resembling a 2-level page table in normal x86 terms. Each
540  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
541  * space.
542  *
543  * FIXME: split allocation into smaller pieces. For now we only ever do this
544  * once, but with full PPGTT, the multiple contiguous allocations will be bad.
545  * TODO: Do something with the size parameter
546  */
547 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
548 {
549         const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
550         const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
551         int i, j, ret;
552
553         if (size % (1<<30))
554                 DRM_INFO("Pages will be wasted unless GTT size (%lu) is divisible by 1GB\n", size);
555
556         /* 1. Do all our allocations for page directories and page tables. */
557         ret = gen8_ppgtt_alloc(ppgtt, max_pdp);
558         if (ret)
559                 return ret;
560
561         /*
562          * 2. Create DMA mappings for the page directories and page tables.
563          */
564         for (i = 0; i < max_pdp; i++) {
565                 ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
566                 if (ret)
567                         goto bail;
568
569                 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
570                         ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
571                         if (ret)
572                                 goto bail;
573                 }
574         }
575
576         /*
577          * 3. Map all the page directory entires to point to the page tables
578          * we've allocated.
579          *
580          * For now, the PPGTT helper functions all require that the PDEs are
581          * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
582          * will never need to touch the PDEs again.
583          */
584         for (i = 0; i < max_pdp; i++) {
585                 gen8_ppgtt_pde_t *pd_vaddr;
586                 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
587                 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
588                         dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
589                         pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
590                                                       I915_CACHE_LLC);
591                 }
592                 if (!HAS_LLC(ppgtt->base.dev))
593                         drm_clflush_virt_range(pd_vaddr, PAGE_SIZE);
594                 kunmap_atomic(pd_vaddr);
595         }
596
597         ppgtt->switch_mm = gen8_mm_switch;
598         ppgtt->base.clear_range = gen8_ppgtt_clear_range;
599         ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
600         ppgtt->base.cleanup = gen8_ppgtt_cleanup;
601         ppgtt->base.start = 0;
602         ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
603
604         ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
605
606         DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
607                          ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
608         DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%ld wasted)\n",
609                          ppgtt->num_pd_entries,
610                          (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30));
611         return 0;
612
613 bail:
614         gen8_ppgtt_unmap_pages(ppgtt);
615         gen8_ppgtt_free(ppgtt);
616         return ret;
617 }
618
619 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
620 {
621         struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
622         struct i915_address_space *vm = &ppgtt->base;
623         gen6_gtt_pte_t __iomem *pd_addr;
624         gen6_gtt_pte_t scratch_pte;
625         uint32_t pd_entry;
626         int pte, pde;
627
628         scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
629
630         pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
631                 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
632
633         seq_printf(m, "  VM %p (pd_offset %x-%x):\n", vm,
634                    ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries);
635         for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
636                 u32 expected;
637                 gen6_gtt_pte_t *pt_vaddr;
638                 dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde];
639                 pd_entry = readl(pd_addr + pde);
640                 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
641
642                 if (pd_entry != expected)
643                         seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
644                                    pde,
645                                    pd_entry,
646                                    expected);
647                 seq_printf(m, "\tPDE: %x\n", pd_entry);
648
649                 pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
650                 for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
651                         unsigned long va =
652                                 (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
653                                 (pte * PAGE_SIZE);
654                         int i;
655                         bool found = false;
656                         for (i = 0; i < 4; i++)
657                                 if (pt_vaddr[pte + i] != scratch_pte)
658                                         found = true;
659                         if (!found)
660                                 continue;
661
662                         seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
663                         for (i = 0; i < 4; i++) {
664                                 if (pt_vaddr[pte + i] != scratch_pte)
665                                         seq_printf(m, " %08x", pt_vaddr[pte + i]);
666                                 else
667                                         seq_printf(m, "  SCRATCH ");
668                         }
669                         seq_printf(m, "\n");
670                 }
671                 kunmap_atomic(pt_vaddr);
672         }
673 }
674
675 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
676 {
677         struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
678         gen6_gtt_pte_t __iomem *pd_addr;
679         uint32_t pd_entry;
680         int i;
681
682         WARN_ON(ppgtt->pd_offset & 0x3f);
683         pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
684                 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
685         for (i = 0; i < ppgtt->num_pd_entries; i++) {
686                 dma_addr_t pt_addr;
687
688                 pt_addr = ppgtt->pt_dma_addr[i];
689                 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
690                 pd_entry |= GEN6_PDE_VALID;
691
692                 writel(pd_entry, pd_addr + i);
693         }
694         readl(pd_addr);
695 }
696
697 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
698 {
699         BUG_ON(ppgtt->pd_offset & 0x3f);
700
701         return (ppgtt->pd_offset / 64) << 16;
702 }
703
704 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
705                          struct intel_engine_cs *ring)
706 {
707         int ret;
708
709         /* NB: TLBs must be flushed and invalidated before a switch */
710         ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
711         if (ret)
712                 return ret;
713
714         ret = intel_ring_begin(ring, 6);
715         if (ret)
716                 return ret;
717
718         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
719         intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
720         intel_ring_emit(ring, PP_DIR_DCLV_2G);
721         intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
722         intel_ring_emit(ring, get_pd_offset(ppgtt));
723         intel_ring_emit(ring, MI_NOOP);
724         intel_ring_advance(ring);
725
726         return 0;
727 }
728
729 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
730                           struct intel_engine_cs *ring)
731 {
732         int ret;
733
734         /* NB: TLBs must be flushed and invalidated before a switch */
735         ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
736         if (ret)
737                 return ret;
738
739         ret = intel_ring_begin(ring, 6);
740         if (ret)
741                 return ret;
742
743         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
744         intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
745         intel_ring_emit(ring, PP_DIR_DCLV_2G);
746         intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
747         intel_ring_emit(ring, get_pd_offset(ppgtt));
748         intel_ring_emit(ring, MI_NOOP);
749         intel_ring_advance(ring);
750
751         /* XXX: RCS is the only one to auto invalidate the TLBs? */
752         if (ring->id != RCS) {
753                 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
754                 if (ret)
755                         return ret;
756         }
757
758         return 0;
759 }
760
761 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
762                           struct intel_engine_cs *ring)
763 {
764         struct drm_device *dev = ppgtt->base.dev;
765         struct drm_i915_private *dev_priv = dev->dev_private;
766
767
768         I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
769         I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
770
771         POSTING_READ(RING_PP_DIR_DCLV(ring));
772
773         return 0;
774 }
775
776 static void gen8_ppgtt_enable(struct drm_device *dev)
777 {
778         struct drm_i915_private *dev_priv = dev->dev_private;
779         struct intel_engine_cs *ring;
780         int j;
781
782         for_each_ring(ring, dev_priv, j) {
783                 I915_WRITE(RING_MODE_GEN7(ring),
784                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
785         }
786 }
787
788 static void gen7_ppgtt_enable(struct drm_device *dev)
789 {
790         struct drm_i915_private *dev_priv = dev->dev_private;
791         struct intel_engine_cs *ring;
792         uint32_t ecochk, ecobits;
793         int i;
794
795         ecobits = I915_READ(GAC_ECO_BITS);
796         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
797
798         ecochk = I915_READ(GAM_ECOCHK);
799         if (IS_HASWELL(dev)) {
800                 ecochk |= ECOCHK_PPGTT_WB_HSW;
801         } else {
802                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
803                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
804         }
805         I915_WRITE(GAM_ECOCHK, ecochk);
806
807         for_each_ring(ring, dev_priv, i) {
808                 /* GFX_MODE is per-ring on gen7+ */
809                 I915_WRITE(RING_MODE_GEN7(ring),
810                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
811         }
812 }
813
814 static void gen6_ppgtt_enable(struct drm_device *dev)
815 {
816         struct drm_i915_private *dev_priv = dev->dev_private;
817         uint32_t ecochk, gab_ctl, ecobits;
818
819         ecobits = I915_READ(GAC_ECO_BITS);
820         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
821                    ECOBITS_PPGTT_CACHE64B);
822
823         gab_ctl = I915_READ(GAB_CTL);
824         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
825
826         ecochk = I915_READ(GAM_ECOCHK);
827         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
828
829         I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
830 }
831
832 /* PPGTT support for Sandybdrige/Gen6 and later */
833 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
834                                    uint64_t start,
835                                    uint64_t length,
836                                    bool use_scratch)
837 {
838         struct i915_hw_ppgtt *ppgtt =
839                 container_of(vm, struct i915_hw_ppgtt, base);
840         gen6_gtt_pte_t *pt_vaddr, scratch_pte;
841         unsigned first_entry = start >> PAGE_SHIFT;
842         unsigned num_entries = length >> PAGE_SHIFT;
843         unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
844         unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
845         unsigned last_pte, i;
846
847         scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
848
849         while (num_entries) {
850                 last_pte = first_pte + num_entries;
851                 if (last_pte > I915_PPGTT_PT_ENTRIES)
852                         last_pte = I915_PPGTT_PT_ENTRIES;
853
854                 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
855
856                 for (i = first_pte; i < last_pte; i++)
857                         pt_vaddr[i] = scratch_pte;
858
859                 kunmap_atomic(pt_vaddr);
860
861                 num_entries -= last_pte - first_pte;
862                 first_pte = 0;
863                 act_pt++;
864         }
865 }
866
867 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
868                                       vm_page_t *pages,
869                                       uint64_t start,
870                                       unsigned num_entries,
871                                       enum i915_cache_level cache_level, u32 flags)
872 {
873         struct i915_hw_ppgtt *ppgtt =
874                 container_of(vm, struct i915_hw_ppgtt, base);
875         gen6_gtt_pte_t *pt_vaddr;
876         unsigned first_entry = start >> PAGE_SHIFT;
877         unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
878         unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
879
880         pt_vaddr = NULL;
881         for (int i=0;i<num_entries;i++) {
882                 if (pt_vaddr == NULL)
883                         pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
884
885                 pt_vaddr[act_pte] =
886                         vm->pte_encode(VM_PAGE_TO_PHYS(pages[i]),
887                                        cache_level, true, flags);
888                 if (++act_pte == I915_PPGTT_PT_ENTRIES) {
889                         kunmap_atomic(pt_vaddr);
890                         pt_vaddr = NULL;
891                         act_pt++;
892                         act_pte = 0;
893                 }
894         }
895         if (pt_vaddr)
896                 kunmap_atomic(pt_vaddr);
897 }
898
899 static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
900 {
901         int i;
902
903         if (ppgtt->pt_dma_addr) {
904                 for (i = 0; i < ppgtt->num_pd_entries; i++)
905                         pci_unmap_page(ppgtt->base.dev->pdev,
906                                        ppgtt->pt_dma_addr[i],
907                                        4096, PCI_DMA_BIDIRECTIONAL);
908         }
909 }
910
911 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
912 {
913         int i;
914
915         kfree(ppgtt->pt_dma_addr);
916         for (i = 0; i < ppgtt->num_pd_entries; i++)
917                 __free_page(ppgtt->pt_pages[i]);
918         kfree(ppgtt->pt_pages);
919 }
920
921 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
922 {
923         struct i915_hw_ppgtt *ppgtt =
924                 container_of(vm, struct i915_hw_ppgtt, base);
925
926         drm_mm_remove_node(&ppgtt->node);
927
928         gen6_ppgtt_unmap_pages(ppgtt);
929         gen6_ppgtt_free(ppgtt);
930 }
931
932 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
933 {
934         struct drm_device *dev = ppgtt->base.dev;
935         struct drm_i915_private *dev_priv = dev->dev_private;
936         bool retried = false;
937         int ret;
938
939         /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
940          * allocator works in address space sizes, so it's multiplied by page
941          * size. We allocate at the top of the GTT to avoid fragmentation.
942          */
943         BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
944 alloc:
945         ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
946                                                   &ppgtt->node, GEN6_PD_SIZE,
947                                                   GEN6_PD_ALIGN, 0,
948                                                   0, dev_priv->gtt.base.total,
949                                                   DRM_MM_TOPDOWN);
950         if (ret == -ENOSPC && !retried) {
951                 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
952                                                GEN6_PD_SIZE, GEN6_PD_ALIGN,
953                                                I915_CACHE_NONE,
954                                                0, dev_priv->gtt.base.total,
955                                                0);
956                 if (ret)
957                         return ret;
958
959                 retried = true;
960                 goto alloc;
961         }
962
963         if (ppgtt->node.start < dev_priv->gtt.mappable_end)
964                 DRM_DEBUG("Forced to use aperture for PDEs\n");
965
966         ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
967         return ret;
968 }
969
970 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
971 {
972         int i;
973
974         ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct vm_page *),
975                                   GFP_KERNEL);
976
977         if (!ppgtt->pt_pages)
978                 return -ENOMEM;
979
980         for (i = 0; i < ppgtt->num_pd_entries; i++) {
981                 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
982                 if (!ppgtt->pt_pages[i]) {
983                         gen6_ppgtt_free(ppgtt);
984                         return -ENOMEM;
985                 }
986         }
987
988         return 0;
989 }
990
991 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
992 {
993         int ret;
994
995         ret = gen6_ppgtt_allocate_page_directories(ppgtt);
996         if (ret)
997                 return ret;
998
999         ret = gen6_ppgtt_allocate_page_tables(ppgtt);
1000         if (ret) {
1001                 drm_mm_remove_node(&ppgtt->node);
1002                 return ret;
1003         }
1004
1005         ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t),
1006                                      GFP_KERNEL);
1007         if (!ppgtt->pt_dma_addr) {
1008                 drm_mm_remove_node(&ppgtt->node);
1009                 gen6_ppgtt_free(ppgtt);
1010                 return -ENOMEM;
1011         }
1012
1013         return 0;
1014 }
1015
1016 static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
1017 {
1018         struct drm_device *dev = ppgtt->base.dev;
1019         int i;
1020
1021         for (i = 0; i < ppgtt->num_pd_entries; i++) {
1022                 dma_addr_t pt_addr;
1023
1024                 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096,
1025                                        PCI_DMA_BIDIRECTIONAL);
1026
1027                 if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
1028                         gen6_ppgtt_unmap_pages(ppgtt);
1029                         return -EIO;
1030                 }
1031
1032                 ppgtt->pt_dma_addr[i] = pt_addr;
1033         }
1034
1035         return 0;
1036 }
1037
1038 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1039 {
1040         struct drm_device *dev = ppgtt->base.dev;
1041         struct drm_i915_private *dev_priv = dev->dev_private;
1042         int ret;
1043
1044         ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
1045         if (IS_GEN6(dev)) {
1046                 ppgtt->switch_mm = gen6_mm_switch;
1047         } else if (IS_HASWELL(dev)) {
1048                 ppgtt->switch_mm = hsw_mm_switch;
1049         } else if (IS_GEN7(dev)) {
1050                 ppgtt->switch_mm = gen7_mm_switch;
1051         } else
1052                 BUG();
1053
1054         ret = gen6_ppgtt_alloc(ppgtt);
1055         if (ret)
1056                 return ret;
1057
1058         ret = gen6_ppgtt_setup_page_tables(ppgtt);
1059         if (ret) {
1060                 gen6_ppgtt_free(ppgtt);
1061                 return ret;
1062         }
1063
1064         ppgtt->base.clear_range = gen6_ppgtt_clear_range;
1065         ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
1066         ppgtt->base.cleanup = gen6_ppgtt_cleanup;
1067         ppgtt->base.start = 0;
1068         ppgtt->base.total =  ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
1069         ppgtt->debug_dump = gen6_dump_ppgtt;
1070
1071         ppgtt->pd_offset =
1072                 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
1073
1074         ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
1075
1076         DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
1077                          ppgtt->node.size >> 20,
1078                          ppgtt->node.start / PAGE_SIZE);
1079
1080         gen6_write_pdes(ppgtt);
1081         DRM_DEBUG("Adding PPGTT at offset %x\n",
1082                   ppgtt->pd_offset << 10);
1083
1084         return 0;
1085 }
1086
1087 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
1088 {
1089         struct drm_i915_private *dev_priv = dev->dev_private;
1090
1091         ppgtt->base.dev = dev;
1092         ppgtt->base.scratch = dev_priv->gtt.base.scratch;
1093
1094         if (INTEL_INFO(dev)->gen < 8)
1095                 return gen6_ppgtt_init(ppgtt);
1096         else if (IS_GEN8(dev))
1097                 return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
1098         else
1099                 BUG();
1100 }
1101 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
1102 {
1103         struct drm_i915_private *dev_priv = dev->dev_private;
1104         int ret = 0;
1105
1106         ret = __hw_ppgtt_init(dev, ppgtt);
1107         if (ret == 0) {
1108                 kref_init(&ppgtt->ref);
1109                 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
1110                             ppgtt->base.total);
1111                 i915_init_vm(dev_priv, &ppgtt->base);
1112         }
1113
1114         return ret;
1115 }
1116
1117 int i915_ppgtt_init_hw(struct drm_device *dev)
1118 {
1119         struct drm_i915_private *dev_priv = dev->dev_private;
1120         struct intel_engine_cs *ring;
1121         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
1122         int i, ret = 0;
1123
1124         /* In the case of execlists, PPGTT is enabled by the context descriptor
1125          * and the PDPs are contained within the context itself.  We don't
1126          * need to do anything here. */
1127         if (i915.enable_execlists)
1128                 return 0;
1129
1130         if (!USES_PPGTT(dev))
1131                 return 0;
1132
1133         if (IS_GEN6(dev))
1134                 gen6_ppgtt_enable(dev);
1135         else if (IS_GEN7(dev))
1136                 gen7_ppgtt_enable(dev);
1137         else if (INTEL_INFO(dev)->gen >= 8)
1138                 gen8_ppgtt_enable(dev);
1139         else
1140                 WARN_ON(1);
1141
1142         if (ppgtt) {
1143                 for_each_ring(ring, dev_priv, i) {
1144                         ret = ppgtt->switch_mm(ppgtt, ring);
1145                         if (ret != 0)
1146                                 return ret;
1147                 }
1148         }
1149
1150         return ret;
1151 }
1152 struct i915_hw_ppgtt *
1153 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
1154 {
1155         struct i915_hw_ppgtt *ppgtt;
1156         int ret;
1157
1158         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
1159         if (!ppgtt)
1160                 return ERR_PTR(-ENOMEM);
1161
1162         ret = i915_ppgtt_init(dev, ppgtt);
1163         if (ret) {
1164                 kfree(ppgtt);
1165                 return ERR_PTR(ret);
1166         }
1167
1168         ppgtt->file_priv = fpriv;
1169
1170         return ppgtt;
1171 }
1172
1173 void  i915_ppgtt_release(struct kref *kref)
1174 {
1175         struct i915_hw_ppgtt *ppgtt =
1176                 container_of(kref, struct i915_hw_ppgtt, ref);
1177
1178         /* vmas should already be unbound */
1179         WARN_ON(!list_empty(&ppgtt->base.active_list));
1180         WARN_ON(!list_empty(&ppgtt->base.inactive_list));
1181
1182         list_del(&ppgtt->base.global_link);
1183         drm_mm_takedown(&ppgtt->base.mm);
1184
1185         ppgtt->base.cleanup(&ppgtt->base);
1186         kfree(ppgtt);
1187 }
1188
1189 static void
1190 ppgtt_bind_vma(struct i915_vma *vma,
1191                enum i915_cache_level cache_level,
1192                u32 flags)
1193 {
1194         const unsigned int num_entries = vma->obj->base.size >> PAGE_SHIFT;
1195
1196         /* Currently applicable only to VLV */
1197         if (vma->obj->gt_ro)
1198                 flags |= PTE_READ_ONLY;
1199
1200         vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
1201                                 num_entries,
1202                                 cache_level, flags);
1203 }
1204
1205 static void ppgtt_unbind_vma(struct i915_vma *vma)
1206 {
1207         vma->vm->clear_range(vma->vm,
1208                              vma->node.start,
1209                              vma->obj->base.size,
1210                              true);
1211 }
1212
1213 extern int intel_iommu_gfx_mapped;
1214 /* Certain Gen5 chipsets require require idling the GPU before
1215  * unmapping anything from the GTT when VT-d is enabled.
1216  */
1217 static inline bool needs_idle_maps(struct drm_device *dev)
1218 {
1219 #ifdef CONFIG_INTEL_IOMMU
1220         /* Query intel_iommu to see if we need the workaround. Presumably that
1221          * was loaded first.
1222          */
1223         if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
1224                 return true;
1225 #endif
1226         return false;
1227 }
1228
1229 static bool do_idling(struct drm_i915_private *dev_priv)
1230 {
1231         bool ret = dev_priv->mm.interruptible;
1232
1233         if (unlikely(dev_priv->gtt.do_idle_maps)) {
1234                 dev_priv->mm.interruptible = false;
1235                 if (i915_gpu_idle(dev_priv->dev)) {
1236                         DRM_ERROR("Couldn't idle GPU\n");
1237                         /* Wait a bit, in hopes it avoids the hang */
1238                         udelay(10);
1239                 }
1240         }
1241
1242         return ret;
1243 }
1244
1245 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
1246 {
1247         if (unlikely(dev_priv->gtt.do_idle_maps))
1248                 dev_priv->mm.interruptible = interruptible;
1249 }
1250
1251 void i915_check_and_clear_faults(struct drm_device *dev)
1252 {
1253         struct drm_i915_private *dev_priv = dev->dev_private;
1254         struct intel_engine_cs *ring;
1255         int i;
1256
1257         if (INTEL_INFO(dev)->gen < 6)
1258                 return;
1259
1260         for_each_ring(ring, dev_priv, i) {
1261                 u32 fault_reg;
1262                 fault_reg = I915_READ(RING_FAULT_REG(ring));
1263                 if (fault_reg & RING_FAULT_VALID) {
1264 #if 0
1265                         DRM_DEBUG_DRIVER("Unexpected fault\n"
1266                                          "\tAddr: 0x%08lx\\n"
1267                                          "\tAddress space: %s\n"
1268                                          "\tSource ID: %d\n"
1269                                          "\tType: %d\n",
1270                                          fault_reg & PAGE_MASK,
1271                                          fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
1272                                          RING_FAULT_SRCID(fault_reg),
1273                                          RING_FAULT_FAULT_TYPE(fault_reg));
1274 #endif
1275                         I915_WRITE(RING_FAULT_REG(ring),
1276                                    fault_reg & ~RING_FAULT_VALID);
1277                 }
1278         }
1279         POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
1280 }
1281
1282 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
1283 {
1284         if (INTEL_INFO(dev_priv->dev)->gen < 6) {
1285                 intel_gtt_chipset_flush();
1286         } else {
1287                 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1288                 POSTING_READ(GFX_FLSH_CNTL_GEN6);
1289         }
1290 }
1291
1292 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
1293 {
1294         struct drm_i915_private *dev_priv = dev->dev_private;
1295
1296         /* Don't bother messing with faults pre GEN6 as we have little
1297          * documentation supporting that it's a good idea.
1298          */
1299         if (INTEL_INFO(dev)->gen < 6)
1300                 return;
1301
1302         i915_check_and_clear_faults(dev);
1303
1304         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1305                                        dev_priv->gtt.base.start,
1306                                        dev_priv->gtt.base.total,
1307                                        true);
1308
1309         i915_ggtt_flush(dev_priv);
1310 }
1311
1312 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
1313 {
1314         struct drm_i915_private *dev_priv = dev->dev_private;
1315         struct drm_i915_gem_object *obj;
1316         struct i915_address_space *vm;
1317
1318         i915_check_and_clear_faults(dev);
1319
1320         /* First fill our portion of the GTT with scratch pages */
1321         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1322                                        dev_priv->gtt.base.start,
1323                                        dev_priv->gtt.base.total,
1324                                        true);
1325
1326         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
1327                 struct i915_vma *vma = i915_gem_obj_to_vma(obj,
1328                                                            &dev_priv->gtt.base);
1329                 if (!vma)
1330                         continue;
1331
1332                 i915_gem_clflush_object(obj, obj->pin_display);
1333                 /* The bind_vma code tries to be smart about tracking mappings.
1334                  * Unfortunately above, we've just wiped out the mappings
1335                  * without telling our object about it. So we need to fake it.
1336                  */
1337                 obj->has_global_gtt_mapping = 0;
1338                 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
1339         }
1340
1341
1342         if (INTEL_INFO(dev)->gen >= 8) {
1343                 if (IS_CHERRYVIEW(dev))
1344                         chv_setup_private_ppat(dev_priv);
1345                 else
1346                         bdw_setup_private_ppat(dev_priv);
1347
1348                 return;
1349         }
1350
1351         list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
1352                 /* TODO: Perhaps it shouldn't be gen6 specific */
1353                 if (i915_is_ggtt(vm)) {
1354                         if (dev_priv->mm.aliasing_ppgtt)
1355                                 gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
1356                         continue;
1357                 }
1358
1359                 gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
1360         }
1361
1362         i915_ggtt_flush(dev_priv);
1363 }
1364
1365 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
1366 {
1367         if (obj->has_dma_mapping)
1368                 return 0;
1369
1370 #if 0
1371         if (!dma_map_sg(&obj->base.dev->pdev->dev,
1372                         obj->pages->sgl, obj->pages->nents,
1373                         PCI_DMA_BIDIRECTIONAL))
1374                 return -ENOSPC;
1375 #endif
1376
1377         return 0;
1378 }
1379
1380 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
1381 {
1382 #if 0
1383         writeq(pte, addr);
1384 #else
1385         iowrite32((u32)pte, addr);
1386         iowrite32(pte >> 32, addr + 4);
1387 #endif
1388 }
1389
1390 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
1391                                      vm_page_t *pages,
1392                                      uint64_t start,
1393                                      unsigned int num_entries,
1394                                      enum i915_cache_level level, u32 unused)
1395 {
1396         struct drm_i915_private *dev_priv = vm->dev->dev_private;
1397         unsigned first_entry = start >> PAGE_SHIFT;
1398         gen8_gtt_pte_t __iomem *gtt_entries =
1399                 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
1400         int i = 0;
1401         dma_addr_t addr = 0;
1402
1403         for (i=0;i<num_entries;i++) {
1404                 addr = VM_PAGE_TO_PHYS(pages[i]);
1405                 gen8_set_pte(&gtt_entries[i],
1406                              gen8_pte_encode(addr, level, true));
1407         }
1408
1409         /*
1410          * XXX: This serves as a posting read to make sure that the PTE has
1411          * actually been updated. There is some concern that even though
1412          * registers and PTEs are within the same BAR that they are potentially
1413          * of NUMA access patterns. Therefore, even with the way we assume
1414          * hardware should work, we must keep this posting read for paranoia.
1415          */
1416         if (i != 0)
1417                 WARN_ON(readq(&gtt_entries[i-1])
1418                         != gen8_pte_encode(addr, level, true));
1419
1420         /* This next bit makes the above posting read even more important. We
1421          * want to flush the TLBs only after we're certain all the PTE updates
1422          * have finished.
1423          */
1424         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1425         POSTING_READ(GFX_FLSH_CNTL_GEN6);
1426 }
1427
1428 /*
1429  * Binds an object into the global gtt with the specified cache level. The object
1430  * will be accessible to the GPU via commands whose operands reference offsets
1431  * within the global GTT as well as accessible by the GPU through the GMADR
1432  * mapped BAR (dev_priv->mm.gtt->gtt).
1433  */
1434 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
1435                                      vm_page_t *pages,
1436                                      uint64_t start,
1437                                      unsigned int num_entries,
1438                                      enum i915_cache_level level, u32 flags)
1439 {
1440         struct drm_i915_private *dev_priv = vm->dev->dev_private;
1441         unsigned first_entry = start >> PAGE_SHIFT;
1442         gen6_gtt_pte_t __iomem *gtt_entries =
1443                 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
1444         int i = 0;
1445         dma_addr_t addr = 0; /* shut up gcc */
1446
1447         for (i = 0; i < num_entries; i++) {
1448                 addr = VM_PAGE_TO_PHYS(pages[i]);
1449                 iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
1450         }
1451
1452         /* XXX: This serves as a posting read to make sure that the PTE has
1453          * actually been updated. There is some concern that even though
1454          * registers and PTEs are within the same BAR that they are potentially
1455          * of NUMA access patterns. Therefore, even with the way we assume
1456          * hardware should work, we must keep this posting read for paranoia.
1457          */
1458         if (i != 0) {
1459                 unsigned long gtt = readl(&gtt_entries[i-1]);
1460                 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
1461         }
1462
1463         /* This next bit makes the above posting read even more important. We
1464          * want to flush the TLBs only after we're certain all the PTE updates
1465          * have finished.
1466          */
1467         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1468         POSTING_READ(GFX_FLSH_CNTL_GEN6);
1469 }
1470
1471 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
1472                                   uint64_t start,
1473                                   uint64_t length,
1474                                   bool use_scratch)
1475 {
1476         struct drm_i915_private *dev_priv = vm->dev->dev_private;
1477         unsigned first_entry = start >> PAGE_SHIFT;
1478         unsigned num_entries = length >> PAGE_SHIFT;
1479         gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
1480                 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
1481         const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
1482         int i;
1483
1484         if (WARN(num_entries > max_entries,
1485                  "First entry = %d; Num entries = %d (max=%d)\n",
1486                  first_entry, num_entries, max_entries))
1487                 num_entries = max_entries;
1488
1489         scratch_pte = gen8_pte_encode(vm->scratch.addr,
1490                                       I915_CACHE_LLC,
1491                                       use_scratch);
1492         for (i = 0; i < num_entries; i++)
1493                 gen8_set_pte(&gtt_base[i], scratch_pte);
1494         readl(gtt_base);
1495 }
1496
1497 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
1498                                   uint64_t start,
1499                                   uint64_t length,
1500                                   bool use_scratch)
1501 {
1502         struct drm_i915_private *dev_priv = vm->dev->dev_private;
1503         unsigned first_entry = start >> PAGE_SHIFT;
1504         unsigned num_entries = length >> PAGE_SHIFT;
1505         gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
1506                 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
1507         const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
1508         int i;
1509
1510         if (WARN(num_entries > max_entries,
1511                  "First entry = %d; Num entries = %d (max=%d)\n",
1512                  first_entry, num_entries, max_entries))
1513                 num_entries = max_entries;
1514
1515         scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0);
1516
1517         for (i = 0; i < num_entries; i++)
1518                 iowrite32(scratch_pte, &gtt_base[i]);
1519         readl(gtt_base);
1520 }
1521
1522 static void i915_ggtt_bind_vma(struct i915_vma *vma,
1523                                enum i915_cache_level cache_level,
1524                                u32 unused)
1525 {
1526         const unsigned long entry = vma->node.start >> PAGE_SHIFT;
1527         const unsigned int num_entries = vma->obj->base.size >> PAGE_SHIFT;
1528         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
1529                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
1530
1531         BUG_ON(!i915_is_ggtt(vma->vm));
1532         intel_gtt_insert_pages(entry, num_entries, vma->obj->pages, flags);
1533         vma->obj->has_global_gtt_mapping = 1;
1534 }
1535
1536 static void i915_ggtt_clear_range(struct i915_address_space *vm,
1537                                   uint64_t start,
1538                                   uint64_t length,
1539                                   bool unused)
1540 {
1541         unsigned first_entry = start >> PAGE_SHIFT;
1542         unsigned num_entries = length >> PAGE_SHIFT;
1543         intel_gtt_clear_range(first_entry, num_entries);
1544 }
1545
1546 static void i915_ggtt_unbind_vma(struct i915_vma *vma)
1547 {
1548         const unsigned int first = vma->node.start >> PAGE_SHIFT;
1549         const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
1550
1551         BUG_ON(!i915_is_ggtt(vma->vm));
1552         vma->obj->has_global_gtt_mapping = 0;
1553         intel_gtt_clear_range(first, size);
1554 }
1555
1556 static void ggtt_bind_vma(struct i915_vma *vma,
1557                           enum i915_cache_level cache_level,
1558                           u32 flags)
1559 {
1560         struct drm_device *dev = vma->vm->dev;
1561         struct drm_i915_private *dev_priv = dev->dev_private;
1562         struct drm_i915_gem_object *obj = vma->obj;
1563
1564         /* Currently applicable only to VLV */
1565         if (obj->gt_ro)
1566                 flags |= PTE_READ_ONLY;
1567
1568         /* If there is no aliasing PPGTT, or the caller needs a global mapping,
1569          * or we have a global mapping already but the cacheability flags have
1570          * changed, set the global PTEs.
1571          *
1572          * If there is an aliasing PPGTT it is anecdotally faster, so use that
1573          * instead if none of the above hold true.
1574          *
1575          * NB: A global mapping should only be needed for special regions like
1576          * "gtt mappable", SNB errata, or if specified via special execbuf
1577          * flags. At all other times, the GPU will use the aliasing PPGTT.
1578          */
1579         if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
1580                 if (!obj->has_global_gtt_mapping ||
1581                     (cache_level != obj->cache_level)) {
1582                         vma->vm->insert_entries(vma->vm, obj->pages,
1583                                                 vma->node.start,
1584                                                 obj->base.size >> PAGE_SHIFT,
1585                                                 cache_level, flags);
1586                         obj->has_global_gtt_mapping = 1;
1587                 }
1588         }
1589
1590         if (dev_priv->mm.aliasing_ppgtt &&
1591             (!obj->has_aliasing_ppgtt_mapping ||
1592              (cache_level != obj->cache_level))) {
1593                 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
1594                 appgtt->base.insert_entries(&appgtt->base,
1595                                             vma->obj->pages,
1596                                             vma->node.start,
1597                                             obj->base.size >> PAGE_SHIFT,
1598                                             cache_level, flags);
1599                 vma->obj->has_aliasing_ppgtt_mapping = 1;
1600         }
1601 }
1602
1603 static void ggtt_unbind_vma(struct i915_vma *vma)
1604 {
1605         struct drm_device *dev = vma->vm->dev;
1606         struct drm_i915_private *dev_priv = dev->dev_private;
1607         struct drm_i915_gem_object *obj = vma->obj;
1608
1609         if (obj->has_global_gtt_mapping) {
1610                 vma->vm->clear_range(vma->vm,
1611                                      vma->node.start,
1612                                      obj->base.size,
1613                                      true);
1614                 obj->has_global_gtt_mapping = 0;
1615         }
1616
1617         if (obj->has_aliasing_ppgtt_mapping) {
1618                 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
1619                 appgtt->base.clear_range(&appgtt->base,
1620                                          vma->node.start,
1621                                          obj->base.size,
1622                                          true);
1623                 obj->has_aliasing_ppgtt_mapping = 0;
1624         }
1625 }
1626
1627 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
1628 {
1629         struct drm_device *dev = obj->base.dev;
1630         struct drm_i915_private *dev_priv = dev->dev_private;
1631         bool interruptible;
1632
1633         interruptible = do_idling(dev_priv);
1634
1635 #if 0
1636         if (!obj->has_dma_mapping)
1637                 dma_unmap_sg(&dev->pdev->dev,
1638                              obj->pages->sgl, obj->pages->nents,
1639                              PCI_DMA_BIDIRECTIONAL);
1640 #endif
1641
1642         undo_idling(dev_priv, interruptible);
1643 }
1644
1645 static void i915_gtt_color_adjust(struct drm_mm_node *node,
1646                                   unsigned long color,
1647                                   unsigned long *start,
1648                                   unsigned long *end)
1649 {
1650         if (node->color != color)
1651                 *start += 4096;
1652
1653         if (!list_empty(&node->node_list)) {
1654                 node = list_entry(node->node_list.next,
1655                                   struct drm_mm_node,
1656                                   node_list);
1657                 if (node->allocated && node->color != color)
1658                         *end -= 4096;
1659         }
1660 }
1661
1662 int i915_gem_setup_global_gtt(struct drm_device *dev,
1663                               unsigned long start,
1664                               unsigned long mappable_end,
1665                               unsigned long end)
1666 {
1667         /* Let GEM Manage all of the aperture.
1668          *
1669          * However, leave one page at the end still bound to the scratch page.
1670          * There are a number of places where the hardware apparently prefetches
1671          * past the end of the object, and we've seen multiple hangs with the
1672          * GPU head pointer stuck in a batchbuffer bound at the last page of the
1673          * aperture.  One page should be enough to keep any prefetching inside
1674          * of the aperture.
1675          */
1676         struct drm_i915_private *dev_priv = dev->dev_private;
1677         struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
1678         unsigned long mappable;
1679         int error;
1680         struct drm_mm_node *entry;
1681         struct drm_i915_gem_object *obj;
1682         unsigned long hole_start, hole_end;
1683         int ret;
1684
1685         kprintf("MAPPABLE_END VS END %016jx %016jx\n", mappable_end, end);
1686         tsleep(&mappable_end, 0, "DELAY", hz); /* for kprintf */
1687         /*BUG_ON(mappable_end > end);*/
1688
1689         mappable = min(end, mappable_end) - start;
1690
1691         /* Subtract the guard page ... */
1692         drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
1693         if (!HAS_LLC(dev))
1694                 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
1695
1696         /* Mark any preallocated objects as occupied */
1697         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
1698                 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
1699
1700                 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
1701                               i915_gem_obj_ggtt_offset(obj), obj->base.size);
1702
1703                 WARN_ON(i915_gem_obj_ggtt_bound(obj));
1704                 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
1705                 if (ret) {
1706                         DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
1707                         return ret;
1708                 }
1709                 obj->has_global_gtt_mapping = 1;
1710         }
1711
1712         dev_priv->gtt.base.start = start;
1713         dev_priv->gtt.base.total = end - start;
1714
1715         /* Clear any non-preallocated blocks */
1716         drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
1717                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
1718                               hole_start, hole_end);
1719                 ggtt_vm->clear_range(ggtt_vm, hole_start,
1720                                      hole_end - hole_start, true);
1721         }
1722
1723 #ifdef __DragonFly__
1724         intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
1725         device_printf(dev->dev,
1726             "taking over the fictitious range 0x%lx-0x%lx\n",
1727             dev_priv->gtt.mappable_base + start, dev_priv->gtt.mappable_base + start + mappable);
1728         error = -vm_phys_fictitious_reg_range(dev_priv->gtt.mappable_base + start,
1729             dev_priv->gtt.mappable_base + start + mappable, VM_MEMATTR_WRITE_COMBINING);
1730 #endif
1731
1732         /* And finally clear the reserved guard page */
1733         ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
1734
1735         if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
1736                 struct i915_hw_ppgtt *ppgtt;
1737
1738                 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
1739                 if (!ppgtt)
1740                         return -ENOMEM;
1741
1742                 ret = __hw_ppgtt_init(dev, ppgtt);
1743                 if (ret != 0)
1744                         return ret;
1745
1746                 dev_priv->mm.aliasing_ppgtt = ppgtt;
1747         }
1748
1749         return 0;
1750 }
1751
1752 void i915_gem_init_global_gtt(struct drm_device *dev)
1753 {
1754         struct drm_i915_private *dev_priv = dev->dev_private;
1755         unsigned long gtt_size, mappable_size;
1756
1757         gtt_size = dev_priv->gtt.base.total;
1758         mappable_size = dev_priv->gtt.mappable_end;
1759
1760         i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
1761 }
1762
1763 void i915_global_gtt_cleanup(struct drm_device *dev)
1764 {
1765         struct drm_i915_private *dev_priv = dev->dev_private;
1766         struct i915_address_space *vm = &dev_priv->gtt.base;
1767
1768         if (dev_priv->mm.aliasing_ppgtt) {
1769                 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
1770
1771                 ppgtt->base.cleanup(&ppgtt->base);
1772         }
1773
1774         if (drm_mm_initialized(&vm->mm)) {
1775                 drm_mm_takedown(&vm->mm);
1776                 list_del(&vm->global_link);
1777         }
1778
1779         vm->cleanup(vm);
1780 }
1781
1782 static int setup_scratch_page(struct drm_device *dev)
1783 {
1784         struct drm_i915_private *dev_priv = dev->dev_private;
1785         struct vm_page *page;
1786         dma_addr_t dma_addr;
1787
1788         page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
1789         if (page == NULL)
1790                 return -ENOMEM;
1791         get_page(page);
1792         set_pages_uc(page, 1);
1793
1794 #ifdef CONFIG_INTEL_IOMMU
1795         dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
1796                                 PCI_DMA_BIDIRECTIONAL);
1797         if (pci_dma_mapping_error(dev->pdev, dma_addr))
1798                 return -EINVAL;
1799 #else
1800         dma_addr = page_to_phys(page);
1801 #endif
1802         dev_priv->gtt.base.scratch.page = page;
1803         dev_priv->gtt.base.scratch.addr = dma_addr;
1804
1805         return 0;
1806 }
1807
1808 #if 0
1809 static void teardown_scratch_page(struct drm_device *dev)
1810 {
1811         struct drm_i915_private *dev_priv = dev->dev_private;
1812         struct vm_page *page = dev_priv->gtt.base.scratch.page;
1813
1814         set_pages_wb(page, 1);
1815         pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
1816                        PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
1817         put_page(page);
1818         __free_page(page);
1819 }
1820 #endif
1821
1822 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
1823 {
1824         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
1825         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
1826         return snb_gmch_ctl << 20;
1827 }
1828
1829 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
1830 {
1831         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
1832         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
1833         if (bdw_gmch_ctl)
1834                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
1835
1836 #ifdef CONFIG_X86_32
1837         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
1838         if (bdw_gmch_ctl > 4)
1839                 bdw_gmch_ctl = 4;
1840 #endif
1841
1842         return bdw_gmch_ctl << 20;
1843 }
1844
1845 static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
1846 {
1847         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
1848         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
1849
1850         if (gmch_ctrl)
1851                 return 1 << (20 + gmch_ctrl);
1852
1853         return 0;
1854 }
1855
1856 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
1857 {
1858         snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
1859         snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
1860         return snb_gmch_ctl << 25; /* 32 MB units */
1861 }
1862
1863 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
1864 {
1865         bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
1866         bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
1867         return bdw_gmch_ctl << 25; /* 32 MB units */
1868 }
1869
1870 static size_t chv_get_stolen_size(u16 gmch_ctrl)
1871 {
1872         gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
1873         gmch_ctrl &= SNB_GMCH_GMS_MASK;
1874
1875         /*
1876          * 0x0  to 0x10: 32MB increments starting at 0MB
1877          * 0x11 to 0x16: 4MB increments starting at 8MB
1878          * 0x17 to 0x1d: 4MB increments start at 36MB
1879          */
1880         if (gmch_ctrl < 0x11)
1881                 return gmch_ctrl << 25;
1882         else if (gmch_ctrl < 0x17)
1883                 return (gmch_ctrl - 0x11 + 2) << 22;
1884         else
1885                 return (gmch_ctrl - 0x17 + 9) << 22;
1886 }
1887
1888 static int ggtt_probe_common(struct drm_device *dev,
1889                              size_t gtt_size)
1890 {
1891         struct drm_i915_private *dev_priv = dev->dev_private;
1892         phys_addr_t gtt_phys_addr;
1893         int ret;
1894
1895         /* For Modern GENs the PTEs and register space are split in the BAR */
1896         gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
1897                 (pci_resource_len(dev->pdev, 0) / 2);
1898
1899         kprintf("gtt_probe_common: gtt_phys_addr=0x%lx\n", gtt_phys_addr);
1900         dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
1901         if (!dev_priv->gtt.gsm) {
1902                 DRM_ERROR("Failed to map the gtt page table\n");
1903                 return -ENOMEM;
1904         }
1905
1906         ret = setup_scratch_page(dev);
1907         if (ret) {
1908                 DRM_ERROR("Scratch setup failed\n");
1909                 /* iounmap will also get called at remove, but meh */
1910 #if 0
1911                 iounmap(dev_priv->gtt.gsm);
1912 #endif
1913         }
1914
1915         return ret;
1916 }
1917
1918 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
1919  * bits. When using advanced contexts each context stores its own PAT, but
1920  * writing this data shouldn't be harmful even in those cases. */
1921 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
1922 {
1923         uint64_t pat;
1924
1925         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
1926               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
1927               GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
1928               GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
1929               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
1930               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
1931               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
1932               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
1933
1934         if (!USES_PPGTT(dev_priv->dev))
1935                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
1936                  * so RTL will always use the value corresponding to
1937                  * pat_sel = 000".
1938                  * So let's disable cache for GGTT to avoid screen corruptions.
1939                  * MOCS still can be used though.
1940                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
1941                  * before this patch, i.e. the same uncached + snooping access
1942                  * like on gen6/7 seems to be in effect.
1943                  * - So this just fixes blitter/render access. Again it looks
1944                  * like it's not just uncached access, but uncached + snooping.
1945                  * So we can still hold onto all our assumptions wrt cpu
1946                  * clflushing on LLC machines.
1947                  */
1948                 pat = GEN8_PPAT(0, GEN8_PPAT_UC);
1949
1950         /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
1951          * write would work. */
1952         I915_WRITE(GEN8_PRIVATE_PAT, pat);
1953         I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
1954 }
1955
1956 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
1957 {
1958         uint64_t pat;
1959
1960         /*
1961          * Map WB on BDW to snooped on CHV.
1962          *
1963          * Only the snoop bit has meaning for CHV, the rest is
1964          * ignored.
1965          *
1966          * Note that the harware enforces snooping for all page
1967          * table accesses. The snoop bit is actually ignored for
1968          * PDEs.
1969          */
1970         pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
1971               GEN8_PPAT(1, 0) |
1972               GEN8_PPAT(2, 0) |
1973               GEN8_PPAT(3, 0) |
1974               GEN8_PPAT(4, CHV_PPAT_SNOOP) |
1975               GEN8_PPAT(5, CHV_PPAT_SNOOP) |
1976               GEN8_PPAT(6, CHV_PPAT_SNOOP) |
1977               GEN8_PPAT(7, CHV_PPAT_SNOOP);
1978
1979         I915_WRITE(GEN8_PRIVATE_PAT, pat);
1980         I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
1981 }
1982
1983 static int gen8_gmch_probe(struct drm_device *dev,
1984                            size_t *gtt_total,
1985                            size_t *stolen,
1986                            phys_addr_t *mappable_base,
1987                            unsigned long *mappable_end)
1988 {
1989         struct drm_i915_private *dev_priv = dev->dev_private;
1990         unsigned int gtt_size;
1991         u16 snb_gmch_ctl;
1992         int ret;
1993
1994         /* TODO: We're not aware of mappable constraints on gen8 yet */
1995         *mappable_base = pci_resource_start(dev->pdev, 2);
1996         *mappable_end = pci_resource_len(dev->pdev, 2);
1997
1998 #if 0
1999         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
2000                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
2001 #endif
2002
2003         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2004
2005         if (IS_CHERRYVIEW(dev)) {
2006                 *stolen = chv_get_stolen_size(snb_gmch_ctl);
2007                 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
2008         } else {
2009                 *stolen = gen8_get_stolen_size(snb_gmch_ctl);
2010                 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
2011         }
2012
2013         *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
2014
2015         if (IS_CHERRYVIEW(dev))
2016                 chv_setup_private_ppat(dev_priv);
2017         else
2018                 bdw_setup_private_ppat(dev_priv);
2019
2020         ret = ggtt_probe_common(dev, gtt_size);
2021
2022         dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
2023         dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
2024
2025         return ret;
2026 }
2027
2028 static int gen6_gmch_probe(struct drm_device *dev,
2029                            size_t *gtt_total,
2030                            size_t *stolen,
2031                            phys_addr_t *mappable_base,
2032                            unsigned long *mappable_end)
2033 {
2034         struct drm_i915_private *dev_priv = dev->dev_private;
2035         unsigned int gtt_size;
2036         u16 snb_gmch_ctl;
2037         int ret;
2038
2039         *mappable_base = pci_resource_start(dev->pdev, 2);
2040         *mappable_end = pci_resource_len(dev->pdev, 2);
2041
2042         /* 64/512MB is the current min/max we actually know of, but this is just
2043          * a coarse sanity check.
2044          */
2045         if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
2046                 DRM_ERROR("Unknown GMADR size (%lx)\n",
2047                           dev_priv->gtt.mappable_end);
2048                 return -ENXIO;
2049         }
2050
2051 #if 0
2052         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
2053                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
2054 #endif
2055         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2056
2057         *stolen = gen6_get_stolen_size(snb_gmch_ctl);
2058
2059         gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
2060         *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
2061
2062         ret = ggtt_probe_common(dev, gtt_size);
2063
2064         dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
2065         dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
2066
2067         return ret;
2068 }
2069
2070 static void gen6_gmch_remove(struct i915_address_space *vm)
2071 {
2072 #if 0
2073         struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
2074
2075         iounmap(gtt->gsm);
2076         teardown_scratch_page(vm->dev);
2077 #endif
2078 }
2079
2080 static int i915_gmch_probe(struct drm_device *dev,
2081                            size_t *gtt_total,
2082                            size_t *stolen,
2083                            phys_addr_t *mappable_base,
2084                            unsigned long *mappable_end)
2085 {
2086         struct drm_i915_private *dev_priv = dev->dev_private;
2087 #if 0
2088         int ret;
2089
2090         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
2091         if (!ret) {
2092                 DRM_ERROR("failed to set up gmch\n");
2093                 return -EIO;
2094         }
2095 #endif
2096
2097         intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
2098
2099         dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
2100         dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
2101
2102         if (unlikely(dev_priv->gtt.do_idle_maps))
2103                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
2104
2105         return 0;
2106 }
2107
2108 static void i915_gmch_remove(struct i915_address_space *vm)
2109 {
2110         intel_gmch_remove();
2111 }
2112
2113 int i915_gem_gtt_init(struct drm_device *dev)
2114 {
2115         struct drm_i915_private *dev_priv = dev->dev_private;
2116         struct i915_gtt *gtt = &dev_priv->gtt;
2117         int ret;
2118
2119         if (INTEL_INFO(dev)->gen <= 5) {
2120                 gtt->gtt_probe = i915_gmch_probe;
2121                 gtt->base.cleanup = i915_gmch_remove;
2122         } else if (INTEL_INFO(dev)->gen < 8) {
2123                 gtt->gtt_probe = gen6_gmch_probe;
2124                 gtt->base.cleanup = gen6_gmch_remove;
2125                 if (IS_HASWELL(dev) && dev_priv->ellc_size)
2126                         gtt->base.pte_encode = iris_pte_encode;
2127                 else if (IS_HASWELL(dev))
2128                         gtt->base.pte_encode = hsw_pte_encode;
2129                 else if (IS_VALLEYVIEW(dev))
2130                         gtt->base.pte_encode = byt_pte_encode;
2131                 else if (INTEL_INFO(dev)->gen >= 7)
2132                         gtt->base.pte_encode = ivb_pte_encode;
2133                 else
2134                         gtt->base.pte_encode = snb_pte_encode;
2135         } else {
2136                 dev_priv->gtt.gtt_probe = gen8_gmch_probe;
2137                 dev_priv->gtt.base.cleanup = gen6_gmch_remove;
2138         }
2139
2140         ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
2141                              &gtt->mappable_base, &gtt->mappable_end);
2142         if (ret)
2143                 return ret;
2144
2145         gtt->base.dev = dev;
2146
2147         /* GMADR is the PCI mmio aperture into the global GTT. */
2148         DRM_INFO("Memory usable by graphics device = %zdM\n",
2149                  gtt->base.total >> 20);
2150         DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
2151         DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
2152 #ifdef CONFIG_INTEL_IOMMU
2153         if (intel_iommu_gfx_mapped)
2154                 DRM_INFO("VT-d active for gfx access\n");
2155 #endif
2156         /*
2157          * i915.enable_ppgtt is read-only, so do an early pass to validate the
2158          * user's requested state against the hardware/driver capabilities.  We
2159          * do this now so that we can print out any log messages once rather
2160          * than every time we check intel_enable_ppgtt().
2161          */
2162         i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
2163         DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
2164
2165         return 0;
2166 }
2167
2168 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
2169                                               struct i915_address_space *vm)
2170 {
2171         struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
2172         if (vma == NULL)
2173                 return ERR_PTR(-ENOMEM);
2174
2175         INIT_LIST_HEAD(&vma->vma_link);
2176         INIT_LIST_HEAD(&vma->mm_list);
2177         INIT_LIST_HEAD(&vma->exec_list);
2178         vma->vm = vm;
2179         vma->obj = obj;
2180
2181         switch (INTEL_INFO(vm->dev)->gen) {
2182         case 8:
2183         case 7:
2184         case 6:
2185                 if (i915_is_ggtt(vm)) {
2186                         vma->unbind_vma = ggtt_unbind_vma;
2187                         vma->bind_vma = ggtt_bind_vma;
2188                 } else {
2189                         vma->unbind_vma = ppgtt_unbind_vma;
2190                         vma->bind_vma = ppgtt_bind_vma;
2191                 }
2192                 break;
2193         case 5:
2194         case 4:
2195         case 3:
2196         case 2:
2197                 BUG_ON(!i915_is_ggtt(vm));
2198                 vma->unbind_vma = i915_ggtt_unbind_vma;
2199                 vma->bind_vma = i915_ggtt_bind_vma;
2200                 break;
2201         default:
2202                 BUG();
2203         }
2204
2205         /* Keep GGTT vmas first to make debug easier */
2206         if (i915_is_ggtt(vm))
2207                 list_add(&vma->vma_link, &obj->vma_list);
2208         else {
2209                 list_add_tail(&vma->vma_link, &obj->vma_list);
2210                 i915_ppgtt_get(i915_vm_to_ppgtt(vm));
2211         }
2212
2213         return vma;
2214 }
2215
2216 struct i915_vma *
2217 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
2218                                   struct i915_address_space *vm)
2219 {
2220         struct i915_vma *vma;
2221
2222         vma = i915_gem_obj_to_vma(obj, vm);
2223         if (!vma)
2224                 vma = __i915_gem_vma_create(obj, vm);
2225
2226         return vma;
2227 }