Merge branch 'vendor/OPENSSH'
[dragonfly.git] / sys / dev / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25
26 #include <linux/seq_file.h>
27 #include <linux/stop_machine.h>
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_vgpu.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34
35 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
36
37 /**
38  * DOC: Global GTT views
39  *
40  * Background and previous state
41  *
42  * Historically objects could exists (be bound) in global GTT space only as
43  * singular instances with a view representing all of the object's backing pages
44  * in a linear fashion. This view will be called a normal view.
45  *
46  * To support multiple views of the same object, where the number of mapped
47  * pages is not equal to the backing store, or where the layout of the pages
48  * is not linear, concept of a GGTT view was added.
49  *
50  * One example of an alternative view is a stereo display driven by a single
51  * image. In this case we would have a framebuffer looking like this
52  * (2x2 pages):
53  *
54  *    12
55  *    34
56  *
57  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
58  * rendering. In contrast, fed to the display engine would be an alternative
59  * view which could look something like this:
60  *
61  *   1212
62  *   3434
63  *
64  * In this example both the size and layout of pages in the alternative view is
65  * different from the normal view.
66  *
67  * Implementation and usage
68  *
69  * GGTT views are implemented using VMAs and are distinguished via enum
70  * i915_ggtt_view_type and struct i915_ggtt_view.
71  *
72  * A new flavour of core GEM functions which work with GGTT bound objects were
73  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
74  * renaming  in large amounts of code. They take the struct i915_ggtt_view
75  * parameter encapsulating all metadata required to implement a view.
76  *
77  * As a helper for callers which are only interested in the normal view,
78  * globally const i915_ggtt_view_normal singleton instance exists. All old core
79  * GEM API functions, the ones not taking the view parameter, are operating on,
80  * or with the normal GGTT view.
81  *
82  * Code wanting to add or use a new GGTT view needs to:
83  *
84  * 1. Add a new enum with a suitable name.
85  * 2. Extend the metadata in the i915_ggtt_view structure if required.
86  * 3. Add support to i915_get_vma_pages().
87  *
88  * New views are required to build a scatter-gather table from within the
89  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
90  * exists for the lifetime of an VMA.
91  *
92  * Core API is designed to have copy semantics which means that passed in
93  * struct i915_ggtt_view does not need to be persistent (left around after
94  * calling the core API functions).
95  *
96  */
97
98 static inline struct i915_ggtt *
99 i915_vm_to_ggtt(struct i915_address_space *vm)
100 {
101         GEM_BUG_ON(!i915_is_ggtt(vm));
102         return container_of(vm, struct i915_ggtt, base);
103 }
104
105 static int
106 i915_get_ggtt_vma_pages(struct i915_vma *vma);
107
108 const struct i915_ggtt_view i915_ggtt_view_normal = {
109         .type = I915_GGTT_VIEW_NORMAL,
110 };
111 const struct i915_ggtt_view i915_ggtt_view_rotated = {
112         .type = I915_GGTT_VIEW_ROTATED,
113 };
114
115 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
116                                 int enable_ppgtt)
117 {
118         bool has_aliasing_ppgtt;
119         bool has_full_ppgtt;
120         bool has_full_48bit_ppgtt;
121
122         has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6;
123         has_full_ppgtt = INTEL_GEN(dev_priv) >= 7;
124         has_full_48bit_ppgtt =
125                 IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9;
126
127         if (intel_vgpu_active(dev_priv)) {
128                 /* emulation is too hard */
129                 has_full_ppgtt = false;
130                 has_full_48bit_ppgtt = false;
131         }
132
133         if (!has_aliasing_ppgtt)
134                 return 0;
135
136         /*
137          * We don't allow disabling PPGTT for gen9+ as it's a requirement for
138          * execlists, the sole mechanism available to submit work.
139          */
140         if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
141                 return 0;
142
143         if (enable_ppgtt == 1)
144                 return 1;
145
146         if (enable_ppgtt == 2 && has_full_ppgtt)
147                 return 2;
148
149         if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
150                 return 3;
151
152 #ifdef CONFIG_INTEL_IOMMU
153         /* Disable ppgtt on SNB if VT-d is on. */
154         if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) {
155                 DRM_INFO("Disabling PPGTT because VT-d is on\n");
156                 return 0;
157         }
158 #endif
159
160         /* Early VLV doesn't have this */
161         if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
162                 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
163                 return 0;
164         }
165
166         if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt)
167                 return has_full_48bit_ppgtt ? 3 : 2;
168         else
169                 return has_aliasing_ppgtt ? 1 : 0;
170 }
171
172 static int ppgtt_bind_vma(struct i915_vma *vma,
173                           enum i915_cache_level cache_level,
174                           u32 unused)
175 {
176         u32 pte_flags = 0;
177
178         vma->pages = vma->obj->pages;
179
180         /* Currently applicable only to VLV */
181         if (vma->obj->gt_ro)
182                 pte_flags |= PTE_READ_ONLY;
183
184         vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
185                                 cache_level, pte_flags);
186
187         return 0;
188 }
189
190 static void ppgtt_unbind_vma(struct i915_vma *vma)
191 {
192         vma->vm->clear_range(vma->vm,
193                              vma->node.start,
194                              vma->size);
195 }
196
197 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
198                                   enum i915_cache_level level)
199 {
200         gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
201         pte |= addr;
202
203         switch (level) {
204         case I915_CACHE_NONE:
205                 pte |= PPAT_UNCACHED_INDEX;
206                 break;
207         case I915_CACHE_WT:
208                 pte |= PPAT_DISPLAY_ELLC_INDEX;
209                 break;
210         default:
211                 pte |= PPAT_CACHED_INDEX;
212                 break;
213         }
214
215         return pte;
216 }
217
218 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
219                                   const enum i915_cache_level level)
220 {
221         gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
222         pde |= addr;
223         if (level != I915_CACHE_NONE)
224                 pde |= PPAT_CACHED_PDE_INDEX;
225         else
226                 pde |= PPAT_UNCACHED_INDEX;
227         return pde;
228 }
229
230 #define gen8_pdpe_encode gen8_pde_encode
231 #define gen8_pml4e_encode gen8_pde_encode
232
233 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
234                                  enum i915_cache_level level,
235                                  u32 unused)
236 {
237         gen6_pte_t pte = GEN6_PTE_VALID;
238         pte |= GEN6_PTE_ADDR_ENCODE(addr);
239
240         switch (level) {
241         case I915_CACHE_L3_LLC:
242         case I915_CACHE_LLC:
243                 pte |= GEN6_PTE_CACHE_LLC;
244                 break;
245         case I915_CACHE_NONE:
246                 pte |= GEN6_PTE_UNCACHED;
247                 break;
248         default:
249                 MISSING_CASE(level);
250         }
251
252         return pte;
253 }
254
255 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
256                                  enum i915_cache_level level,
257                                  u32 unused)
258 {
259         gen6_pte_t pte = GEN6_PTE_VALID;
260         pte |= GEN6_PTE_ADDR_ENCODE(addr);
261
262         switch (level) {
263         case I915_CACHE_L3_LLC:
264                 pte |= GEN7_PTE_CACHE_L3_LLC;
265                 break;
266         case I915_CACHE_LLC:
267                 pte |= GEN6_PTE_CACHE_LLC;
268                 break;
269         case I915_CACHE_NONE:
270                 pte |= GEN6_PTE_UNCACHED;
271                 break;
272         default:
273                 MISSING_CASE(level);
274         }
275
276         return pte;
277 }
278
279 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
280                                  enum i915_cache_level level,
281                                  u32 flags)
282 {
283         gen6_pte_t pte = GEN6_PTE_VALID;
284         pte |= GEN6_PTE_ADDR_ENCODE(addr);
285
286         if (!(flags & PTE_READ_ONLY))
287                 pte |= BYT_PTE_WRITEABLE;
288
289         if (level != I915_CACHE_NONE)
290                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
291
292         return pte;
293 }
294
295 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
296                                  enum i915_cache_level level,
297                                  u32 unused)
298 {
299         gen6_pte_t pte = GEN6_PTE_VALID;
300         pte |= HSW_PTE_ADDR_ENCODE(addr);
301
302         if (level != I915_CACHE_NONE)
303                 pte |= HSW_WB_LLC_AGE3;
304
305         return pte;
306 }
307
308 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
309                                   enum i915_cache_level level,
310                                   u32 unused)
311 {
312         gen6_pte_t pte = GEN6_PTE_VALID;
313         pte |= HSW_PTE_ADDR_ENCODE(addr);
314
315         switch (level) {
316         case I915_CACHE_NONE:
317                 break;
318         case I915_CACHE_WT:
319                 pte |= HSW_WT_ELLC_LLC_AGE3;
320                 break;
321         default:
322                 pte |= HSW_WB_ELLC_LLC_AGE3;
323                 break;
324         }
325
326         return pte;
327 }
328
329 static int __setup_page_dma(struct drm_device *dev,
330                             struct i915_page_dma *p, gfp_t flags)
331 {
332         struct device *kdev = &dev->pdev->dev;
333
334         p->page = alloc_page(flags);
335         if (!p->page)
336                 return -ENOMEM;
337
338         p->daddr = dma_map_page(kdev,
339                                 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
340
341         if (dma_mapping_error(kdev, p->daddr)) {
342                 __free_page(p->page);
343                 return -EINVAL;
344         }
345
346         return 0;
347 }
348
349 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
350 {
351         return __setup_page_dma(dev, p, I915_GFP_DMA);
352 }
353
354 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
355 {
356         struct pci_dev *pdev = dev->pdev;
357
358         if (WARN_ON(!p->page))
359                 return;
360
361         dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
362         __free_page(p->page);
363         memset(p, 0, sizeof(*p));
364 }
365
366 static void *kmap_page_dma(struct i915_page_dma *p)
367 {
368         return kmap_atomic(p->page);
369 }
370
371 /* We use the flushing unmap only with ppgtt structures:
372  * page directories, page tables and scratch pages.
373  */
374 static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr)
375 {
376         /* There are only few exceptions for gen >=6. chv and bxt.
377          * And we are not sure about the latter so play safe for now.
378          */
379         if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
380                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
381
382         kunmap_atomic(vaddr);
383 }
384
385 #define kmap_px(px) kmap_page_dma(px_base(px))
386 #define kunmap_px(ppgtt, vaddr) \
387                 kunmap_page_dma(to_i915((ppgtt)->base.dev), (vaddr))
388
389 #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
390 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
391 #define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v))
392 #define fill32_px(dev_priv, px, v) \
393                 fill_page_dma_32((dev_priv), px_base(px), (v))
394
395 static void fill_page_dma(struct drm_i915_private *dev_priv,
396                           struct i915_page_dma *p, const uint64_t val)
397 {
398         int i;
399         uint64_t * const vaddr = kmap_page_dma(p);
400
401         for (i = 0; i < 512; i++)
402                 vaddr[i] = val;
403
404         kunmap_page_dma(dev_priv, vaddr);
405 }
406
407 static void fill_page_dma_32(struct drm_i915_private *dev_priv,
408                              struct i915_page_dma *p, const uint32_t val32)
409 {
410         uint64_t v = val32;
411
412         v = v << 32 | val32;
413
414         fill_page_dma(dev_priv, p, v);
415 }
416
417 static int
418 setup_scratch_page(struct drm_device *dev,
419                    struct i915_page_dma *scratch,
420                    gfp_t gfp)
421 {
422         return __setup_page_dma(dev, scratch, gfp | __GFP_ZERO);
423 }
424
425 static void cleanup_scratch_page(struct drm_device *dev,
426                                  struct i915_page_dma *scratch)
427 {
428         cleanup_page_dma(dev, scratch);
429 }
430
431 static struct i915_page_table *alloc_pt(struct drm_device *dev)
432 {
433         struct i915_page_table *pt;
434         const size_t count = INTEL_INFO(dev)->gen >= 8 ?
435                 GEN8_PTES : GEN6_PTES;
436         int ret = -ENOMEM;
437
438         pt = kzalloc(sizeof(*pt), GFP_KERNEL);
439         if (!pt)
440                 return ERR_PTR(-ENOMEM);
441
442         pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
443                                 GFP_KERNEL);
444
445         if (!pt->used_ptes)
446                 goto fail_bitmap;
447
448         ret = setup_px(dev, pt);
449         if (ret)
450                 goto fail_page_m;
451
452         return pt;
453
454 fail_page_m:
455         kfree(pt->used_ptes);
456 fail_bitmap:
457         kfree(pt);
458
459         return ERR_PTR(ret);
460 }
461
462 static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
463 {
464         cleanup_px(dev, pt);
465         kfree(pt->used_ptes);
466         kfree(pt);
467 }
468
469 static void gen8_initialize_pt(struct i915_address_space *vm,
470                                struct i915_page_table *pt)
471 {
472         gen8_pte_t scratch_pte;
473
474         scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
475                                       I915_CACHE_LLC);
476
477         fill_px(to_i915(vm->dev), pt, scratch_pte);
478 }
479
480 static void gen6_initialize_pt(struct i915_address_space *vm,
481                                struct i915_page_table *pt)
482 {
483         gen6_pte_t scratch_pte;
484
485         WARN_ON(vm->scratch_page.daddr == 0);
486
487         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
488                                      I915_CACHE_LLC, 0);
489
490         fill32_px(to_i915(vm->dev), pt, scratch_pte);
491 }
492
493 static struct i915_page_directory *alloc_pd(struct drm_device *dev)
494 {
495         struct i915_page_directory *pd;
496         int ret = -ENOMEM;
497
498         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
499         if (!pd)
500                 return ERR_PTR(-ENOMEM);
501
502         pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
503                                 sizeof(*pd->used_pdes), GFP_KERNEL);
504         if (!pd->used_pdes)
505                 goto fail_bitmap;
506
507         ret = setup_px(dev, pd);
508         if (ret)
509                 goto fail_page_m;
510
511         return pd;
512
513 fail_page_m:
514         kfree(pd->used_pdes);
515 fail_bitmap:
516         kfree(pd);
517
518         return ERR_PTR(ret);
519 }
520
521 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
522 {
523         if (px_page(pd)) {
524                 cleanup_px(dev, pd);
525                 kfree(pd->used_pdes);
526                 kfree(pd);
527         }
528 }
529
530 static void gen8_initialize_pd(struct i915_address_space *vm,
531                                struct i915_page_directory *pd)
532 {
533         gen8_pde_t scratch_pde;
534
535         scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
536
537         fill_px(to_i915(vm->dev), pd, scratch_pde);
538 }
539
540 static int __pdp_init(struct drm_device *dev,
541                       struct i915_page_directory_pointer *pdp)
542 {
543         size_t pdpes = I915_PDPES_PER_PDP(dev);
544
545         pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
546                                   sizeof(unsigned long),
547                                   GFP_KERNEL);
548         if (!pdp->used_pdpes)
549                 return -ENOMEM;
550
551         pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
552                                       GFP_KERNEL);
553         if (!pdp->page_directory) {
554                 kfree(pdp->used_pdpes);
555                 /* the PDP might be the statically allocated top level. Keep it
556                  * as clean as possible */
557                 pdp->used_pdpes = NULL;
558                 return -ENOMEM;
559         }
560
561         return 0;
562 }
563
564 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
565 {
566         kfree(pdp->used_pdpes);
567         kfree(pdp->page_directory);
568         pdp->page_directory = NULL;
569 }
570
571 static struct
572 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
573 {
574         struct i915_page_directory_pointer *pdp;
575         int ret = -ENOMEM;
576
577         WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
578
579         pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
580         if (!pdp)
581                 return ERR_PTR(-ENOMEM);
582
583         ret = __pdp_init(dev, pdp);
584         if (ret)
585                 goto fail_bitmap;
586
587         ret = setup_px(dev, pdp);
588         if (ret)
589                 goto fail_page_m;
590
591         return pdp;
592
593 fail_page_m:
594         __pdp_fini(pdp);
595 fail_bitmap:
596         kfree(pdp);
597
598         return ERR_PTR(ret);
599 }
600
601 static void free_pdp(struct drm_device *dev,
602                      struct i915_page_directory_pointer *pdp)
603 {
604         __pdp_fini(pdp);
605         if (USES_FULL_48BIT_PPGTT(dev)) {
606                 cleanup_px(dev, pdp);
607                 kfree(pdp);
608         }
609 }
610
611 static void gen8_initialize_pdp(struct i915_address_space *vm,
612                                 struct i915_page_directory_pointer *pdp)
613 {
614         gen8_ppgtt_pdpe_t scratch_pdpe;
615
616         scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
617
618         fill_px(to_i915(vm->dev), pdp, scratch_pdpe);
619 }
620
621 static void gen8_initialize_pml4(struct i915_address_space *vm,
622                                  struct i915_pml4 *pml4)
623 {
624         gen8_ppgtt_pml4e_t scratch_pml4e;
625
626         scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
627                                           I915_CACHE_LLC);
628
629         fill_px(to_i915(vm->dev), pml4, scratch_pml4e);
630 }
631
632 static void
633 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
634                           struct i915_page_directory_pointer *pdp,
635                           struct i915_page_directory *pd,
636                           int index)
637 {
638         gen8_ppgtt_pdpe_t *page_directorypo;
639
640         if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
641                 return;
642
643         page_directorypo = kmap_px(pdp);
644         page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
645         kunmap_px(ppgtt, page_directorypo);
646 }
647
648 static void
649 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
650                                   struct i915_pml4 *pml4,
651                                   struct i915_page_directory_pointer *pdp,
652                                   int index)
653 {
654         gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
655
656         WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
657         pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
658         kunmap_px(ppgtt, pagemap);
659 }
660
661 /* Broadwell Page Directory Pointer Descriptors */
662 static int gen8_write_pdp(struct drm_i915_gem_request *req,
663                           unsigned entry,
664                           dma_addr_t addr)
665 {
666         struct intel_ring *ring = req->ring;
667         struct intel_engine_cs *engine = req->engine;
668         int ret;
669
670         BUG_ON(entry >= 4);
671
672         ret = intel_ring_begin(req, 6);
673         if (ret)
674                 return ret;
675
676         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
677         intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry));
678         intel_ring_emit(ring, upper_32_bits(addr));
679         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
680         intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry));
681         intel_ring_emit(ring, lower_32_bits(addr));
682         intel_ring_advance(ring);
683
684         return 0;
685 }
686
687 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
688                                  struct drm_i915_gem_request *req)
689 {
690         int i, ret;
691
692         for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
693                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
694
695                 ret = gen8_write_pdp(req, i, pd_daddr);
696                 if (ret)
697                         return ret;
698         }
699
700         return 0;
701 }
702
703 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
704                               struct drm_i915_gem_request *req)
705 {
706         return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
707 }
708
709 /* Removes entries from a single page table, releasing it if it's empty.
710  * Caller can use the return value to update higher-level entries.
711  */
712 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
713                                 struct i915_page_table *pt,
714                                 uint64_t start,
715                                 uint64_t length)
716 {
717         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
718         unsigned int pte_start = gen8_pte_index(start);
719         unsigned int num_entries = gen8_pte_count(start, length);
720         uint64_t pte;
721         gen8_pte_t *pt_vaddr;
722         gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
723                                                  I915_CACHE_LLC);
724
725         if (WARN_ON(!px_page(pt)))
726                 return false;
727
728         bitmap_clear(pt->used_ptes, pte_start, num_entries);
729
730         if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
731                 free_pt(vm->dev, pt);
732                 return true;
733         }
734
735         pt_vaddr = kmap_px(pt);
736
737         for (pte = pte_start; pte < num_entries; pte++)
738                 pt_vaddr[pte] = scratch_pte;
739
740         kunmap_px(ppgtt, pt_vaddr);
741
742         return false;
743 }
744
745 /* Removes entries from a single page dir, releasing it if it's empty.
746  * Caller can use the return value to update higher-level entries
747  */
748 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
749                                 struct i915_page_directory *pd,
750                                 uint64_t start,
751                                 uint64_t length)
752 {
753         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
754         struct i915_page_table *pt;
755         uint64_t pde;
756         gen8_pde_t *pde_vaddr;
757         gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
758                                                  I915_CACHE_LLC);
759
760         gen8_for_each_pde(pt, pd, start, length, pde) {
761                 if (WARN_ON(!pd->page_table[pde]))
762                         break;
763
764                 if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
765                         __clear_bit(pde, pd->used_pdes);
766                         pde_vaddr = kmap_px(pd);
767                         pde_vaddr[pde] = scratch_pde;
768                         kunmap_px(ppgtt, pde_vaddr);
769                 }
770         }
771
772         if (bitmap_empty(pd->used_pdes, I915_PDES)) {
773                 free_pd(vm->dev, pd);
774                 return true;
775         }
776
777         return false;
778 }
779
780 /* Removes entries from a single page dir pointer, releasing it if it's empty.
781  * Caller can use the return value to update higher-level entries
782  */
783 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
784                                  struct i915_page_directory_pointer *pdp,
785                                  uint64_t start,
786                                  uint64_t length)
787 {
788         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
789         struct i915_page_directory *pd;
790         uint64_t pdpe;
791         gen8_ppgtt_pdpe_t *pdpe_vaddr;
792         gen8_ppgtt_pdpe_t scratch_pdpe =
793                 gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
794
795         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
796                 if (WARN_ON(!pdp->page_directory[pdpe]))
797                         break;
798
799                 if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
800                         __clear_bit(pdpe, pdp->used_pdpes);
801                         if (USES_FULL_48BIT_PPGTT(vm->dev)) {
802                                 pdpe_vaddr = kmap_px(pdp);
803                                 pdpe_vaddr[pdpe] = scratch_pdpe;
804                                 kunmap_px(ppgtt, pdpe_vaddr);
805                         }
806                 }
807         }
808
809         if (USES_FULL_48BIT_PPGTT(vm->dev) &&
810             bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(vm->dev))) {
811                 free_pdp(vm->dev, pdp);
812                 return true;
813         }
814
815         return false;
816 }
817
818 /* Removes entries from a single pml4.
819  * This is the top-level structure in 4-level page tables used on gen8+.
820  * Empty entries are always scratch pml4e.
821  */
822 static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
823                                   struct i915_pml4 *pml4,
824                                   uint64_t start,
825                                   uint64_t length)
826 {
827         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
828         struct i915_page_directory_pointer *pdp;
829         uint64_t pml4e;
830         gen8_ppgtt_pml4e_t *pml4e_vaddr;
831         gen8_ppgtt_pml4e_t scratch_pml4e =
832                 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC);
833
834         GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->dev));
835
836         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
837                 if (WARN_ON(!pml4->pdps[pml4e]))
838                         break;
839
840                 if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
841                         __clear_bit(pml4e, pml4->used_pml4es);
842                         pml4e_vaddr = kmap_px(pml4);
843                         pml4e_vaddr[pml4e] = scratch_pml4e;
844                         kunmap_px(ppgtt, pml4e_vaddr);
845                 }
846         }
847 }
848
849 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
850                                    uint64_t start, uint64_t length)
851 {
852         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
853
854         if (USES_FULL_48BIT_PPGTT(vm->dev))
855                 gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length);
856         else
857                 gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
858 }
859
860 static void
861 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
862                               struct i915_page_directory_pointer *pdp,
863                               struct sg_page_iter *sg_iter,
864                               uint64_t start,
865                               enum i915_cache_level cache_level)
866 {
867         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
868         gen8_pte_t *pt_vaddr;
869         unsigned pdpe = gen8_pdpe_index(start);
870         unsigned pde = gen8_pde_index(start);
871         unsigned pte = gen8_pte_index(start);
872
873         pt_vaddr = NULL;
874
875         while (__sg_page_iter_next(sg_iter)) {
876                 if (pt_vaddr == NULL) {
877                         struct i915_page_directory *pd = pdp->page_directory[pdpe];
878                         struct i915_page_table *pt = pd->page_table[pde];
879                         pt_vaddr = kmap_px(pt);
880                 }
881
882                 pt_vaddr[pte] =
883                         gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
884                                         cache_level);
885                 if (++pte == GEN8_PTES) {
886                         kunmap_px(ppgtt, pt_vaddr);
887                         pt_vaddr = NULL;
888                         if (++pde == I915_PDES) {
889                                 if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
890                                         break;
891                                 pde = 0;
892                         }
893                         pte = 0;
894                 }
895         }
896
897         if (pt_vaddr)
898                 kunmap_px(ppgtt, pt_vaddr);
899 }
900
901 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
902                                       struct sg_table *pages,
903                                       uint64_t start,
904                                       enum i915_cache_level cache_level,
905                                       u32 unused)
906 {
907         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
908         struct sg_page_iter sg_iter;
909
910         __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
911
912         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
913                 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
914                                               cache_level);
915         } else {
916                 struct i915_page_directory_pointer *pdp;
917                 uint64_t pml4e;
918                 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
919
920                 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
921                         gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
922                                                       start, cache_level);
923                 }
924         }
925 }
926
927 static void gen8_free_page_tables(struct drm_device *dev,
928                                   struct i915_page_directory *pd)
929 {
930         int i;
931
932         if (!px_page(pd))
933                 return;
934
935         for_each_set_bit(i, pd->used_pdes, I915_PDES) {
936                 if (WARN_ON(!pd->page_table[i]))
937                         continue;
938
939                 free_pt(dev, pd->page_table[i]);
940                 pd->page_table[i] = NULL;
941         }
942 }
943
944 static int gen8_init_scratch(struct i915_address_space *vm)
945 {
946         struct drm_device *dev = vm->dev;
947         int ret;
948
949         ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
950         if (ret)
951                 return ret;
952
953         vm->scratch_pt = alloc_pt(dev);
954         if (IS_ERR(vm->scratch_pt)) {
955                 ret = PTR_ERR(vm->scratch_pt);
956                 goto free_scratch_page;
957         }
958
959         vm->scratch_pd = alloc_pd(dev);
960         if (IS_ERR(vm->scratch_pd)) {
961                 ret = PTR_ERR(vm->scratch_pd);
962                 goto free_pt;
963         }
964
965         if (USES_FULL_48BIT_PPGTT(dev)) {
966                 vm->scratch_pdp = alloc_pdp(dev);
967                 if (IS_ERR(vm->scratch_pdp)) {
968                         ret = PTR_ERR(vm->scratch_pdp);
969                         goto free_pd;
970                 }
971         }
972
973         gen8_initialize_pt(vm, vm->scratch_pt);
974         gen8_initialize_pd(vm, vm->scratch_pd);
975         if (USES_FULL_48BIT_PPGTT(dev))
976                 gen8_initialize_pdp(vm, vm->scratch_pdp);
977
978         return 0;
979
980 free_pd:
981         free_pd(dev, vm->scratch_pd);
982 free_pt:
983         free_pt(dev, vm->scratch_pt);
984 free_scratch_page:
985         cleanup_scratch_page(dev, &vm->scratch_page);
986
987         return ret;
988 }
989
990 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
991 {
992         enum vgt_g2v_type msg;
993         struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
994         int i;
995
996         if (USES_FULL_48BIT_PPGTT(dev_priv)) {
997                 u64 daddr = px_dma(&ppgtt->pml4);
998
999                 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
1000                 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
1001
1002                 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1003                                 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1004         } else {
1005                 for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
1006                         u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1007
1008                         I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
1009                         I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
1010                 }
1011
1012                 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1013                                 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1014         }
1015
1016         I915_WRITE(vgtif_reg(g2v_notify), msg);
1017
1018         return 0;
1019 }
1020
1021 static void gen8_free_scratch(struct i915_address_space *vm)
1022 {
1023         struct drm_device *dev = vm->dev;
1024
1025         if (USES_FULL_48BIT_PPGTT(dev))
1026                 free_pdp(dev, vm->scratch_pdp);
1027         free_pd(dev, vm->scratch_pd);
1028         free_pt(dev, vm->scratch_pt);
1029         cleanup_scratch_page(dev, &vm->scratch_page);
1030 }
1031
1032 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
1033                                     struct i915_page_directory_pointer *pdp)
1034 {
1035         int i;
1036
1037         for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
1038                 if (WARN_ON(!pdp->page_directory[i]))
1039                         continue;
1040
1041                 gen8_free_page_tables(dev, pdp->page_directory[i]);
1042                 free_pd(dev, pdp->page_directory[i]);
1043         }
1044
1045         free_pdp(dev, pdp);
1046 }
1047
1048 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1049 {
1050         int i;
1051
1052         for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
1053                 if (WARN_ON(!ppgtt->pml4.pdps[i]))
1054                         continue;
1055
1056                 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
1057         }
1058
1059         cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
1060 }
1061
1062 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1063 {
1064         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1065
1066         if (intel_vgpu_active(to_i915(vm->dev)))
1067                 gen8_ppgtt_notify_vgt(ppgtt, false);
1068
1069         if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
1070                 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
1071         else
1072                 gen8_ppgtt_cleanup_4lvl(ppgtt);
1073
1074         gen8_free_scratch(vm);
1075 }
1076
1077 /**
1078  * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
1079  * @vm: Master vm structure.
1080  * @pd: Page directory for this address range.
1081  * @start:      Starting virtual address to begin allocations.
1082  * @length:     Size of the allocations.
1083  * @new_pts:    Bitmap set by function with new allocations. Likely used by the
1084  *              caller to free on error.
1085  *
1086  * Allocate the required number of page tables. Extremely similar to
1087  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1088  * the page directory boundary (instead of the page directory pointer). That
1089  * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1090  * possible, and likely that the caller will need to use multiple calls of this
1091  * function to achieve the appropriate allocation.
1092  *
1093  * Return: 0 if success; negative error code otherwise.
1094  */
1095 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1096                                      struct i915_page_directory *pd,
1097                                      uint64_t start,
1098                                      uint64_t length,
1099                                      unsigned long *new_pts)
1100 {
1101         struct drm_device *dev = vm->dev;
1102         struct i915_page_table *pt;
1103         uint32_t pde;
1104
1105         gen8_for_each_pde(pt, pd, start, length, pde) {
1106                 /* Don't reallocate page tables */
1107                 if (test_bit(pde, pd->used_pdes)) {
1108                         /* Scratch is never allocated this way */
1109                         WARN_ON(pt == vm->scratch_pt);
1110                         continue;
1111                 }
1112
1113                 pt = alloc_pt(dev);
1114                 if (IS_ERR(pt))
1115                         goto unwind_out;
1116
1117                 gen8_initialize_pt(vm, pt);
1118                 pd->page_table[pde] = pt;
1119                 __set_bit(pde, new_pts);
1120                 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1121         }
1122
1123         return 0;
1124
1125 unwind_out:
1126         for_each_set_bit(pde, new_pts, I915_PDES)
1127                 free_pt(dev, pd->page_table[pde]);
1128
1129         return -ENOMEM;
1130 }
1131
1132 /**
1133  * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1134  * @vm: Master vm structure.
1135  * @pdp:        Page directory pointer for this address range.
1136  * @start:      Starting virtual address to begin allocations.
1137  * @length:     Size of the allocations.
1138  * @new_pds:    Bitmap set by function with new allocations. Likely used by the
1139  *              caller to free on error.
1140  *
1141  * Allocate the required number of page directories starting at the pde index of
1142  * @start, and ending at the pde index @start + @length. This function will skip
1143  * over already allocated page directories within the range, and only allocate
1144  * new ones, setting the appropriate pointer within the pdp as well as the
1145  * correct position in the bitmap @new_pds.
1146  *
1147  * The function will only allocate the pages within the range for a give page
1148  * directory pointer. In other words, if @start + @length straddles a virtually
1149  * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1150  * required by the caller, This is not currently possible, and the BUG in the
1151  * code will prevent it.
1152  *
1153  * Return: 0 if success; negative error code otherwise.
1154  */
1155 static int
1156 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1157                                   struct i915_page_directory_pointer *pdp,
1158                                   uint64_t start,
1159                                   uint64_t length,
1160                                   unsigned long *new_pds)
1161 {
1162         struct drm_device *dev = vm->dev;
1163         struct i915_page_directory *pd;
1164         uint32_t pdpe;
1165         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1166
1167         WARN_ON(!bitmap_empty(new_pds, pdpes));
1168
1169         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1170                 if (test_bit(pdpe, pdp->used_pdpes))
1171                         continue;
1172
1173                 pd = alloc_pd(dev);
1174                 if (IS_ERR(pd))
1175                         goto unwind_out;
1176
1177                 gen8_initialize_pd(vm, pd);
1178                 pdp->page_directory[pdpe] = pd;
1179                 __set_bit(pdpe, new_pds);
1180                 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
1181         }
1182
1183         return 0;
1184
1185 unwind_out:
1186         for_each_set_bit(pdpe, new_pds, pdpes)
1187                 free_pd(dev, pdp->page_directory[pdpe]);
1188
1189         return -ENOMEM;
1190 }
1191
1192 /**
1193  * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1194  * @vm: Master vm structure.
1195  * @pml4:       Page map level 4 for this address range.
1196  * @start:      Starting virtual address to begin allocations.
1197  * @length:     Size of the allocations.
1198  * @new_pdps:   Bitmap set by function with new allocations. Likely used by the
1199  *              caller to free on error.
1200  *
1201  * Allocate the required number of page directory pointers. Extremely similar to
1202  * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1203  * The main difference is here we are limited by the pml4 boundary (instead of
1204  * the page directory pointer).
1205  *
1206  * Return: 0 if success; negative error code otherwise.
1207  */
1208 static int
1209 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1210                                   struct i915_pml4 *pml4,
1211                                   uint64_t start,
1212                                   uint64_t length,
1213                                   unsigned long *new_pdps)
1214 {
1215         struct drm_device *dev = vm->dev;
1216         struct i915_page_directory_pointer *pdp;
1217         uint32_t pml4e;
1218
1219         WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1220
1221         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1222                 if (!test_bit(pml4e, pml4->used_pml4es)) {
1223                         pdp = alloc_pdp(dev);
1224                         if (IS_ERR(pdp))
1225                                 goto unwind_out;
1226
1227                         gen8_initialize_pdp(vm, pdp);
1228                         pml4->pdps[pml4e] = pdp;
1229                         __set_bit(pml4e, new_pdps);
1230                         trace_i915_page_directory_pointer_entry_alloc(vm,
1231                                                                       pml4e,
1232                                                                       start,
1233                                                                       GEN8_PML4E_SHIFT);
1234                 }
1235         }
1236
1237         return 0;
1238
1239 unwind_out:
1240         for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1241                 free_pdp(dev, pml4->pdps[pml4e]);
1242
1243         return -ENOMEM;
1244 }
1245
1246 static void
1247 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
1248 {
1249         kfree(new_pts);
1250         kfree(new_pds);
1251 }
1252
1253 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1254  * of these are based on the number of PDPEs in the system.
1255  */
1256 static
1257 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1258                                          unsigned long **new_pts,
1259                                          uint32_t pdpes)
1260 {
1261         unsigned long *pds;
1262         unsigned long *pts;
1263
1264         pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1265         if (!pds)
1266                 return -ENOMEM;
1267
1268         pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1269                       GFP_TEMPORARY);
1270         if (!pts)
1271                 goto err_out;
1272
1273         *new_pds = pds;
1274         *new_pts = pts;
1275
1276         return 0;
1277
1278 err_out:
1279         free_gen8_temp_bitmaps(pds, pts);
1280         return -ENOMEM;
1281 }
1282
1283 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1284  * the page table structures, we mark them dirty so that
1285  * context switching/execlist queuing code takes extra steps
1286  * to ensure that tlbs are flushed.
1287  */
1288 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1289 {
1290         ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1291 }
1292
1293 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1294                                     struct i915_page_directory_pointer *pdp,
1295                                     uint64_t start,
1296                                     uint64_t length)
1297 {
1298         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1299         unsigned long *new_page_dirs, *new_page_tables;
1300         struct drm_device *dev = vm->dev;
1301         struct i915_page_directory *pd;
1302         const uint64_t orig_start = start;
1303         const uint64_t orig_length = length;
1304         uint32_t pdpe;
1305         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1306         int ret;
1307
1308         /* Wrap is never okay since we can only represent 48b, and we don't
1309          * actually use the other side of the canonical address space.
1310          */
1311         if (WARN_ON(start + length < start))
1312                 return -ENODEV;
1313
1314         if (WARN_ON(start + length > vm->total))
1315                 return -ENODEV;
1316
1317         ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1318         if (ret)
1319                 return ret;
1320
1321         /* Do the allocations first so we can easily bail out */
1322         ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1323                                                 new_page_dirs);
1324         if (ret) {
1325                 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1326                 return ret;
1327         }
1328
1329         /* For every page directory referenced, allocate page tables */
1330         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1331                 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1332                                                 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1333                 if (ret)
1334                         goto err_out;
1335         }
1336
1337         start = orig_start;
1338         length = orig_length;
1339
1340         /* Allocations have completed successfully, so set the bitmaps, and do
1341          * the mappings. */
1342         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1343                 gen8_pde_t *const page_directory = kmap_px(pd);
1344                 struct i915_page_table *pt;
1345                 uint64_t pd_len = length;
1346                 uint64_t pd_start = start;
1347                 uint32_t pde;
1348
1349                 /* Every pd should be allocated, we just did that above. */
1350                 WARN_ON(!pd);
1351
1352                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1353                         /* Same reasoning as pd */
1354                         WARN_ON(!pt);
1355                         WARN_ON(!pd_len);
1356                         WARN_ON(!gen8_pte_count(pd_start, pd_len));
1357
1358                         /* Set our used ptes within the page table */
1359                         bitmap_set(pt->used_ptes,
1360                                    gen8_pte_index(pd_start),
1361                                    gen8_pte_count(pd_start, pd_len));
1362
1363                         /* Our pde is now pointing to the pagetable, pt */
1364                         __set_bit(pde, pd->used_pdes);
1365
1366                         /* Map the PDE to the page table */
1367                         page_directory[pde] = gen8_pde_encode(px_dma(pt),
1368                                                               I915_CACHE_LLC);
1369                         trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1370                                                         gen8_pte_index(start),
1371                                                         gen8_pte_count(start, length),
1372                                                         GEN8_PTES);
1373
1374                         /* NB: We haven't yet mapped ptes to pages. At this
1375                          * point we're still relying on insert_entries() */
1376                 }
1377
1378                 kunmap_px(ppgtt, page_directory);
1379                 __set_bit(pdpe, pdp->used_pdpes);
1380                 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1381         }
1382
1383         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1384         mark_tlbs_dirty(ppgtt);
1385         return 0;
1386
1387 err_out:
1388         while (pdpe--) {
1389                 unsigned long temp;
1390
1391                 for_each_set_bit(temp, new_page_tables + pdpe *
1392                                 BITS_TO_LONGS(I915_PDES), I915_PDES)
1393                         free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1394         }
1395
1396         for_each_set_bit(pdpe, new_page_dirs, pdpes)
1397                 free_pd(dev, pdp->page_directory[pdpe]);
1398
1399         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1400         mark_tlbs_dirty(ppgtt);
1401         return ret;
1402 }
1403
1404 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1405                                     struct i915_pml4 *pml4,
1406                                     uint64_t start,
1407                                     uint64_t length)
1408 {
1409         DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1410         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1411         struct i915_page_directory_pointer *pdp;
1412         uint64_t pml4e;
1413         int ret = 0;
1414
1415         /* Do the pml4 allocations first, so we don't need to track the newly
1416          * allocated tables below the pdp */
1417         bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
1418
1419         /* The pagedirectory and pagetable allocations are done in the shared 3
1420          * and 4 level code. Just allocate the pdps.
1421          */
1422         ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1423                                                 new_pdps);
1424         if (ret)
1425                 return ret;
1426
1427         WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1428              "The allocation has spanned more than 512GB. "
1429              "It is highly likely this is incorrect.");
1430
1431         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1432                 WARN_ON(!pdp);
1433
1434                 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1435                 if (ret)
1436                         goto err_out;
1437
1438                 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1439         }
1440
1441         bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1442                   GEN8_PML4ES_PER_PML4);
1443
1444         return 0;
1445
1446 err_out:
1447         for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1448                 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1449
1450         return ret;
1451 }
1452
1453 static int gen8_alloc_va_range(struct i915_address_space *vm,
1454                                uint64_t start, uint64_t length)
1455 {
1456         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1457
1458         if (USES_FULL_48BIT_PPGTT(vm->dev))
1459                 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1460         else
1461                 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1462 }
1463
1464 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1465                           uint64_t start, uint64_t length,
1466                           gen8_pte_t scratch_pte,
1467                           struct seq_file *m)
1468 {
1469         struct i915_page_directory *pd;
1470         uint32_t pdpe;
1471
1472         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1473                 struct i915_page_table *pt;
1474                 uint64_t pd_len = length;
1475                 uint64_t pd_start = start;
1476                 uint32_t pde;
1477
1478                 if (!test_bit(pdpe, pdp->used_pdpes))
1479                         continue;
1480
1481                 seq_printf(m, "\tPDPE #%d\n", pdpe);
1482                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1483                         uint32_t  pte;
1484                         gen8_pte_t *pt_vaddr;
1485
1486                         if (!test_bit(pde, pd->used_pdes))
1487                                 continue;
1488
1489                         pt_vaddr = kmap_px(pt);
1490                         for (pte = 0; pte < GEN8_PTES; pte += 4) {
1491                                 uint64_t va =
1492                                         (pdpe << GEN8_PDPE_SHIFT) |
1493                                         (pde << GEN8_PDE_SHIFT) |
1494                                         (pte << GEN8_PTE_SHIFT);
1495                                 int i;
1496                                 bool found = false;
1497
1498                                 for (i = 0; i < 4; i++)
1499                                         if (pt_vaddr[pte + i] != scratch_pte)
1500                                                 found = true;
1501                                 if (!found)
1502                                         continue;
1503
1504                                 seq_printf(m, "\t\t0x%lx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1505                                 for (i = 0; i < 4; i++) {
1506                                         if (pt_vaddr[pte + i] != scratch_pte)
1507                                                 seq_printf(m, " %lx", pt_vaddr[pte + i]);
1508                                         else
1509                                                 seq_puts(m, "  SCRATCH ");
1510                                 }
1511                                 seq_puts(m, "\n");
1512                         }
1513                         /* don't use kunmap_px, it could trigger
1514                          * an unnecessary flush.
1515                          */
1516                         kunmap_atomic(pt_vaddr);
1517                 }
1518         }
1519 }
1520
1521 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1522 {
1523         struct i915_address_space *vm = &ppgtt->base;
1524         uint64_t start = ppgtt->base.start;
1525         uint64_t length = ppgtt->base.total;
1526         gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
1527                                                  I915_CACHE_LLC);
1528
1529         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1530                 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1531         } else {
1532                 uint64_t pml4e;
1533                 struct i915_pml4 *pml4 = &ppgtt->pml4;
1534                 struct i915_page_directory_pointer *pdp;
1535
1536                 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1537                         if (!test_bit(pml4e, pml4->used_pml4es))
1538                                 continue;
1539
1540                         seq_printf(m, "    PML4E #%lu\n", pml4e);
1541                         gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1542                 }
1543         }
1544 }
1545
1546 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1547 {
1548         unsigned long *new_page_dirs, *new_page_tables;
1549         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1550         int ret;
1551
1552         /* We allocate temp bitmap for page tables for no gain
1553          * but as this is for init only, lets keep the things simple
1554          */
1555         ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1556         if (ret)
1557                 return ret;
1558
1559         /* Allocate for all pdps regardless of how the ppgtt
1560          * was defined.
1561          */
1562         ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1563                                                 0, 1ULL << 32,
1564                                                 new_page_dirs);
1565         if (!ret)
1566                 *ppgtt->pdp.used_pdpes = *new_page_dirs;
1567
1568         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1569
1570         return ret;
1571 }
1572
1573 /*
1574  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1575  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1576  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1577  * space.
1578  *
1579  */
1580 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1581 {
1582         int ret;
1583
1584         ret = gen8_init_scratch(&ppgtt->base);
1585         if (ret)
1586                 return ret;
1587
1588         ppgtt->base.start = 0;
1589         ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1590         ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1591         ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1592         ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1593         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1594         ppgtt->base.bind_vma = ppgtt_bind_vma;
1595         ppgtt->debug_dump = gen8_dump_ppgtt;
1596
1597         if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1598                 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
1599                 if (ret)
1600                         goto free_scratch;
1601
1602                 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1603
1604                 ppgtt->base.total = 1ULL << 48;
1605                 ppgtt->switch_mm = gen8_48b_mm_switch;
1606         } else {
1607                 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1608                 if (ret)
1609                         goto free_scratch;
1610
1611                 ppgtt->base.total = 1ULL << 32;
1612                 ppgtt->switch_mm = gen8_legacy_mm_switch;
1613                 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1614                                                               0, 0,
1615                                                               GEN8_PML4E_SHIFT);
1616
1617                 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) {
1618                         ret = gen8_preallocate_top_level_pdps(ppgtt);
1619                         if (ret)
1620                                 goto free_scratch;
1621                 }
1622         }
1623
1624         if (intel_vgpu_active(to_i915(ppgtt->base.dev)))
1625                 gen8_ppgtt_notify_vgt(ppgtt, true);
1626
1627         return 0;
1628
1629 free_scratch:
1630         gen8_free_scratch(&ppgtt->base);
1631         return ret;
1632 }
1633
1634 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1635 {
1636         struct i915_address_space *vm = &ppgtt->base;
1637         struct i915_page_table *unused;
1638         gen6_pte_t scratch_pte;
1639         uint32_t pd_entry;
1640         uint32_t  pte, pde;
1641         uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1642
1643         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1644                                      I915_CACHE_LLC, 0);
1645
1646         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
1647                 u32 expected;
1648                 gen6_pte_t *pt_vaddr;
1649                 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1650                 pd_entry = readl(ppgtt->pd_addr + pde);
1651                 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1652
1653                 if (pd_entry != expected)
1654                         seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1655                                    pde,
1656                                    pd_entry,
1657                                    expected);
1658                 seq_printf(m, "\tPDE: %x\n", pd_entry);
1659
1660                 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1661
1662                 for (pte = 0; pte < GEN6_PTES; pte+=4) {
1663                         unsigned long va =
1664                                 (pde * PAGE_SIZE * GEN6_PTES) +
1665                                 (pte * PAGE_SIZE);
1666                         int i;
1667                         bool found = false;
1668                         for (i = 0; i < 4; i++)
1669                                 if (pt_vaddr[pte + i] != scratch_pte)
1670                                         found = true;
1671                         if (!found)
1672                                 continue;
1673
1674                         seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1675                         for (i = 0; i < 4; i++) {
1676                                 if (pt_vaddr[pte + i] != scratch_pte)
1677                                         seq_printf(m, " %08x", pt_vaddr[pte + i]);
1678                                 else
1679                                         seq_puts(m, "  SCRATCH ");
1680                         }
1681                         seq_puts(m, "\n");
1682                 }
1683                 kunmap_px(ppgtt, pt_vaddr);
1684         }
1685 }
1686
1687 /* Write pde (index) from the page directory @pd to the page table @pt */
1688 static void gen6_write_pde(struct i915_page_directory *pd,
1689                             const int pde, struct i915_page_table *pt)
1690 {
1691         /* Caller needs to make sure the write completes if necessary */
1692         struct i915_hw_ppgtt *ppgtt =
1693                 container_of(pd, struct i915_hw_ppgtt, pd);
1694         u32 pd_entry;
1695
1696         pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1697         pd_entry |= GEN6_PDE_VALID;
1698
1699         writel(pd_entry, ppgtt->pd_addr + pde);
1700 }
1701
1702 /* Write all the page tables found in the ppgtt structure to incrementing page
1703  * directories. */
1704 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1705                                   struct i915_page_directory *pd,
1706                                   uint32_t start, uint32_t length)
1707 {
1708         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1709         struct i915_page_table *pt;
1710         uint32_t pde;
1711
1712         gen6_for_each_pde(pt, pd, start, length, pde)
1713                 gen6_write_pde(pd, pde, pt);
1714
1715         /* Make sure write is complete before other code can use this page
1716          * table. Also require for WC mapped PTEs */
1717         readl(ggtt->gsm);
1718 }
1719
1720 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1721 {
1722         BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1723
1724         return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1725 }
1726
1727 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1728                          struct drm_i915_gem_request *req)
1729 {
1730         struct intel_ring *ring = req->ring;
1731         struct intel_engine_cs *engine = req->engine;
1732         int ret;
1733
1734         /* NB: TLBs must be flushed and invalidated before a switch */
1735         ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1736         if (ret)
1737                 return ret;
1738
1739         ret = intel_ring_begin(req, 6);
1740         if (ret)
1741                 return ret;
1742
1743         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1744         intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine));
1745         intel_ring_emit(ring, PP_DIR_DCLV_2G);
1746         intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine));
1747         intel_ring_emit(ring, get_pd_offset(ppgtt));
1748         intel_ring_emit(ring, MI_NOOP);
1749         intel_ring_advance(ring);
1750
1751         return 0;
1752 }
1753
1754 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1755                           struct drm_i915_gem_request *req)
1756 {
1757         struct intel_ring *ring = req->ring;
1758         struct intel_engine_cs *engine = req->engine;
1759         int ret;
1760
1761         /* NB: TLBs must be flushed and invalidated before a switch */
1762         ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1763         if (ret)
1764                 return ret;
1765
1766         ret = intel_ring_begin(req, 6);
1767         if (ret)
1768                 return ret;
1769
1770         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1771         intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine));
1772         intel_ring_emit(ring, PP_DIR_DCLV_2G);
1773         intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine));
1774         intel_ring_emit(ring, get_pd_offset(ppgtt));
1775         intel_ring_emit(ring, MI_NOOP);
1776         intel_ring_advance(ring);
1777
1778         /* XXX: RCS is the only one to auto invalidate the TLBs? */
1779         if (engine->id != RCS) {
1780                 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1781                 if (ret)
1782                         return ret;
1783         }
1784
1785         return 0;
1786 }
1787
1788 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1789                           struct drm_i915_gem_request *req)
1790 {
1791         struct intel_engine_cs *engine = req->engine;
1792         struct drm_i915_private *dev_priv = req->i915;
1793
1794         I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1795         I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1796         return 0;
1797 }
1798
1799 static void gen8_ppgtt_enable(struct drm_device *dev)
1800 {
1801         struct drm_i915_private *dev_priv = to_i915(dev);
1802         struct intel_engine_cs *engine;
1803         enum intel_engine_id id;
1804
1805         for_each_engine(engine, dev_priv, id) {
1806                 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
1807                 I915_WRITE(RING_MODE_GEN7(engine),
1808                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1809         }
1810 }
1811
1812 static void gen7_ppgtt_enable(struct drm_device *dev)
1813 {
1814         struct drm_i915_private *dev_priv = to_i915(dev);
1815         struct intel_engine_cs *engine;
1816         uint32_t ecochk, ecobits;
1817         enum intel_engine_id id;
1818
1819         ecobits = I915_READ(GAC_ECO_BITS);
1820         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1821
1822         ecochk = I915_READ(GAM_ECOCHK);
1823         if (IS_HASWELL(dev_priv)) {
1824                 ecochk |= ECOCHK_PPGTT_WB_HSW;
1825         } else {
1826                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1827                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1828         }
1829         I915_WRITE(GAM_ECOCHK, ecochk);
1830
1831         for_each_engine(engine, dev_priv, id) {
1832                 /* GFX_MODE is per-ring on gen7+ */
1833                 I915_WRITE(RING_MODE_GEN7(engine),
1834                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1835         }
1836 }
1837
1838 static void gen6_ppgtt_enable(struct drm_device *dev)
1839 {
1840         struct drm_i915_private *dev_priv = to_i915(dev);
1841         uint32_t ecochk, gab_ctl, ecobits;
1842
1843         ecobits = I915_READ(GAC_ECO_BITS);
1844         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1845                    ECOBITS_PPGTT_CACHE64B);
1846
1847         gab_ctl = I915_READ(GAB_CTL);
1848         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1849
1850         ecochk = I915_READ(GAM_ECOCHK);
1851         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1852
1853         I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1854 }
1855
1856 /* PPGTT support for Sandybdrige/Gen6 and later */
1857 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1858                                    uint64_t start,
1859                                    uint64_t length)
1860 {
1861         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1862         gen6_pte_t *pt_vaddr, scratch_pte;
1863         unsigned first_entry = start >> PAGE_SHIFT;
1864         unsigned num_entries = length >> PAGE_SHIFT;
1865         unsigned act_pt = first_entry / GEN6_PTES;
1866         unsigned first_pte = first_entry % GEN6_PTES;
1867         unsigned last_pte, i;
1868
1869         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1870                                      I915_CACHE_LLC, 0);
1871
1872         while (num_entries) {
1873                 last_pte = first_pte + num_entries;
1874                 if (last_pte > GEN6_PTES)
1875                         last_pte = GEN6_PTES;
1876
1877                 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1878
1879                 for (i = first_pte; i < last_pte; i++)
1880                         pt_vaddr[i] = scratch_pte;
1881
1882                 kunmap_px(ppgtt, pt_vaddr);
1883
1884                 num_entries -= last_pte - first_pte;
1885                 first_pte = 0;
1886                 act_pt++;
1887         }
1888 }
1889
1890 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1891                                       struct sg_table *pages,
1892                                       uint64_t start,
1893                                       enum i915_cache_level cache_level, u32 flags)
1894 {
1895         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1896         unsigned first_entry = start >> PAGE_SHIFT;
1897         unsigned act_pt = first_entry / GEN6_PTES;
1898         unsigned act_pte = first_entry % GEN6_PTES;
1899         gen6_pte_t *pt_vaddr = NULL;
1900         struct sgt_iter sgt_iter;
1901         dma_addr_t addr;
1902
1903         for_each_sgt_dma(addr, sgt_iter, pages) {
1904                 if (pt_vaddr == NULL)
1905                         pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1906
1907                 pt_vaddr[act_pte] =
1908                         vm->pte_encode(addr, cache_level, flags);
1909
1910                 if (++act_pte == GEN6_PTES) {
1911                         kunmap_px(ppgtt, pt_vaddr);
1912                         pt_vaddr = NULL;
1913                         act_pt++;
1914                         act_pte = 0;
1915                 }
1916         }
1917
1918         if (pt_vaddr)
1919                 kunmap_px(ppgtt, pt_vaddr);
1920 }
1921
1922 static int gen6_alloc_va_range(struct i915_address_space *vm,
1923                                uint64_t start_in, uint64_t length_in)
1924 {
1925         DECLARE_BITMAP(new_page_tables, I915_PDES);
1926         struct drm_device *dev = vm->dev;
1927         struct drm_i915_private *dev_priv = to_i915(dev);
1928         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1929         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1930         struct i915_page_table *pt;
1931         uint32_t start, length, start_save, length_save;
1932         uint32_t pde;
1933         int ret;
1934
1935         if (WARN_ON(start_in + length_in > ppgtt->base.total))
1936                 return -ENODEV;
1937
1938         start = start_save = start_in;
1939         length = length_save = length_in;
1940
1941         bitmap_zero(new_page_tables, I915_PDES);
1942
1943         /* The allocation is done in two stages so that we can bail out with
1944          * minimal amount of pain. The first stage finds new page tables that
1945          * need allocation. The second stage marks use ptes within the page
1946          * tables.
1947          */
1948         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1949                 if (pt != vm->scratch_pt) {
1950                         WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1951                         continue;
1952                 }
1953
1954                 /* We've already allocated a page table */
1955                 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1956
1957                 pt = alloc_pt(dev);
1958                 if (IS_ERR(pt)) {
1959                         ret = PTR_ERR(pt);
1960                         goto unwind_out;
1961                 }
1962
1963                 gen6_initialize_pt(vm, pt);
1964
1965                 ppgtt->pd.page_table[pde] = pt;
1966                 __set_bit(pde, new_page_tables);
1967                 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1968         }
1969
1970         start = start_save;
1971         length = length_save;
1972
1973         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1974                 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1975
1976                 bitmap_zero(tmp_bitmap, GEN6_PTES);
1977                 bitmap_set(tmp_bitmap, gen6_pte_index(start),
1978                            gen6_pte_count(start, length));
1979
1980                 if (__test_and_clear_bit(pde, new_page_tables))
1981                         gen6_write_pde(&ppgtt->pd, pde, pt);
1982
1983                 trace_i915_page_table_entry_map(vm, pde, pt,
1984                                          gen6_pte_index(start),
1985                                          gen6_pte_count(start, length),
1986                                          GEN6_PTES);
1987                 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1988                                 GEN6_PTES);
1989         }
1990
1991         WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1992
1993         /* Make sure write is complete before other code can use this page
1994          * table. Also require for WC mapped PTEs */
1995         readl(ggtt->gsm);
1996
1997         mark_tlbs_dirty(ppgtt);
1998         return 0;
1999
2000 unwind_out:
2001         for_each_set_bit(pde, new_page_tables, I915_PDES) {
2002                 struct i915_page_table *pt = ppgtt->pd.page_table[pde];
2003
2004                 ppgtt->pd.page_table[pde] = vm->scratch_pt;
2005                 free_pt(vm->dev, pt);
2006         }
2007
2008         mark_tlbs_dirty(ppgtt);
2009         return ret;
2010 }
2011
2012 static int gen6_init_scratch(struct i915_address_space *vm)
2013 {
2014         struct drm_device *dev = vm->dev;
2015         int ret;
2016
2017         ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
2018         if (ret)
2019                 return ret;
2020
2021         vm->scratch_pt = alloc_pt(dev);
2022         if (IS_ERR(vm->scratch_pt)) {
2023                 cleanup_scratch_page(dev, &vm->scratch_page);
2024                 return PTR_ERR(vm->scratch_pt);
2025         }
2026
2027         gen6_initialize_pt(vm, vm->scratch_pt);
2028
2029         return 0;
2030 }
2031
2032 static void gen6_free_scratch(struct i915_address_space *vm)
2033 {
2034         struct drm_device *dev = vm->dev;
2035
2036         free_pt(dev, vm->scratch_pt);
2037         cleanup_scratch_page(dev, &vm->scratch_page);
2038 }
2039
2040 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
2041 {
2042         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
2043         struct i915_page_directory *pd = &ppgtt->pd;
2044         struct drm_device *dev = vm->dev;
2045         struct i915_page_table *pt;
2046         uint32_t pde;
2047
2048         drm_mm_remove_node(&ppgtt->node);
2049
2050         gen6_for_all_pdes(pt, pd, pde)
2051                 if (pt != vm->scratch_pt)
2052                         free_pt(dev, pt);
2053
2054         gen6_free_scratch(vm);
2055 }
2056
2057 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
2058 {
2059         struct i915_address_space *vm = &ppgtt->base;
2060         struct drm_device *dev = ppgtt->base.dev;
2061         struct drm_i915_private *dev_priv = to_i915(dev);
2062         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2063         bool retried = false;
2064         int ret;
2065
2066         /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2067          * allocator works in address space sizes, so it's multiplied by page
2068          * size. We allocate at the top of the GTT to avoid fragmentation.
2069          */
2070         BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
2071
2072         ret = gen6_init_scratch(vm);
2073         if (ret)
2074                 return ret;
2075
2076 alloc:
2077         ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
2078                                                   &ppgtt->node, GEN6_PD_SIZE,
2079                                                   GEN6_PD_ALIGN, 0,
2080                                                   0, ggtt->base.total,
2081                                                   DRM_MM_TOPDOWN);
2082         if (ret == -ENOSPC && !retried) {
2083                 ret = i915_gem_evict_something(&ggtt->base,
2084                                                GEN6_PD_SIZE, GEN6_PD_ALIGN,
2085                                                I915_CACHE_NONE,
2086                                                0, ggtt->base.total,
2087                                                0);
2088                 if (ret)
2089                         goto err_out;
2090
2091                 retried = true;
2092                 goto alloc;
2093         }
2094
2095         if (ret)
2096                 goto err_out;
2097
2098
2099         if (ppgtt->node.start < ggtt->mappable_end)
2100                 DRM_DEBUG("Forced to use aperture for PDEs\n");
2101
2102         return 0;
2103
2104 err_out:
2105         gen6_free_scratch(vm);
2106         return ret;
2107 }
2108
2109 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2110 {
2111         return gen6_ppgtt_allocate_page_directories(ppgtt);
2112 }
2113
2114 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2115                                   uint64_t start, uint64_t length)
2116 {
2117         struct i915_page_table *unused;
2118         uint32_t pde;
2119
2120         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
2121                 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2122 }
2123
2124 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2125 {
2126         struct drm_device *dev = ppgtt->base.dev;
2127         struct drm_i915_private *dev_priv = to_i915(dev);
2128         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2129         int ret;
2130
2131         ppgtt->base.pte_encode = ggtt->base.pte_encode;
2132         if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv))
2133                 ppgtt->switch_mm = gen6_mm_switch;
2134         else if (IS_HASWELL(dev_priv))
2135                 ppgtt->switch_mm = hsw_mm_switch;
2136         else if (IS_GEN7(dev_priv))
2137                 ppgtt->switch_mm = gen7_mm_switch;
2138         else
2139                 BUG();
2140
2141         ret = gen6_ppgtt_alloc(ppgtt);
2142         if (ret)
2143                 return ret;
2144
2145         ppgtt->base.allocate_va_range = gen6_alloc_va_range;
2146         ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2147         ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2148         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2149         ppgtt->base.bind_vma = ppgtt_bind_vma;
2150         ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2151         ppgtt->base.start = 0;
2152         ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2153         ppgtt->debug_dump = gen6_dump_ppgtt;
2154
2155         ppgtt->pd.base.ggtt_offset =
2156                 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2157
2158         ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
2159                 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2160
2161         gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2162
2163         gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2164
2165         DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2166                          ppgtt->node.size >> 20,
2167                          ppgtt->node.start / PAGE_SIZE);
2168
2169         DRM_DEBUG("Adding PPGTT at offset %x\n",
2170                   ppgtt->pd.base.ggtt_offset << 10);
2171
2172         return 0;
2173 }
2174
2175 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
2176                            struct drm_i915_private *dev_priv)
2177 {
2178         ppgtt->base.dev = &dev_priv->drm;
2179
2180         if (INTEL_INFO(dev_priv)->gen < 8)
2181                 return gen6_ppgtt_init(ppgtt);
2182         else
2183                 return gen8_ppgtt_init(ppgtt);
2184 }
2185
2186 static void i915_address_space_init(struct i915_address_space *vm,
2187                                     struct drm_i915_private *dev_priv)
2188 {
2189         drm_mm_init(&vm->mm, vm->start, vm->total);
2190         INIT_LIST_HEAD(&vm->active_list);
2191         INIT_LIST_HEAD(&vm->inactive_list);
2192         INIT_LIST_HEAD(&vm->unbound_list);
2193         list_add_tail(&vm->global_link, &dev_priv->vm_list);
2194 }
2195
2196 static void gtt_write_workarounds(struct drm_device *dev)
2197 {
2198         struct drm_i915_private *dev_priv = to_i915(dev);
2199
2200         /* This function is for gtt related workarounds. This function is
2201          * called on driver load and after a GPU reset, so you can place
2202          * workarounds here even if they get overwritten by GPU reset.
2203          */
2204         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
2205         if (IS_BROADWELL(dev_priv))
2206                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2207         else if (IS_CHERRYVIEW(dev_priv))
2208                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2209         else if (IS_SKYLAKE(dev_priv))
2210                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2211         else if (IS_BROXTON(dev_priv))
2212                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2213 }
2214
2215 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
2216                            struct drm_i915_private *dev_priv,
2217                            struct drm_i915_file_private *file_priv)
2218 {
2219         int ret;
2220
2221         ret = __hw_ppgtt_init(ppgtt, dev_priv);
2222         if (ret == 0) {
2223                 kref_init(&ppgtt->ref);
2224                 i915_address_space_init(&ppgtt->base, dev_priv);
2225                 ppgtt->base.file = file_priv;
2226         }
2227
2228         return ret;
2229 }
2230
2231 int i915_ppgtt_init_hw(struct drm_device *dev)
2232 {
2233         struct drm_i915_private *dev_priv = to_i915(dev);
2234
2235         gtt_write_workarounds(dev);
2236
2237         /* In the case of execlists, PPGTT is enabled by the context descriptor
2238          * and the PDPs are contained within the context itself.  We don't
2239          * need to do anything here. */
2240         if (i915.enable_execlists)
2241                 return 0;
2242
2243         if (!USES_PPGTT(dev))
2244                 return 0;
2245
2246         if (IS_GEN6(dev_priv))
2247                 gen6_ppgtt_enable(dev);
2248         else if (IS_GEN7(dev_priv))
2249                 gen7_ppgtt_enable(dev);
2250         else if (INTEL_INFO(dev)->gen >= 8)
2251                 gen8_ppgtt_enable(dev);
2252         else
2253                 MISSING_CASE(INTEL_INFO(dev)->gen);
2254
2255         return 0;
2256 }
2257
2258 struct i915_hw_ppgtt *
2259 i915_ppgtt_create(struct drm_i915_private *dev_priv,
2260                   struct drm_i915_file_private *fpriv)
2261 {
2262         struct i915_hw_ppgtt *ppgtt;
2263         int ret;
2264
2265         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2266         if (!ppgtt)
2267                 return ERR_PTR(-ENOMEM);
2268
2269         ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv);
2270         if (ret) {
2271                 kfree(ppgtt);
2272                 return ERR_PTR(ret);
2273         }
2274
2275         trace_i915_ppgtt_create(&ppgtt->base);
2276
2277         return ppgtt;
2278 }
2279
2280 void  i915_ppgtt_release(struct kref *kref)
2281 {
2282         struct i915_hw_ppgtt *ppgtt =
2283                 container_of(kref, struct i915_hw_ppgtt, ref);
2284
2285         trace_i915_ppgtt_release(&ppgtt->base);
2286
2287         /* vmas should already be unbound and destroyed */
2288         WARN_ON(!list_empty(&ppgtt->base.active_list));
2289         WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2290         WARN_ON(!list_empty(&ppgtt->base.unbound_list));
2291
2292         list_del(&ppgtt->base.global_link);
2293         drm_mm_takedown(&ppgtt->base.mm);
2294
2295         ppgtt->base.cleanup(&ppgtt->base);
2296         kfree(ppgtt);
2297 }
2298
2299 /* Certain Gen5 chipsets require require idling the GPU before
2300  * unmapping anything from the GTT when VT-d is enabled.
2301  */
2302 static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2303 {
2304 #ifdef CONFIG_INTEL_IOMMU
2305         /* Query intel_iommu to see if we need the workaround. Presumably that
2306          * was loaded first.
2307          */
2308         if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped)
2309                 return true;
2310 #endif
2311         return false;
2312 }
2313
2314 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2315 {
2316         struct intel_engine_cs *engine;
2317         enum intel_engine_id id;
2318
2319         if (INTEL_INFO(dev_priv)->gen < 6)
2320                 return;
2321
2322         for_each_engine(engine, dev_priv, id) {
2323                 u32 fault_reg;
2324                 fault_reg = I915_READ(RING_FAULT_REG(engine));
2325                 if (fault_reg & RING_FAULT_VALID) {
2326                         DRM_DEBUG_DRIVER("Unexpected fault\n"
2327                                          "\tAddr: 0x%08ux\n"
2328                                          "\tAddress space: %s\n"
2329                                          "\tSource ID: %d\n"
2330                                          "\tType: %d\n",
2331                                          fault_reg & LINUX_PAGE_MASK,
2332                                          fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2333                                          RING_FAULT_SRCID(fault_reg),
2334                                          RING_FAULT_FAULT_TYPE(fault_reg));
2335                         I915_WRITE(RING_FAULT_REG(engine),
2336                                    fault_reg & ~RING_FAULT_VALID);
2337                 }
2338         }
2339
2340         /* Engine specific init may not have been done till this point. */
2341         if (dev_priv->engine[RCS])
2342                 POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
2343 }
2344
2345 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2346 {
2347         if (INTEL_INFO(dev_priv)->gen < 6) {
2348                 intel_gtt_chipset_flush();
2349         } else {
2350                 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2351                 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2352         }
2353 }
2354
2355 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2356 {
2357         struct drm_i915_private *dev_priv = to_i915(dev);
2358         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2359
2360         /* Don't bother messing with faults pre GEN6 as we have little
2361          * documentation supporting that it's a good idea.
2362          */
2363         if (INTEL_INFO(dev)->gen < 6)
2364                 return;
2365
2366         i915_check_and_clear_faults(dev_priv);
2367
2368         ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
2369
2370         i915_ggtt_flush(dev_priv);
2371 }
2372
2373 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2374 {
2375         if (!dma_map_sg(&obj->base.dev->pdev->dev,
2376                         obj->pages->sgl, obj->pages->nents,
2377                         PCI_DMA_BIDIRECTIONAL))
2378                 return -ENOSPC;
2379
2380         return 0;
2381 }
2382
2383 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2384 {
2385         writeq(pte, addr);
2386 }
2387
2388 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2389                                   dma_addr_t addr,
2390                                   uint64_t offset,
2391                                   enum i915_cache_level level,
2392                                   u32 unused)
2393 {
2394         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2395         gen8_pte_t __iomem *pte =
2396                 (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
2397                 (offset >> PAGE_SHIFT);
2398         int rpm_atomic_seq;
2399
2400         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2401
2402         gen8_set_pte(pte, gen8_pte_encode(addr, level));
2403
2404         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2405         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2406
2407         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2408 }
2409
2410 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2411                                      struct sg_table *st,
2412                                      uint64_t start,
2413                                      enum i915_cache_level level, u32 unused)
2414 {
2415         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2416         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2417         struct sgt_iter sgt_iter;
2418         gen8_pte_t __iomem *gtt_entries;
2419         gen8_pte_t gtt_entry;
2420         dma_addr_t addr;
2421         int rpm_atomic_seq;
2422         int i = 0;
2423
2424         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2425
2426         gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
2427
2428         for_each_sgt_dma(addr, sgt_iter, st) {
2429                 gtt_entry = gen8_pte_encode(addr, level);
2430                 gen8_set_pte(&gtt_entries[i++], gtt_entry);
2431         }
2432
2433         /*
2434          * XXX: This serves as a posting read to make sure that the PTE has
2435          * actually been updated. There is some concern that even though
2436          * registers and PTEs are within the same BAR that they are potentially
2437          * of NUMA access patterns. Therefore, even with the way we assume
2438          * hardware should work, we must keep this posting read for paranoia.
2439          */
2440         if (i != 0)
2441                 WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
2442
2443         /* This next bit makes the above posting read even more important. We
2444          * want to flush the TLBs only after we're certain all the PTE updates
2445          * have finished.
2446          */
2447         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2448         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2449
2450         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2451 }
2452
2453 struct insert_entries {
2454         struct i915_address_space *vm;
2455         struct sg_table *st;
2456         uint64_t start;
2457         enum i915_cache_level level;
2458         u32 flags;
2459 };
2460
2461 static int gen8_ggtt_insert_entries__cb(void *_arg)
2462 {
2463         struct insert_entries *arg = _arg;
2464         gen8_ggtt_insert_entries(arg->vm, arg->st,
2465                                  arg->start, arg->level, arg->flags);
2466         return 0;
2467 }
2468
2469 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2470                                           struct sg_table *st,
2471                                           uint64_t start,
2472                                           enum i915_cache_level level,
2473                                           u32 flags)
2474 {
2475         struct insert_entries arg = { vm, st, start, level, flags };
2476         stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
2477 }
2478
2479 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2480                                   dma_addr_t addr,
2481                                   uint64_t offset,
2482                                   enum i915_cache_level level,
2483                                   u32 flags)
2484 {
2485         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2486         gen6_pte_t __iomem *pte =
2487                 (gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
2488                 (offset >> PAGE_SHIFT);
2489         int rpm_atomic_seq;
2490
2491         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2492
2493         iowrite32(vm->pte_encode(addr, level, flags), pte);
2494
2495         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2496         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2497
2498         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2499 }
2500
2501 /*
2502  * Binds an object into the global gtt with the specified cache level. The object
2503  * will be accessible to the GPU via commands whose operands reference offsets
2504  * within the global GTT as well as accessible by the GPU through the GMADR
2505  * mapped BAR (dev_priv->mm.gtt->gtt).
2506  */
2507 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2508                                      struct sg_table *st,
2509                                      uint64_t start,
2510                                      enum i915_cache_level level, u32 flags)
2511 {
2512         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2513         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2514         struct sgt_iter sgt_iter;
2515         gen6_pte_t __iomem *gtt_entries;
2516         gen6_pte_t gtt_entry;
2517         dma_addr_t addr;
2518         int rpm_atomic_seq;
2519         int i = 0;
2520
2521         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2522
2523         gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
2524
2525         for_each_sgt_dma(addr, sgt_iter, st) {
2526                 gtt_entry = vm->pte_encode(addr, level, flags);
2527                 iowrite32(gtt_entry, &gtt_entries[i++]);
2528         }
2529
2530         /* XXX: This serves as a posting read to make sure that the PTE has
2531          * actually been updated. There is some concern that even though
2532          * registers and PTEs are within the same BAR that they are potentially
2533          * of NUMA access patterns. Therefore, even with the way we assume
2534          * hardware should work, we must keep this posting read for paranoia.
2535          */
2536         if (i != 0)
2537                 WARN_ON(readl(&gtt_entries[i-1]) != gtt_entry);
2538
2539         /* This next bit makes the above posting read even more important. We
2540          * want to flush the TLBs only after we're certain all the PTE updates
2541          * have finished.
2542          */
2543         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2544         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2545
2546         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2547 }
2548
2549 static void nop_clear_range(struct i915_address_space *vm,
2550                             uint64_t start, uint64_t length)
2551 {
2552 }
2553
2554 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2555                                   uint64_t start, uint64_t length)
2556 {
2557         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2558         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2559         unsigned first_entry = start >> PAGE_SHIFT;
2560         unsigned num_entries = length >> PAGE_SHIFT;
2561         gen8_pte_t scratch_pte, __iomem *gtt_base =
2562                 (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2563         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2564         int i;
2565         int rpm_atomic_seq;
2566
2567         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2568
2569         if (WARN(num_entries > max_entries,
2570                  "First entry = %d; Num entries = %d (max=%d)\n",
2571                  first_entry, num_entries, max_entries))
2572                 num_entries = max_entries;
2573
2574         scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
2575                                       I915_CACHE_LLC);
2576         for (i = 0; i < num_entries; i++)
2577                 gen8_set_pte(&gtt_base[i], scratch_pte);
2578         readl(gtt_base);
2579
2580         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2581 }
2582
2583 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2584                                   uint64_t start,
2585                                   uint64_t length)
2586 {
2587         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2588         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2589         unsigned first_entry = start >> PAGE_SHIFT;
2590         unsigned num_entries = length >> PAGE_SHIFT;
2591         gen6_pte_t scratch_pte, __iomem *gtt_base =
2592                 (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2593         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2594         int i;
2595         int rpm_atomic_seq;
2596
2597         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2598
2599         if (WARN(num_entries > max_entries,
2600                  "First entry = %d; Num entries = %d (max=%d)\n",
2601                  first_entry, num_entries, max_entries))
2602                 num_entries = max_entries;
2603
2604         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
2605                                      I915_CACHE_LLC, 0);
2606
2607         for (i = 0; i < num_entries; i++)
2608                 iowrite32(scratch_pte, &gtt_base[i]);
2609         readl(gtt_base);
2610
2611         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2612 }
2613
2614 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2615                                   dma_addr_t addr,
2616                                   uint64_t offset,
2617                                   enum i915_cache_level cache_level,
2618                                   u32 unused)
2619 {
2620         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2621         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2622                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2623         int rpm_atomic_seq;
2624
2625         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2626
2627         intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2628
2629         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2630 }
2631
2632 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2633                                      struct sg_table *pages,
2634                                      uint64_t start,
2635                                      enum i915_cache_level cache_level, u32 unused)
2636 {
2637         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2638         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2639                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2640         int rpm_atomic_seq;
2641
2642         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2643
2644         intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2645
2646         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2647
2648 }
2649
2650 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2651                                   uint64_t start,
2652                                   uint64_t length)
2653 {
2654         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2655         unsigned first_entry = start >> PAGE_SHIFT;
2656         unsigned num_entries = length >> PAGE_SHIFT;
2657         int rpm_atomic_seq;
2658
2659         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2660
2661         intel_gtt_clear_range(first_entry, num_entries);
2662
2663         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2664 }
2665
2666 static int ggtt_bind_vma(struct i915_vma *vma,
2667                          enum i915_cache_level cache_level,
2668                          u32 flags)
2669 {
2670         struct drm_i915_gem_object *obj = vma->obj;
2671         u32 pte_flags = 0;
2672         int ret;
2673
2674         ret = i915_get_ggtt_vma_pages(vma);
2675         if (ret)
2676                 return ret;
2677
2678         /* Currently applicable only to VLV */
2679         if (obj->gt_ro)
2680                 pte_flags |= PTE_READ_ONLY;
2681
2682         vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
2683                                 cache_level, pte_flags);
2684
2685         /*
2686          * Without aliasing PPGTT there's no difference between
2687          * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2688          * upgrade to both bound if we bind either to avoid double-binding.
2689          */
2690         vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2691
2692         return 0;
2693 }
2694
2695 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2696                                  enum i915_cache_level cache_level,
2697                                  u32 flags)
2698 {
2699         u32 pte_flags;
2700         int ret;
2701
2702         ret = i915_get_ggtt_vma_pages(vma);
2703         if (ret)
2704                 return ret;
2705
2706         /* Currently applicable only to VLV */
2707         pte_flags = 0;
2708         if (vma->obj->gt_ro)
2709                 pte_flags |= PTE_READ_ONLY;
2710
2711
2712         if (flags & I915_VMA_GLOBAL_BIND) {
2713                 vma->vm->insert_entries(vma->vm,
2714                                         vma->pages, vma->node.start,
2715                                         cache_level, pte_flags);
2716         }
2717
2718         if (flags & I915_VMA_LOCAL_BIND) {
2719                 struct i915_hw_ppgtt *appgtt =
2720                         to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
2721                 appgtt->base.insert_entries(&appgtt->base,
2722                                             vma->pages, vma->node.start,
2723                                             cache_level, pte_flags);
2724         }
2725
2726         return 0;
2727 }
2728
2729 static void ggtt_unbind_vma(struct i915_vma *vma)
2730 {
2731         struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
2732         const u64 size = min(vma->size, vma->node.size);
2733
2734         if (vma->flags & I915_VMA_GLOBAL_BIND)
2735                 vma->vm->clear_range(vma->vm,
2736                                      vma->node.start, size);
2737
2738         if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
2739                 appgtt->base.clear_range(&appgtt->base,
2740                                          vma->node.start, size);
2741 }
2742
2743 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2744 {
2745         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2746         struct device *kdev = &dev_priv->drm.pdev->dev;
2747         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2748
2749         if (unlikely(ggtt->do_idle_maps)) {
2750                 if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
2751                         DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2752                         /* Wait a bit, in hopes it avoids the hang */
2753                         udelay(10);
2754                 }
2755         }
2756
2757         dma_unmap_sg(kdev, obj->pages->sgl, obj->pages->nents,
2758                      PCI_DMA_BIDIRECTIONAL);
2759 }
2760
2761 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2762                                   unsigned long color,
2763                                   u64 *start,
2764                                   u64 *end)
2765 {
2766         if (node->color != color)
2767                 *start += 4096;
2768
2769         node = list_first_entry_or_null(&node->node_list,
2770                                         struct drm_mm_node,
2771                                         node_list);
2772         if (node && node->allocated && node->color != color)
2773                 *end -= 4096;
2774 }
2775
2776 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2777 {
2778         /* Let GEM Manage all of the aperture.
2779          *
2780          * However, leave one page at the end still bound to the scratch page.
2781          * There are a number of places where the hardware apparently prefetches
2782          * past the end of the object, and we've seen multiple hangs with the
2783          * GPU head pointer stuck in a batchbuffer bound at the last page of the
2784          * aperture.  One page should be enough to keep any prefetching inside
2785          * of the aperture.
2786          */
2787         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2788         unsigned long hole_start, hole_end;
2789         struct i915_hw_ppgtt *ppgtt;
2790         struct drm_mm_node *entry;
2791         int ret;
2792         unsigned long mappable = min(ggtt->base.total, ggtt->mappable_end);
2793
2794         ret = intel_vgt_balloon(dev_priv);
2795         if (ret)
2796                 return ret;
2797
2798         /* Reserve a mappable slot for our lockless error capture */
2799         ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
2800                                                   &ggtt->error_capture,
2801                                                   4096, 0, -1,
2802                                                   0, ggtt->mappable_end,
2803                                                   0, 0);
2804         if (ret)
2805                 return ret;
2806
2807         /* Clear any non-preallocated blocks */
2808         drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2809                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2810                               hole_start, hole_end);
2811                 ggtt->base.clear_range(&ggtt->base, hole_start,
2812                                        hole_end - hole_start);
2813         }
2814
2815 #ifdef __DragonFly__
2816         DRM_INFO("taking over the fictitious range 0x%llx-0x%llx\n",
2817             dev_priv->ggtt.mappable_base, dev_priv->ggtt.mappable_end);
2818         vm_phys_fictitious_reg_range(dev_priv->ggtt.mappable_base,
2819              dev_priv->ggtt.mappable_base + mappable, VM_MEMATTR_WRITE_COMBINING);
2820 #endif
2821
2822         /* And finally clear the reserved guard page */
2823         ggtt->base.clear_range(&ggtt->base,
2824                                ggtt->base.total - PAGE_SIZE, PAGE_SIZE);
2825
2826         if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
2827                 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2828                 if (!ppgtt) {
2829                         ret = -ENOMEM;
2830                         goto err;
2831                 }
2832
2833                 ret = __hw_ppgtt_init(ppgtt, dev_priv);
2834                 if (ret)
2835                         goto err_ppgtt;
2836
2837                 if (ppgtt->base.allocate_va_range) {
2838                         ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2839                                                             ppgtt->base.total);
2840                         if (ret)
2841                                 goto err_ppgtt_cleanup;
2842                 }
2843
2844                 ppgtt->base.clear_range(&ppgtt->base,
2845                                         ppgtt->base.start,
2846                                         ppgtt->base.total);
2847
2848                 dev_priv->mm.aliasing_ppgtt = ppgtt;
2849                 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
2850                 ggtt->base.bind_vma = aliasing_gtt_bind_vma;
2851         }
2852
2853         return 0;
2854
2855 err_ppgtt_cleanup:
2856         ppgtt->base.cleanup(&ppgtt->base);
2857 err_ppgtt:
2858         kfree(ppgtt);
2859 err:
2860         drm_mm_remove_node(&ggtt->error_capture);
2861         return ret;
2862 }
2863
2864 /**
2865  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2866  * @dev_priv: i915 device
2867  */
2868 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2869 {
2870         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2871
2872         if (dev_priv->mm.aliasing_ppgtt) {
2873                 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2874                 ppgtt->base.cleanup(&ppgtt->base);
2875                 kfree(ppgtt);
2876         }
2877
2878         i915_gem_cleanup_stolen(&dev_priv->drm);
2879
2880         if (drm_mm_node_allocated(&ggtt->error_capture))
2881                 drm_mm_remove_node(&ggtt->error_capture);
2882
2883         if (drm_mm_initialized(&ggtt->base.mm)) {
2884                 intel_vgt_deballoon(dev_priv);
2885
2886                 drm_mm_takedown(&ggtt->base.mm);
2887                 list_del(&ggtt->base.global_link);
2888         }
2889
2890         ggtt->base.cleanup(&ggtt->base);
2891
2892         arch_phys_wc_del(ggtt->mtrr);
2893         io_mapping_fini(&ggtt->mappable);
2894 }
2895
2896 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2897 {
2898         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2899         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2900         return snb_gmch_ctl << 20;
2901 }
2902
2903 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2904 {
2905         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2906         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2907         if (bdw_gmch_ctl)
2908                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2909
2910 #ifdef CONFIG_X86_32
2911         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2912         if (bdw_gmch_ctl > 4)
2913                 bdw_gmch_ctl = 4;
2914 #endif
2915
2916         return bdw_gmch_ctl << 20;
2917 }
2918
2919 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2920 {
2921         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2922         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2923
2924         if (gmch_ctrl)
2925                 return 1 << (20 + gmch_ctrl);
2926
2927         return 0;
2928 }
2929
2930 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2931 {
2932         snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2933         snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2934         return snb_gmch_ctl << 25; /* 32 MB units */
2935 }
2936
2937 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2938 {
2939         bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2940         bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2941         return bdw_gmch_ctl << 25; /* 32 MB units */
2942 }
2943
2944 static size_t chv_get_stolen_size(u16 gmch_ctrl)
2945 {
2946         gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2947         gmch_ctrl &= SNB_GMCH_GMS_MASK;
2948
2949         /*
2950          * 0x0  to 0x10: 32MB increments starting at 0MB
2951          * 0x11 to 0x16: 4MB increments starting at 8MB
2952          * 0x17 to 0x1d: 4MB increments start at 36MB
2953          */
2954         if (gmch_ctrl < 0x11)
2955                 return gmch_ctrl << 25;
2956         else if (gmch_ctrl < 0x17)
2957                 return (gmch_ctrl - 0x11 + 2) << 22;
2958         else
2959                 return (gmch_ctrl - 0x17 + 9) << 22;
2960 }
2961
2962 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2963 {
2964         gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2965         gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2966
2967         if (gen9_gmch_ctl < 0xf0)
2968                 return gen9_gmch_ctl << 25; /* 32 MB units */
2969         else
2970                 /* 4MB increments starting at 0xf0 for 4MB */
2971                 return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2972 }
2973
2974 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
2975 {
2976         struct pci_dev *pdev = ggtt->base.dev->pdev;
2977         phys_addr_t phys_addr;
2978         int ret;
2979
2980         /* For Modern GENs the PTEs and register space are split in the BAR */
2981         phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
2982
2983         /*
2984          * On BXT writes larger than 64 bit to the GTT pagetable range will be
2985          * dropped. For WC mappings in general we have 64 byte burst writes
2986          * when the WC buffer is flushed, so we can't use it, but have to
2987          * resort to an uncached mapping. The WC issue is easily caught by the
2988          * readback check when writing GTT PTE entries.
2989          */
2990         if (IS_BROXTON(to_i915(ggtt->base.dev)))
2991                 ggtt->gsm = ioremap_nocache(phys_addr, size);
2992         else
2993                 ggtt->gsm = ioremap_wc(phys_addr, size);
2994         if (!ggtt->gsm) {
2995                 DRM_ERROR("Failed to map the ggtt page table\n");
2996                 return -ENOMEM;
2997         }
2998
2999         ret = setup_scratch_page(ggtt->base.dev,
3000                                  &ggtt->base.scratch_page,
3001                                  GFP_DMA32);
3002         if (ret) {
3003                 DRM_ERROR("Scratch setup failed\n");
3004                 /* iounmap will also get called at remove, but meh */
3005                 iounmap(ggtt->gsm);
3006                 return ret;
3007         }
3008
3009         return 0;
3010 }
3011
3012 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3013  * bits. When using advanced contexts each context stores its own PAT, but
3014  * writing this data shouldn't be harmful even in those cases. */
3015 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
3016 {
3017         uint64_t pat;
3018
3019         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
3020               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
3021               GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
3022               GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
3023               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
3024               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
3025               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
3026               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3027
3028         if (!USES_PPGTT(dev_priv))
3029                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3030                  * so RTL will always use the value corresponding to
3031                  * pat_sel = 000".
3032                  * So let's disable cache for GGTT to avoid screen corruptions.
3033                  * MOCS still can be used though.
3034                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3035                  * before this patch, i.e. the same uncached + snooping access
3036                  * like on gen6/7 seems to be in effect.
3037                  * - So this just fixes blitter/render access. Again it looks
3038                  * like it's not just uncached access, but uncached + snooping.
3039                  * So we can still hold onto all our assumptions wrt cpu
3040                  * clflushing on LLC machines.
3041                  */
3042                 pat = GEN8_PPAT(0, GEN8_PPAT_UC);
3043
3044         /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
3045          * write would work. */
3046         I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3047         I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3048 }
3049
3050 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
3051 {
3052         uint64_t pat;
3053
3054         /*
3055          * Map WB on BDW to snooped on CHV.
3056          *
3057          * Only the snoop bit has meaning for CHV, the rest is
3058          * ignored.
3059          *
3060          * The hardware will never snoop for certain types of accesses:
3061          * - CPU GTT (GMADR->GGTT->no snoop->memory)
3062          * - PPGTT page tables
3063          * - some other special cycles
3064          *
3065          * As with BDW, we also need to consider the following for GT accesses:
3066          * "For GGTT, there is NO pat_sel[2:0] from the entry,
3067          * so RTL will always use the value corresponding to
3068          * pat_sel = 000".
3069          * Which means we must set the snoop bit in PAT entry 0
3070          * in order to keep the global status page working.
3071          */
3072         pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3073               GEN8_PPAT(1, 0) |
3074               GEN8_PPAT(2, 0) |
3075               GEN8_PPAT(3, 0) |
3076               GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3077               GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3078               GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3079               GEN8_PPAT(7, CHV_PPAT_SNOOP);
3080
3081         I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3082         I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3083 }
3084
3085 static void gen6_gmch_remove(struct i915_address_space *vm)
3086 {
3087         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3088
3089         iounmap(ggtt->gsm);
3090         cleanup_scratch_page(vm->dev, &vm->scratch_page);
3091 }
3092
3093 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3094 {
3095         struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
3096         struct pci_dev *pdev = dev_priv->drm.pdev;
3097         unsigned int size;
3098         u16 snb_gmch_ctl;
3099
3100         /* TODO: We're not aware of mappable constraints on gen8 yet */
3101         ggtt->mappable_base = pci_resource_start(pdev, 2);
3102         ggtt->mappable_end = pci_resource_len(pdev, 2);
3103
3104 #if 0
3105         if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39)))
3106                 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3107 #endif
3108
3109         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3110
3111         if (INTEL_GEN(dev_priv) >= 9) {
3112                 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
3113                 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3114         } else if (IS_CHERRYVIEW(dev_priv)) {
3115                 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
3116                 size = chv_get_total_gtt_size(snb_gmch_ctl);
3117         } else {
3118                 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
3119                 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3120         }
3121
3122         ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3123
3124         if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
3125                 chv_setup_private_ppat(dev_priv);
3126         else
3127                 bdw_setup_private_ppat(dev_priv);
3128
3129         ggtt->base.cleanup = gen6_gmch_remove;
3130         ggtt->base.bind_vma = ggtt_bind_vma;
3131         ggtt->base.unbind_vma = ggtt_unbind_vma;
3132         ggtt->base.insert_page = gen8_ggtt_insert_page;
3133         ggtt->base.clear_range = nop_clear_range;
3134         if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3135                 ggtt->base.clear_range = gen8_ggtt_clear_range;
3136
3137         ggtt->base.insert_entries = gen8_ggtt_insert_entries;
3138         if (IS_CHERRYVIEW(dev_priv))
3139                 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
3140
3141         return ggtt_probe_common(ggtt, size);
3142 }
3143
3144 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3145 {
3146         struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
3147         struct pci_dev *pdev = dev_priv->drm.pdev;
3148         unsigned int size;
3149         u16 snb_gmch_ctl;
3150
3151         ggtt->mappable_base = pci_resource_start(pdev, 2);
3152         ggtt->mappable_end = pci_resource_len(pdev, 2);
3153
3154         /* 64/512MB is the current min/max we actually know of, but this is just
3155          * a coarse sanity check.
3156          */
3157         if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3158                 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
3159                 return -ENXIO;
3160         }
3161
3162 #if 0
3163         if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
3164                 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3165 #endif
3166         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3167
3168         ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
3169
3170         size = gen6_get_total_gtt_size(snb_gmch_ctl);
3171         ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3172
3173         ggtt->base.clear_range = gen6_ggtt_clear_range;
3174         ggtt->base.insert_page = gen6_ggtt_insert_page;
3175         ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3176         ggtt->base.bind_vma = ggtt_bind_vma;
3177         ggtt->base.unbind_vma = ggtt_unbind_vma;
3178         ggtt->base.cleanup = gen6_gmch_remove;
3179
3180         if (HAS_EDRAM(dev_priv))
3181                 ggtt->base.pte_encode = iris_pte_encode;
3182         else if (IS_HASWELL(dev_priv))
3183                 ggtt->base.pte_encode = hsw_pte_encode;
3184         else if (IS_VALLEYVIEW(dev_priv))
3185                 ggtt->base.pte_encode = byt_pte_encode;
3186         else if (INTEL_GEN(dev_priv) >= 7)
3187                 ggtt->base.pte_encode = ivb_pte_encode;
3188         else
3189                 ggtt->base.pte_encode = snb_pte_encode;
3190
3191         return ggtt_probe_common(ggtt, size);
3192 }
3193
3194 static void i915_gmch_remove(struct i915_address_space *vm)
3195 {
3196         intel_gmch_remove();
3197 }
3198
3199 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3200 {
3201         struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
3202 #if 0
3203         int ret;
3204
3205         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3206         if (!ret) {
3207                 DRM_ERROR("failed to set up gmch\n");
3208                 return -EIO;
3209         }
3210 #endif
3211
3212         intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size,
3213                       &ggtt->mappable_base, &ggtt->mappable_end);
3214
3215         ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3216         ggtt->base.insert_page = i915_ggtt_insert_page;
3217         ggtt->base.insert_entries = i915_ggtt_insert_entries;
3218         ggtt->base.clear_range = i915_ggtt_clear_range;
3219         ggtt->base.bind_vma = ggtt_bind_vma;
3220         ggtt->base.unbind_vma = ggtt_unbind_vma;
3221         ggtt->base.cleanup = i915_gmch_remove;
3222
3223         if (unlikely(ggtt->do_idle_maps))
3224                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3225
3226         return 0;
3227 }
3228
3229 /**
3230  * i915_ggtt_probe_hw - Probe GGTT hardware location
3231  * @dev_priv: i915 device
3232  */
3233 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3234 {
3235         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3236         int ret;
3237
3238         ggtt->base.dev = &dev_priv->drm;
3239
3240         if (INTEL_GEN(dev_priv) <= 5)
3241                 ret = i915_gmch_probe(ggtt);
3242         else if (INTEL_GEN(dev_priv) < 8)
3243                 ret = gen6_gmch_probe(ggtt);
3244         else
3245                 ret = gen8_gmch_probe(ggtt);
3246         if (ret)
3247                 return ret;
3248
3249         if ((ggtt->base.total - 1) >> 32) {
3250                 DRM_ERROR("We never expected a Global GTT with more than 32bits"
3251                           " of address space! Found %lldM!\n",
3252                           ggtt->base.total >> 20);
3253                 ggtt->base.total = 1ULL << 32;
3254                 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3255         }
3256
3257         if (ggtt->mappable_end > ggtt->base.total) {
3258                 DRM_ERROR("mappable aperture extends past end of GGTT,"
3259                           " aperture=%llx, total=%llx\n",
3260                           ggtt->mappable_end, ggtt->base.total);
3261                 ggtt->mappable_end = ggtt->base.total;
3262         }
3263
3264         /* GMADR is the PCI mmio aperture into the global GTT. */
3265         DRM_INFO("Memory usable by graphics device = %lluM\n",
3266                  ggtt->base.total >> 20);
3267         DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
3268         DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20);
3269 #ifdef CONFIG_INTEL_IOMMU
3270         if (intel_iommu_gfx_mapped)
3271                 DRM_INFO("VT-d active for gfx access\n");
3272 #endif
3273
3274         return 0;
3275 }
3276
3277 /**
3278  * i915_ggtt_init_hw - Initialize GGTT hardware
3279  * @dev_priv: i915 device
3280  */
3281 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3282 {
3283         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3284         int ret;
3285
3286         INIT_LIST_HEAD(&dev_priv->vm_list);
3287
3288         /* Subtract the guard page before address space initialization to
3289          * shrink the range used by drm_mm.
3290          */
3291         ggtt->base.total -= PAGE_SIZE;
3292         i915_address_space_init(&ggtt->base, dev_priv);
3293         ggtt->base.total += PAGE_SIZE;
3294         if (!HAS_LLC(dev_priv))
3295                 ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
3296
3297         if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
3298                                 dev_priv->ggtt.mappable_base,
3299                                 dev_priv->ggtt.mappable_end)) {
3300                 ret = -EIO;
3301                 goto out_gtt_cleanup;
3302         }
3303
3304         ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end);
3305
3306         /*
3307          * Initialise stolen early so that we may reserve preallocated
3308          * objects for the BIOS to KMS transition.
3309          */
3310         ret = i915_gem_init_stolen(&dev_priv->drm);
3311         if (ret)
3312                 goto out_gtt_cleanup;
3313
3314         return 0;
3315
3316 out_gtt_cleanup:
3317         ggtt->base.cleanup(&ggtt->base);
3318         return ret;
3319 }
3320
3321 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3322 {
3323         if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3324                 return -EIO;
3325
3326         return 0;
3327 }
3328
3329 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3330 {
3331         struct drm_i915_private *dev_priv = to_i915(dev);
3332         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3333         struct drm_i915_gem_object *obj, *on;
3334
3335         i915_check_and_clear_faults(dev_priv);
3336
3337         /* First fill our portion of the GTT with scratch pages */
3338         ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
3339
3340         ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
3341
3342         /* clflush objects bound into the GGTT and rebind them. */
3343         list_for_each_entry_safe(obj, on,
3344                                  &dev_priv->mm.bound_list, global_list) {
3345                 bool ggtt_bound = false;
3346                 struct i915_vma *vma;
3347
3348                 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3349                         if (vma->vm != &ggtt->base)
3350                                 continue;
3351
3352                         if (!i915_vma_unbind(vma))
3353                                 continue;
3354
3355                         WARN_ON(i915_vma_bind(vma, obj->cache_level,
3356                                               PIN_UPDATE));
3357                         ggtt_bound = true;
3358                 }
3359
3360                 if (ggtt_bound)
3361                         WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3362         }
3363
3364         ggtt->base.closed = false;
3365
3366         if (INTEL_INFO(dev)->gen >= 8) {
3367                 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
3368                         chv_setup_private_ppat(dev_priv);
3369                 else
3370                         bdw_setup_private_ppat(dev_priv);
3371
3372                 return;
3373         }
3374
3375         if (USES_PPGTT(dev)) {
3376                 struct i915_address_space *vm;
3377
3378                 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3379                         /* TODO: Perhaps it shouldn't be gen6 specific */
3380
3381                         struct i915_hw_ppgtt *ppgtt;
3382
3383                         if (i915_is_ggtt(vm))
3384                                 ppgtt = dev_priv->mm.aliasing_ppgtt;
3385                         else
3386                                 ppgtt = i915_vm_to_ppgtt(vm);
3387
3388                         gen6_write_page_range(dev_priv, &ppgtt->pd,
3389                                               0, ppgtt->base.total);
3390                 }
3391         }
3392
3393         i915_ggtt_flush(dev_priv);
3394 }
3395
3396 static void
3397 i915_vma_retire(struct i915_gem_active *active,
3398                 struct drm_i915_gem_request *rq)
3399 {
3400         const unsigned int idx = rq->engine->id;
3401         struct i915_vma *vma =
3402                 container_of(active, struct i915_vma, last_read[idx]);
3403
3404         GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
3405
3406         i915_vma_clear_active(vma, idx);
3407         if (i915_vma_is_active(vma))
3408                 return;
3409
3410         list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3411         if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
3412                 WARN_ON(i915_vma_unbind(vma));
3413 }
3414
3415 void i915_vma_destroy(struct i915_vma *vma)
3416 {
3417         GEM_BUG_ON(vma->node.allocated);
3418         GEM_BUG_ON(i915_vma_is_active(vma));
3419         GEM_BUG_ON(!i915_vma_is_closed(vma));
3420         GEM_BUG_ON(vma->fence);
3421
3422         list_del(&vma->vm_link);
3423         if (!i915_vma_is_ggtt(vma))
3424                 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
3425
3426         kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
3427 }
3428
3429 void i915_vma_close(struct i915_vma *vma)
3430 {
3431         GEM_BUG_ON(i915_vma_is_closed(vma));
3432         vma->flags |= I915_VMA_CLOSED;
3433
3434         list_del_init(&vma->obj_link);
3435         if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
3436                 WARN_ON(i915_vma_unbind(vma));
3437 }
3438
3439 static struct i915_vma *
3440 __i915_vma_create(struct drm_i915_gem_object *obj,
3441                   struct i915_address_space *vm,
3442                   const struct i915_ggtt_view *view)
3443 {
3444         struct i915_vma *vma;
3445         int i;
3446
3447         GEM_BUG_ON(vm->closed);
3448
3449         vma = kzalloc(sizeof(*vma), GFP_KERNEL);
3450         if (vma == NULL)
3451                 return ERR_PTR(-ENOMEM);
3452
3453         INIT_LIST_HEAD(&vma->exec_list);
3454         for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
3455                 init_request_active(&vma->last_read[i], i915_vma_retire);
3456         init_request_active(&vma->last_fence, NULL);
3457         list_add(&vma->vm_link, &vm->unbound_list);
3458         vma->vm = vm;
3459         vma->obj = obj;
3460         vma->size = obj->base.size;
3461
3462         if (view) {
3463                 vma->ggtt_view = *view;
3464                 if (view->type == I915_GGTT_VIEW_PARTIAL) {
3465                         vma->size = view->params.partial.size;
3466                         vma->size <<= PAGE_SHIFT;
3467                 } else if (view->type == I915_GGTT_VIEW_ROTATED) {
3468                         vma->size =
3469                                 intel_rotation_info_size(&view->params.rotated);
3470                         vma->size <<= PAGE_SHIFT;
3471                 }
3472         }
3473
3474         if (i915_is_ggtt(vm)) {
3475                 vma->flags |= I915_VMA_GGTT;
3476         } else {
3477                 i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3478         }
3479
3480         list_add_tail(&vma->obj_link, &obj->vma_list);
3481         return vma;
3482 }
3483
3484 static inline bool vma_matches(struct i915_vma *vma,
3485                                struct i915_address_space *vm,
3486                                const struct i915_ggtt_view *view)
3487 {
3488         if (vma->vm != vm)
3489                 return false;
3490
3491         if (!i915_vma_is_ggtt(vma))
3492                 return true;
3493
3494         if (!view)
3495                 return vma->ggtt_view.type == 0;
3496
3497         if (vma->ggtt_view.type != view->type)
3498                 return false;
3499
3500         return memcmp(&vma->ggtt_view.params,
3501                       &view->params,
3502                       sizeof(view->params)) == 0;
3503 }
3504
3505 struct i915_vma *
3506 i915_vma_create(struct drm_i915_gem_object *obj,
3507                 struct i915_address_space *vm,
3508                 const struct i915_ggtt_view *view)
3509 {
3510         GEM_BUG_ON(view && !i915_is_ggtt(vm));
3511         GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view));
3512
3513         return __i915_vma_create(obj, vm, view);
3514 }
3515
3516 struct i915_vma *
3517 i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
3518                     struct i915_address_space *vm,
3519                     const struct i915_ggtt_view *view)
3520 {
3521         struct i915_vma *vma;
3522
3523         list_for_each_entry_reverse(vma, &obj->vma_list, obj_link)
3524                 if (vma_matches(vma, vm, view))
3525                         return vma;
3526
3527         return NULL;
3528 }
3529
3530 struct i915_vma *
3531 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3532                                   struct i915_address_space *vm,
3533                                   const struct i915_ggtt_view *view)
3534 {
3535         struct i915_vma *vma;
3536
3537         GEM_BUG_ON(view && !i915_is_ggtt(vm));
3538
3539         vma = i915_gem_obj_to_vma(obj, vm, view);
3540         if (!vma)
3541                 vma = __i915_vma_create(obj, vm, view);
3542
3543         GEM_BUG_ON(i915_vma_is_closed(vma));
3544         return vma;
3545 }
3546
3547 static struct scatterlist *
3548 rotate_pages(const dma_addr_t *in, unsigned int offset,
3549              unsigned int width, unsigned int height,
3550              unsigned int stride,
3551              struct sg_table *st, struct scatterlist *sg)
3552 {
3553         unsigned int column, row;
3554         unsigned int src_idx;
3555
3556         for (column = 0; column < width; column++) {
3557                 src_idx = stride * (height - 1) + column;
3558                 for (row = 0; row < height; row++) {
3559                         st->nents++;
3560                         /* We don't need the pages, but need to initialize
3561                          * the entries so the sg list can be happily traversed.
3562                          * The only thing we need are DMA addresses.
3563                          */
3564                         sg_set_page(sg, NULL, PAGE_SIZE, 0);
3565                         sg_dma_address(sg) = in[offset + src_idx];
3566                         sg_dma_len(sg) = PAGE_SIZE;
3567                         sg = sg_next(sg);
3568                         src_idx -= stride;
3569                 }
3570         }
3571
3572         return sg;
3573 }
3574
3575 static struct sg_table *
3576 intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
3577                           struct drm_i915_gem_object *obj)
3578 {
3579         const size_t n_pages = obj->base.size / PAGE_SIZE;
3580         unsigned int size = intel_rotation_info_size(rot_info);
3581         struct sgt_iter sgt_iter;
3582         dma_addr_t dma_addr;
3583         unsigned long i;
3584         dma_addr_t *page_addr_list;
3585         struct sg_table *st;
3586         struct scatterlist *sg;
3587         int ret = -ENOMEM;
3588
3589         /* Allocate a temporary list of source pages for random access. */
3590         page_addr_list = drm_malloc_gfp(n_pages,
3591                                         sizeof(dma_addr_t),
3592                                         GFP_TEMPORARY);
3593         if (!page_addr_list)
3594                 return ERR_PTR(ret);
3595
3596         /* Allocate target SG list. */
3597         st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
3598         if (!st)
3599                 goto err_st_alloc;
3600
3601         ret = sg_alloc_table(st, size, GFP_KERNEL);
3602         if (ret)
3603                 goto err_sg_alloc;
3604
3605         /* Populate source page list from the object. */
3606         i = 0;
3607         for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
3608                 page_addr_list[i++] = dma_addr;
3609
3610         GEM_BUG_ON(i != n_pages);
3611         st->nents = 0;
3612         sg = st->sgl;
3613
3614         for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3615                 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
3616                                   rot_info->plane[i].width, rot_info->plane[i].height,
3617                                   rot_info->plane[i].stride, st, sg);
3618         }
3619
3620         DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
3621                       obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3622
3623         drm_free_large(page_addr_list);
3624
3625         return st;
3626
3627 err_sg_alloc:
3628         kfree(st);
3629 err_st_alloc:
3630         drm_free_large(page_addr_list);
3631
3632         DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3633                       obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3634
3635         return ERR_PTR(ret);
3636 }
3637
3638 static struct sg_table *
3639 intel_partial_pages(const struct i915_ggtt_view *view,
3640                     struct drm_i915_gem_object *obj)
3641 {
3642         struct sg_table *st;
3643         struct scatterlist *sg;
3644         struct sg_page_iter obj_sg_iter;
3645         int ret = -ENOMEM;
3646
3647         st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
3648         if (!st)
3649                 goto err_st_alloc;
3650
3651         ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3652         if (ret)
3653                 goto err_sg_alloc;
3654
3655         sg = st->sgl;
3656         st->nents = 0;
3657         for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3658                 view->params.partial.offset)
3659         {
3660                 if (st->nents >= view->params.partial.size)
3661                         break;
3662
3663                 sg_set_page(sg, NULL, PAGE_SIZE, 0);
3664                 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3665                 sg_dma_len(sg) = PAGE_SIZE;
3666
3667                 sg = sg_next(sg);
3668                 st->nents++;
3669         }
3670
3671         return st;
3672
3673 err_sg_alloc:
3674         kfree(st);
3675 err_st_alloc:
3676         return ERR_PTR(ret);
3677 }
3678
3679 static int
3680 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3681 {
3682         int ret = 0;
3683
3684         if (vma->pages)
3685                 return 0;
3686
3687         if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3688                 vma->pages = vma->obj->pages;
3689         else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3690                 vma->pages =
3691                         intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
3692         else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3693                 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3694         else
3695                 WARN_ONCE(1, "GGTT view %u not implemented!\n",
3696                           vma->ggtt_view.type);
3697
3698         if (!vma->pages) {
3699                 DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3700                           vma->ggtt_view.type);
3701                 ret = -EINVAL;
3702         } else if (IS_ERR(vma->pages)) {
3703                 ret = PTR_ERR(vma->pages);
3704                 vma->pages = NULL;
3705                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3706                           vma->ggtt_view.type, ret);
3707         }
3708
3709         return ret;
3710 }
3711
3712 /**
3713  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3714  * @vma: VMA to map
3715  * @cache_level: mapping cache level
3716  * @flags: flags like global or local mapping
3717  *
3718  * DMA addresses are taken from the scatter-gather table of this object (or of
3719  * this VMA in case of non-default GGTT views) and PTE entries set up.
3720  * Note that DMA addresses are also the only part of the SG table we care about.
3721  */
3722 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3723                   u32 flags)
3724 {
3725         u32 bind_flags;
3726         u32 vma_flags;
3727         int ret;
3728
3729         if (WARN_ON(flags == 0))
3730                 return -EINVAL;
3731
3732         bind_flags = 0;
3733         if (flags & PIN_GLOBAL)
3734                 bind_flags |= I915_VMA_GLOBAL_BIND;
3735         if (flags & PIN_USER)
3736                 bind_flags |= I915_VMA_LOCAL_BIND;
3737
3738         vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
3739         if (flags & PIN_UPDATE)
3740                 bind_flags |= vma_flags;
3741         else
3742                 bind_flags &= ~vma_flags;
3743         if (bind_flags == 0)
3744                 return 0;
3745
3746         if (vma_flags == 0 && vma->vm->allocate_va_range) {
3747                 trace_i915_va_alloc(vma);
3748                 ret = vma->vm->allocate_va_range(vma->vm,
3749                                                  vma->node.start,
3750                                                  vma->node.size);
3751                 if (ret)
3752                         return ret;
3753         }
3754
3755         ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3756         if (ret)
3757                 return ret;
3758
3759         vma->flags |= bind_flags;
3760         return 0;
3761 }
3762
3763 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
3764 {
3765         void __iomem *ptr;
3766
3767         /* Access through the GTT requires the device to be awake. */
3768         assert_rpm_wakelock_held(to_i915(vma->vm->dev));
3769
3770         lockdep_assert_held(&vma->vm->dev->struct_mutex);
3771         if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
3772                 return IO_ERR_PTR(-ENODEV);
3773
3774         GEM_BUG_ON(!i915_vma_is_ggtt(vma));
3775         GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
3776
3777         ptr = vma->iomap;
3778         if (ptr == NULL) {
3779                 ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
3780                                         vma->node.start,
3781                                         vma->node.size);
3782                 if (ptr == NULL)
3783                         return IO_ERR_PTR(-ENOMEM);
3784
3785                 vma->iomap = ptr;
3786         }
3787
3788         __i915_vma_pin(vma);
3789         return ptr;
3790 }
3791
3792 void i915_vma_unpin_and_release(struct i915_vma **p_vma)
3793 {
3794         struct i915_vma *vma;
3795
3796         vma = fetch_and_zero(p_vma);
3797         if (!vma)
3798                 return;
3799
3800         i915_vma_unpin(vma);
3801         i915_vma_put(vma);
3802 }