drm/i915: Update base driver to 20160725
[dragonfly.git] / sys / dev / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25
26 #include <linux/seq_file.h>
27 #include <linux/stop_machine.h>
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_vgpu.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34
35 #include <sys/mplock2.h>
36
37 /**
38  * DOC: Global GTT views
39  *
40  * Background and previous state
41  *
42  * Historically objects could exists (be bound) in global GTT space only as
43  * singular instances with a view representing all of the object's backing pages
44  * in a linear fashion. This view will be called a normal view.
45  *
46  * To support multiple views of the same object, where the number of mapped
47  * pages is not equal to the backing store, or where the layout of the pages
48  * is not linear, concept of a GGTT view was added.
49  *
50  * One example of an alternative view is a stereo display driven by a single
51  * image. In this case we would have a framebuffer looking like this
52  * (2x2 pages):
53  *
54  *    12
55  *    34
56  *
57  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
58  * rendering. In contrast, fed to the display engine would be an alternative
59  * view which could look something like this:
60  *
61  *   1212
62  *   3434
63  *
64  * In this example both the size and layout of pages in the alternative view is
65  * different from the normal view.
66  *
67  * Implementation and usage
68  *
69  * GGTT views are implemented using VMAs and are distinguished via enum
70  * i915_ggtt_view_type and struct i915_ggtt_view.
71  *
72  * A new flavour of core GEM functions which work with GGTT bound objects were
73  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
74  * renaming  in large amounts of code. They take the struct i915_ggtt_view
75  * parameter encapsulating all metadata required to implement a view.
76  *
77  * As a helper for callers which are only interested in the normal view,
78  * globally const i915_ggtt_view_normal singleton instance exists. All old core
79  * GEM API functions, the ones not taking the view parameter, are operating on,
80  * or with the normal GGTT view.
81  *
82  * Code wanting to add or use a new GGTT view needs to:
83  *
84  * 1. Add a new enum with a suitable name.
85  * 2. Extend the metadata in the i915_ggtt_view structure if required.
86  * 3. Add support to i915_get_vma_pages().
87  *
88  * New views are required to build a scatter-gather table from within the
89  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
90  * exists for the lifetime of an VMA.
91  *
92  * Core API is designed to have copy semantics which means that passed in
93  * struct i915_ggtt_view does not need to be persistent (left around after
94  * calling the core API functions).
95  *
96  */
97
98 static inline struct i915_ggtt *
99 i915_vm_to_ggtt(struct i915_address_space *vm)
100 {
101         GEM_BUG_ON(!i915_is_ggtt(vm));
102         return container_of(vm, struct i915_ggtt, base);
103 }
104
105 static int
106 i915_get_ggtt_vma_pages(struct i915_vma *vma);
107
108 const struct i915_ggtt_view i915_ggtt_view_normal = {
109         .type = I915_GGTT_VIEW_NORMAL,
110 };
111 const struct i915_ggtt_view i915_ggtt_view_rotated = {
112         .type = I915_GGTT_VIEW_ROTATED,
113 };
114
115 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
116                                 int enable_ppgtt)
117 {
118         bool has_aliasing_ppgtt;
119         bool has_full_ppgtt;
120         bool has_full_48bit_ppgtt;
121
122         has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6;
123         has_full_ppgtt = INTEL_GEN(dev_priv) >= 7;
124         has_full_48bit_ppgtt =
125                 IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9;
126
127         if (intel_vgpu_active(dev_priv)) {
128                 /* emulation is too hard */
129                 has_full_ppgtt = false;
130                 has_full_48bit_ppgtt = false;
131         }
132
133         if (!has_aliasing_ppgtt)
134                 return 0;
135
136         /*
137          * We don't allow disabling PPGTT for gen9+ as it's a requirement for
138          * execlists, the sole mechanism available to submit work.
139          */
140         if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
141                 return 0;
142
143         if (enable_ppgtt == 1)
144                 return 1;
145
146         if (enable_ppgtt == 2 && has_full_ppgtt)
147                 return 2;
148
149         if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
150                 return 3;
151
152 #ifdef CONFIG_INTEL_IOMMU
153         /* Disable ppgtt on SNB if VT-d is on. */
154         if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) {
155                 DRM_INFO("Disabling PPGTT because VT-d is on\n");
156                 return 0;
157         }
158 #endif
159
160         /* Early VLV doesn't have this */
161         if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
162                 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
163                 return 0;
164         }
165
166         if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt)
167                 return has_full_48bit_ppgtt ? 3 : 2;
168         else
169                 return has_aliasing_ppgtt ? 1 : 0;
170 }
171
172 static int ppgtt_bind_vma(struct i915_vma *vma,
173                           enum i915_cache_level cache_level,
174                           u32 unused)
175 {
176         u32 pte_flags = 0;
177
178         /* Currently applicable only to VLV */
179         if (vma->obj->gt_ro)
180                 pte_flags |= PTE_READ_ONLY;
181
182         vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
183                                 cache_level, pte_flags);
184
185         return 0;
186 }
187
188 static void ppgtt_unbind_vma(struct i915_vma *vma)
189 {
190         vma->vm->clear_range(vma->vm,
191                              vma->node.start,
192                              vma->obj->base.size,
193                              true);
194 }
195
196 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
197                                   enum i915_cache_level level,
198                                   bool valid)
199 {
200         gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
201         pte |= addr;
202
203         switch (level) {
204         case I915_CACHE_NONE:
205                 pte |= PPAT_UNCACHED_INDEX;
206                 break;
207         case I915_CACHE_WT:
208                 pte |= PPAT_DISPLAY_ELLC_INDEX;
209                 break;
210         default:
211                 pte |= PPAT_CACHED_INDEX;
212                 break;
213         }
214
215         return pte;
216 }
217
218 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
219                                   const enum i915_cache_level level)
220 {
221         gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
222         pde |= addr;
223         if (level != I915_CACHE_NONE)
224                 pde |= PPAT_CACHED_PDE_INDEX;
225         else
226                 pde |= PPAT_UNCACHED_INDEX;
227         return pde;
228 }
229
230 #define gen8_pdpe_encode gen8_pde_encode
231 #define gen8_pml4e_encode gen8_pde_encode
232
233 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
234                                  enum i915_cache_level level,
235                                  bool valid, u32 unused)
236 {
237         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
238         pte |= GEN6_PTE_ADDR_ENCODE(addr);
239
240         switch (level) {
241         case I915_CACHE_L3_LLC:
242         case I915_CACHE_LLC:
243                 pte |= GEN6_PTE_CACHE_LLC;
244                 break;
245         case I915_CACHE_NONE:
246                 pte |= GEN6_PTE_UNCACHED;
247                 break;
248         default:
249                 MISSING_CASE(level);
250         }
251
252         return pte;
253 }
254
255 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
256                                  enum i915_cache_level level,
257                                  bool valid, u32 unused)
258 {
259         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
260         pte |= GEN6_PTE_ADDR_ENCODE(addr);
261
262         switch (level) {
263         case I915_CACHE_L3_LLC:
264                 pte |= GEN7_PTE_CACHE_L3_LLC;
265                 break;
266         case I915_CACHE_LLC:
267                 pte |= GEN6_PTE_CACHE_LLC;
268                 break;
269         case I915_CACHE_NONE:
270                 pte |= GEN6_PTE_UNCACHED;
271                 break;
272         default:
273                 MISSING_CASE(level);
274         }
275
276         return pte;
277 }
278
279 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
280                                  enum i915_cache_level level,
281                                  bool valid, u32 flags)
282 {
283         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
284         pte |= GEN6_PTE_ADDR_ENCODE(addr);
285
286         if (!(flags & PTE_READ_ONLY))
287                 pte |= BYT_PTE_WRITEABLE;
288
289         if (level != I915_CACHE_NONE)
290                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
291
292         return pte;
293 }
294
295 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
296                                  enum i915_cache_level level,
297                                  bool valid, u32 unused)
298 {
299         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
300         pte |= HSW_PTE_ADDR_ENCODE(addr);
301
302         if (level != I915_CACHE_NONE)
303                 pte |= HSW_WB_LLC_AGE3;
304
305         return pte;
306 }
307
308 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
309                                   enum i915_cache_level level,
310                                   bool valid, u32 unused)
311 {
312         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
313         pte |= HSW_PTE_ADDR_ENCODE(addr);
314
315         switch (level) {
316         case I915_CACHE_NONE:
317                 break;
318         case I915_CACHE_WT:
319                 pte |= HSW_WT_ELLC_LLC_AGE3;
320                 break;
321         default:
322                 pte |= HSW_WB_ELLC_LLC_AGE3;
323                 break;
324         }
325
326         return pte;
327 }
328
329 static int __setup_page_dma(struct drm_device *dev,
330                             struct i915_page_dma *p, gfp_t flags)
331 {
332         struct device *device = &dev->pdev->dev;
333
334         p->page = alloc_page(flags);
335         if (!p->page)
336                 return -ENOMEM;
337
338         p->daddr = dma_map_page(device,
339                                 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
340
341         if (dma_mapping_error(device, p->daddr)) {
342                 __free_page(p->page);
343                 return -EINVAL;
344         }
345
346         return 0;
347 }
348
349 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
350 {
351         return __setup_page_dma(dev, p, GFP_KERNEL);
352 }
353
354 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
355 {
356         if (WARN_ON(!p->page))
357                 return;
358
359         dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
360         __free_page(p->page);
361         memset(p, 0, sizeof(*p));
362 }
363
364 static void *kmap_page_dma(struct i915_page_dma *p)
365 {
366         return kmap_atomic(p->page);
367 }
368
369 /* We use the flushing unmap only with ppgtt structures:
370  * page directories, page tables and scratch pages.
371  */
372 static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
373 {
374         /* There are only few exceptions for gen >=6. chv and bxt.
375          * And we are not sure about the latter so play safe for now.
376          */
377         if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
378                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
379
380         kunmap_atomic(vaddr);
381 }
382
383 #define kmap_px(px) kmap_page_dma(px_base(px))
384 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
385
386 #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
387 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
388 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
389 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
390
391 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
392                           const uint64_t val)
393 {
394         int i;
395         uint64_t * const vaddr = kmap_page_dma(p);
396
397         for (i = 0; i < 512; i++)
398                 vaddr[i] = val;
399
400         kunmap_page_dma(dev, vaddr);
401 }
402
403 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
404                              const uint32_t val32)
405 {
406         uint64_t v = val32;
407
408         v = v << 32 | val32;
409
410         fill_page_dma(dev, p, v);
411 }
412
413 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
414 {
415         struct i915_page_scratch *sp;
416         int ret;
417
418         sp = kzalloc(sizeof(*sp), GFP_KERNEL);
419         if (sp == NULL)
420                 return ERR_PTR(-ENOMEM);
421
422         ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
423         if (ret) {
424                 kfree(sp);
425                 return ERR_PTR(ret);
426         }
427
428         set_pages_uc(px_page(sp), 1);
429
430         return sp;
431 }
432
433 static void free_scratch_page(struct drm_device *dev,
434                               struct i915_page_scratch *sp)
435 {
436         set_pages_wb(px_page(sp), 1);
437
438         cleanup_px(dev, sp);
439         kfree(sp);
440 }
441
442 static struct i915_page_table *alloc_pt(struct drm_device *dev)
443 {
444         struct i915_page_table *pt;
445         const size_t count = INTEL_INFO(dev)->gen >= 8 ?
446                 GEN8_PTES : GEN6_PTES;
447         int ret = -ENOMEM;
448
449         pt = kzalloc(sizeof(*pt), GFP_KERNEL);
450         if (!pt)
451                 return ERR_PTR(-ENOMEM);
452
453         pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
454                                 GFP_KERNEL);
455
456         if (!pt->used_ptes)
457                 goto fail_bitmap;
458
459         ret = setup_px(dev, pt);
460         if (ret)
461                 goto fail_page_m;
462
463         return pt;
464
465 fail_page_m:
466         kfree(pt->used_ptes);
467 fail_bitmap:
468         kfree(pt);
469
470         return ERR_PTR(ret);
471 }
472
473 static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
474 {
475         cleanup_px(dev, pt);
476         kfree(pt->used_ptes);
477         kfree(pt);
478 }
479
480 static void gen8_initialize_pt(struct i915_address_space *vm,
481                                struct i915_page_table *pt)
482 {
483         gen8_pte_t scratch_pte;
484
485         scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
486                                       I915_CACHE_LLC, true);
487
488         fill_px(vm->dev, pt, scratch_pte);
489 }
490
491 static void gen6_initialize_pt(struct i915_address_space *vm,
492                                struct i915_page_table *pt)
493 {
494         gen6_pte_t scratch_pte;
495
496         WARN_ON(px_dma(vm->scratch_page) == 0);
497
498         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
499                                      I915_CACHE_LLC, true, 0);
500
501         fill32_px(vm->dev, pt, scratch_pte);
502 }
503
504 static struct i915_page_directory *alloc_pd(struct drm_device *dev)
505 {
506         struct i915_page_directory *pd;
507         int ret = -ENOMEM;
508
509         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
510         if (!pd)
511                 return ERR_PTR(-ENOMEM);
512
513         pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
514                                 sizeof(*pd->used_pdes), GFP_KERNEL);
515         if (!pd->used_pdes)
516                 goto fail_bitmap;
517
518         ret = setup_px(dev, pd);
519         if (ret)
520                 goto fail_page_m;
521
522         return pd;
523
524 fail_page_m:
525         kfree(pd->used_pdes);
526 fail_bitmap:
527         kfree(pd);
528
529         return ERR_PTR(ret);
530 }
531
532 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
533 {
534         if (px_page(pd)) {
535                 cleanup_px(dev, pd);
536                 kfree(pd->used_pdes);
537                 kfree(pd);
538         }
539 }
540
541 static void gen8_initialize_pd(struct i915_address_space *vm,
542                                struct i915_page_directory *pd)
543 {
544         gen8_pde_t scratch_pde;
545
546         scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
547
548         fill_px(vm->dev, pd, scratch_pde);
549 }
550
551 static int __pdp_init(struct drm_device *dev,
552                       struct i915_page_directory_pointer *pdp)
553 {
554         size_t pdpes = I915_PDPES_PER_PDP(dev);
555
556         pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
557                                   sizeof(unsigned long),
558                                   GFP_KERNEL);
559         if (!pdp->used_pdpes)
560                 return -ENOMEM;
561
562         pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
563                                       GFP_KERNEL);
564         if (!pdp->page_directory) {
565                 kfree(pdp->used_pdpes);
566                 /* the PDP might be the statically allocated top level. Keep it
567                  * as clean as possible */
568                 pdp->used_pdpes = NULL;
569                 return -ENOMEM;
570         }
571
572         return 0;
573 }
574
575 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
576 {
577         kfree(pdp->used_pdpes);
578         kfree(pdp->page_directory);
579         pdp->page_directory = NULL;
580 }
581
582 static struct
583 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
584 {
585         struct i915_page_directory_pointer *pdp;
586         int ret = -ENOMEM;
587
588         WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
589
590         pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
591         if (!pdp)
592                 return ERR_PTR(-ENOMEM);
593
594         ret = __pdp_init(dev, pdp);
595         if (ret)
596                 goto fail_bitmap;
597
598         ret = setup_px(dev, pdp);
599         if (ret)
600                 goto fail_page_m;
601
602         return pdp;
603
604 fail_page_m:
605         __pdp_fini(pdp);
606 fail_bitmap:
607         kfree(pdp);
608
609         return ERR_PTR(ret);
610 }
611
612 static void free_pdp(struct drm_device *dev,
613                      struct i915_page_directory_pointer *pdp)
614 {
615         __pdp_fini(pdp);
616         if (USES_FULL_48BIT_PPGTT(dev)) {
617                 cleanup_px(dev, pdp);
618                 kfree(pdp);
619         }
620 }
621
622 static void gen8_initialize_pdp(struct i915_address_space *vm,
623                                 struct i915_page_directory_pointer *pdp)
624 {
625         gen8_ppgtt_pdpe_t scratch_pdpe;
626
627         scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
628
629         fill_px(vm->dev, pdp, scratch_pdpe);
630 }
631
632 static void gen8_initialize_pml4(struct i915_address_space *vm,
633                                  struct i915_pml4 *pml4)
634 {
635         gen8_ppgtt_pml4e_t scratch_pml4e;
636
637         scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
638                                           I915_CACHE_LLC);
639
640         fill_px(vm->dev, pml4, scratch_pml4e);
641 }
642
643 static void
644 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
645                           struct i915_page_directory_pointer *pdp,
646                           struct i915_page_directory *pd,
647                           int index)
648 {
649         gen8_ppgtt_pdpe_t *page_directorypo;
650
651         if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
652                 return;
653
654         page_directorypo = kmap_px(pdp);
655         page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
656         kunmap_px(ppgtt, page_directorypo);
657 }
658
659 static void
660 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
661                                   struct i915_pml4 *pml4,
662                                   struct i915_page_directory_pointer *pdp,
663                                   int index)
664 {
665         gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
666
667         WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
668         pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
669         kunmap_px(ppgtt, pagemap);
670 }
671
672 /* Broadwell Page Directory Pointer Descriptors */
673 static int gen8_write_pdp(struct drm_i915_gem_request *req,
674                           unsigned entry,
675                           dma_addr_t addr)
676 {
677         struct intel_engine_cs *engine = req->engine;
678         int ret;
679
680         BUG_ON(entry >= 4);
681
682         ret = intel_ring_begin(req, 6);
683         if (ret)
684                 return ret;
685
686         intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
687         intel_ring_emit_reg(engine, GEN8_RING_PDP_UDW(engine, entry));
688         intel_ring_emit(engine, upper_32_bits(addr));
689         intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
690         intel_ring_emit_reg(engine, GEN8_RING_PDP_LDW(engine, entry));
691         intel_ring_emit(engine, lower_32_bits(addr));
692         intel_ring_advance(engine);
693
694         return 0;
695 }
696
697 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
698                                  struct drm_i915_gem_request *req)
699 {
700         int i, ret;
701
702         for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
703                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
704
705                 ret = gen8_write_pdp(req, i, pd_daddr);
706                 if (ret)
707                         return ret;
708         }
709
710         return 0;
711 }
712
713 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
714                               struct drm_i915_gem_request *req)
715 {
716         return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
717 }
718
719 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
720                                        struct i915_page_directory_pointer *pdp,
721                                        uint64_t start,
722                                        uint64_t length,
723                                        gen8_pte_t scratch_pte)
724 {
725         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
726         gen8_pte_t *pt_vaddr;
727         unsigned pdpe = gen8_pdpe_index(start);
728         unsigned pde = gen8_pde_index(start);
729         unsigned pte = gen8_pte_index(start);
730         unsigned num_entries = length >> PAGE_SHIFT;
731         unsigned last_pte, i;
732
733         if (WARN_ON(!pdp))
734                 return;
735
736         while (num_entries) {
737                 struct i915_page_directory *pd;
738                 struct i915_page_table *pt;
739
740                 if (WARN_ON(!pdp->page_directory[pdpe]))
741                         break;
742
743                 pd = pdp->page_directory[pdpe];
744
745                 if (WARN_ON(!pd->page_table[pde]))
746                         break;
747
748                 pt = pd->page_table[pde];
749
750                 if (WARN_ON(!px_page(pt)))
751                         break;
752
753                 last_pte = pte + num_entries;
754                 if (last_pte > GEN8_PTES)
755                         last_pte = GEN8_PTES;
756
757                 pt_vaddr = kmap_px(pt);
758
759                 for (i = pte; i < last_pte; i++) {
760                         pt_vaddr[i] = scratch_pte;
761                         num_entries--;
762                 }
763
764                 kunmap_px(ppgtt, pt_vaddr);
765
766                 pte = 0;
767                 if (++pde == I915_PDES) {
768                         if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
769                                 break;
770                         pde = 0;
771                 }
772         }
773 }
774
775 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
776                                    uint64_t start,
777                                    uint64_t length,
778                                    bool use_scratch)
779 {
780         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
781         gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
782                                                  I915_CACHE_LLC, use_scratch);
783
784         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
785                 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
786                                            scratch_pte);
787         } else {
788                 uint64_t pml4e;
789                 struct i915_page_directory_pointer *pdp;
790
791                 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
792                         gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
793                                                    scratch_pte);
794                 }
795         }
796 }
797
798 static void
799 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
800                               struct i915_page_directory_pointer *pdp,
801                               struct sg_page_iter *sg_iter,
802                               uint64_t start,
803                               enum i915_cache_level cache_level)
804 {
805         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
806         gen8_pte_t *pt_vaddr;
807         unsigned pdpe = gen8_pdpe_index(start);
808         unsigned pde = gen8_pde_index(start);
809         unsigned pte = gen8_pte_index(start);
810
811         pt_vaddr = NULL;
812
813         while (__sg_page_iter_next(sg_iter)) {
814                 if (pt_vaddr == NULL) {
815                         struct i915_page_directory *pd = pdp->page_directory[pdpe];
816                         struct i915_page_table *pt = pd->page_table[pde];
817                         pt_vaddr = kmap_px(pt);
818                 }
819
820                 pt_vaddr[pte] =
821                         gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
822                                         cache_level, true);
823                 if (++pte == GEN8_PTES) {
824                         kunmap_px(ppgtt, pt_vaddr);
825                         pt_vaddr = NULL;
826                         if (++pde == I915_PDES) {
827                                 if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
828                                         break;
829                                 pde = 0;
830                         }
831                         pte = 0;
832                 }
833         }
834
835         if (pt_vaddr)
836                 kunmap_px(ppgtt, pt_vaddr);
837 }
838
839 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
840                                       struct sg_table *pages,
841                                       uint64_t start,
842                                       enum i915_cache_level cache_level,
843                                       u32 unused)
844 {
845         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
846         struct sg_page_iter sg_iter;
847
848         __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
849
850         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
851                 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
852                                               cache_level);
853         } else {
854                 struct i915_page_directory_pointer *pdp;
855                 uint64_t pml4e;
856                 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
857
858                 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
859                         gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
860                                                       start, cache_level);
861                 }
862         }
863 }
864
865 static void gen8_free_page_tables(struct drm_device *dev,
866                                   struct i915_page_directory *pd)
867 {
868         int i;
869
870         if (!px_page(pd))
871                 return;
872
873         for_each_set_bit(i, pd->used_pdes, I915_PDES) {
874                 if (WARN_ON(!pd->page_table[i]))
875                         continue;
876
877                 free_pt(dev, pd->page_table[i]);
878                 pd->page_table[i] = NULL;
879         }
880 }
881
882 static int gen8_init_scratch(struct i915_address_space *vm)
883 {
884         struct drm_device *dev = vm->dev;
885         int ret;
886
887         vm->scratch_page = alloc_scratch_page(dev);
888         if (IS_ERR(vm->scratch_page))
889                 return PTR_ERR(vm->scratch_page);
890
891         vm->scratch_pt = alloc_pt(dev);
892         if (IS_ERR(vm->scratch_pt)) {
893                 ret = PTR_ERR(vm->scratch_pt);
894                 goto free_scratch_page;
895         }
896
897         vm->scratch_pd = alloc_pd(dev);
898         if (IS_ERR(vm->scratch_pd)) {
899                 ret = PTR_ERR(vm->scratch_pd);
900                 goto free_pt;
901         }
902
903         if (USES_FULL_48BIT_PPGTT(dev)) {
904                 vm->scratch_pdp = alloc_pdp(dev);
905                 if (IS_ERR(vm->scratch_pdp)) {
906                         ret = PTR_ERR(vm->scratch_pdp);
907                         goto free_pd;
908                 }
909         }
910
911         gen8_initialize_pt(vm, vm->scratch_pt);
912         gen8_initialize_pd(vm, vm->scratch_pd);
913         if (USES_FULL_48BIT_PPGTT(dev))
914                 gen8_initialize_pdp(vm, vm->scratch_pdp);
915
916         return 0;
917
918 free_pd:
919         free_pd(dev, vm->scratch_pd);
920 free_pt:
921         free_pt(dev, vm->scratch_pt);
922 free_scratch_page:
923         free_scratch_page(dev, vm->scratch_page);
924
925         return ret;
926 }
927
928 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
929 {
930         enum vgt_g2v_type msg;
931         struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
932         int i;
933
934         if (USES_FULL_48BIT_PPGTT(dev_priv)) {
935                 u64 daddr = px_dma(&ppgtt->pml4);
936
937                 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
938                 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
939
940                 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
941                                 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
942         } else {
943                 for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
944                         u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
945
946                         I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
947                         I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
948                 }
949
950                 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
951                                 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
952         }
953
954         I915_WRITE(vgtif_reg(g2v_notify), msg);
955
956         return 0;
957 }
958
959 static void gen8_free_scratch(struct i915_address_space *vm)
960 {
961         struct drm_device *dev = vm->dev;
962
963         if (USES_FULL_48BIT_PPGTT(dev))
964                 free_pdp(dev, vm->scratch_pdp);
965         free_pd(dev, vm->scratch_pd);
966         free_pt(dev, vm->scratch_pt);
967         free_scratch_page(dev, vm->scratch_page);
968 }
969
970 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
971                                     struct i915_page_directory_pointer *pdp)
972 {
973         int i;
974
975         for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
976                 if (WARN_ON(!pdp->page_directory[i]))
977                         continue;
978
979                 gen8_free_page_tables(dev, pdp->page_directory[i]);
980                 free_pd(dev, pdp->page_directory[i]);
981         }
982
983         free_pdp(dev, pdp);
984 }
985
986 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
987 {
988         int i;
989
990         for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
991                 if (WARN_ON(!ppgtt->pml4.pdps[i]))
992                         continue;
993
994                 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
995         }
996
997         cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
998 }
999
1000 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1001 {
1002         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1003
1004         if (intel_vgpu_active(to_i915(vm->dev)))
1005                 gen8_ppgtt_notify_vgt(ppgtt, false);
1006
1007         if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
1008                 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
1009         else
1010                 gen8_ppgtt_cleanup_4lvl(ppgtt);
1011
1012         gen8_free_scratch(vm);
1013 }
1014
1015 /**
1016  * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
1017  * @vm: Master vm structure.
1018  * @pd: Page directory for this address range.
1019  * @start:      Starting virtual address to begin allocations.
1020  * @length:     Size of the allocations.
1021  * @new_pts:    Bitmap set by function with new allocations. Likely used by the
1022  *              caller to free on error.
1023  *
1024  * Allocate the required number of page tables. Extremely similar to
1025  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1026  * the page directory boundary (instead of the page directory pointer). That
1027  * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1028  * possible, and likely that the caller will need to use multiple calls of this
1029  * function to achieve the appropriate allocation.
1030  *
1031  * Return: 0 if success; negative error code otherwise.
1032  */
1033 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1034                                      struct i915_page_directory *pd,
1035                                      uint64_t start,
1036                                      uint64_t length,
1037                                      unsigned long *new_pts)
1038 {
1039         struct drm_device *dev = vm->dev;
1040         struct i915_page_table *pt;
1041         uint32_t pde;
1042
1043         gen8_for_each_pde(pt, pd, start, length, pde) {
1044                 /* Don't reallocate page tables */
1045                 if (test_bit(pde, pd->used_pdes)) {
1046                         /* Scratch is never allocated this way */
1047                         WARN_ON(pt == vm->scratch_pt);
1048                         continue;
1049                 }
1050
1051                 pt = alloc_pt(dev);
1052                 if (IS_ERR(pt))
1053                         goto unwind_out;
1054
1055                 gen8_initialize_pt(vm, pt);
1056                 pd->page_table[pde] = pt;
1057                 __set_bit(pde, new_pts);
1058                 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1059         }
1060
1061         return 0;
1062
1063 unwind_out:
1064         for_each_set_bit(pde, new_pts, I915_PDES)
1065                 free_pt(dev, pd->page_table[pde]);
1066
1067         return -ENOMEM;
1068 }
1069
1070 /**
1071  * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1072  * @vm: Master vm structure.
1073  * @pdp:        Page directory pointer for this address range.
1074  * @start:      Starting virtual address to begin allocations.
1075  * @length:     Size of the allocations.
1076  * @new_pds:    Bitmap set by function with new allocations. Likely used by the
1077  *              caller to free on error.
1078  *
1079  * Allocate the required number of page directories starting at the pde index of
1080  * @start, and ending at the pde index @start + @length. This function will skip
1081  * over already allocated page directories within the range, and only allocate
1082  * new ones, setting the appropriate pointer within the pdp as well as the
1083  * correct position in the bitmap @new_pds.
1084  *
1085  * The function will only allocate the pages within the range for a give page
1086  * directory pointer. In other words, if @start + @length straddles a virtually
1087  * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1088  * required by the caller, This is not currently possible, and the BUG in the
1089  * code will prevent it.
1090  *
1091  * Return: 0 if success; negative error code otherwise.
1092  */
1093 static int
1094 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1095                                   struct i915_page_directory_pointer *pdp,
1096                                   uint64_t start,
1097                                   uint64_t length,
1098                                   unsigned long *new_pds)
1099 {
1100         struct drm_device *dev = vm->dev;
1101         struct i915_page_directory *pd;
1102         uint32_t pdpe;
1103         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1104
1105         WARN_ON(!bitmap_empty(new_pds, pdpes));
1106
1107         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1108                 if (test_bit(pdpe, pdp->used_pdpes))
1109                         continue;
1110
1111                 pd = alloc_pd(dev);
1112                 if (IS_ERR(pd))
1113                         goto unwind_out;
1114
1115                 gen8_initialize_pd(vm, pd);
1116                 pdp->page_directory[pdpe] = pd;
1117                 __set_bit(pdpe, new_pds);
1118                 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
1119         }
1120
1121         return 0;
1122
1123 unwind_out:
1124         for_each_set_bit(pdpe, new_pds, pdpes)
1125                 free_pd(dev, pdp->page_directory[pdpe]);
1126
1127         return -ENOMEM;
1128 }
1129
1130 /**
1131  * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1132  * @vm: Master vm structure.
1133  * @pml4:       Page map level 4 for this address range.
1134  * @start:      Starting virtual address to begin allocations.
1135  * @length:     Size of the allocations.
1136  * @new_pdps:   Bitmap set by function with new allocations. Likely used by the
1137  *              caller to free on error.
1138  *
1139  * Allocate the required number of page directory pointers. Extremely similar to
1140  * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1141  * The main difference is here we are limited by the pml4 boundary (instead of
1142  * the page directory pointer).
1143  *
1144  * Return: 0 if success; negative error code otherwise.
1145  */
1146 static int
1147 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1148                                   struct i915_pml4 *pml4,
1149                                   uint64_t start,
1150                                   uint64_t length,
1151                                   unsigned long *new_pdps)
1152 {
1153         struct drm_device *dev = vm->dev;
1154         struct i915_page_directory_pointer *pdp;
1155         uint32_t pml4e;
1156
1157         WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1158
1159         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1160                 if (!test_bit(pml4e, pml4->used_pml4es)) {
1161                         pdp = alloc_pdp(dev);
1162                         if (IS_ERR(pdp))
1163                                 goto unwind_out;
1164
1165                         gen8_initialize_pdp(vm, pdp);
1166                         pml4->pdps[pml4e] = pdp;
1167                         __set_bit(pml4e, new_pdps);
1168                         trace_i915_page_directory_pointer_entry_alloc(vm,
1169                                                                       pml4e,
1170                                                                       start,
1171                                                                       GEN8_PML4E_SHIFT);
1172                 }
1173         }
1174
1175         return 0;
1176
1177 unwind_out:
1178         for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1179                 free_pdp(dev, pml4->pdps[pml4e]);
1180
1181         return -ENOMEM;
1182 }
1183
1184 static void
1185 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
1186 {
1187         kfree(new_pts);
1188         kfree(new_pds);
1189 }
1190
1191 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1192  * of these are based on the number of PDPEs in the system.
1193  */
1194 static
1195 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1196                                          unsigned long **new_pts,
1197                                          uint32_t pdpes)
1198 {
1199         unsigned long *pds;
1200         unsigned long *pts;
1201
1202         pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1203         if (!pds)
1204                 return -ENOMEM;
1205
1206         pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1207                       GFP_TEMPORARY);
1208         if (!pts)
1209                 goto err_out;
1210
1211         *new_pds = pds;
1212         *new_pts = pts;
1213
1214         return 0;
1215
1216 err_out:
1217         free_gen8_temp_bitmaps(pds, pts);
1218         return -ENOMEM;
1219 }
1220
1221 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1222  * the page table structures, we mark them dirty so that
1223  * context switching/execlist queuing code takes extra steps
1224  * to ensure that tlbs are flushed.
1225  */
1226 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1227 {
1228         ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1229 }
1230
1231 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1232                                     struct i915_page_directory_pointer *pdp,
1233                                     uint64_t start,
1234                                     uint64_t length)
1235 {
1236         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1237         unsigned long *new_page_dirs, *new_page_tables;
1238         struct drm_device *dev = vm->dev;
1239         struct i915_page_directory *pd;
1240         const uint64_t orig_start = start;
1241         const uint64_t orig_length = length;
1242         uint32_t pdpe;
1243         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1244         int ret;
1245
1246         /* Wrap is never okay since we can only represent 48b, and we don't
1247          * actually use the other side of the canonical address space.
1248          */
1249         if (WARN_ON(start + length < start))
1250                 return -ENODEV;
1251
1252         if (WARN_ON(start + length > vm->total))
1253                 return -ENODEV;
1254
1255         ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1256         if (ret)
1257                 return ret;
1258
1259         /* Do the allocations first so we can easily bail out */
1260         ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1261                                                 new_page_dirs);
1262         if (ret) {
1263                 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1264                 return ret;
1265         }
1266
1267         /* For every page directory referenced, allocate page tables */
1268         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1269                 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1270                                                 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1271                 if (ret)
1272                         goto err_out;
1273         }
1274
1275         start = orig_start;
1276         length = orig_length;
1277
1278         /* Allocations have completed successfully, so set the bitmaps, and do
1279          * the mappings. */
1280         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1281                 gen8_pde_t *const page_directory = kmap_px(pd);
1282                 struct i915_page_table *pt;
1283                 uint64_t pd_len = length;
1284                 uint64_t pd_start = start;
1285                 uint32_t pde;
1286
1287                 /* Every pd should be allocated, we just did that above. */
1288                 WARN_ON(!pd);
1289
1290                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1291                         /* Same reasoning as pd */
1292                         WARN_ON(!pt);
1293                         WARN_ON(!pd_len);
1294                         WARN_ON(!gen8_pte_count(pd_start, pd_len));
1295
1296                         /* Set our used ptes within the page table */
1297                         bitmap_set(pt->used_ptes,
1298                                    gen8_pte_index(pd_start),
1299                                    gen8_pte_count(pd_start, pd_len));
1300
1301                         /* Our pde is now pointing to the pagetable, pt */
1302                         __set_bit(pde, pd->used_pdes);
1303
1304                         /* Map the PDE to the page table */
1305                         page_directory[pde] = gen8_pde_encode(px_dma(pt),
1306                                                               I915_CACHE_LLC);
1307                         trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1308                                                         gen8_pte_index(start),
1309                                                         gen8_pte_count(start, length),
1310                                                         GEN8_PTES);
1311
1312                         /* NB: We haven't yet mapped ptes to pages. At this
1313                          * point we're still relying on insert_entries() */
1314                 }
1315
1316                 kunmap_px(ppgtt, page_directory);
1317                 __set_bit(pdpe, pdp->used_pdpes);
1318                 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1319         }
1320
1321         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1322         mark_tlbs_dirty(ppgtt);
1323         return 0;
1324
1325 err_out:
1326         while (pdpe--) {
1327                 unsigned long temp;
1328
1329                 for_each_set_bit(temp, new_page_tables + pdpe *
1330                                 BITS_TO_LONGS(I915_PDES), I915_PDES)
1331                         free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1332         }
1333
1334         for_each_set_bit(pdpe, new_page_dirs, pdpes)
1335                 free_pd(dev, pdp->page_directory[pdpe]);
1336
1337         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1338         mark_tlbs_dirty(ppgtt);
1339         return ret;
1340 }
1341
1342 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1343                                     struct i915_pml4 *pml4,
1344                                     uint64_t start,
1345                                     uint64_t length)
1346 {
1347         DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1348         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1349         struct i915_page_directory_pointer *pdp;
1350         uint64_t pml4e;
1351         int ret = 0;
1352
1353         /* Do the pml4 allocations first, so we don't need to track the newly
1354          * allocated tables below the pdp */
1355         bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
1356
1357         /* The pagedirectory and pagetable allocations are done in the shared 3
1358          * and 4 level code. Just allocate the pdps.
1359          */
1360         ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1361                                                 new_pdps);
1362         if (ret)
1363                 return ret;
1364
1365         WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1366              "The allocation has spanned more than 512GB. "
1367              "It is highly likely this is incorrect.");
1368
1369         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1370                 WARN_ON(!pdp);
1371
1372                 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1373                 if (ret)
1374                         goto err_out;
1375
1376                 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1377         }
1378
1379         bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1380                   GEN8_PML4ES_PER_PML4);
1381
1382         return 0;
1383
1384 err_out:
1385         for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1386                 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1387
1388         return ret;
1389 }
1390
1391 static int gen8_alloc_va_range(struct i915_address_space *vm,
1392                                uint64_t start, uint64_t length)
1393 {
1394         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1395
1396         if (USES_FULL_48BIT_PPGTT(vm->dev))
1397                 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1398         else
1399                 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1400 }
1401
1402 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1403                           uint64_t start, uint64_t length,
1404                           gen8_pte_t scratch_pte,
1405                           struct seq_file *m)
1406 {
1407         struct i915_page_directory *pd;
1408         uint32_t pdpe;
1409
1410         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1411                 struct i915_page_table *pt;
1412                 uint64_t pd_len = length;
1413                 uint64_t pd_start = start;
1414                 uint32_t pde;
1415
1416                 if (!test_bit(pdpe, pdp->used_pdpes))
1417                         continue;
1418
1419                 seq_printf(m, "\tPDPE #%d\n", pdpe);
1420                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1421                         uint32_t  pte;
1422                         gen8_pte_t *pt_vaddr;
1423
1424                         if (!test_bit(pde, pd->used_pdes))
1425                                 continue;
1426
1427                         pt_vaddr = kmap_px(pt);
1428                         for (pte = 0; pte < GEN8_PTES; pte += 4) {
1429                                 uint64_t va =
1430                                         (pdpe << GEN8_PDPE_SHIFT) |
1431                                         (pde << GEN8_PDE_SHIFT) |
1432                                         (pte << GEN8_PTE_SHIFT);
1433                                 int i;
1434                                 bool found = false;
1435
1436                                 for (i = 0; i < 4; i++)
1437                                         if (pt_vaddr[pte + i] != scratch_pte)
1438                                                 found = true;
1439                                 if (!found)
1440                                         continue;
1441
1442                                 seq_printf(m, "\t\t0x%lx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1443                                 for (i = 0; i < 4; i++) {
1444                                         if (pt_vaddr[pte + i] != scratch_pte)
1445                                                 seq_printf(m, " %lx", pt_vaddr[pte + i]);
1446                                         else
1447                                                 seq_puts(m, "  SCRATCH ");
1448                                 }
1449                                 seq_puts(m, "\n");
1450                         }
1451                         /* don't use kunmap_px, it could trigger
1452                          * an unnecessary flush.
1453                          */
1454                         kunmap_atomic(pt_vaddr);
1455                 }
1456         }
1457 }
1458
1459 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1460 {
1461         struct i915_address_space *vm = &ppgtt->base;
1462         uint64_t start = ppgtt->base.start;
1463         uint64_t length = ppgtt->base.total;
1464         gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
1465                                                  I915_CACHE_LLC, true);
1466
1467         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1468                 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1469         } else {
1470                 uint64_t pml4e;
1471                 struct i915_pml4 *pml4 = &ppgtt->pml4;
1472                 struct i915_page_directory_pointer *pdp;
1473
1474                 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1475                         if (!test_bit(pml4e, pml4->used_pml4es))
1476                                 continue;
1477
1478                         seq_printf(m, "    PML4E #%lu\n", pml4e);
1479                         gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1480                 }
1481         }
1482 }
1483
1484 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1485 {
1486         unsigned long *new_page_dirs, *new_page_tables;
1487         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1488         int ret;
1489
1490         /* We allocate temp bitmap for page tables for no gain
1491          * but as this is for init only, lets keep the things simple
1492          */
1493         ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1494         if (ret)
1495                 return ret;
1496
1497         /* Allocate for all pdps regardless of how the ppgtt
1498          * was defined.
1499          */
1500         ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1501                                                 0, 1ULL << 32,
1502                                                 new_page_dirs);
1503         if (!ret)
1504                 *ppgtt->pdp.used_pdpes = *new_page_dirs;
1505
1506         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1507
1508         return ret;
1509 }
1510
1511 /*
1512  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1513  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1514  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1515  * space.
1516  *
1517  */
1518 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1519 {
1520         int ret;
1521
1522         ret = gen8_init_scratch(&ppgtt->base);
1523         if (ret)
1524                 return ret;
1525
1526         ppgtt->base.start = 0;
1527         ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1528         ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1529         ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1530         ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1531         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1532         ppgtt->base.bind_vma = ppgtt_bind_vma;
1533         ppgtt->debug_dump = gen8_dump_ppgtt;
1534
1535         if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1536                 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
1537                 if (ret)
1538                         goto free_scratch;
1539
1540                 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1541
1542                 ppgtt->base.total = 1ULL << 48;
1543                 ppgtt->switch_mm = gen8_48b_mm_switch;
1544         } else {
1545                 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1546                 if (ret)
1547                         goto free_scratch;
1548
1549                 ppgtt->base.total = 1ULL << 32;
1550                 ppgtt->switch_mm = gen8_legacy_mm_switch;
1551                 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1552                                                               0, 0,
1553                                                               GEN8_PML4E_SHIFT);
1554
1555                 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) {
1556                         ret = gen8_preallocate_top_level_pdps(ppgtt);
1557                         if (ret)
1558                                 goto free_scratch;
1559                 }
1560         }
1561
1562         if (intel_vgpu_active(to_i915(ppgtt->base.dev)))
1563                 gen8_ppgtt_notify_vgt(ppgtt, true);
1564
1565         return 0;
1566
1567 free_scratch:
1568         gen8_free_scratch(&ppgtt->base);
1569         return ret;
1570 }
1571
1572 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1573 {
1574         struct i915_address_space *vm = &ppgtt->base;
1575         struct i915_page_table *unused;
1576         gen6_pte_t scratch_pte;
1577         uint32_t pd_entry;
1578         uint32_t  pte, pde;
1579         uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1580
1581         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1582                                      I915_CACHE_LLC, true, 0);
1583
1584         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
1585                 u32 expected;
1586                 gen6_pte_t *pt_vaddr;
1587                 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1588                 pd_entry = readl(ppgtt->pd_addr + pde);
1589                 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1590
1591                 if (pd_entry != expected)
1592                         seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1593                                    pde,
1594                                    pd_entry,
1595                                    expected);
1596                 seq_printf(m, "\tPDE: %x\n", pd_entry);
1597
1598                 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1599
1600                 for (pte = 0; pte < GEN6_PTES; pte+=4) {
1601                         unsigned long va =
1602                                 (pde * PAGE_SIZE * GEN6_PTES) +
1603                                 (pte * PAGE_SIZE);
1604                         int i;
1605                         bool found = false;
1606                         for (i = 0; i < 4; i++)
1607                                 if (pt_vaddr[pte + i] != scratch_pte)
1608                                         found = true;
1609                         if (!found)
1610                                 continue;
1611
1612                         seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1613                         for (i = 0; i < 4; i++) {
1614                                 if (pt_vaddr[pte + i] != scratch_pte)
1615                                         seq_printf(m, " %08x", pt_vaddr[pte + i]);
1616                                 else
1617                                         seq_puts(m, "  SCRATCH ");
1618                         }
1619                         seq_puts(m, "\n");
1620                 }
1621                 kunmap_px(ppgtt, pt_vaddr);
1622         }
1623 }
1624
1625 /* Write pde (index) from the page directory @pd to the page table @pt */
1626 static void gen6_write_pde(struct i915_page_directory *pd,
1627                             const int pde, struct i915_page_table *pt)
1628 {
1629         /* Caller needs to make sure the write completes if necessary */
1630         struct i915_hw_ppgtt *ppgtt =
1631                 container_of(pd, struct i915_hw_ppgtt, pd);
1632         u32 pd_entry;
1633
1634         pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1635         pd_entry |= GEN6_PDE_VALID;
1636
1637         writel(pd_entry, ppgtt->pd_addr + pde);
1638 }
1639
1640 /* Write all the page tables found in the ppgtt structure to incrementing page
1641  * directories. */
1642 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1643                                   struct i915_page_directory *pd,
1644                                   uint32_t start, uint32_t length)
1645 {
1646         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1647         struct i915_page_table *pt;
1648         uint32_t pde;
1649
1650         gen6_for_each_pde(pt, pd, start, length, pde)
1651                 gen6_write_pde(pd, pde, pt);
1652
1653         /* Make sure write is complete before other code can use this page
1654          * table. Also require for WC mapped PTEs */
1655         readl(ggtt->gsm);
1656 }
1657
1658 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1659 {
1660         BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1661
1662         return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1663 }
1664
1665 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1666                          struct drm_i915_gem_request *req)
1667 {
1668         struct intel_engine_cs *engine = req->engine;
1669         int ret;
1670
1671         /* NB: TLBs must be flushed and invalidated before a switch */
1672         ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1673         if (ret)
1674                 return ret;
1675
1676         ret = intel_ring_begin(req, 6);
1677         if (ret)
1678                 return ret;
1679
1680         intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2));
1681         intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine));
1682         intel_ring_emit(engine, PP_DIR_DCLV_2G);
1683         intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine));
1684         intel_ring_emit(engine, get_pd_offset(ppgtt));
1685         intel_ring_emit(engine, MI_NOOP);
1686         intel_ring_advance(engine);
1687
1688         return 0;
1689 }
1690
1691 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1692                           struct drm_i915_gem_request *req)
1693 {
1694         struct intel_engine_cs *engine = req->engine;
1695         int ret;
1696
1697         /* NB: TLBs must be flushed and invalidated before a switch */
1698         ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1699         if (ret)
1700                 return ret;
1701
1702         ret = intel_ring_begin(req, 6);
1703         if (ret)
1704                 return ret;
1705
1706         intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2));
1707         intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine));
1708         intel_ring_emit(engine, PP_DIR_DCLV_2G);
1709         intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine));
1710         intel_ring_emit(engine, get_pd_offset(ppgtt));
1711         intel_ring_emit(engine, MI_NOOP);
1712         intel_ring_advance(engine);
1713
1714         /* XXX: RCS is the only one to auto invalidate the TLBs? */
1715         if (engine->id != RCS) {
1716                 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1717                 if (ret)
1718                         return ret;
1719         }
1720
1721         return 0;
1722 }
1723
1724 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1725                           struct drm_i915_gem_request *req)
1726 {
1727         struct intel_engine_cs *engine = req->engine;
1728         struct drm_i915_private *dev_priv = req->i915;
1729
1730         I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1731         I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1732         return 0;
1733 }
1734
1735 static void gen8_ppgtt_enable(struct drm_device *dev)
1736 {
1737         struct drm_i915_private *dev_priv = to_i915(dev);
1738         struct intel_engine_cs *engine;
1739
1740         for_each_engine(engine, dev_priv) {
1741                 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
1742                 I915_WRITE(RING_MODE_GEN7(engine),
1743                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1744         }
1745 }
1746
1747 static void gen7_ppgtt_enable(struct drm_device *dev)
1748 {
1749         struct drm_i915_private *dev_priv = to_i915(dev);
1750         struct intel_engine_cs *engine;
1751         uint32_t ecochk, ecobits;
1752
1753         ecobits = I915_READ(GAC_ECO_BITS);
1754         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1755
1756         ecochk = I915_READ(GAM_ECOCHK);
1757         if (IS_HASWELL(dev)) {
1758                 ecochk |= ECOCHK_PPGTT_WB_HSW;
1759         } else {
1760                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1761                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1762         }
1763         I915_WRITE(GAM_ECOCHK, ecochk);
1764
1765         for_each_engine(engine, dev_priv) {
1766                 /* GFX_MODE is per-ring on gen7+ */
1767                 I915_WRITE(RING_MODE_GEN7(engine),
1768                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1769         }
1770 }
1771
1772 static void gen6_ppgtt_enable(struct drm_device *dev)
1773 {
1774         struct drm_i915_private *dev_priv = to_i915(dev);
1775         uint32_t ecochk, gab_ctl, ecobits;
1776
1777         ecobits = I915_READ(GAC_ECO_BITS);
1778         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1779                    ECOBITS_PPGTT_CACHE64B);
1780
1781         gab_ctl = I915_READ(GAB_CTL);
1782         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1783
1784         ecochk = I915_READ(GAM_ECOCHK);
1785         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1786
1787         I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1788 }
1789
1790 /* PPGTT support for Sandybdrige/Gen6 and later */
1791 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1792                                    uint64_t start,
1793                                    uint64_t length,
1794                                    bool use_scratch)
1795 {
1796         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1797         gen6_pte_t *pt_vaddr, scratch_pte;
1798         unsigned first_entry = start >> PAGE_SHIFT;
1799         unsigned num_entries = length >> PAGE_SHIFT;
1800         unsigned act_pt = first_entry / GEN6_PTES;
1801         unsigned first_pte = first_entry % GEN6_PTES;
1802         unsigned last_pte, i;
1803
1804         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1805                                      I915_CACHE_LLC, true, 0);
1806
1807         while (num_entries) {
1808                 last_pte = first_pte + num_entries;
1809                 if (last_pte > GEN6_PTES)
1810                         last_pte = GEN6_PTES;
1811
1812                 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1813
1814                 for (i = first_pte; i < last_pte; i++)
1815                         pt_vaddr[i] = scratch_pte;
1816
1817                 kunmap_px(ppgtt, pt_vaddr);
1818
1819                 num_entries -= last_pte - first_pte;
1820                 first_pte = 0;
1821                 act_pt++;
1822         }
1823 }
1824
1825 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1826                                       struct sg_table *pages,
1827                                       uint64_t start,
1828                                       enum i915_cache_level cache_level, u32 flags)
1829 {
1830         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1831         unsigned first_entry = start >> PAGE_SHIFT;
1832         unsigned act_pt = first_entry / GEN6_PTES;
1833         unsigned act_pte = first_entry % GEN6_PTES;
1834         gen6_pte_t *pt_vaddr = NULL;
1835         struct sgt_iter sgt_iter;
1836         dma_addr_t addr;
1837
1838         for_each_sgt_dma(addr, sgt_iter, pages) {
1839                 if (pt_vaddr == NULL)
1840                         pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1841
1842                 pt_vaddr[act_pte] =
1843                         vm->pte_encode(addr, cache_level, true, flags);
1844
1845                 if (++act_pte == GEN6_PTES) {
1846                         kunmap_px(ppgtt, pt_vaddr);
1847                         pt_vaddr = NULL;
1848                         act_pt++;
1849                         act_pte = 0;
1850                 }
1851         }
1852
1853         if (pt_vaddr)
1854                 kunmap_px(ppgtt, pt_vaddr);
1855 }
1856
1857 static int gen6_alloc_va_range(struct i915_address_space *vm,
1858                                uint64_t start_in, uint64_t length_in)
1859 {
1860         DECLARE_BITMAP(new_page_tables, I915_PDES);
1861         struct drm_device *dev = vm->dev;
1862         struct drm_i915_private *dev_priv = to_i915(dev);
1863         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1864         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1865         struct i915_page_table *pt;
1866         uint32_t start, length, start_save, length_save;
1867         uint32_t pde;
1868         int ret;
1869
1870         if (WARN_ON(start_in + length_in > ppgtt->base.total))
1871                 return -ENODEV;
1872
1873         start = start_save = start_in;
1874         length = length_save = length_in;
1875
1876         bitmap_zero(new_page_tables, I915_PDES);
1877
1878         /* The allocation is done in two stages so that we can bail out with
1879          * minimal amount of pain. The first stage finds new page tables that
1880          * need allocation. The second stage marks use ptes within the page
1881          * tables.
1882          */
1883         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1884                 if (pt != vm->scratch_pt) {
1885                         WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1886                         continue;
1887                 }
1888
1889                 /* We've already allocated a page table */
1890                 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1891
1892                 pt = alloc_pt(dev);
1893                 if (IS_ERR(pt)) {
1894                         ret = PTR_ERR(pt);
1895                         goto unwind_out;
1896                 }
1897
1898                 gen6_initialize_pt(vm, pt);
1899
1900                 ppgtt->pd.page_table[pde] = pt;
1901                 __set_bit(pde, new_page_tables);
1902                 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1903         }
1904
1905         start = start_save;
1906         length = length_save;
1907
1908         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1909                 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1910
1911                 bitmap_zero(tmp_bitmap, GEN6_PTES);
1912                 bitmap_set(tmp_bitmap, gen6_pte_index(start),
1913                            gen6_pte_count(start, length));
1914
1915                 if (__test_and_clear_bit(pde, new_page_tables))
1916                         gen6_write_pde(&ppgtt->pd, pde, pt);
1917
1918                 trace_i915_page_table_entry_map(vm, pde, pt,
1919                                          gen6_pte_index(start),
1920                                          gen6_pte_count(start, length),
1921                                          GEN6_PTES);
1922                 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1923                                 GEN6_PTES);
1924         }
1925
1926         WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1927
1928         /* Make sure write is complete before other code can use this page
1929          * table. Also require for WC mapped PTEs */
1930         readl(ggtt->gsm);
1931
1932         mark_tlbs_dirty(ppgtt);
1933         return 0;
1934
1935 unwind_out:
1936         for_each_set_bit(pde, new_page_tables, I915_PDES) {
1937                 struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1938
1939                 ppgtt->pd.page_table[pde] = vm->scratch_pt;
1940                 free_pt(vm->dev, pt);
1941         }
1942
1943         mark_tlbs_dirty(ppgtt);
1944         return ret;
1945 }
1946
1947 static int gen6_init_scratch(struct i915_address_space *vm)
1948 {
1949         struct drm_device *dev = vm->dev;
1950
1951         vm->scratch_page = alloc_scratch_page(dev);
1952         if (IS_ERR(vm->scratch_page))
1953                 return PTR_ERR(vm->scratch_page);
1954
1955         vm->scratch_pt = alloc_pt(dev);
1956         if (IS_ERR(vm->scratch_pt)) {
1957                 free_scratch_page(dev, vm->scratch_page);
1958                 return PTR_ERR(vm->scratch_pt);
1959         }
1960
1961         gen6_initialize_pt(vm, vm->scratch_pt);
1962
1963         return 0;
1964 }
1965
1966 static void gen6_free_scratch(struct i915_address_space *vm)
1967 {
1968         struct drm_device *dev = vm->dev;
1969
1970         free_pt(dev, vm->scratch_pt);
1971         free_scratch_page(dev, vm->scratch_page);
1972 }
1973
1974 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1975 {
1976         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1977         struct i915_page_directory *pd = &ppgtt->pd;
1978         struct drm_device *dev = vm->dev;
1979         struct i915_page_table *pt;
1980         uint32_t pde;
1981
1982         drm_mm_remove_node(&ppgtt->node);
1983
1984         gen6_for_all_pdes(pt, pd, pde)
1985                 if (pt != vm->scratch_pt)
1986                         free_pt(dev, pt);
1987
1988         gen6_free_scratch(vm);
1989 }
1990
1991 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1992 {
1993         struct i915_address_space *vm = &ppgtt->base;
1994         struct drm_device *dev = ppgtt->base.dev;
1995         struct drm_i915_private *dev_priv = to_i915(dev);
1996         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1997         bool retried = false;
1998         int ret;
1999
2000         /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2001          * allocator works in address space sizes, so it's multiplied by page
2002          * size. We allocate at the top of the GTT to avoid fragmentation.
2003          */
2004         BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
2005
2006         ret = gen6_init_scratch(vm);
2007         if (ret)
2008                 return ret;
2009
2010 alloc:
2011         ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
2012                                                   &ppgtt->node, GEN6_PD_SIZE,
2013                                                   GEN6_PD_ALIGN, 0,
2014                                                   0, ggtt->base.total,
2015                                                   DRM_MM_TOPDOWN);
2016         if (ret == -ENOSPC && !retried) {
2017                 ret = i915_gem_evict_something(dev, &ggtt->base,
2018                                                GEN6_PD_SIZE, GEN6_PD_ALIGN,
2019                                                I915_CACHE_NONE,
2020                                                0, ggtt->base.total,
2021                                                0);
2022                 if (ret)
2023                         goto err_out;
2024
2025                 retried = true;
2026                 goto alloc;
2027         }
2028
2029         if (ret)
2030                 goto err_out;
2031
2032
2033         if (ppgtt->node.start < ggtt->mappable_end)
2034                 DRM_DEBUG("Forced to use aperture for PDEs\n");
2035
2036         return 0;
2037
2038 err_out:
2039         gen6_free_scratch(vm);
2040         return ret;
2041 }
2042
2043 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2044 {
2045         return gen6_ppgtt_allocate_page_directories(ppgtt);
2046 }
2047
2048 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2049                                   uint64_t start, uint64_t length)
2050 {
2051         struct i915_page_table *unused;
2052         uint32_t pde;
2053
2054         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
2055                 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2056 }
2057
2058 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2059 {
2060         struct drm_device *dev = ppgtt->base.dev;
2061         struct drm_i915_private *dev_priv = to_i915(dev);
2062         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2063         int ret;
2064
2065         ppgtt->base.pte_encode = ggtt->base.pte_encode;
2066         if (intel_vgpu_active(dev_priv) || IS_GEN6(dev))
2067                 ppgtt->switch_mm = gen6_mm_switch;
2068         else if (IS_HASWELL(dev))
2069                 ppgtt->switch_mm = hsw_mm_switch;
2070         else if (IS_GEN7(dev))
2071                 ppgtt->switch_mm = gen7_mm_switch;
2072         else
2073                 BUG();
2074
2075         ret = gen6_ppgtt_alloc(ppgtt);
2076         if (ret)
2077                 return ret;
2078
2079         ppgtt->base.allocate_va_range = gen6_alloc_va_range;
2080         ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2081         ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2082         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2083         ppgtt->base.bind_vma = ppgtt_bind_vma;
2084         ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2085         ppgtt->base.start = 0;
2086         ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2087         ppgtt->debug_dump = gen6_dump_ppgtt;
2088
2089         ppgtt->pd.base.ggtt_offset =
2090                 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2091
2092         ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
2093                 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2094
2095         gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2096
2097         gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2098
2099         DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2100                          ppgtt->node.size >> 20,
2101                          ppgtt->node.start / PAGE_SIZE);
2102
2103         DRM_DEBUG("Adding PPGTT at offset %x\n",
2104                   ppgtt->pd.base.ggtt_offset << 10);
2105
2106         return 0;
2107 }
2108
2109 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2110 {
2111         ppgtt->base.dev = dev;
2112
2113         if (INTEL_INFO(dev)->gen < 8)
2114                 return gen6_ppgtt_init(ppgtt);
2115         else
2116                 return gen8_ppgtt_init(ppgtt);
2117 }
2118
2119 static void i915_address_space_init(struct i915_address_space *vm,
2120                                     struct drm_i915_private *dev_priv)
2121 {
2122         drm_mm_init(&vm->mm, vm->start, vm->total);
2123         vm->dev = &dev_priv->drm;
2124         INIT_LIST_HEAD(&vm->active_list);
2125         INIT_LIST_HEAD(&vm->inactive_list);
2126         list_add_tail(&vm->global_link, &dev_priv->vm_list);
2127 }
2128
2129 static void gtt_write_workarounds(struct drm_device *dev)
2130 {
2131         struct drm_i915_private *dev_priv = to_i915(dev);
2132
2133         /* This function is for gtt related workarounds. This function is
2134          * called on driver load and after a GPU reset, so you can place
2135          * workarounds here even if they get overwritten by GPU reset.
2136          */
2137         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
2138         if (IS_BROADWELL(dev))
2139                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2140         else if (IS_CHERRYVIEW(dev))
2141                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2142         else if (IS_SKYLAKE(dev))
2143                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2144         else if (IS_BROXTON(dev))
2145                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2146 }
2147
2148 static int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2149 {
2150         struct drm_i915_private *dev_priv = to_i915(dev);
2151         int ret = 0;
2152
2153         ret = __hw_ppgtt_init(dev, ppgtt);
2154         if (ret == 0) {
2155                 kref_init(&ppgtt->ref);
2156                 i915_address_space_init(&ppgtt->base, dev_priv);
2157         }
2158
2159         return ret;
2160 }
2161
2162 int i915_ppgtt_init_hw(struct drm_device *dev)
2163 {
2164         gtt_write_workarounds(dev);
2165
2166         /* In the case of execlists, PPGTT is enabled by the context descriptor
2167          * and the PDPs are contained within the context itself.  We don't
2168          * need to do anything here. */
2169         if (i915.enable_execlists)
2170                 return 0;
2171
2172         if (!USES_PPGTT(dev))
2173                 return 0;
2174
2175         if (IS_GEN6(dev))
2176                 gen6_ppgtt_enable(dev);
2177         else if (IS_GEN7(dev))
2178                 gen7_ppgtt_enable(dev);
2179         else if (INTEL_INFO(dev)->gen >= 8)
2180                 gen8_ppgtt_enable(dev);
2181         else
2182                 MISSING_CASE(INTEL_INFO(dev)->gen);
2183
2184         return 0;
2185 }
2186
2187 struct i915_hw_ppgtt *
2188 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2189 {
2190         struct i915_hw_ppgtt *ppgtt;
2191         int ret;
2192
2193         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2194         if (!ppgtt)
2195                 return ERR_PTR(-ENOMEM);
2196
2197         ret = i915_ppgtt_init(dev, ppgtt);
2198         if (ret) {
2199                 kfree(ppgtt);
2200                 return ERR_PTR(ret);
2201         }
2202
2203         ppgtt->file_priv = fpriv;
2204
2205         trace_i915_ppgtt_create(&ppgtt->base);
2206
2207         return ppgtt;
2208 }
2209
2210 void  i915_ppgtt_release(struct kref *kref)
2211 {
2212         struct i915_hw_ppgtt *ppgtt =
2213                 container_of(kref, struct i915_hw_ppgtt, ref);
2214
2215         trace_i915_ppgtt_release(&ppgtt->base);
2216
2217         /* vmas should already be unbound */
2218         WARN_ON(!list_empty(&ppgtt->base.active_list));
2219         WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2220
2221         list_del(&ppgtt->base.global_link);
2222         drm_mm_takedown(&ppgtt->base.mm);
2223
2224         ppgtt->base.cleanup(&ppgtt->base);
2225         kfree(ppgtt);
2226 }
2227
2228 extern int intel_iommu_gfx_mapped;
2229 /* Certain Gen5 chipsets require require idling the GPU before
2230  * unmapping anything from the GTT when VT-d is enabled.
2231  */
2232 static bool needs_idle_maps(struct drm_device *dev)
2233 {
2234 #ifdef CONFIG_INTEL_IOMMU
2235         /* Query intel_iommu to see if we need the workaround. Presumably that
2236          * was loaded first.
2237          */
2238         if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
2239                 return true;
2240 #endif
2241         return false;
2242 }
2243
2244 static bool do_idling(struct drm_i915_private *dev_priv)
2245 {
2246         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2247         bool ret = dev_priv->mm.interruptible;
2248
2249         if (unlikely(ggtt->do_idle_maps)) {
2250                 dev_priv->mm.interruptible = false;
2251                 if (i915_gem_wait_for_idle(dev_priv)) {
2252                         DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2253                         /* Wait a bit, in hopes it avoids the hang */
2254                         udelay(10);
2255                 }
2256         }
2257
2258         return ret;
2259 }
2260
2261 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2262 {
2263         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2264
2265         if (unlikely(ggtt->do_idle_maps))
2266                 dev_priv->mm.interruptible = interruptible;
2267 }
2268
2269 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2270 {
2271         struct intel_engine_cs *engine;
2272
2273         if (INTEL_INFO(dev_priv)->gen < 6)
2274                 return;
2275
2276         for_each_engine(engine, dev_priv) {
2277                 u32 fault_reg;
2278                 fault_reg = I915_READ(RING_FAULT_REG(engine));
2279                 if (fault_reg & RING_FAULT_VALID) {
2280                         DRM_DEBUG_DRIVER("Unexpected fault\n"
2281                                          "\tAddr: 0x%08ux\n"
2282                                          "\tAddress space: %s\n"
2283                                          "\tSource ID: %d\n"
2284                                          "\tType: %d\n",
2285                                          fault_reg & LINUX_PAGE_MASK,
2286                                          fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2287                                          RING_FAULT_SRCID(fault_reg),
2288                                          RING_FAULT_FAULT_TYPE(fault_reg));
2289                         I915_WRITE(RING_FAULT_REG(engine),
2290                                    fault_reg & ~RING_FAULT_VALID);
2291                 }
2292         }
2293         POSTING_READ(RING_FAULT_REG(&dev_priv->engine[RCS]));
2294 }
2295
2296 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2297 {
2298         if (INTEL_INFO(dev_priv)->gen < 6) {
2299                 intel_gtt_chipset_flush();
2300         } else {
2301                 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2302                 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2303         }
2304 }
2305
2306 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2307 {
2308         struct drm_i915_private *dev_priv = to_i915(dev);
2309         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2310
2311         /* Don't bother messing with faults pre GEN6 as we have little
2312          * documentation supporting that it's a good idea.
2313          */
2314         if (INTEL_INFO(dev)->gen < 6)
2315                 return;
2316
2317         i915_check_and_clear_faults(dev_priv);
2318
2319         ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
2320                              true);
2321
2322         i915_ggtt_flush(dev_priv);
2323 }
2324
2325 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2326 {
2327         if (!dma_map_sg(&obj->base.dev->pdev->dev,
2328                         obj->pages->sgl, obj->pages->nents,
2329                         PCI_DMA_BIDIRECTIONAL))
2330                 return -ENOSPC;
2331
2332         return 0;
2333 }
2334
2335 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2336 {
2337 #ifdef writeq
2338         writeq(pte, addr);
2339 #else
2340         iowrite32((u32)pte, addr);
2341         iowrite32(pte >> 32, addr + 4);
2342 #endif
2343 }
2344
2345 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2346                                   dma_addr_t addr,
2347                                   uint64_t offset,
2348                                   enum i915_cache_level level,
2349                                   u32 unused)
2350 {
2351         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2352         gen8_pte_t __iomem *pte =
2353                 (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
2354                 (offset >> PAGE_SHIFT);
2355         int rpm_atomic_seq;
2356
2357         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2358
2359         gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
2360
2361         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2362         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2363
2364         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2365 }
2366
2367 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2368                                      struct sg_table *st,
2369                                      uint64_t start,
2370                                      enum i915_cache_level level, u32 unused)
2371 {
2372         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2373         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2374         struct sgt_iter sgt_iter;
2375         gen8_pte_t __iomem *gtt_entries;
2376         gen8_pte_t gtt_entry;
2377         dma_addr_t addr;
2378         int rpm_atomic_seq;
2379         int i = 0;
2380
2381         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2382
2383         gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
2384
2385         for_each_sgt_dma(addr, sgt_iter, st) {
2386                 gtt_entry = gen8_pte_encode(addr, level, true);
2387                 gen8_set_pte(&gtt_entries[i++], gtt_entry);
2388         }
2389
2390         /*
2391          * XXX: This serves as a posting read to make sure that the PTE has
2392          * actually been updated. There is some concern that even though
2393          * registers and PTEs are within the same BAR that they are potentially
2394          * of NUMA access patterns. Therefore, even with the way we assume
2395          * hardware should work, we must keep this posting read for paranoia.
2396          */
2397         if (i != 0)
2398                 WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
2399
2400         /* This next bit makes the above posting read even more important. We
2401          * want to flush the TLBs only after we're certain all the PTE updates
2402          * have finished.
2403          */
2404         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2405         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2406
2407         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2408 }
2409
2410 struct insert_entries {
2411         struct i915_address_space *vm;
2412         struct sg_table *st;
2413         uint64_t start;
2414         enum i915_cache_level level;
2415         u32 flags;
2416 };
2417
2418 static int gen8_ggtt_insert_entries__cb(void *_arg)
2419 {
2420         struct insert_entries *arg = _arg;
2421         gen8_ggtt_insert_entries(arg->vm, arg->st,
2422                                  arg->start, arg->level, arg->flags);
2423         return 0;
2424 }
2425
2426 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2427                                           struct sg_table *st,
2428                                           uint64_t start,
2429                                           enum i915_cache_level level,
2430                                           u32 flags)
2431 {
2432         struct insert_entries arg = { vm, st, start, level, flags };
2433 #ifndef __DragonFly__
2434         stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
2435 #else
2436         /* XXX: is this enough ?
2437          * See Linux commit 5bab6f60cb4d1417ad7c599166bcfec87529c1a2 */
2438         get_mplock();
2439         gen8_ggtt_insert_entries__cb(&arg);
2440         rel_mplock();
2441 #endif
2442 }
2443
2444 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2445                                   dma_addr_t addr,
2446                                   uint64_t offset,
2447                                   enum i915_cache_level level,
2448                                   u32 flags)
2449 {
2450         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2451         gen6_pte_t __iomem *pte =
2452                 (gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
2453                 (offset >> PAGE_SHIFT);
2454         int rpm_atomic_seq;
2455
2456         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2457
2458         iowrite32(vm->pte_encode(addr, level, true, flags), pte);
2459
2460         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2461         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2462
2463         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2464 }
2465
2466 /*
2467  * Binds an object into the global gtt with the specified cache level. The object
2468  * will be accessible to the GPU via commands whose operands reference offsets
2469  * within the global GTT as well as accessible by the GPU through the GMADR
2470  * mapped BAR (dev_priv->mm.gtt->gtt).
2471  */
2472 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2473                                      struct sg_table *st,
2474                                      uint64_t start,
2475                                      enum i915_cache_level level, u32 flags)
2476 {
2477         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2478         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2479         struct sgt_iter sgt_iter;
2480         gen6_pte_t __iomem *gtt_entries;
2481         gen6_pte_t gtt_entry;
2482         dma_addr_t addr;
2483         int rpm_atomic_seq;
2484         int i = 0;
2485
2486         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2487
2488         gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
2489
2490         for_each_sgt_dma(addr, sgt_iter, st) {
2491                 gtt_entry = vm->pte_encode(addr, level, true, flags);
2492                 iowrite32(gtt_entry, &gtt_entries[i++]);
2493         }
2494
2495         /* XXX: This serves as a posting read to make sure that the PTE has
2496          * actually been updated. There is some concern that even though
2497          * registers and PTEs are within the same BAR that they are potentially
2498          * of NUMA access patterns. Therefore, even with the way we assume
2499          * hardware should work, we must keep this posting read for paranoia.
2500          */
2501         if (i != 0)
2502                 WARN_ON(readl(&gtt_entries[i-1]) != gtt_entry);
2503
2504         /* This next bit makes the above posting read even more important. We
2505          * want to flush the TLBs only after we're certain all the PTE updates
2506          * have finished.
2507          */
2508         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2509         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2510
2511         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2512 }
2513
2514 static void nop_clear_range(struct i915_address_space *vm,
2515                             uint64_t start,
2516                             uint64_t length,
2517                             bool use_scratch)
2518 {
2519 }
2520
2521 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2522                                   uint64_t start,
2523                                   uint64_t length,
2524                                   bool use_scratch)
2525 {
2526         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2527         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2528         unsigned first_entry = start >> PAGE_SHIFT;
2529         unsigned num_entries = length >> PAGE_SHIFT;
2530         gen8_pte_t scratch_pte, __iomem *gtt_base =
2531                 (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2532         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2533         int i;
2534         int rpm_atomic_seq;
2535
2536         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2537
2538         if (WARN(num_entries > max_entries,
2539                  "First entry = %d; Num entries = %d (max=%d)\n",
2540                  first_entry, num_entries, max_entries))
2541                 num_entries = max_entries;
2542
2543         scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
2544                                       I915_CACHE_LLC,
2545                                       use_scratch);
2546         for (i = 0; i < num_entries; i++)
2547                 gen8_set_pte(&gtt_base[i], scratch_pte);
2548         readl(gtt_base);
2549
2550         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2551 }
2552
2553 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2554                                   uint64_t start,
2555                                   uint64_t length,
2556                                   bool use_scratch)
2557 {
2558         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2559         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2560         unsigned first_entry = start >> PAGE_SHIFT;
2561         unsigned num_entries = length >> PAGE_SHIFT;
2562         gen6_pte_t scratch_pte, __iomem *gtt_base =
2563                 (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2564         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2565         int i;
2566         int rpm_atomic_seq;
2567
2568         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2569
2570         if (WARN(num_entries > max_entries,
2571                  "First entry = %d; Num entries = %d (max=%d)\n",
2572                  first_entry, num_entries, max_entries))
2573                 num_entries = max_entries;
2574
2575         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2576                                      I915_CACHE_LLC, use_scratch, 0);
2577
2578         for (i = 0; i < num_entries; i++)
2579                 iowrite32(scratch_pte, &gtt_base[i]);
2580         readl(gtt_base);
2581
2582         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2583 }
2584
2585 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2586                                   dma_addr_t addr,
2587                                   uint64_t offset,
2588                                   enum i915_cache_level cache_level,
2589                                   u32 unused)
2590 {
2591         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2592         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2593                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2594         int rpm_atomic_seq;
2595
2596         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2597
2598         intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2599
2600         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2601 }
2602
2603 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2604                                      struct sg_table *pages,
2605                                      uint64_t start,
2606                                      enum i915_cache_level cache_level, u32 unused)
2607 {
2608         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2609         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2610                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2611         int rpm_atomic_seq;
2612
2613         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2614
2615         intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2616
2617         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2618
2619 }
2620
2621 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2622                                   uint64_t start,
2623                                   uint64_t length,
2624                                   bool unused)
2625 {
2626         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2627         unsigned first_entry = start >> PAGE_SHIFT;
2628         unsigned num_entries = length >> PAGE_SHIFT;
2629         int rpm_atomic_seq;
2630
2631         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2632
2633         intel_gtt_clear_range(first_entry, num_entries);
2634
2635         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2636 }
2637
2638 static int ggtt_bind_vma(struct i915_vma *vma,
2639                          enum i915_cache_level cache_level,
2640                          u32 flags)
2641 {
2642         struct drm_i915_gem_object *obj = vma->obj;
2643         u32 pte_flags = 0;
2644         int ret;
2645
2646         ret = i915_get_ggtt_vma_pages(vma);
2647         if (ret)
2648                 return ret;
2649
2650         /* Currently applicable only to VLV */
2651         if (obj->gt_ro)
2652                 pte_flags |= PTE_READ_ONLY;
2653
2654         vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
2655                                 vma->node.start,
2656                                 cache_level, pte_flags);
2657
2658         /*
2659          * Without aliasing PPGTT there's no difference between
2660          * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2661          * upgrade to both bound if we bind either to avoid double-binding.
2662          */
2663         vma->bound |= GLOBAL_BIND | LOCAL_BIND;
2664
2665         return 0;
2666 }
2667
2668 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2669                                  enum i915_cache_level cache_level,
2670                                  u32 flags)
2671 {
2672         u32 pte_flags;
2673         int ret;
2674
2675         ret = i915_get_ggtt_vma_pages(vma);
2676         if (ret)
2677                 return ret;
2678
2679         /* Currently applicable only to VLV */
2680         pte_flags = 0;
2681         if (vma->obj->gt_ro)
2682                 pte_flags |= PTE_READ_ONLY;
2683
2684
2685         if (flags & GLOBAL_BIND) {
2686                 vma->vm->insert_entries(vma->vm,
2687                                         vma->ggtt_view.pages,
2688                                         vma->node.start,
2689                                         cache_level, pte_flags);
2690         }
2691
2692         if (flags & LOCAL_BIND) {
2693                 struct i915_hw_ppgtt *appgtt =
2694                         to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
2695                 appgtt->base.insert_entries(&appgtt->base,
2696                                             vma->ggtt_view.pages,
2697                                             vma->node.start,
2698                                             cache_level, pte_flags);
2699         }
2700
2701         return 0;
2702 }
2703
2704 static void ggtt_unbind_vma(struct i915_vma *vma)
2705 {
2706         struct drm_device *dev = vma->vm->dev;
2707         struct drm_i915_private *dev_priv = to_i915(dev);
2708         struct drm_i915_gem_object *obj = vma->obj;
2709         const uint64_t size = min_t(uint64_t,
2710                                     obj->base.size,
2711                                     vma->node.size);
2712
2713         if (vma->bound & GLOBAL_BIND) {
2714                 vma->vm->clear_range(vma->vm,
2715                                      vma->node.start,
2716                                      size,
2717                                      true);
2718         }
2719
2720         if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
2721                 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2722
2723                 appgtt->base.clear_range(&appgtt->base,
2724                                          vma->node.start,
2725                                          size,
2726                                          true);
2727         }
2728 }
2729
2730 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2731 {
2732         struct drm_device *dev = obj->base.dev;
2733         struct drm_i915_private *dev_priv = to_i915(dev);
2734         bool interruptible;
2735
2736         interruptible = do_idling(dev_priv);
2737
2738         dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2739                      PCI_DMA_BIDIRECTIONAL);
2740
2741         undo_idling(dev_priv, interruptible);
2742 }
2743
2744 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2745                                   unsigned long color,
2746                                   u64 *start,
2747                                   u64 *end)
2748 {
2749         if (node->color != color)
2750                 *start += 4096;
2751
2752         if (!list_empty(&node->node_list)) {
2753                 node = list_entry(node->node_list.next,
2754                                   struct drm_mm_node,
2755                                   node_list);
2756                 if (node->allocated && node->color != color)
2757                         *end -= 4096;
2758         }
2759 }
2760
2761 static int i915_gem_setup_global_gtt(struct drm_device *dev,
2762                                      u64 start,
2763                                      u64 mappable_end,
2764                                      u64 end)
2765 {
2766         /* Let GEM Manage all of the aperture.
2767          *
2768          * However, leave one page at the end still bound to the scratch page.
2769          * There are a number of places where the hardware apparently prefetches
2770          * past the end of the object, and we've seen multiple hangs with the
2771          * GPU head pointer stuck in a batchbuffer bound at the last page of the
2772          * aperture.  One page should be enough to keep any prefetching inside
2773          * of the aperture.
2774          */
2775         struct drm_i915_private *dev_priv = to_i915(dev);
2776         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2777         struct drm_mm_node *entry;
2778         struct drm_i915_gem_object *obj;
2779         unsigned long hole_start, hole_end;
2780         int ret;
2781         unsigned long mappable = min(end, mappable_end) - start;
2782         int error;
2783
2784         BUG_ON(mappable_end > end);
2785
2786         ggtt->base.start = start;
2787
2788         /* Subtract the guard page before address space initialization to
2789          * shrink the range used by drm_mm */
2790         ggtt->base.total = end - start - PAGE_SIZE;
2791         i915_address_space_init(&ggtt->base, dev_priv);
2792         ggtt->base.total += PAGE_SIZE;
2793
2794         ret = intel_vgt_balloon(dev_priv);
2795         if (ret)
2796                 return ret;
2797
2798         if (!HAS_LLC(dev))
2799                 ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
2800
2801         /* Mark any preallocated objects as occupied */
2802         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2803                 struct i915_vma *vma = i915_gem_obj_to_vma(obj, &ggtt->base);
2804
2805                 DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
2806                               i915_gem_obj_ggtt_offset(obj), obj->base.size);
2807
2808                 WARN_ON(i915_gem_obj_ggtt_bound(obj));
2809                 ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
2810                 if (ret) {
2811                         DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2812                         return ret;
2813                 }
2814                 vma->bound |= GLOBAL_BIND;
2815                 __i915_vma_set_map_and_fenceable(vma);
2816                 list_add_tail(&vma->vm_link, &ggtt->base.inactive_list);
2817         }
2818
2819         /* Clear any non-preallocated blocks */
2820         drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2821                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2822                               hole_start, hole_end);
2823                 ggtt->base.clear_range(&ggtt->base, hole_start,
2824                                      hole_end - hole_start, true);
2825         }
2826
2827 #ifdef __DragonFly__
2828         device_printf(dev->dev->bsddev,
2829             "taking over the fictitious range 0x%llx-0x%llx\n",
2830             dev_priv->ggtt.mappable_base + start, dev_priv->ggtt.mappable_base + start + mappable);
2831         error = -vm_phys_fictitious_reg_range(dev_priv->ggtt.mappable_base + start,
2832             dev_priv->ggtt.mappable_base + start + mappable, VM_MEMATTR_WRITE_COMBINING);
2833 #endif
2834
2835         /* And finally clear the reserved guard page */
2836         ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true);
2837
2838         if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
2839                 struct i915_hw_ppgtt *ppgtt;
2840
2841                 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2842                 if (!ppgtt)
2843                         return -ENOMEM;
2844
2845                 ret = __hw_ppgtt_init(dev, ppgtt);
2846                 if (ret) {
2847                         ppgtt->base.cleanup(&ppgtt->base);
2848                         kfree(ppgtt);
2849                         return ret;
2850                 }
2851
2852                 if (ppgtt->base.allocate_va_range)
2853                         ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2854                                                             ppgtt->base.total);
2855                 if (ret) {
2856                         ppgtt->base.cleanup(&ppgtt->base);
2857                         kfree(ppgtt);
2858                         return ret;
2859                 }
2860
2861                 ppgtt->base.clear_range(&ppgtt->base,
2862                                         ppgtt->base.start,
2863                                         ppgtt->base.total,
2864                                         true);
2865
2866                 dev_priv->mm.aliasing_ppgtt = ppgtt;
2867                 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
2868                 ggtt->base.bind_vma = aliasing_gtt_bind_vma;
2869         }
2870
2871         return 0;
2872 }
2873
2874 /**
2875  * i915_gem_init_ggtt - Initialize GEM for Global GTT
2876  * @dev: DRM device
2877  */
2878 void i915_gem_init_ggtt(struct drm_device *dev)
2879 {
2880         struct drm_i915_private *dev_priv = to_i915(dev);
2881         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2882
2883         i915_gem_setup_global_gtt(dev, 0, ggtt->mappable_end, ggtt->base.total);
2884 }
2885
2886 /**
2887  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2888  * @dev: DRM device
2889  */
2890 void i915_ggtt_cleanup_hw(struct drm_device *dev)
2891 {
2892         struct drm_i915_private *dev_priv = to_i915(dev);
2893         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2894
2895         if (dev_priv->mm.aliasing_ppgtt) {
2896                 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2897
2898                 ppgtt->base.cleanup(&ppgtt->base);
2899                 kfree(ppgtt);
2900         }
2901
2902         i915_gem_cleanup_stolen(dev);
2903
2904         if (drm_mm_initialized(&ggtt->base.mm)) {
2905                 intel_vgt_deballoon(dev_priv);
2906
2907                 drm_mm_takedown(&ggtt->base.mm);
2908                 list_del(&ggtt->base.global_link);
2909         }
2910
2911         ggtt->base.cleanup(&ggtt->base);
2912 }
2913
2914 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2915 {
2916         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2917         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2918         return snb_gmch_ctl << 20;
2919 }
2920
2921 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2922 {
2923         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2924         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2925         if (bdw_gmch_ctl)
2926                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2927
2928 #ifdef CONFIG_X86_32
2929         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2930         if (bdw_gmch_ctl > 4)
2931                 bdw_gmch_ctl = 4;
2932 #endif
2933
2934         return bdw_gmch_ctl << 20;
2935 }
2936
2937 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2938 {
2939         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2940         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2941
2942         if (gmch_ctrl)
2943                 return 1 << (20 + gmch_ctrl);
2944
2945         return 0;
2946 }
2947
2948 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2949 {
2950         snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2951         snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2952         return snb_gmch_ctl << 25; /* 32 MB units */
2953 }
2954
2955 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2956 {
2957         bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2958         bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2959         return bdw_gmch_ctl << 25; /* 32 MB units */
2960 }
2961
2962 static size_t chv_get_stolen_size(u16 gmch_ctrl)
2963 {
2964         gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2965         gmch_ctrl &= SNB_GMCH_GMS_MASK;
2966
2967         /*
2968          * 0x0  to 0x10: 32MB increments starting at 0MB
2969          * 0x11 to 0x16: 4MB increments starting at 8MB
2970          * 0x17 to 0x1d: 4MB increments start at 36MB
2971          */
2972         if (gmch_ctrl < 0x11)
2973                 return gmch_ctrl << 25;
2974         else if (gmch_ctrl < 0x17)
2975                 return (gmch_ctrl - 0x11 + 2) << 22;
2976         else
2977                 return (gmch_ctrl - 0x17 + 9) << 22;
2978 }
2979
2980 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2981 {
2982         gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2983         gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2984
2985         if (gen9_gmch_ctl < 0xf0)
2986                 return gen9_gmch_ctl << 25; /* 32 MB units */
2987         else
2988                 /* 4MB increments starting at 0xf0 for 4MB */
2989                 return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2990 }
2991
2992 static int ggtt_probe_common(struct drm_device *dev,
2993                              size_t gtt_size)
2994 {
2995         struct drm_i915_private *dev_priv = to_i915(dev);
2996         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2997         struct i915_page_scratch *scratch_page;
2998         phys_addr_t ggtt_phys_addr;
2999
3000         /* For Modern GENs the PTEs and register space are split in the BAR */
3001         ggtt_phys_addr = pci_resource_start(dev->pdev, 0) +
3002                          (pci_resource_len(dev->pdev, 0) / 2);
3003
3004         /*
3005          * On BXT writes larger than 64 bit to the GTT pagetable range will be
3006          * dropped. For WC mappings in general we have 64 byte burst writes
3007          * when the WC buffer is flushed, so we can't use it, but have to
3008          * resort to an uncached mapping. The WC issue is easily caught by the
3009          * readback check when writing GTT PTE entries.
3010          */
3011         if (IS_BROXTON(dev))
3012                 ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size);
3013         else
3014                 ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size);
3015         if (!ggtt->gsm) {
3016                 DRM_ERROR("Failed to map the gtt page table\n");
3017                 return -ENOMEM;
3018         }
3019
3020         scratch_page = alloc_scratch_page(dev);
3021         if (IS_ERR(scratch_page)) {
3022                 DRM_ERROR("Scratch setup failed\n");
3023                 /* iounmap will also get called at remove, but meh */
3024                 iounmap(ggtt->gsm);
3025                 return PTR_ERR(scratch_page);
3026         }
3027
3028         ggtt->base.scratch_page = scratch_page;
3029
3030         return 0;
3031 }
3032
3033 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3034  * bits. When using advanced contexts each context stores its own PAT, but
3035  * writing this data shouldn't be harmful even in those cases. */
3036 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
3037 {
3038         uint64_t pat;
3039
3040         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
3041               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
3042               GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
3043               GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
3044               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
3045               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
3046               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
3047               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3048
3049         if (!USES_PPGTT(dev_priv))
3050                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3051                  * so RTL will always use the value corresponding to
3052                  * pat_sel = 000".
3053                  * So let's disable cache for GGTT to avoid screen corruptions.
3054                  * MOCS still can be used though.
3055                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3056                  * before this patch, i.e. the same uncached + snooping access
3057                  * like on gen6/7 seems to be in effect.
3058                  * - So this just fixes blitter/render access. Again it looks
3059                  * like it's not just uncached access, but uncached + snooping.
3060                  * So we can still hold onto all our assumptions wrt cpu
3061                  * clflushing on LLC machines.
3062                  */
3063                 pat = GEN8_PPAT(0, GEN8_PPAT_UC);
3064
3065         /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
3066          * write would work. */
3067         I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3068         I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3069 }
3070
3071 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
3072 {
3073         uint64_t pat;
3074
3075         /*
3076          * Map WB on BDW to snooped on CHV.
3077          *
3078          * Only the snoop bit has meaning for CHV, the rest is
3079          * ignored.
3080          *
3081          * The hardware will never snoop for certain types of accesses:
3082          * - CPU GTT (GMADR->GGTT->no snoop->memory)
3083          * - PPGTT page tables
3084          * - some other special cycles
3085          *
3086          * As with BDW, we also need to consider the following for GT accesses:
3087          * "For GGTT, there is NO pat_sel[2:0] from the entry,
3088          * so RTL will always use the value corresponding to
3089          * pat_sel = 000".
3090          * Which means we must set the snoop bit in PAT entry 0
3091          * in order to keep the global status page working.
3092          */
3093         pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3094               GEN8_PPAT(1, 0) |
3095               GEN8_PPAT(2, 0) |
3096               GEN8_PPAT(3, 0) |
3097               GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3098               GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3099               GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3100               GEN8_PPAT(7, CHV_PPAT_SNOOP);
3101
3102         I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3103         I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3104 }
3105
3106 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3107 {
3108         struct drm_device *dev = ggtt->base.dev;
3109         struct drm_i915_private *dev_priv = to_i915(dev);
3110         u16 snb_gmch_ctl;
3111         int ret;
3112
3113         /* TODO: We're not aware of mappable constraints on gen8 yet */
3114         ggtt->mappable_base = pci_resource_start(dev->pdev, 2);
3115         ggtt->mappable_end = pci_resource_len(dev->pdev, 2);
3116
3117 #if 0
3118         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
3119                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
3120 #endif
3121
3122         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3123
3124         if (INTEL_INFO(dev)->gen >= 9) {
3125                 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
3126                 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl);
3127         } else if (IS_CHERRYVIEW(dev)) {
3128                 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
3129                 ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl);
3130         } else {
3131                 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
3132                 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl);
3133         }
3134
3135         ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3136
3137         if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3138                 chv_setup_private_ppat(dev_priv);
3139         else
3140                 bdw_setup_private_ppat(dev_priv);
3141
3142         ret = ggtt_probe_common(dev, ggtt->size);
3143
3144         ggtt->base.bind_vma = ggtt_bind_vma;
3145         ggtt->base.unbind_vma = ggtt_unbind_vma;
3146         ggtt->base.insert_page = gen8_ggtt_insert_page;
3147         ggtt->base.clear_range = nop_clear_range;
3148         if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3149                 ggtt->base.clear_range = gen8_ggtt_clear_range;
3150
3151         ggtt->base.insert_entries = gen8_ggtt_insert_entries;
3152         if (IS_CHERRYVIEW(dev_priv))
3153                 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
3154
3155         return ret;
3156 }
3157
3158 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3159 {
3160         struct drm_device *dev = ggtt->base.dev;
3161         u16 snb_gmch_ctl;
3162         int ret;
3163
3164         ggtt->mappable_base = pci_resource_start(dev->pdev, 2);
3165         ggtt->mappable_end = pci_resource_len(dev->pdev, 2);
3166
3167         /* 64/512MB is the current min/max we actually know of, but this is just
3168          * a coarse sanity check.
3169          */
3170         if ((ggtt->mappable_end < (64<<20) || (ggtt->mappable_end > (512<<20)))) {
3171                 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
3172                 return -ENXIO;
3173         }
3174
3175 #if 0
3176         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
3177                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
3178 #endif
3179         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3180
3181         ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
3182         ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl);
3183         ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3184
3185         ret = ggtt_probe_common(dev, ggtt->size);
3186
3187         ggtt->base.clear_range = gen6_ggtt_clear_range;
3188         ggtt->base.insert_page = gen6_ggtt_insert_page;
3189         ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3190         ggtt->base.bind_vma = ggtt_bind_vma;
3191         ggtt->base.unbind_vma = ggtt_unbind_vma;
3192
3193         return ret;
3194 }
3195
3196 static void gen6_gmch_remove(struct i915_address_space *vm)
3197 {
3198         struct i915_ggtt *ggtt = container_of(vm, struct i915_ggtt, base);
3199
3200         iounmap(ggtt->gsm);
3201         free_scratch_page(vm->dev, vm->scratch_page);
3202 }
3203
3204 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3205 {
3206         struct drm_device *dev = ggtt->base.dev;
3207         struct drm_i915_private *dev_priv = to_i915(dev);
3208 #if 0
3209         int ret;
3210
3211         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3212         if (!ret) {
3213                 DRM_ERROR("failed to set up gmch\n");
3214                 return -EIO;
3215         }
3216 #endif
3217
3218         intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size,
3219                       &ggtt->mappable_base, &ggtt->mappable_end);
3220
3221         ggtt->do_idle_maps = needs_idle_maps(&dev_priv->drm);
3222         ggtt->base.insert_page = i915_ggtt_insert_page;
3223         ggtt->base.insert_entries = i915_ggtt_insert_entries;
3224         ggtt->base.clear_range = i915_ggtt_clear_range;
3225         ggtt->base.bind_vma = ggtt_bind_vma;
3226         ggtt->base.unbind_vma = ggtt_unbind_vma;
3227
3228         if (unlikely(ggtt->do_idle_maps))
3229                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3230
3231         return 0;
3232 }
3233
3234 static void i915_gmch_remove(struct i915_address_space *vm)
3235 {
3236         intel_gmch_remove();
3237 }
3238
3239 /**
3240  * i915_ggtt_init_hw - Initialize GGTT hardware
3241  * @dev: DRM device
3242  */
3243 int i915_ggtt_init_hw(struct drm_device *dev)
3244 {
3245         struct drm_i915_private *dev_priv = to_i915(dev);
3246         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3247         int ret;
3248
3249         if (INTEL_INFO(dev)->gen <= 5) {
3250                 ggtt->probe = i915_gmch_probe;
3251                 ggtt->base.cleanup = i915_gmch_remove;
3252         } else if (INTEL_INFO(dev)->gen < 8) {
3253                 ggtt->probe = gen6_gmch_probe;
3254                 ggtt->base.cleanup = gen6_gmch_remove;
3255
3256                 if (HAS_EDRAM(dev))
3257                         ggtt->base.pte_encode = iris_pte_encode;
3258                 else if (IS_HASWELL(dev))
3259                         ggtt->base.pte_encode = hsw_pte_encode;
3260                 else if (IS_VALLEYVIEW(dev))
3261                         ggtt->base.pte_encode = byt_pte_encode;
3262                 else if (INTEL_INFO(dev)->gen >= 7)
3263                         ggtt->base.pte_encode = ivb_pte_encode;
3264                 else
3265                         ggtt->base.pte_encode = snb_pte_encode;
3266         } else {
3267                 ggtt->probe = gen8_gmch_probe;
3268                 ggtt->base.cleanup = gen6_gmch_remove;
3269         }
3270
3271         ggtt->base.dev = dev;
3272         ggtt->base.is_ggtt = true;
3273
3274         ret = ggtt->probe(ggtt);
3275         if (ret)
3276                 return ret;
3277
3278         if ((ggtt->base.total - 1) >> 32) {
3279                 DRM_ERROR("We never expected a Global GTT with more than 32bits"
3280                           "of address space! Found %lldM!\n",
3281                           ggtt->base.total >> 20);
3282                 ggtt->base.total = 1ULL << 32;
3283                 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3284         }
3285
3286         /*
3287          * Initialise stolen early so that we may reserve preallocated
3288          * objects for the BIOS to KMS transition.
3289          */
3290         ret = i915_gem_init_stolen(dev);
3291         if (ret)
3292                 goto out_gtt_cleanup;
3293
3294         /* GMADR is the PCI mmio aperture into the global GTT. */
3295         DRM_INFO("Memory usable by graphics device = %lluM\n",
3296                  ggtt->base.total >> 20);
3297         DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
3298         DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20);
3299 #ifdef CONFIG_INTEL_IOMMU
3300         if (intel_iommu_gfx_mapped)
3301                 DRM_INFO("VT-d active for gfx access\n");
3302 #endif
3303
3304         return 0;
3305
3306 out_gtt_cleanup:
3307         ggtt->base.cleanup(&ggtt->base);
3308
3309         return ret;
3310 }
3311
3312 int i915_ggtt_enable_hw(struct drm_device *dev)
3313 {
3314         if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
3315                 return -EIO;
3316
3317         return 0;
3318 }
3319
3320 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3321 {
3322         struct drm_i915_private *dev_priv = to_i915(dev);
3323         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3324         struct drm_i915_gem_object *obj;
3325         struct i915_vma *vma;
3326
3327         i915_check_and_clear_faults(dev_priv);
3328
3329         /* First fill our portion of the GTT with scratch pages */
3330         ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
3331                                true);
3332
3333         /* Cache flush objects bound into GGTT and rebind them. */
3334         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3335                 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3336                         if (vma->vm != &ggtt->base)
3337                                 continue;
3338
3339                         WARN_ON(i915_vma_bind(vma, obj->cache_level,
3340                                               PIN_UPDATE));
3341                 }
3342
3343                 if (obj->pin_display)
3344                         WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3345         }
3346
3347         if (INTEL_INFO(dev)->gen >= 8) {
3348                 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3349                         chv_setup_private_ppat(dev_priv);
3350                 else
3351                         bdw_setup_private_ppat(dev_priv);
3352
3353                 return;
3354         }
3355
3356         if (USES_PPGTT(dev)) {
3357                 struct i915_address_space *vm;
3358
3359                 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3360                         /* TODO: Perhaps it shouldn't be gen6 specific */
3361
3362                         struct i915_hw_ppgtt *ppgtt;
3363
3364                         if (vm->is_ggtt)
3365                                 ppgtt = dev_priv->mm.aliasing_ppgtt;
3366                         else
3367                                 ppgtt = i915_vm_to_ppgtt(vm);
3368
3369                         gen6_write_page_range(dev_priv, &ppgtt->pd,
3370                                               0, ppgtt->base.total);
3371                 }
3372         }
3373
3374         i915_ggtt_flush(dev_priv);
3375 }
3376
3377 static struct i915_vma *
3378 __i915_gem_vma_create(struct drm_i915_gem_object *obj,
3379                       struct i915_address_space *vm,
3380                       const struct i915_ggtt_view *ggtt_view)
3381 {
3382         struct i915_vma *vma;
3383
3384         if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3385                 return ERR_PTR(-EINVAL);
3386
3387         vma = kzalloc(sizeof(*vma), GFP_KERNEL);
3388         if (vma == NULL)
3389                 return ERR_PTR(-ENOMEM);
3390
3391         INIT_LIST_HEAD(&vma->vm_link);
3392         INIT_LIST_HEAD(&vma->obj_link);
3393         INIT_LIST_HEAD(&vma->exec_list);
3394         vma->vm = vm;
3395         vma->obj = obj;
3396         vma->is_ggtt = i915_is_ggtt(vm);
3397
3398         if (i915_is_ggtt(vm))
3399                 vma->ggtt_view = *ggtt_view;
3400         else
3401                 i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3402
3403         list_add_tail(&vma->obj_link, &obj->vma_list);
3404
3405         return vma;
3406 }
3407
3408 struct i915_vma *
3409 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3410                                   struct i915_address_space *vm)
3411 {
3412         struct i915_vma *vma;
3413
3414         vma = i915_gem_obj_to_vma(obj, vm);
3415         if (!vma)
3416                 vma = __i915_gem_vma_create(obj, vm,
3417                                             i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
3418
3419         return vma;
3420 }
3421
3422 struct i915_vma *
3423 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3424                                        const struct i915_ggtt_view *view)
3425 {
3426         struct drm_device *dev = obj->base.dev;
3427         struct drm_i915_private *dev_priv = to_i915(dev);
3428         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3429         struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
3430
3431         if (!vma)
3432                 vma = __i915_gem_vma_create(obj, &ggtt->base, view);
3433
3434         return vma;
3435
3436 }
3437
3438 static struct scatterlist *
3439 rotate_pages(const dma_addr_t *in, unsigned int offset,
3440              unsigned int width, unsigned int height,
3441              unsigned int stride,
3442              struct sg_table *st, struct scatterlist *sg)
3443 {
3444         unsigned int column, row;
3445         unsigned int src_idx;
3446
3447         for (column = 0; column < width; column++) {
3448                 src_idx = stride * (height - 1) + column;
3449                 for (row = 0; row < height; row++) {
3450                         st->nents++;
3451                         /* We don't need the pages, but need to initialize
3452                          * the entries so the sg list can be happily traversed.
3453                          * The only thing we need are DMA addresses.
3454                          */
3455                         sg_set_page(sg, NULL, PAGE_SIZE, 0);
3456                         sg_dma_address(sg) = in[offset + src_idx];
3457                         sg_dma_len(sg) = PAGE_SIZE;
3458                         sg = sg_next(sg);
3459                         src_idx -= stride;
3460                 }
3461         }
3462
3463         return sg;
3464 }
3465
3466 static struct sg_table *
3467 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
3468                           struct drm_i915_gem_object *obj)
3469 {
3470         const size_t n_pages = obj->base.size / PAGE_SIZE;
3471         unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height;
3472         unsigned int size_pages_uv;
3473         struct sgt_iter sgt_iter;
3474         dma_addr_t dma_addr;
3475         unsigned long i;
3476         dma_addr_t *page_addr_list;
3477         struct sg_table *st;
3478         unsigned int uv_start_page;
3479         struct scatterlist *sg;
3480         int ret = -ENOMEM;
3481
3482         /* Allocate a temporary list of source pages for random access. */
3483         page_addr_list = drm_malloc_gfp(n_pages,
3484                                         sizeof(dma_addr_t),
3485                                         GFP_TEMPORARY);
3486         if (!page_addr_list)
3487                 return ERR_PTR(ret);
3488
3489         /* Account for UV plane with NV12. */
3490         if (rot_info->pixel_format == DRM_FORMAT_NV12)
3491                 size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height;
3492         else
3493                 size_pages_uv = 0;
3494
3495         /* Allocate target SG list. */
3496         st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
3497         if (!st)
3498                 goto err_st_alloc;
3499
3500         ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
3501         if (ret)
3502                 goto err_sg_alloc;
3503
3504         /* Populate source page list from the object. */
3505         i = 0;
3506         for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
3507                 page_addr_list[i++] = dma_addr;
3508
3509         GEM_BUG_ON(i != n_pages);
3510         st->nents = 0;
3511         sg = st->sgl;
3512
3513         /* Rotate the pages. */
3514         sg = rotate_pages(page_addr_list, 0,
3515                           rot_info->plane[0].width, rot_info->plane[0].height,
3516                           rot_info->plane[0].width,
3517                           st, sg);
3518
3519         /* Append the UV plane if NV12. */
3520         if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3521                 uv_start_page = size_pages;
3522
3523                 /* Check for tile-row un-alignment. */
3524                 if (offset_in_page(rot_info->uv_offset))
3525                         uv_start_page--;
3526
3527                 rot_info->uv_start_page = uv_start_page;
3528
3529                 sg = rotate_pages(page_addr_list, rot_info->uv_start_page,
3530                                   rot_info->plane[1].width, rot_info->plane[1].height,
3531                                   rot_info->plane[1].width,
3532                                   st, sg);
3533         }
3534
3535         DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n",
3536                       obj->base.size, rot_info->plane[0].width,
3537                       rot_info->plane[0].height, size_pages + size_pages_uv,
3538                       size_pages);
3539
3540         drm_free_large(page_addr_list);
3541
3542         return st;
3543
3544 err_sg_alloc:
3545         kfree(st);
3546 err_st_alloc:
3547         drm_free_large(page_addr_list);
3548
3549         DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n",
3550                       obj->base.size, ret, rot_info->plane[0].width,
3551                       rot_info->plane[0].height, size_pages + size_pages_uv,
3552                       size_pages);
3553         return ERR_PTR(ret);
3554 }
3555
3556 static struct sg_table *
3557 intel_partial_pages(const struct i915_ggtt_view *view,
3558                     struct drm_i915_gem_object *obj)
3559 {
3560         struct sg_table *st;
3561         struct scatterlist *sg;
3562         struct sg_page_iter obj_sg_iter;
3563         int ret = -ENOMEM;
3564
3565         st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
3566         if (!st)
3567                 goto err_st_alloc;
3568
3569         ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3570         if (ret)
3571                 goto err_sg_alloc;
3572
3573         sg = st->sgl;
3574         st->nents = 0;
3575         for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3576                 view->params.partial.offset)
3577         {
3578                 if (st->nents >= view->params.partial.size)
3579                         break;
3580
3581                 sg_set_page(sg, NULL, PAGE_SIZE, 0);
3582                 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3583                 sg_dma_len(sg) = PAGE_SIZE;
3584
3585                 sg = sg_next(sg);
3586                 st->nents++;
3587         }
3588
3589         return st;
3590
3591 err_sg_alloc:
3592         kfree(st);
3593 err_st_alloc:
3594         return ERR_PTR(ret);
3595 }
3596
3597 static int
3598 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3599 {
3600         int ret = 0;
3601
3602         if (vma->ggtt_view.pages)
3603                 return 0;
3604
3605         if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3606                 vma->ggtt_view.pages = vma->obj->pages;
3607         else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3608                 vma->ggtt_view.pages =
3609                         intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
3610         else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3611                 vma->ggtt_view.pages =
3612                         intel_partial_pages(&vma->ggtt_view, vma->obj);
3613         else
3614                 WARN_ONCE(1, "GGTT view %u not implemented!\n",
3615                           vma->ggtt_view.type);
3616
3617         if (!vma->ggtt_view.pages) {
3618                 DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3619                           vma->ggtt_view.type);
3620                 ret = -EINVAL;
3621         } else if (IS_ERR(vma->ggtt_view.pages)) {
3622                 ret = PTR_ERR(vma->ggtt_view.pages);
3623                 vma->ggtt_view.pages = NULL;
3624                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3625                           vma->ggtt_view.type, ret);
3626         }
3627
3628         return ret;
3629 }
3630
3631 /**
3632  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3633  * @vma: VMA to map
3634  * @cache_level: mapping cache level
3635  * @flags: flags like global or local mapping
3636  *
3637  * DMA addresses are taken from the scatter-gather table of this object (or of
3638  * this VMA in case of non-default GGTT views) and PTE entries set up.
3639  * Note that DMA addresses are also the only part of the SG table we care about.
3640  */
3641 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3642                   u32 flags)
3643 {
3644         int ret;
3645         u32 bind_flags;
3646
3647         if (WARN_ON(flags == 0))
3648                 return -EINVAL;
3649
3650         bind_flags = 0;
3651         if (flags & PIN_GLOBAL)
3652                 bind_flags |= GLOBAL_BIND;
3653         if (flags & PIN_USER)
3654                 bind_flags |= LOCAL_BIND;
3655
3656         if (flags & PIN_UPDATE)
3657                 bind_flags |= vma->bound;
3658         else
3659                 bind_flags &= ~vma->bound;
3660
3661         if (bind_flags == 0)
3662                 return 0;
3663
3664         if (vma->bound == 0 && vma->vm->allocate_va_range) {
3665                 /* XXX: i915_vma_pin() will fix this +- hack */
3666                 vma->pin_count++;
3667                 trace_i915_va_alloc(vma);
3668                 ret = vma->vm->allocate_va_range(vma->vm,
3669                                                  vma->node.start,
3670                                                  vma->node.size);
3671                 vma->pin_count--;
3672                 if (ret)
3673                         return ret;
3674         }
3675
3676         ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3677         if (ret)
3678                 return ret;
3679
3680         vma->bound |= bind_flags;
3681
3682         return 0;
3683 }
3684
3685 /**
3686  * i915_ggtt_view_size - Get the size of a GGTT view.
3687  * @obj: Object the view is of.
3688  * @view: The view in question.
3689  *
3690  * @return The size of the GGTT view in bytes.
3691  */
3692 size_t
3693 i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3694                     const struct i915_ggtt_view *view)
3695 {
3696         if (view->type == I915_GGTT_VIEW_NORMAL) {
3697                 return obj->base.size;
3698         } else if (view->type == I915_GGTT_VIEW_ROTATED) {
3699                 return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT;
3700         } else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3701                 return view->params.partial.size << PAGE_SHIFT;
3702         } else {
3703                 WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3704                 return obj->base.size;
3705         }
3706 }
3707
3708 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
3709 {
3710         void __iomem *ptr;
3711
3712         lockdep_assert_held(&vma->vm->dev->struct_mutex);
3713         if (WARN_ON(!vma->obj->map_and_fenceable))
3714                 return IO_ERR_PTR(-ENODEV);
3715
3716         GEM_BUG_ON(!vma->is_ggtt);
3717         GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0);
3718
3719         ptr = vma->iomap;
3720         if (ptr == NULL) {
3721                 ptr = io_mapping_map_wc(i915_vm_to_ggtt(vma->vm)->mappable,
3722                                         vma->node.start,
3723                                         vma->node.size);
3724                 if (ptr == NULL)
3725                         return IO_ERR_PTR(-ENOMEM);
3726
3727                 vma->iomap = ptr;
3728         }
3729
3730         vma->pin_count++;
3731         return ptr;
3732 }