drm/i915: Partially sync headers with Linux 3.8
[dragonfly.git] / sys / dev / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * $FreeBSD: src/sys/dev/drm2/i915/i915_gem_gtt.c,v 1.1 2012/05/22 11:07:44 kib Exp $
24  */
25
26 #include <sys/sfbuf.h>
27
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "intel_drv.h"
32
33 typedef uint32_t gtt_pte_t;
34
35 /* PPGTT stuff */
36 #define GEN6_GTT_ADDR_ENCODE(addr)      ((addr) | (((addr) >> 28) & 0xff0))
37
38 #define GEN6_PDE_VALID                  (1 << 0)
39 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
40 #define GEN6_PDE_ADDR_ENCODE(addr)      GEN6_GTT_ADDR_ENCODE(addr)
41
42 #define GEN6_PTE_VALID                  (1 << 0)
43 #define GEN6_PTE_UNCACHED               (1 << 1)
44 #define HSW_PTE_UNCACHED                (0)
45 #define GEN6_PTE_CACHE_LLC              (2 << 1)
46 #define GEN6_PTE_CACHE_LLC_MLC          (3 << 1)
47 #define GEN6_PTE_ADDR_ENCODE(addr)      GEN6_GTT_ADDR_ENCODE(addr)
48
49 static inline gtt_pte_t pte_encode(struct drm_device *dev,
50                                    dma_addr_t addr,
51                                    enum i915_cache_level level)
52 {
53         gtt_pte_t pte = GEN6_PTE_VALID;
54         pte |= GEN6_PTE_ADDR_ENCODE(addr);
55
56         switch (level) {
57         case I915_CACHE_LLC_MLC:
58                 /* Haswell doesn't set L3 this way */
59                 if (IS_HASWELL(dev))
60                         pte |= GEN6_PTE_CACHE_LLC;
61                 else
62                         pte |= GEN6_PTE_CACHE_LLC_MLC;
63                 break;
64         case I915_CACHE_LLC:
65                 pte |= GEN6_PTE_CACHE_LLC;
66                 break;
67         case I915_CACHE_NONE:
68                 if (IS_HASWELL(dev))
69                         pte |= HSW_PTE_UNCACHED;
70                 else
71                         pte |= GEN6_PTE_UNCACHED;
72                 break;
73         default:
74                 BUG();
75         }
76
77
78         return pte;
79 }
80
81 /* PPGTT support for Sandybdrige/Gen6 and later */
82 static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
83                                    unsigned first_entry,
84                                    unsigned num_entries)
85 {
86         gtt_pte_t *pt_vaddr;
87         gtt_pte_t scratch_pte;
88         struct sf_buf *sf;
89         unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
90         unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
91         unsigned last_pte, i;
92
93         scratch_pte = GEN6_GTT_ADDR_ENCODE(ppgtt->scratch_page_dma_addr);
94         scratch_pte |= GEN6_PTE_VALID | GEN6_PTE_CACHE_LLC;
95
96         while (num_entries) {
97                 last_pte = first_pte + num_entries;
98                 if (last_pte > I915_PPGTT_PT_ENTRIES)
99                         last_pte = I915_PPGTT_PT_ENTRIES;
100
101                 sf = sf_buf_alloc(ppgtt->pt_pages[act_pd]);
102                 pt_vaddr = (uint32_t *)(uintptr_t)sf_buf_kva(sf);
103
104                 for (i = first_pte; i < last_pte; i++)
105                         pt_vaddr[i] = scratch_pte;
106
107                 sf_buf_free(sf);
108
109                 num_entries -= last_pte - first_pte;
110                 first_pte = 0;
111                 act_pd++;
112         }
113 }
114
115 int
116 i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
117 {
118         struct drm_i915_private *dev_priv;
119         struct i915_hw_ppgtt *ppgtt;
120         u_int first_pd_entry_in_global_pt, i;
121
122         dev_priv = dev->dev_private;
123
124         /*
125          * ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
126          * entries. For aliasing ppgtt support we just steal them at the end for
127          * now.
128          */
129         first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES;
130
131         ppgtt = kmalloc(sizeof(*ppgtt), DRM_I915_GEM, M_WAITOK | M_ZERO);
132
133         ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
134         ppgtt->pt_pages = kmalloc(sizeof(vm_page_t) * ppgtt->num_pd_entries,
135             DRM_I915_GEM, M_WAITOK | M_ZERO);
136
137         for (i = 0; i < ppgtt->num_pd_entries; i++) {
138                 ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0,
139                     VM_ALLOC_NORMAL | VM_ALLOC_ZERO);
140                 if (ppgtt->pt_pages[i] == NULL) {
141                         dev_priv->mm.aliasing_ppgtt = ppgtt;
142                         i915_gem_cleanup_aliasing_ppgtt(dev);
143                         return (-ENOMEM);
144                 }
145         }
146
147         ppgtt->scratch_page_dma_addr = dev_priv->mm.gtt->scratch_page_dma;
148
149         i915_ppgtt_clear_range(ppgtt, 0, ppgtt->num_pd_entries *
150             I915_PPGTT_PT_ENTRIES);
151         ppgtt->pd_offset = (first_pd_entry_in_global_pt) * sizeof(uint32_t);
152         dev_priv->mm.aliasing_ppgtt = ppgtt;
153         return (0);
154 }
155
156 static void
157 i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, unsigned first_entry,
158     unsigned num_entries, vm_page_t *pages, uint32_t pte_flags)
159 {
160         uint32_t *pt_vaddr, pte;
161         struct sf_buf *sf;
162         unsigned act_pd, first_pte;
163         unsigned last_pte, i;
164         vm_paddr_t page_addr;
165
166         act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
167         first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
168
169         while (num_entries) {
170                 last_pte = first_pte + num_entries;
171                 if (last_pte > I915_PPGTT_PT_ENTRIES)
172                         last_pte = I915_PPGTT_PT_ENTRIES;
173
174                 sf = sf_buf_alloc(ppgtt->pt_pages[act_pd]);
175                 pt_vaddr = (uint32_t *)(uintptr_t)sf_buf_kva(sf);
176
177                 for (i = first_pte; i < last_pte; i++) {
178                         page_addr = VM_PAGE_TO_PHYS(*pages);
179                         pte = GEN6_PTE_ADDR_ENCODE(page_addr);
180                         pt_vaddr[i] = pte | pte_flags;
181
182                         pages++;
183                 }
184
185                 sf_buf_free(sf);
186
187                 num_entries -= last_pte - first_pte;
188                 first_pte = 0;
189                 act_pd++;
190         }
191 }
192
193 void
194 i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
195     struct drm_i915_gem_object *obj, enum i915_cache_level cache_level)
196 {
197         struct drm_device *dev;
198         struct drm_i915_private *dev_priv;
199         uint32_t pte_flags;
200
201         dev = obj->base.dev;
202         dev_priv = dev->dev_private;
203         pte_flags = GEN6_PTE_VALID;
204
205         switch (cache_level) {
206         case I915_CACHE_LLC_MLC:
207                 pte_flags |= GEN6_PTE_CACHE_LLC_MLC;
208                 break;
209         case I915_CACHE_LLC:
210                 pte_flags |= GEN6_PTE_CACHE_LLC;
211                 break;
212         case I915_CACHE_NONE:
213                 pte_flags |= GEN6_PTE_UNCACHED;
214                 break;
215         default:
216                 panic("cache mode");
217         }
218
219         i915_ppgtt_insert_pages(ppgtt, obj->gtt_space->start >> PAGE_SHIFT,
220             obj->base.size >> PAGE_SHIFT, obj->pages, pte_flags);
221 }
222
223 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
224                               struct drm_i915_gem_object *obj)
225 {
226         i915_ppgtt_clear_range(ppgtt, obj->gtt_space->start >> PAGE_SHIFT,
227             obj->base.size >> PAGE_SHIFT);
228 }
229
230 void
231 i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
232 {
233         struct drm_i915_private *dev_priv;
234         struct i915_hw_ppgtt *ppgtt;
235         vm_page_t m;
236         int i;
237
238         dev_priv = dev->dev_private;
239         ppgtt = dev_priv->mm.aliasing_ppgtt;
240         if (ppgtt == NULL)
241                 return;
242         dev_priv->mm.aliasing_ppgtt = NULL;
243
244         for (i = 0; i < ppgtt->num_pd_entries; i++) {
245                 m = ppgtt->pt_pages[i];
246                 if (m != NULL) {
247                         vm_page_busy_wait(m, FALSE, "i915gem");
248                         vm_page_unwire(m, 0);
249                         vm_page_free(m);
250                 }
251         }
252         drm_free(ppgtt->pt_pages, DRM_I915_GEM);
253         drm_free(ppgtt, DRM_I915_GEM);
254 }
255
256
257 static unsigned int
258 cache_level_to_agp_type(struct drm_device *dev, enum i915_cache_level
259     cache_level)
260 {
261
262         switch (cache_level) {
263         case I915_CACHE_LLC_MLC:
264                 if (INTEL_INFO(dev)->gen >= 6)
265                         return (AGP_USER_CACHED_MEMORY_LLC_MLC);
266                 /*
267                  * Older chipsets do not have this extra level of CPU
268                  * cacheing, so fallthrough and request the PTE simply
269                  * as cached.
270                  */
271         case I915_CACHE_LLC:
272                 return (AGP_USER_CACHED_MEMORY);
273
274         default:
275         case I915_CACHE_NONE:
276                 return (AGP_USER_MEMORY);
277         }
278 }
279
280 static bool
281 do_idling(struct drm_i915_private *dev_priv)
282 {
283         bool ret = dev_priv->mm.interruptible;
284
285         if (unlikely(dev_priv->mm.gtt->do_idle_maps)) {
286                 dev_priv->mm.interruptible = false;
287                 if (i915_gpu_idle(dev_priv->dev, false)) {
288                         DRM_ERROR("Couldn't idle GPU\n");
289                         /* Wait a bit, in hopes it avoids the hang */
290                         DELAY(10);
291                 }
292         }
293
294         return ret;
295 }
296
297 static void
298 undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
299 {
300
301         if (unlikely(dev_priv->mm.gtt->do_idle_maps))
302                 dev_priv->mm.interruptible = interruptible;
303 }
304
305 void
306 i915_gem_restore_gtt_mappings(struct drm_device *dev)
307 {
308         struct drm_i915_private *dev_priv;
309         struct drm_i915_gem_object *obj;
310
311         dev_priv = dev->dev_private;
312
313         /* First fill our portion of the GTT with scratch pages */
314         intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE,
315             (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
316
317         list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
318                 i915_gem_clflush_object(obj);
319                 i915_gem_gtt_rebind_object(obj, obj->cache_level);
320         }
321
322         intel_gtt_chipset_flush();
323 }
324
325 int
326 i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj)
327 {
328         unsigned int agp_type;
329
330         agp_type = cache_level_to_agp_type(obj->base.dev, obj->cache_level);
331         intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
332             obj->base.size >> PAGE_SHIFT, obj->pages, agp_type);
333         return (0);
334 }
335
336 void
337 i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj,
338     enum i915_cache_level cache_level)
339 {
340         struct drm_device *dev;
341         struct drm_i915_private *dev_priv;
342         unsigned int agp_type;
343
344         dev = obj->base.dev;
345         dev_priv = dev->dev_private;
346         agp_type = cache_level_to_agp_type(dev, cache_level);
347
348         intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
349             obj->base.size >> PAGE_SHIFT, obj->pages, agp_type);
350 }
351
352 void
353 i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
354 {
355         struct drm_device *dev = obj->base.dev;
356         struct drm_i915_private *dev_priv = dev->dev_private;
357         bool interruptible;
358
359         dev = obj->base.dev;
360         dev_priv = dev->dev_private;
361
362         interruptible = do_idling(dev_priv);
363
364         intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT,
365             obj->base.size >> PAGE_SHIFT);
366
367         undo_idling(dev_priv, interruptible);
368 }
369
370 #define GFX_MODE_ENABLE(bit) (((bit) << 16) | (bit))
371
372 void i915_gem_init_ppgtt(struct drm_device *dev)
373 {
374         drm_i915_private_t *dev_priv = dev->dev_private;
375         uint32_t pd_offset;
376         struct intel_ring_buffer *ring;
377         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
378         uint32_t pd_entry;
379         vm_paddr_t pt_addr;
380         u_int first_pd_entry_in_global_pt, i;
381
382         if (ppgtt == NULL)
383                 return;
384
385         first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES;
386         for (i = 0; i < ppgtt->num_pd_entries; i++) {
387                 pt_addr = VM_PAGE_TO_PHYS(ppgtt->pt_pages[i]);
388                 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
389                 pd_entry |= GEN6_PDE_VALID;
390                 intel_gtt_write(first_pd_entry_in_global_pt + i, pd_entry);
391         }
392         intel_gtt_read_pte(first_pd_entry_in_global_pt);
393
394         pd_offset = ppgtt->pd_offset;
395         pd_offset /= 64; /* in cachelines, */
396         pd_offset <<= 16;
397
398         if (INTEL_INFO(dev)->gen == 6) {
399                 uint32_t ecochk = I915_READ(GAM_ECOCHK);
400                 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
401                                        ECOCHK_PPGTT_CACHE64B);
402                 I915_WRITE(GFX_MODE, GFX_MODE_ENABLE(GFX_PPGTT_ENABLE));
403         } else if (INTEL_INFO(dev)->gen >= 7) {
404                 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
405                 /* GFX_MODE is per-ring on gen7+ */
406         }
407
408         for (i = 0; i < I915_NUM_RINGS; i++) {
409                 ring = &dev_priv->rings[i];
410
411                 if (INTEL_INFO(dev)->gen >= 7)
412                         I915_WRITE(RING_MODE_GEN7(ring),
413                                    GFX_MODE_ENABLE(GFX_PPGTT_ENABLE));
414
415                 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
416                 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
417         }
418 }