2 * Copyright © 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
26 * Copyright (c) 2011 The FreeBSD Foundation
27 * All rights reserved.
29 * This software was developed by Konstantin Belousov under sponsorship from
30 * the FreeBSD Foundation.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <machine/md_var.h>
58 #include <drm/drm_vma_manager.h>
59 #include <drm/i915_drm.h>
61 #include "i915_trace.h"
62 #include "intel_drv.h"
63 #include <linux/shmem_fs.h>
64 #include <linux/slab.h>
65 #include <linux/swap.h>
66 #include <linux/pci.h>
68 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
69 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
70 static __must_check int
71 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
74 i915_gem_object_retire(struct drm_i915_gem_object *obj);
76 static void i915_gem_write_fence(struct drm_device *dev, int reg,
77 struct drm_i915_gem_object *obj);
78 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
79 struct drm_i915_fence_reg *fence,
82 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
84 static bool cpu_cache_is_coherent(struct drm_device *dev,
85 enum i915_cache_level level)
87 return HAS_LLC(dev) || level != I915_CACHE_NONE;
90 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
92 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
95 return obj->pin_display;
98 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
100 if (obj->tiling_mode)
101 i915_gem_release_mmap(obj);
103 /* As we do not have an associated fence register, we will force
104 * a tiling change if we ever need to acquire one.
106 obj->fence_dirty = false;
107 obj->fence_reg = I915_FENCE_REG_NONE;
110 /* some bookkeeping */
111 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
114 spin_lock(&dev_priv->mm.object_stat_lock);
115 dev_priv->mm.object_count++;
116 dev_priv->mm.object_memory += size;
117 spin_unlock(&dev_priv->mm.object_stat_lock);
120 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
123 spin_lock(&dev_priv->mm.object_stat_lock);
124 dev_priv->mm.object_count--;
125 dev_priv->mm.object_memory -= size;
126 spin_unlock(&dev_priv->mm.object_stat_lock);
130 i915_gem_wait_for_error(struct i915_gpu_error *error)
134 #define EXIT_COND (!i915_reset_in_progress(error) || \
135 i915_terminally_wedged(error))
140 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
141 * userspace. If it takes that long something really bad is going on and
142 * we should simply try to bail out and fail as gracefully as possible.
144 ret = wait_event_interruptible_timeout(error->reset_queue,
148 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
150 } else if (ret < 0) {
158 int i915_mutex_lock_interruptible(struct drm_device *dev)
160 struct drm_i915_private *dev_priv = dev->dev_private;
163 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
167 ret = mutex_lock_interruptible(&dev->struct_mutex);
171 WARN_ON(i915_verify_lists(dev));
176 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
177 struct drm_file *file)
179 struct drm_i915_private *dev_priv = dev->dev_private;
180 struct drm_i915_gem_get_aperture *args = data;
181 struct drm_i915_gem_object *obj;
185 mutex_lock(&dev->struct_mutex);
186 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
187 if (i915_gem_obj_is_pinned(obj))
188 pinned += i915_gem_obj_ggtt_size(obj);
189 mutex_unlock(&dev->struct_mutex);
191 args->aper_size = dev_priv->gtt.base.total;
192 args->aper_available_size = args->aper_size - pinned;
199 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
201 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
202 char *vaddr = obj->phys_handle->vaddr;
204 struct scatterlist *sg;
207 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
210 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
211 struct vm_page *page;
214 page = shmem_read_mapping_page(mapping, i);
216 return PTR_ERR(page);
218 src = kmap_atomic(page);
219 memcpy(vaddr, src, PAGE_SIZE);
220 drm_clflush_virt_range(vaddr, PAGE_SIZE);
223 page_cache_release(page);
227 i915_gem_chipset_flush(obj->base.dev);
229 st = kmalloc(sizeof(*st), GFP_KERNEL);
233 if (sg_alloc_table(st, 1, GFP_KERNEL)) {
240 sg->length = obj->base.size;
242 sg_dma_address(sg) = obj->phys_handle->busaddr;
243 sg_dma_len(sg) = obj->base.size;
246 obj->has_dma_mapping = true;
251 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
255 BUG_ON(obj->madv == __I915_MADV_PURGED);
257 ret = i915_gem_object_set_to_cpu_domain(obj, true);
259 /* In the event of a disaster, abandon all caches and
262 WARN_ON(ret != -EIO);
263 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
266 if (obj->madv == I915_MADV_DONTNEED)
270 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
271 char *vaddr = obj->phys_handle->vaddr;
274 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
278 page = shmem_read_mapping_page(mapping, i);
282 dst = kmap_atomic(page);
283 drm_clflush_virt_range(vaddr, PAGE_SIZE);
284 memcpy(dst, vaddr, PAGE_SIZE);
287 set_page_dirty(page);
288 if (obj->madv == I915_MADV_WILLNEED)
289 mark_page_accessed(page);
290 page_cache_release(page);
296 sg_free_table(obj->pages);
299 obj->has_dma_mapping = false;
303 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
305 drm_pci_free(obj->base.dev, obj->phys_handle);
308 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
309 .get_pages = i915_gem_object_get_pages_phys,
310 .put_pages = i915_gem_object_put_pages_phys,
311 .release = i915_gem_object_release_phys,
316 drop_pages(struct drm_i915_gem_object *obj)
318 struct i915_vma *vma, *next;
321 drm_gem_object_reference(&obj->base);
322 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
323 if (i915_vma_unbind(vma))
326 ret = i915_gem_object_put_pages(obj);
327 drm_gem_object_unreference(&obj->base);
333 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
336 drm_dma_handle_t *phys;
339 if (obj->phys_handle) {
340 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
346 if (obj->madv != I915_MADV_WILLNEED)
350 if (obj->base.filp == NULL)
354 ret = drop_pages(obj);
358 /* create a new object */
359 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
363 obj->phys_handle = phys;
365 obj->ops = &i915_gem_phys_ops;
368 return i915_gem_object_get_pages(obj);
372 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
373 struct drm_i915_gem_pwrite *args,
374 struct drm_file *file_priv)
376 struct drm_device *dev = obj->base.dev;
377 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset;
378 char __user *user_data = to_user_ptr(args->data_ptr);
381 /* We manually control the domain here and pretend that it
382 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
384 ret = i915_gem_object_wait_rendering(obj, false);
388 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
389 unsigned long unwritten;
391 /* The physical object once assigned is fixed for the lifetime
392 * of the obj, so we can safely drop the lock and continue
395 mutex_unlock(&dev->struct_mutex);
396 unwritten = copy_from_user(vaddr, user_data, args->size);
397 mutex_lock(&dev->struct_mutex);
402 drm_clflush_virt_range(vaddr, args->size);
403 i915_gem_chipset_flush(dev);
407 void *i915_gem_object_alloc(struct drm_device *dev)
409 return kmalloc(sizeof(struct drm_i915_gem_object),
410 M_DRM, M_WAITOK | M_ZERO);
413 void i915_gem_object_free(struct drm_i915_gem_object *obj)
419 i915_gem_create(struct drm_file *file,
420 struct drm_device *dev,
424 struct drm_i915_gem_object *obj;
428 size = roundup(size, PAGE_SIZE);
432 /* Allocate the new object */
433 obj = i915_gem_alloc_object(dev, size);
437 ret = drm_gem_handle_create(file, &obj->base, &handle);
438 /* drop reference from allocate - handle holds it now */
439 drm_gem_object_unreference_unlocked(&obj->base);
448 i915_gem_dumb_create(struct drm_file *file,
449 struct drm_device *dev,
450 struct drm_mode_create_dumb *args)
452 /* have to work out size/pitch and return them */
453 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
454 args->size = args->pitch * args->height;
455 return i915_gem_create(file, dev,
456 args->size, &args->handle);
460 * Creates a new mm object and returns a handle to it.
463 i915_gem_create_ioctl(struct drm_device *dev, void *data,
464 struct drm_file *file)
466 struct drm_i915_gem_create *args = data;
468 return i915_gem_create(file, dev,
469 args->size, &args->handle);
473 __copy_to_user_swizzled(char __user *cpu_vaddr,
474 const char *gpu_vaddr, int gpu_offset,
477 int ret, cpu_offset = 0;
480 int cacheline_end = ALIGN(gpu_offset + 1, 64);
481 int this_length = min(cacheline_end - gpu_offset, length);
482 int swizzled_gpu_offset = gpu_offset ^ 64;
484 ret = __copy_to_user(cpu_vaddr + cpu_offset,
485 gpu_vaddr + swizzled_gpu_offset,
490 cpu_offset += this_length;
491 gpu_offset += this_length;
492 length -= this_length;
499 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
500 const char __user *cpu_vaddr,
503 int ret, cpu_offset = 0;
506 int cacheline_end = ALIGN(gpu_offset + 1, 64);
507 int this_length = min(cacheline_end - gpu_offset, length);
508 int swizzled_gpu_offset = gpu_offset ^ 64;
510 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
511 cpu_vaddr + cpu_offset,
516 cpu_offset += this_length;
517 gpu_offset += this_length;
518 length -= this_length;
525 * Pins the specified object's pages and synchronizes the object with
526 * GPU accesses. Sets needs_clflush to non-zero if the caller should
527 * flush the object from the CPU cache.
529 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
541 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
542 /* If we're not in the cpu read domain, set ourself into the gtt
543 * read domain and manually flush cachelines (if required). This
544 * optimizes for the case when the gpu will dirty the data
545 * anyway again before the next pread happens. */
546 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
548 ret = i915_gem_object_wait_rendering(obj, true);
552 i915_gem_object_retire(obj);
555 ret = i915_gem_object_get_pages(obj);
559 i915_gem_object_pin_pages(obj);
564 /* Per-page copy function for the shmem pread fastpath.
565 * Flushes invalid cachelines before reading the target if
566 * needs_clflush is set. */
568 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length,
569 char __user *user_data,
570 bool page_do_bit17_swizzling, bool needs_clflush)
575 if (unlikely(page_do_bit17_swizzling))
578 vaddr = kmap_atomic(page);
580 drm_clflush_virt_range(vaddr + shmem_page_offset,
582 ret = __copy_to_user_inatomic(user_data,
583 vaddr + shmem_page_offset,
585 kunmap_atomic(vaddr);
587 return ret ? -EFAULT : 0;
591 shmem_clflush_swizzled_range(char *addr, unsigned long length,
594 if (unlikely(swizzled)) {
595 unsigned long start = (unsigned long) addr;
596 unsigned long end = (unsigned long) addr + length;
598 /* For swizzling simply ensure that we always flush both
599 * channels. Lame, but simple and it works. Swizzled
600 * pwrite/pread is far from a hotpath - current userspace
601 * doesn't use it at all. */
602 start = round_down(start, 128);
603 end = round_up(end, 128);
605 drm_clflush_virt_range((void *)start, end - start);
607 drm_clflush_virt_range(addr, length);
612 /* Only difference to the fast-path function is that this can handle bit17
613 * and uses non-atomic copy and kmap functions. */
615 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length,
616 char __user *user_data,
617 bool page_do_bit17_swizzling, bool needs_clflush)
624 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
626 page_do_bit17_swizzling);
628 if (page_do_bit17_swizzling)
629 ret = __copy_to_user_swizzled(user_data,
630 vaddr, shmem_page_offset,
633 ret = __copy_to_user(user_data,
634 vaddr + shmem_page_offset,
638 return ret ? - EFAULT : 0;
642 i915_gem_shmem_pread(struct drm_device *dev,
643 struct drm_i915_gem_object *obj,
644 struct drm_i915_gem_pread *args,
645 struct drm_file *file)
647 char __user *user_data;
650 int shmem_page_offset, page_length, ret = 0;
651 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
653 int needs_clflush = 0;
656 user_data = to_user_ptr(args->data_ptr);
659 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
661 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
665 offset = args->offset;
667 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) {
668 struct vm_page *page = obj->pages[i];
673 /* Operation in this page
675 * shmem_page_offset = offset within page in shmem file
676 * page_length = bytes to copy for this page
678 shmem_page_offset = offset_in_page(offset);
679 page_length = remain;
680 if ((shmem_page_offset + page_length) > PAGE_SIZE)
681 page_length = PAGE_SIZE - shmem_page_offset;
683 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
684 (page_to_phys(page) & (1 << 17)) != 0;
686 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
687 user_data, page_do_bit17_swizzling,
692 mutex_unlock(&dev->struct_mutex);
694 if (likely(!i915.prefault_disable) && !prefaulted) {
695 ret = fault_in_multipages_writeable(user_data, remain);
696 /* Userspace is tricking us, but we've already clobbered
697 * its pages with the prefault and promised to write the
698 * data up to the first fault. Hence ignore any errors
699 * and just continue. */
704 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
705 user_data, page_do_bit17_swizzling,
708 mutex_lock(&dev->struct_mutex);
714 remain -= page_length;
715 user_data += page_length;
716 offset += page_length;
720 i915_gem_object_unpin_pages(obj);
726 * Reads data from the object referenced by handle.
728 * On error, the contents of *data are undefined.
731 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
732 struct drm_file *file)
734 struct drm_i915_gem_pread *args = data;
735 struct drm_i915_gem_object *obj;
741 ret = i915_mutex_lock_interruptible(dev);
745 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
746 if (&obj->base == NULL) {
751 /* Bounds check source. */
752 if (args->offset > obj->base.size ||
753 args->size > obj->base.size - args->offset) {
758 trace_i915_gem_object_pread(obj, args->offset, args->size);
760 ret = i915_gem_shmem_pread(dev, obj, args, file);
763 drm_gem_object_unreference(&obj->base);
765 mutex_unlock(&dev->struct_mutex);
769 /* This is the fast write path which cannot handle
770 * page faults in the source data
774 fast_user_write(struct io_mapping *mapping,
775 loff_t page_base, int page_offset,
776 char __user *user_data,
779 void __iomem *vaddr_atomic;
781 unsigned long unwritten;
783 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
784 /* We can use the cpu mem copy function because this is X86. */
785 vaddr = (char __force*)vaddr_atomic + page_offset;
786 unwritten = __copy_from_user_inatomic_nocache(vaddr,
788 io_mapping_unmap_atomic(vaddr_atomic);
793 * This is the fast pwrite path, where we copy the data directly from the
794 * user into the GTT, uncached.
797 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
798 struct drm_i915_gem_object *obj,
799 struct drm_i915_gem_pwrite *args,
800 struct drm_file *file)
802 struct drm_i915_private *dev_priv = dev->dev_private;
804 loff_t offset, page_base;
805 char __user *user_data;
806 int page_offset, page_length, ret;
808 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
812 ret = i915_gem_object_set_to_gtt_domain(obj, true);
816 ret = i915_gem_object_put_fence(obj);
820 user_data = to_user_ptr(args->data_ptr);
823 offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
826 /* Operation in this page
828 * page_base = page offset within aperture
829 * page_offset = offset within page
830 * page_length = bytes to copy for this page
832 page_base = offset & ~PAGE_MASK;
833 page_offset = offset_in_page(offset);
834 page_length = remain;
835 if ((page_offset + remain) > PAGE_SIZE)
836 page_length = PAGE_SIZE - page_offset;
838 /* If we get a fault while copying data, then (presumably) our
839 * source page isn't available. Return the error and we'll
840 * retry in the slow path.
842 if (fast_user_write(dev_priv->gtt.mappable, page_base,
843 page_offset, user_data, page_length)) {
848 remain -= page_length;
849 user_data += page_length;
850 offset += page_length;
854 i915_gem_object_ggtt_unpin(obj);
859 /* Per-page copy function for the shmem pwrite fastpath.
860 * Flushes invalid cachelines before writing to the target if
861 * needs_clflush_before is set and flushes out any written cachelines after
862 * writing if needs_clflush is set. */
864 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length,
865 char __user *user_data,
866 bool page_do_bit17_swizzling,
867 bool needs_clflush_before,
868 bool needs_clflush_after)
873 if (unlikely(page_do_bit17_swizzling))
876 vaddr = kmap_atomic(page);
877 if (needs_clflush_before)
878 drm_clflush_virt_range(vaddr + shmem_page_offset,
880 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
881 user_data, page_length);
882 if (needs_clflush_after)
883 drm_clflush_virt_range(vaddr + shmem_page_offset,
885 kunmap_atomic(vaddr);
887 return ret ? -EFAULT : 0;
890 /* Only difference to the fast-path function is that this can handle bit17
891 * and uses non-atomic copy and kmap functions. */
893 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length,
894 char __user *user_data,
895 bool page_do_bit17_swizzling,
896 bool needs_clflush_before,
897 bool needs_clflush_after)
903 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
904 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
906 page_do_bit17_swizzling);
907 if (page_do_bit17_swizzling)
908 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
912 ret = __copy_from_user(vaddr + shmem_page_offset,
915 if (needs_clflush_after)
916 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
918 page_do_bit17_swizzling);
921 return ret ? -EFAULT : 0;
925 i915_gem_shmem_pwrite(struct drm_device *dev,
926 struct drm_i915_gem_object *obj,
927 struct drm_i915_gem_pwrite *args,
928 struct drm_file *file)
932 char __user *user_data;
933 int shmem_page_offset, page_length, ret = 0;
934 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
935 int hit_slowpath = 0;
936 int needs_clflush_after = 0;
937 int needs_clflush_before = 0;
940 user_data = to_user_ptr(args->data_ptr);
943 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
945 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
946 /* If we're not in the cpu write domain, set ourself into the gtt
947 * write domain and manually flush cachelines (if required). This
948 * optimizes for the case when the gpu will use the data
949 * right away and we therefore have to clflush anyway. */
950 needs_clflush_after = cpu_write_needs_clflush(obj);
951 ret = i915_gem_object_wait_rendering(obj, false);
955 i915_gem_object_retire(obj);
957 /* Same trick applies to invalidate partially written cachelines read
959 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
960 needs_clflush_before =
961 !cpu_cache_is_coherent(dev, obj->cache_level);
963 ret = i915_gem_object_get_pages(obj);
967 i915_gem_object_pin_pages(obj);
969 offset = args->offset;
972 VM_OBJECT_LOCK(obj->base.vm_obj);
973 vm_object_pip_add(obj->base.vm_obj, 1);
974 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) {
975 struct vm_page *page = obj->pages[i];
976 int partial_cacheline_write;
978 if (i < offset >> PAGE_SHIFT)
984 /* Operation in this page
986 * shmem_page_offset = offset within page in shmem file
987 * page_length = bytes to copy for this page
989 shmem_page_offset = offset_in_page(offset);
991 page_length = remain;
992 if ((shmem_page_offset + page_length) > PAGE_SIZE)
993 page_length = PAGE_SIZE - shmem_page_offset;
995 /* If we don't overwrite a cacheline completely we need to be
996 * careful to have up-to-date data by first clflushing. Don't
997 * overcomplicate things and flush the entire patch. */
998 partial_cacheline_write = needs_clflush_before &&
999 ((shmem_page_offset | page_length)
1000 & (cpu_clflush_line_size - 1));
1002 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1003 (page_to_phys(page) & (1 << 17)) != 0;
1005 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1006 user_data, page_do_bit17_swizzling,
1007 partial_cacheline_write,
1008 needs_clflush_after);
1013 mutex_unlock(&dev->struct_mutex);
1014 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1015 user_data, page_do_bit17_swizzling,
1016 partial_cacheline_write,
1017 needs_clflush_after);
1019 mutex_lock(&dev->struct_mutex);
1025 remain -= page_length;
1026 user_data += page_length;
1027 offset += page_length;
1029 vm_object_pip_wakeup(obj->base.vm_obj);
1030 VM_OBJECT_UNLOCK(obj->base.vm_obj);
1033 i915_gem_object_unpin_pages(obj);
1037 * Fixup: Flush cpu caches in case we didn't flush the dirty
1038 * cachelines in-line while writing and the object moved
1039 * out of the cpu write domain while we've dropped the lock.
1041 if (!needs_clflush_after &&
1042 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1043 if (i915_gem_clflush_object(obj, obj->pin_display))
1044 i915_gem_chipset_flush(dev);
1048 if (needs_clflush_after)
1049 i915_gem_chipset_flush(dev);
1055 * Writes data to the object referenced by handle.
1057 * On error, the contents of the buffer that were to be modified are undefined.
1060 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1061 struct drm_file *file)
1063 struct drm_i915_private *dev_priv = dev->dev_private;
1064 struct drm_i915_gem_pwrite *args = data;
1065 struct drm_i915_gem_object *obj;
1068 if (args->size == 0)
1071 if (likely(!i915.prefault_disable)) {
1072 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1078 intel_runtime_pm_get(dev_priv);
1080 ret = i915_mutex_lock_interruptible(dev);
1084 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1085 if (&obj->base == NULL) {
1090 /* Bounds check destination. */
1091 if (args->offset > obj->base.size ||
1092 args->size > obj->base.size - args->offset) {
1097 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1100 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1101 * it would end up going through the fenced access, and we'll get
1102 * different detiling behavior between reading and writing.
1103 * pread/pwrite currently are reading and writing from the CPU
1104 * perspective, requiring manual detiling by the client.
1107 if (obj->tiling_mode == I915_TILING_NONE &&
1108 obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1109 cpu_write_needs_clflush(obj)) {
1110 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1111 /* Note that the gtt paths might fail with non-page-backed user
1112 * pointers (e.g. gtt mappings when moving data between
1113 * textures). Fallback to the shmem path in that case. */
1116 if (ret == -EFAULT || ret == -ENOSPC) {
1117 if (obj->phys_handle)
1118 ret = i915_gem_phys_pwrite(obj, args, file);
1120 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1124 drm_gem_object_unreference(&obj->base);
1126 mutex_unlock(&dev->struct_mutex);
1128 intel_runtime_pm_put(dev_priv);
1134 i915_gem_check_wedge(struct i915_gpu_error *error,
1137 if (i915_reset_in_progress(error)) {
1138 /* Non-interruptible callers can't handle -EAGAIN, hence return
1139 * -EIO unconditionally for these. */
1143 /* Recovery complete, but the reset failed ... */
1144 if (i915_terminally_wedged(error))
1148 * Check if GPU Reset is in progress - we need intel_ring_begin
1149 * to work properly to reinit the hw state while the gpu is
1150 * still marked as reset-in-progress. Handle this with a flag.
1152 if (!error->reload_in_reset)
1160 * Compare arbitrary request against outstanding lazy request. Emit on match.
1163 i915_gem_check_olr(struct drm_i915_gem_request *req)
1167 WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex));
1170 if (req == req->ring->outstanding_lazy_request)
1171 ret = i915_add_request(req->ring);
1177 static void fake_irq(unsigned long data)
1179 wake_up_process((struct task_struct *)data);
1182 static bool missed_irq(struct drm_i915_private *dev_priv,
1183 struct intel_engine_cs *ring)
1185 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1189 static bool can_wait_boost(struct drm_i915_file_private *file_priv)
1191 if (file_priv == NULL)
1194 return !atomic_xchg(&file_priv->rps_wait_boost, true);
1198 * __i915_wait_request - wait until execution of request has finished
1200 * @reset_counter: reset sequence associated with the given request
1201 * @interruptible: do an interruptible wait (normally yes)
1202 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1204 * Note: It is of utmost importance that the passed in seqno and reset_counter
1205 * values have been read by the caller in an smp safe manner. Where read-side
1206 * locks are involved, it is sufficient to read the reset_counter before
1207 * unlocking the lock that protects the seqno. For lockless tricks, the
1208 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1211 * Returns 0 if the request was found within the alloted time. Else returns the
1212 * errno with remaining time filled in timeout argument.
1214 int __i915_wait_request(struct drm_i915_gem_request *req,
1215 unsigned reset_counter,
1218 struct drm_i915_file_private *file_priv)
1220 struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
1221 struct drm_device *dev = ring->dev;
1222 struct drm_i915_private *dev_priv = dev->dev_private;
1223 const bool irq_test_in_progress =
1224 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1225 unsigned long timeout_expire;
1227 bool wait_forever = true;
1231 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
1233 if (i915_gem_request_completed(req, true))
1236 if (timeout != NULL)
1237 wait_forever = false;
1239 timeout_expire = timeout ?
1240 jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
1242 if (INTEL_INFO(dev)->gen >= 6 && ring->id == RCS && can_wait_boost(file_priv)) {
1243 gen6_rps_boost(dev_priv);
1245 mod_delayed_work(dev_priv->wq,
1246 &file_priv->mm.idle_work,
1247 msecs_to_jiffies(100));
1250 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1253 /* Record current time in case interrupted by signal, or wedged */
1254 trace_i915_gem_request_wait_begin(req);
1255 before = ktime_get_raw_ns();
1258 (i915_seqno_passed(ring->get_seqno(ring, false), i915_gem_request_get_seqno(req)) || \
1259 i915_reset_in_progress(&dev_priv->gpu_error) || \
1260 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
1263 end = wait_event_interruptible_timeout(ring->irq_queue,
1267 end = wait_event_timeout(ring->irq_queue, EXIT_COND,
1270 /* We need to check whether any gpu reset happened in between
1271 * the caller grabbing the seqno and now ... */
1272 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
1275 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely
1277 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1280 } while (end == 0 && wait_forever);
1282 now = ktime_get_raw_ns();
1283 trace_i915_gem_request_wait_end(req);
1285 ring->irq_put(ring);
1289 s64 tres = *timeout - (now - before);
1291 *timeout = tres < 0 ? 0 : tres;
1296 case -EAGAIN: /* Wedged */
1297 case -ERESTARTSYS: /* Signal */
1299 case 0: /* Timeout */
1300 return -ETIMEDOUT; /* -ETIME on Linux */
1301 default: /* Completed */
1302 WARN_ON(end < 0); /* We're not aware of other errors */
1308 * Waits for a request to be signaled, and cleans up the
1309 * request and object lists appropriately for that event.
1312 i915_wait_request(struct drm_i915_gem_request *req)
1314 struct drm_device *dev;
1315 struct drm_i915_private *dev_priv;
1317 unsigned reset_counter;
1320 BUG_ON(req == NULL);
1322 dev = req->ring->dev;
1323 dev_priv = dev->dev_private;
1324 interruptible = dev_priv->mm.interruptible;
1326 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1328 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1332 ret = i915_gem_check_olr(req);
1336 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1337 i915_gem_request_reference(req);
1338 ret = __i915_wait_request(req, reset_counter,
1339 interruptible, NULL, NULL);
1340 i915_gem_request_unreference(req);
1345 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj)
1350 /* Manually manage the write flush as we may have not yet
1351 * retired the buffer.
1353 * Note that the last_write_req is always the earlier of
1354 * the two (read/write) requests, so if we haved successfully waited,
1355 * we know we have passed the last write.
1357 i915_gem_request_assign(&obj->last_write_req, NULL);
1363 * Ensures that all rendering to the object has completed and the object is
1364 * safe to unbind from the GTT or access from the CPU.
1366 static __must_check int
1367 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1370 struct drm_i915_gem_request *req;
1373 req = readonly ? obj->last_write_req : obj->last_read_req;
1377 ret = i915_wait_request(req);
1381 return i915_gem_object_wait_rendering__tail(obj);
1384 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1385 * as the object state may change during this call.
1387 static __must_check int
1388 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1389 struct drm_i915_file_private *file_priv,
1392 struct drm_i915_gem_request *req;
1393 struct drm_device *dev = obj->base.dev;
1394 struct drm_i915_private *dev_priv = dev->dev_private;
1395 unsigned reset_counter;
1398 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1399 BUG_ON(!dev_priv->mm.interruptible);
1401 req = readonly ? obj->last_write_req : obj->last_read_req;
1405 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1409 ret = i915_gem_check_olr(req);
1413 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1414 i915_gem_request_reference(req);
1415 mutex_unlock(&dev->struct_mutex);
1416 ret = __i915_wait_request(req, reset_counter, true, NULL, file_priv);
1417 mutex_lock(&dev->struct_mutex);
1418 i915_gem_request_unreference(req);
1422 return i915_gem_object_wait_rendering__tail(obj);
1426 * Called when user space prepares to use an object with the CPU, either
1427 * through the mmap ioctl's mapping or a GTT mapping.
1430 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1431 struct drm_file *file)
1433 struct drm_i915_gem_set_domain *args = data;
1434 struct drm_i915_gem_object *obj;
1435 uint32_t read_domains = args->read_domains;
1436 uint32_t write_domain = args->write_domain;
1439 /* Only handle setting domains to types used by the CPU. */
1440 if (write_domain & I915_GEM_GPU_DOMAINS)
1443 if (read_domains & I915_GEM_GPU_DOMAINS)
1446 /* Having something in the write domain implies it's in the read
1447 * domain, and only that read domain. Enforce that in the request.
1449 if (write_domain != 0 && read_domains != write_domain)
1452 ret = i915_mutex_lock_interruptible(dev);
1456 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1457 if (&obj->base == NULL) {
1462 /* Try to flush the object off the GPU without holding the lock.
1463 * We will repeat the flush holding the lock in the normal manner
1464 * to catch cases where we are gazumped.
1466 ret = i915_gem_object_wait_rendering__nonblocking(obj,
1472 if (read_domains & I915_GEM_DOMAIN_GTT)
1473 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1475 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1478 drm_gem_object_unreference(&obj->base);
1480 mutex_unlock(&dev->struct_mutex);
1485 * Called when user space has done writes to this buffer
1488 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1489 struct drm_file *file)
1491 struct drm_i915_gem_sw_finish *args = data;
1492 struct drm_i915_gem_object *obj;
1495 ret = i915_mutex_lock_interruptible(dev);
1499 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1500 if (&obj->base == NULL) {
1505 /* Pinned buffers may be scanout, so flush the cache */
1506 if (obj->pin_display)
1507 i915_gem_object_flush_cpu_write_domain(obj);
1509 drm_gem_object_unreference(&obj->base);
1511 mutex_unlock(&dev->struct_mutex);
1516 * Maps the contents of an object, returning the address it is mapped
1519 * While the mapping holds a reference on the contents of the object, it doesn't
1520 * imply a ref on the object itself.
1524 * DRM driver writers who look a this function as an example for how to do GEM
1525 * mmap support, please don't implement mmap support like here. The modern way
1526 * to implement DRM mmap support is with an mmap offset ioctl (like
1527 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1528 * That way debug tooling like valgrind will understand what's going on, hiding
1529 * the mmap call in a driver private ioctl will break that. The i915 driver only
1530 * does cpu mmaps this way because we didn't know better.
1533 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1534 struct drm_file *file)
1536 struct drm_i915_gem_mmap *args = data;
1537 struct drm_gem_object *obj;
1539 struct proc *p = curproc;
1540 vm_map_t map = &p->p_vmspace->vm_map;
1544 obj = drm_gem_object_lookup(dev, file, args->handle);
1548 if (args->size == 0)
1551 size = round_page(args->size);
1552 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
1558 * Call hint to ensure that NULL is not returned as a valid address
1559 * and to reduce vm_map traversals. XXX causes instability, use a
1560 * fixed low address as the start point instead to avoid the NULL
1566 * Use 256KB alignment. It is unclear why this matters for a
1567 * virtual address but it appears to fix a number of application/X
1568 * crashes and kms console switching is much faster.
1570 vm_object_hold(obj->vm_obj);
1571 vm_object_reference_locked(obj->vm_obj);
1572 vm_object_drop(obj->vm_obj);
1574 rv = vm_map_find(map, obj->vm_obj, NULL,
1575 args->offset, &addr, args->size,
1576 256 * 1024, /* align */
1578 VM_MAPTYPE_NORMAL, /* maptype */
1579 VM_PROT_READ | VM_PROT_WRITE, /* prot */
1580 VM_PROT_READ | VM_PROT_WRITE, /* max */
1581 MAP_SHARED /* cow */);
1582 if (rv != KERN_SUCCESS) {
1583 vm_object_deallocate(obj->vm_obj);
1584 error = -vm_mmap_to_errno(rv);
1586 args->addr_ptr = (uint64_t)addr;
1589 drm_gem_object_unreference(obj);
1594 * i915_gem_fault - fault a page into the GTT
1596 * vm_obj is locked on entry and expected to be locked on return.
1598 * The vm_pager has placemarked the object with an anonymous memory page
1599 * which we must replace atomically to avoid races against concurrent faults
1600 * on the same page. XXX we currently are unable to do this atomically.
1602 * If we are to return an error we should not touch the anonymous page,
1603 * the caller will deallocate it.
1605 * XXX Most GEM calls appear to be interruptable, but we can't hard loop
1606 * in that case. Release all resources and wait 1 tick before retrying.
1607 * This is a huge problem which needs to be fixed by getting rid of most
1608 * of the interruptability. The linux code does not retry but does appear
1609 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level
1610 * to be able to retry.
1614 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1615 * from userspace. The fault handler takes care of binding the object to
1616 * the GTT (if needed), allocating and programming a fence register (again,
1617 * only if needed based on whether the old reg is still valid or the object
1618 * is tiled) and inserting a new PTE into the faulting process.
1620 * Note that the faulting process may involve evicting existing objects
1621 * from the GTT and/or fence registers to make room. So performance may
1622 * suffer if the GTT working set is large or there are few fence registers
1625 * vm_obj is locked on entry and expected to be locked on return. The VM
1626 * pager has placed an anonymous memory page at (obj,offset) which we have
1629 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres)
1631 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle);
1632 struct drm_device *dev = obj->base.dev;
1633 struct drm_i915_private *dev_priv = dev->dev_private;
1634 unsigned long page_offset;
1635 vm_page_t m, oldm = NULL;
1638 bool write = !!(prot & VM_PROT_WRITE);
1640 intel_runtime_pm_get(dev_priv);
1642 /* We don't use vmf->pgoff since that has the fake offset */
1643 page_offset = (unsigned long)offset;
1646 ret = i915_mutex_lock_interruptible(dev);
1650 trace_i915_gem_object_fault(obj, page_offset, true, write);
1652 /* Try to flush the object off the GPU first without holding the lock.
1653 * Upon reacquiring the lock, we will perform our sanity checks and then
1654 * repeat the flush holding the lock in the normal manner to catch cases
1655 * where we are gazumped.
1657 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1661 /* Access to snoopable pages through the GTT is incoherent. */
1662 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1667 /* Now bind it into the GTT if needed */
1668 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
1672 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1676 ret = i915_gem_object_get_fence(obj);
1681 * START FREEBSD MAGIC
1683 * Add a pip count to avoid destruction and certain other
1684 * complex operations (such as collapses?) while unlocked.
1687 vm_object_pip_add(vm_obj, 1);
1692 * XXX We must currently remove the placeholder page now to avoid
1693 * a deadlock against a concurrent i915_gem_release_mmap().
1694 * Otherwise concurrent operation will block on the busy page
1695 * while holding locks which we need to obtain.
1697 if (*mres != NULL) {
1699 if ((oldm->flags & PG_BUSY) == 0)
1700 kprintf("i915_gem_fault: Page was not busy\n");
1702 vm_page_remove(oldm);
1712 * Since the object lock was dropped, another thread might have
1713 * faulted on the same GTT address and instantiated the mapping.
1716 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
1719 * Try to busy the page, retry on failure (non-zero ret).
1721 if (vm_page_busy_try(m, false)) {
1722 kprintf("i915_gem_fault: PG_BUSY\n");
1732 obj->fault_mappable = true;
1734 m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base +
1735 i915_gem_obj_ggtt_offset(obj) +
1741 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m));
1742 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
1745 * Try to busy the page. Fails on non-zero return.
1747 if (vm_page_busy_try(m, false)) {
1748 kprintf("i915_gem_fault: PG_BUSY(2)\n");
1752 m->valid = VM_PAGE_BITS_ALL;
1755 * Finally, remap it using the new GTT offset.
1757 * (object expected to be in a locked state)
1759 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
1763 i915_gem_object_ggtt_unpin(obj);
1764 mutex_unlock(&dev->struct_mutex);
1769 * ALTERNATIVE ERROR RETURN.
1771 * OBJECT EXPECTED TO BE LOCKED.
1774 i915_gem_object_ggtt_unpin(obj);
1776 mutex_unlock(&dev->struct_mutex);
1781 * We eat errors when the gpu is terminally wedged to avoid
1782 * userspace unduly crashing (gl has no provisions for mmaps to
1783 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1784 * and so needs to be reported.
1786 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
1787 // ret = VM_FAULT_SIGBUS;
1793 * EAGAIN means the gpu is hung and we'll wait for the error
1794 * handler to reset everything when re-faulting in
1795 * i915_mutex_lock_interruptible.
1800 VM_OBJECT_UNLOCK(vm_obj);
1802 tsleep(&dummy, 0, "delay", 1); /* XXX */
1803 VM_OBJECT_LOCK(vm_obj);
1806 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1807 ret = VM_PAGER_ERROR;
1815 vm_object_pip_wakeup(vm_obj);
1817 intel_runtime_pm_put(dev_priv);
1822 * i915_gem_release_mmap - remove physical page mappings
1823 * @obj: obj in question
1825 * Preserve the reservation of the mmapping with the DRM core code, but
1826 * relinquish ownership of the pages back to the system.
1828 * It is vital that we remove the page mapping if we have mapped a tiled
1829 * object through the GTT and then lose the fence register due to
1830 * resource pressure. Similarly if the object has been moved out of the
1831 * aperture, than pages mapped into userspace must be revoked. Removing the
1832 * mapping will then trigger a page fault on the next user access, allowing
1833 * fixup by i915_gem_fault().
1836 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1842 if (!obj->fault_mappable)
1845 devobj = cdev_pager_lookup(obj);
1846 if (devobj != NULL) {
1847 page_count = OFF_TO_IDX(obj->base.size);
1849 VM_OBJECT_LOCK(devobj);
1850 for (i = 0; i < page_count; i++) {
1851 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
1854 cdev_pager_free_page(devobj, m);
1856 VM_OBJECT_UNLOCK(devobj);
1857 vm_object_deallocate(devobj);
1860 obj->fault_mappable = false;
1864 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1866 struct drm_i915_gem_object *obj;
1868 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1869 i915_gem_release_mmap(obj);
1873 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1877 if (INTEL_INFO(dev)->gen >= 4 ||
1878 tiling_mode == I915_TILING_NONE)
1881 /* Previous chips need a power-of-two fence region when tiling */
1882 if (INTEL_INFO(dev)->gen == 3)
1883 gtt_size = 1024*1024;
1885 gtt_size = 512*1024;
1887 while (gtt_size < size)
1894 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1895 * @obj: object to check
1897 * Return the required GTT alignment for an object, taking into account
1898 * potential fence register mapping.
1901 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
1902 int tiling_mode, bool fenced)
1905 * Minimum alignment is 4k (GTT page size), but might be greater
1906 * if a fence register is needed for the object.
1908 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
1909 tiling_mode == I915_TILING_NONE)
1913 * Previous chips need to be aligned to the size of the smallest
1914 * fence register that can contain the object.
1916 return i915_gem_get_gtt_size(dev, size, tiling_mode);
1919 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1921 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1925 if (drm_vma_node_has_offset(&obj->base.vma_node))
1929 dev_priv->mm.shrinker_no_lock_stealing = true;
1931 ret = drm_gem_create_mmap_offset(&obj->base);
1935 /* Badly fragmented mmap space? The only way we can recover
1936 * space is by destroying unwanted objects. We can't randomly release
1937 * mmap_offsets as userspace expects them to be persistent for the
1938 * lifetime of the objects. The closest we can is to release the
1939 * offsets on purgeable objects by truncating it and marking it purged,
1940 * which prevents userspace from ever using that object again.
1942 i915_gem_shrink(dev_priv,
1943 obj->base.size >> PAGE_SHIFT,
1945 I915_SHRINK_UNBOUND |
1946 I915_SHRINK_PURGEABLE);
1947 ret = drm_gem_create_mmap_offset(&obj->base);
1951 i915_gem_shrink_all(dev_priv);
1952 ret = drm_gem_create_mmap_offset(&obj->base);
1954 dev_priv->mm.shrinker_no_lock_stealing = false;
1959 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
1961 drm_gem_free_mmap_offset(&obj->base);
1965 i915_gem_mmap_gtt(struct drm_file *file,
1966 struct drm_device *dev,
1970 struct drm_i915_private *dev_priv = dev->dev_private;
1971 struct drm_i915_gem_object *obj;
1974 ret = i915_mutex_lock_interruptible(dev);
1978 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1979 if (&obj->base == NULL) {
1984 if (obj->base.size > dev_priv->gtt.mappable_end) {
1989 if (obj->madv != I915_MADV_WILLNEED) {
1990 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
1995 ret = i915_gem_object_create_mmap_offset(obj);
1999 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
2000 DRM_GEM_MAPPING_KEY;
2003 drm_gem_object_unreference(&obj->base);
2005 mutex_unlock(&dev->struct_mutex);
2010 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2012 * @data: GTT mapping ioctl data
2013 * @file: GEM object info
2015 * Simply returns the fake offset to userspace so it can mmap it.
2016 * The mmap call will end up in drm_gem_mmap(), which will set things
2017 * up so we can get faults in the handler above.
2019 * The fault handler will take care of binding the object into the GTT
2020 * (since it may have been evicted to make room for something), allocating
2021 * a fence register, and mapping the appropriate aperture address into
2025 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2026 struct drm_file *file)
2028 struct drm_i915_gem_mmap_gtt *args = data;
2030 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2034 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
2036 return obj->madv == I915_MADV_DONTNEED;
2039 /* Immediately discard the backing storage */
2041 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2045 vm_obj = obj->base.vm_obj;
2046 VM_OBJECT_LOCK(vm_obj);
2047 vm_object_page_remove(vm_obj, 0, 0, false);
2048 VM_OBJECT_UNLOCK(vm_obj);
2050 obj->madv = __I915_MADV_PURGED;
2053 /* Try to discard unwanted pages */
2055 i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2058 struct address_space *mapping;
2061 switch (obj->madv) {
2062 case I915_MADV_DONTNEED:
2063 i915_gem_object_truncate(obj);
2064 case __I915_MADV_PURGED:
2069 if (obj->base.filp == NULL)
2072 mapping = file_inode(obj->base.filp)->i_mapping,
2073 invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2078 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2080 int page_count = obj->base.size / PAGE_SIZE;
2086 BUG_ON(obj->madv == __I915_MADV_PURGED);
2088 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2090 /* In the event of a disaster, abandon all caches and
2091 * hope for the best.
2093 WARN_ON(ret != -EIO);
2094 i915_gem_clflush_object(obj, true);
2095 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2098 if (i915_gem_object_needs_bit17_swizzle(obj))
2099 i915_gem_object_save_bit_17_swizzle(obj);
2101 if (obj->madv == I915_MADV_DONTNEED)
2104 for (i = 0; i < page_count; i++) {
2105 struct vm_page *page = obj->pages[i];
2108 set_page_dirty(page);
2110 if (obj->madv == I915_MADV_WILLNEED)
2111 mark_page_accessed(page);
2113 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem");
2114 vm_page_unwire(obj->pages[i], 1);
2115 vm_page_wakeup(obj->pages[i]);
2124 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2126 const struct drm_i915_gem_object_ops *ops = obj->ops;
2128 if (obj->pages == NULL)
2131 if (obj->pages_pin_count)
2134 BUG_ON(i915_gem_obj_bound_any(obj));
2136 /* ->put_pages might need to allocate memory for the bit17 swizzle
2137 * array, hence protect them from being reaped by removing them from gtt
2139 list_del(&obj->global_list);
2141 ops->put_pages(obj);
2144 i915_gem_object_invalidate(obj);
2150 i915_gem_shrink(struct drm_i915_private *dev_priv,
2151 long target, unsigned flags)
2154 struct list_head *list;
2157 { &dev_priv->mm.unbound_list, I915_SHRINK_UNBOUND },
2158 { &dev_priv->mm.bound_list, I915_SHRINK_BOUND },
2161 unsigned long count = 0;
2164 * As we may completely rewrite the (un)bound list whilst unbinding
2165 * (due to retiring requests) we have to strictly process only
2166 * one element of the list at the time, and recheck the list
2167 * on every iteration.
2169 * In particular, we must hold a reference whilst removing the
2170 * object as we may end up waiting for and/or retiring the objects.
2171 * This might release the final reference (held by the active list)
2172 * and result in the object being freed from under us. This is
2173 * similar to the precautions the eviction code must take whilst
2176 * Also note that although these lists do not hold a reference to
2177 * the object we can safely grab one here: The final object
2178 * unreferencing and the bound_list are both protected by the
2179 * dev->struct_mutex and so we won't ever be able to observe an
2180 * object on the bound_list with a reference count equals 0.
2182 for (phase = phases; phase->list; phase++) {
2183 struct list_head still_in_list;
2185 if ((flags & phase->bit) == 0)
2188 INIT_LIST_HEAD(&still_in_list);
2189 while (count < target && !list_empty(phase->list)) {
2190 struct drm_i915_gem_object *obj;
2191 struct i915_vma *vma, *v;
2193 obj = list_first_entry(phase->list,
2194 typeof(*obj), global_list);
2195 list_move_tail(&obj->global_list, &still_in_list);
2197 if (flags & I915_SHRINK_PURGEABLE &&
2198 !i915_gem_object_is_purgeable(obj))
2201 drm_gem_object_reference(&obj->base);
2203 /* For the unbound phase, this should be a no-op! */
2204 list_for_each_entry_safe(vma, v,
2205 &obj->vma_list, vma_link)
2206 if (i915_vma_unbind(vma))
2209 if (i915_gem_object_put_pages(obj) == 0)
2210 count += obj->base.size >> PAGE_SHIFT;
2212 drm_gem_object_unreference(&obj->base);
2214 list_splice(&still_in_list, phase->list);
2220 static unsigned long
2221 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
2223 i915_gem_evict_everything(dev_priv->dev);
2224 return i915_gem_shrink(dev_priv, LONG_MAX,
2225 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND);
2229 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2231 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2232 int page_count, i, j;
2234 struct vm_page *page;
2236 /* Assert that the object is not currently in any GPU domain. As it
2237 * wasn't in the GTT, there shouldn't be any way it could have been in
2240 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2241 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2243 page_count = obj->base.size / PAGE_SIZE;
2244 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM,
2247 /* Get the list of pages out of our struct file. They'll be pinned
2248 * at this point until we release them.
2250 * Fail silently without starting the shrinker
2252 vm_obj = obj->base.vm_obj;
2253 VM_OBJECT_LOCK(vm_obj);
2254 for (i = 0; i < page_count; i++) {
2255 page = shmem_read_mapping_page(vm_obj, i);
2257 i915_gem_shrink(dev_priv,
2260 I915_SHRINK_UNBOUND |
2261 I915_SHRINK_PURGEABLE);
2262 page = shmem_read_mapping_page(vm_obj, i);
2265 /* We've tried hard to allocate the memory by reaping
2266 * our own buffer, now let the real VM do its job and
2267 * go down in flames if truly OOM.
2270 i915_gem_shrink_all(dev_priv);
2271 page = shmem_read_mapping_page(vm_obj, i);
2275 #ifdef CONFIG_SWIOTLB
2276 if (swiotlb_nr_tbl()) {
2278 sg_set_page(sg, page, PAGE_SIZE, 0);
2283 obj->pages[i] = page;
2285 #ifdef CONFIG_SWIOTLB
2286 if (!swiotlb_nr_tbl())
2288 VM_OBJECT_UNLOCK(vm_obj);
2290 if (i915_gem_object_needs_bit17_swizzle(obj))
2291 i915_gem_object_do_bit_17_swizzle(obj);
2293 if (obj->tiling_mode != I915_TILING_NONE &&
2294 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2295 i915_gem_object_pin_pages(obj);
2300 for (j = 0; j < i; j++) {
2301 page = obj->pages[j];
2302 vm_page_busy_wait(page, FALSE, "i915gem");
2303 vm_page_unwire(page, 0);
2304 vm_page_wakeup(page);
2306 VM_OBJECT_UNLOCK(vm_obj);
2312 /* Ensure that the associated pages are gathered from the backing storage
2313 * and pinned into our object. i915_gem_object_get_pages() may be called
2314 * multiple times before they are released by a single call to
2315 * i915_gem_object_put_pages() - once the pages are no longer referenced
2316 * either as a result of memory pressure (reaping pages under the shrinker)
2317 * or as the object is itself released.
2320 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2322 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2323 const struct drm_i915_gem_object_ops *ops = obj->ops;
2329 if (obj->madv != I915_MADV_WILLNEED) {
2330 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2334 BUG_ON(obj->pages_pin_count);
2336 ret = ops->get_pages(obj);
2340 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2345 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
2346 struct intel_engine_cs *ring)
2348 struct drm_i915_gem_request *req;
2349 struct intel_engine_cs *old_ring;
2351 BUG_ON(ring == NULL);
2353 req = intel_ring_get_request(ring);
2354 old_ring = i915_gem_request_get_ring(obj->last_read_req);
2356 if (old_ring != ring && obj->last_write_req) {
2357 /* Keep the request relative to the current ring */
2358 i915_gem_request_assign(&obj->last_write_req, req);
2361 /* Add a reference if we're newly entering the active list. */
2363 drm_gem_object_reference(&obj->base);
2367 list_move_tail(&obj->ring_list, &ring->active_list);
2369 i915_gem_request_assign(&obj->last_read_req, req);
2372 void i915_vma_move_to_active(struct i915_vma *vma,
2373 struct intel_engine_cs *ring)
2375 list_move_tail(&vma->mm_list, &vma->vm->active_list);
2376 return i915_gem_object_move_to_active(vma->obj, ring);
2380 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
2382 struct i915_vma *vma;
2384 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
2385 BUG_ON(!obj->active);
2387 list_for_each_entry(vma, &obj->vma_list, vma_link) {
2388 if (!list_empty(&vma->mm_list))
2389 list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
2392 intel_fb_obj_flush(obj, true);
2394 list_del_init(&obj->ring_list);
2396 i915_gem_request_assign(&obj->last_read_req, NULL);
2397 i915_gem_request_assign(&obj->last_write_req, NULL);
2398 obj->base.write_domain = 0;
2400 i915_gem_request_assign(&obj->last_fenced_req, NULL);
2403 drm_gem_object_unreference(&obj->base);
2405 WARN_ON(i915_verify_lists(dev));
2409 i915_gem_object_retire(struct drm_i915_gem_object *obj)
2411 if (obj->last_read_req == NULL)
2414 if (i915_gem_request_completed(obj->last_read_req, true))
2415 i915_gem_object_move_to_inactive(obj);
2419 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2421 struct drm_i915_private *dev_priv = dev->dev_private;
2422 struct intel_engine_cs *ring;
2425 /* Carefully retire all requests without writing to the rings */
2426 for_each_ring(ring, dev_priv, i) {
2427 ret = intel_ring_idle(ring);
2431 i915_gem_retire_requests(dev);
2433 /* Finally reset hw state */
2434 for_each_ring(ring, dev_priv, i) {
2435 intel_ring_init_seqno(ring, seqno);
2437 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
2438 ring->semaphore.sync_seqno[j] = 0;
2444 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2446 struct drm_i915_private *dev_priv = dev->dev_private;
2452 /* HWS page needs to be set less than what we
2453 * will inject to ring
2455 ret = i915_gem_init_seqno(dev, seqno - 1);
2459 /* Carefully set the last_seqno value so that wrap
2460 * detection still works
2462 dev_priv->next_seqno = seqno;
2463 dev_priv->last_seqno = seqno - 1;
2464 if (dev_priv->last_seqno == 0)
2465 dev_priv->last_seqno--;
2471 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2473 struct drm_i915_private *dev_priv = dev->dev_private;
2475 /* reserve 0 for non-seqno */
2476 if (dev_priv->next_seqno == 0) {
2477 int ret = i915_gem_init_seqno(dev, 0);
2481 dev_priv->next_seqno = 1;
2484 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2488 int __i915_add_request(struct intel_engine_cs *ring,
2489 struct drm_file *file,
2490 struct drm_i915_gem_object *obj)
2492 struct drm_i915_private *dev_priv = ring->dev->dev_private;
2493 struct drm_i915_gem_request *request;
2494 struct intel_ringbuffer *ringbuf;
2498 request = ring->outstanding_lazy_request;
2499 if (WARN_ON(request == NULL))
2502 if (i915.enable_execlists) {
2503 ringbuf = request->ctx->engine[ring->id].ringbuf;
2505 ringbuf = ring->buffer;
2507 request_start = intel_ring_get_tail(ringbuf);
2509 * Emit any outstanding flushes - execbuf can fail to emit the flush
2510 * after having emitted the batchbuffer command. Hence we need to fix
2511 * things up similar to emitting the lazy request. The difference here
2512 * is that the flush _must_ happen before the next request, no matter
2515 if (i915.enable_execlists) {
2516 ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
2520 ret = intel_ring_flush_all_caches(ring);
2525 /* Record the position of the start of the request so that
2526 * should we detect the updated seqno part-way through the
2527 * GPU processing the request, we never over-estimate the
2528 * position of the head.
2530 request->postfix = intel_ring_get_tail(ringbuf);
2532 if (i915.enable_execlists) {
2533 ret = ring->emit_request(ringbuf, request);
2537 ret = ring->add_request(ring);
2542 request->head = request_start;
2543 request->tail = intel_ring_get_tail(ringbuf);
2545 /* Whilst this request exists, batch_obj will be on the
2546 * active_list, and so will hold the active reference. Only when this
2547 * request is retired will the the batch_obj be moved onto the
2548 * inactive_list and lose its active reference. Hence we do not need
2549 * to explicitly hold another reference here.
2551 request->batch_obj = obj;
2553 if (!i915.enable_execlists) {
2554 /* Hold a reference to the current context so that we can inspect
2555 * it later in case a hangcheck error event fires.
2557 request->ctx = ring->last_context;
2559 i915_gem_context_reference(request->ctx);
2562 request->emitted_jiffies = jiffies;
2563 list_add_tail(&request->list, &ring->request_list);
2564 request->file_priv = NULL;
2567 struct drm_i915_file_private *file_priv = file->driver_priv;
2569 spin_lock(&file_priv->mm.lock);
2570 request->file_priv = file_priv;
2571 list_add_tail(&request->client_list,
2572 &file_priv->mm.request_list);
2573 spin_unlock(&file_priv->mm.lock);
2576 trace_i915_gem_request_add(request);
2577 ring->outstanding_lazy_request = NULL;
2579 i915_queue_hangcheck(ring->dev);
2581 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
2582 queue_delayed_work(dev_priv->wq,
2583 &dev_priv->mm.retire_work,
2584 round_jiffies_up_relative(HZ));
2585 intel_mark_busy(dev_priv->dev);
2591 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
2593 struct drm_i915_file_private *file_priv = request->file_priv;
2598 spin_lock(&file_priv->mm.lock);
2599 list_del(&request->client_list);
2600 request->file_priv = NULL;
2601 spin_unlock(&file_priv->mm.lock);
2604 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
2605 const struct intel_context *ctx)
2607 unsigned long elapsed;
2609 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2611 if (ctx->hang_stats.banned)
2614 if (ctx->hang_stats.ban_period_seconds &&
2615 elapsed <= ctx->hang_stats.ban_period_seconds) {
2616 if (!i915_gem_context_is_default(ctx)) {
2617 DRM_DEBUG("context hanging too fast, banning!\n");
2619 } else if (i915_stop_ring_allow_ban(dev_priv)) {
2620 if (i915_stop_ring_allow_warn(dev_priv))
2621 DRM_ERROR("gpu hanging too fast, banning!\n");
2629 static void i915_set_reset_status(struct drm_i915_private *dev_priv,
2630 struct intel_context *ctx,
2633 struct i915_ctx_hang_stats *hs;
2638 hs = &ctx->hang_stats;
2641 hs->banned = i915_context_is_banned(dev_priv, ctx);
2643 hs->guilty_ts = get_seconds();
2645 hs->batch_pending++;
2649 static void i915_gem_free_request(struct drm_i915_gem_request *request)
2651 list_del(&request->list);
2652 i915_gem_request_remove_from_client(request);
2654 i915_gem_request_unreference(request);
2657 void i915_gem_request_free(struct kref *req_ref)
2659 struct drm_i915_gem_request *req = container_of(req_ref,
2661 struct intel_context *ctx = req->ctx;
2664 if (i915.enable_execlists) {
2665 struct intel_engine_cs *ring = req->ring;
2667 if (ctx != ring->default_context)
2668 intel_lr_context_unpin(ring, ctx);
2671 i915_gem_context_unreference(ctx);
2677 struct drm_i915_gem_request *
2678 i915_gem_find_active_request(struct intel_engine_cs *ring)
2680 struct drm_i915_gem_request *request;
2682 list_for_each_entry(request, &ring->request_list, list) {
2683 if (i915_gem_request_completed(request, false))
2692 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
2693 struct intel_engine_cs *ring)
2695 struct drm_i915_gem_request *request;
2698 request = i915_gem_find_active_request(ring);
2700 if (request == NULL)
2703 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2705 i915_set_reset_status(dev_priv, request->ctx, ring_hung);
2707 list_for_each_entry_continue(request, &ring->request_list, list)
2708 i915_set_reset_status(dev_priv, request->ctx, false);
2711 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
2712 struct intel_engine_cs *ring)
2714 while (!list_empty(&ring->active_list)) {
2715 struct drm_i915_gem_object *obj;
2717 obj = list_first_entry(&ring->active_list,
2718 struct drm_i915_gem_object,
2721 i915_gem_object_move_to_inactive(obj);
2725 * Clear the execlists queue up before freeing the requests, as those
2726 * are the ones that keep the context and ringbuffer backing objects
2729 while (!list_empty(&ring->execlist_queue)) {
2730 struct drm_i915_gem_request *submit_req;
2732 submit_req = list_first_entry(&ring->execlist_queue,
2733 struct drm_i915_gem_request,
2735 list_del(&submit_req->execlist_link);
2736 intel_runtime_pm_put(dev_priv);
2738 if (submit_req->ctx != ring->default_context)
2739 intel_lr_context_unpin(ring, submit_req->ctx);
2741 i915_gem_request_unreference(submit_req);
2745 * We must free the requests after all the corresponding objects have
2746 * been moved off active lists. Which is the same order as the normal
2747 * retire_requests function does. This is important if object hold
2748 * implicit references on things like e.g. ppgtt address spaces through
2751 while (!list_empty(&ring->request_list)) {
2752 struct drm_i915_gem_request *request;
2754 request = list_first_entry(&ring->request_list,
2755 struct drm_i915_gem_request,
2758 i915_gem_free_request(request);
2761 /* This may not have been flushed before the reset, so clean it now */
2762 i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
2765 void i915_gem_restore_fences(struct drm_device *dev)
2767 struct drm_i915_private *dev_priv = dev->dev_private;
2770 for (i = 0; i < dev_priv->num_fence_regs; i++) {
2771 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2774 * Commit delayed tiling changes if we have an object still
2775 * attached to the fence, otherwise just clear the fence.
2778 i915_gem_object_update_fence(reg->obj, reg,
2779 reg->obj->tiling_mode);
2781 i915_gem_write_fence(dev, i, NULL);
2786 void i915_gem_reset(struct drm_device *dev)
2788 struct drm_i915_private *dev_priv = dev->dev_private;
2789 struct intel_engine_cs *ring;
2793 * Before we free the objects from the requests, we need to inspect
2794 * them for finding the guilty party. As the requests only borrow
2795 * their reference to the objects, the inspection must be done first.
2797 for_each_ring(ring, dev_priv, i)
2798 i915_gem_reset_ring_status(dev_priv, ring);
2800 for_each_ring(ring, dev_priv, i)
2801 i915_gem_reset_ring_cleanup(dev_priv, ring);
2803 i915_gem_context_reset(dev);
2805 i915_gem_restore_fences(dev);
2809 * This function clears the request list as sequence numbers are passed.
2812 i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
2814 if (list_empty(&ring->request_list))
2817 WARN_ON(i915_verify_lists(ring->dev));
2819 /* Retire requests first as we use it above for the early return.
2820 * If we retire requests last, we may use a later seqno and so clear
2821 * the requests lists without clearing the active list, leading to
2824 while (!list_empty(&ring->request_list)) {
2825 struct drm_i915_gem_request *request;
2826 struct intel_ringbuffer *ringbuf;
2828 request = list_first_entry(&ring->request_list,
2829 struct drm_i915_gem_request,
2832 if (!i915_gem_request_completed(request, true))
2835 trace_i915_gem_request_retire(request);
2837 /* This is one of the few common intersection points
2838 * between legacy ringbuffer submission and execlists:
2839 * we need to tell them apart in order to find the correct
2840 * ringbuffer to which the request belongs to.
2842 if (i915.enable_execlists) {
2843 struct intel_context *ctx = request->ctx;
2844 ringbuf = ctx->engine[ring->id].ringbuf;
2846 ringbuf = ring->buffer;
2848 /* We know the GPU must have read the request to have
2849 * sent us the seqno + interrupt, so use the position
2850 * of tail of the request to update the last known position
2853 ringbuf->last_retired_head = request->postfix;
2855 i915_gem_free_request(request);
2858 /* Move any buffers on the active list that are no longer referenced
2859 * by the ringbuffer to the flushing/inactive lists as appropriate,
2860 * before we free the context associated with the requests.
2862 while (!list_empty(&ring->active_list)) {
2863 struct drm_i915_gem_object *obj;
2865 obj = list_first_entry(&ring->active_list,
2866 struct drm_i915_gem_object,
2869 if (!i915_gem_request_completed(obj->last_read_req, true))
2872 i915_gem_object_move_to_inactive(obj);
2875 if (unlikely(ring->trace_irq_req &&
2876 i915_gem_request_completed(ring->trace_irq_req, true))) {
2877 ring->irq_put(ring);
2878 i915_gem_request_assign(&ring->trace_irq_req, NULL);
2881 WARN_ON(i915_verify_lists(ring->dev));
2885 i915_gem_retire_requests(struct drm_device *dev)
2887 struct drm_i915_private *dev_priv = dev->dev_private;
2888 struct intel_engine_cs *ring;
2892 for_each_ring(ring, dev_priv, i) {
2893 i915_gem_retire_requests_ring(ring);
2894 idle &= list_empty(&ring->request_list);
2895 if (i915.enable_execlists) {
2897 lockmgr(&ring->execlist_lock, LK_EXCLUSIVE);
2898 idle &= list_empty(&ring->execlist_queue);
2899 lockmgr(&ring->execlist_lock, LK_RELEASE);
2901 intel_execlists_retire_requests(ring);
2906 mod_delayed_work(dev_priv->wq,
2907 &dev_priv->mm.idle_work,
2908 msecs_to_jiffies(100));
2914 i915_gem_retire_work_handler(struct work_struct *work)
2916 struct drm_i915_private *dev_priv =
2917 container_of(work, typeof(*dev_priv), mm.retire_work.work);
2918 struct drm_device *dev = dev_priv->dev;
2921 /* Come back later if the device is busy... */
2923 if (mutex_trylock(&dev->struct_mutex)) {
2924 idle = i915_gem_retire_requests(dev);
2925 mutex_unlock(&dev->struct_mutex);
2928 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2929 round_jiffies_up_relative(HZ));
2933 i915_gem_idle_work_handler(struct work_struct *work)
2935 struct drm_i915_private *dev_priv =
2936 container_of(work, typeof(*dev_priv), mm.idle_work.work);
2938 intel_mark_idle(dev_priv->dev);
2942 * Ensures that an object will eventually get non-busy by flushing any required
2943 * write domains, emitting any outstanding lazy request and retiring and
2944 * completed requests.
2947 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2949 struct intel_engine_cs *ring;
2953 ring = i915_gem_request_get_ring(obj->last_read_req);
2955 ret = i915_gem_check_olr(obj->last_read_req);
2959 i915_gem_retire_requests_ring(ring);
2966 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2967 * @DRM_IOCTL_ARGS: standard ioctl arguments
2969 * Returns 0 if successful, else an error is returned with the remaining time in
2970 * the timeout parameter.
2971 * -ETIME: object is still busy after timeout
2972 * -ERESTARTSYS: signal interrupted the wait
2973 * -ENONENT: object doesn't exist
2974 * Also possible, but rare:
2975 * -EAGAIN: GPU wedged
2977 * -ENODEV: Internal IRQ fail
2978 * -E?: The add request failed
2980 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2981 * non-zero timeout parameter the wait ioctl will wait for the given number of
2982 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2983 * without holding struct_mutex the object may become re-busied before this
2984 * function completes. A similar but shorter * race condition exists in the busy
2988 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2990 struct drm_i915_private *dev_priv = dev->dev_private;
2991 struct drm_i915_gem_wait *args = data;
2992 struct drm_i915_gem_object *obj;
2993 struct drm_i915_gem_request *req;
2994 unsigned reset_counter;
2997 if (args->flags != 0)
3000 ret = i915_mutex_lock_interruptible(dev);
3004 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
3005 if (&obj->base == NULL) {
3006 mutex_unlock(&dev->struct_mutex);
3010 /* Need to make sure the object gets inactive eventually. */
3011 ret = i915_gem_object_flush_active(obj);
3015 if (!obj->active || !obj->last_read_req)
3018 req = obj->last_read_req;
3020 /* Do this after OLR check to make sure we make forward progress polling
3021 * on this IOCTL with a timeout == 0 (like busy ioctl)
3023 if (args->timeout_ns == 0) {
3028 drm_gem_object_unreference(&obj->base);
3029 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3030 i915_gem_request_reference(req);
3031 mutex_unlock(&dev->struct_mutex);
3033 ret = __i915_wait_request(req, reset_counter, true,
3034 args->timeout_ns > 0 ? &args->timeout_ns : NULL,
3036 mutex_lock(&dev->struct_mutex);
3037 i915_gem_request_unreference(req);
3038 mutex_unlock(&dev->struct_mutex);
3042 drm_gem_object_unreference(&obj->base);
3043 mutex_unlock(&dev->struct_mutex);
3048 * i915_gem_object_sync - sync an object to a ring.
3050 * @obj: object which may be in use on another ring.
3051 * @to: ring we wish to use the object on. May be NULL.
3053 * This code is meant to abstract object synchronization with the GPU.
3054 * Calling with NULL implies synchronizing the object with the CPU
3055 * rather than a particular GPU ring.
3057 * Returns 0 if successful, else propagates up the lower layer error.
3060 i915_gem_object_sync(struct drm_i915_gem_object *obj,
3061 struct intel_engine_cs *to)
3063 struct intel_engine_cs *from;
3067 from = i915_gem_request_get_ring(obj->last_read_req);
3069 if (from == NULL || to == from)
3072 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
3073 return i915_gem_object_wait_rendering(obj, false);
3075 idx = intel_ring_sync_index(from, to);
3077 seqno = i915_gem_request_get_seqno(obj->last_read_req);
3078 /* Optimization: Avoid semaphore sync when we are sure we already
3079 * waited for an object with higher seqno */
3080 if (seqno <= from->semaphore.sync_seqno[idx])
3083 ret = i915_gem_check_olr(obj->last_read_req);
3087 trace_i915_gem_ring_sync_to(from, to, obj->last_read_req);
3088 ret = to->semaphore.sync_to(to, from, seqno);
3090 /* We use last_read_req because sync_to()
3091 * might have just caused seqno wrap under
3094 from->semaphore.sync_seqno[idx] =
3095 i915_gem_request_get_seqno(obj->last_read_req);
3100 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3102 u32 old_write_domain, old_read_domains;
3104 /* Force a pagefault for domain tracking on next user access */
3105 i915_gem_release_mmap(obj);
3107 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3110 /* Wait for any direct GTT access to complete */
3113 old_read_domains = obj->base.read_domains;
3114 old_write_domain = obj->base.write_domain;
3116 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3117 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3119 trace_i915_gem_object_change_domain(obj,
3124 int i915_vma_unbind(struct i915_vma *vma)
3126 struct drm_i915_gem_object *obj = vma->obj;
3127 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3130 if (list_empty(&vma->vma_link))
3133 if (!drm_mm_node_allocated(&vma->node)) {
3134 i915_gem_vma_destroy(vma);
3141 BUG_ON(obj->pages == NULL);
3143 ret = i915_gem_object_finish_gpu(obj);
3146 /* Continue on if we fail due to EIO, the GPU is hung so we
3147 * should be safe and we need to cleanup or else we might
3148 * cause memory corruption through use-after-free.
3151 if (i915_is_ggtt(vma->vm) &&
3152 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3153 i915_gem_object_finish_gtt(obj);
3155 /* release the fence reg _after_ flushing */
3156 ret = i915_gem_object_put_fence(obj);
3161 trace_i915_vma_unbind(vma);
3163 vma->unbind_vma(vma);
3165 list_del_init(&vma->mm_list);
3166 if (i915_is_ggtt(vma->vm)) {
3167 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3168 obj->map_and_fenceable = false;
3169 } else if (vma->ggtt_view.pages) {
3170 kfree(vma->ggtt_view.pages);
3171 vma->ggtt_view.pages = NULL;
3175 drm_mm_remove_node(&vma->node);
3176 i915_gem_vma_destroy(vma);
3178 /* Since the unbound list is global, only move to that list if
3179 * no more VMAs exist. */
3180 if (list_empty(&obj->vma_list)) {
3181 /* Throw away the active reference before
3182 * moving to the unbound list. */
3183 i915_gem_object_retire(obj);
3185 i915_gem_gtt_finish_object(obj);
3186 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3189 /* And finally now the object is completely decoupled from this vma,
3190 * we can drop its hold on the backing storage and allow it to be
3191 * reaped by the shrinker.
3193 i915_gem_object_unpin_pages(obj);
3198 int i915_gpu_idle(struct drm_device *dev)
3200 struct drm_i915_private *dev_priv = dev->dev_private;
3201 struct intel_engine_cs *ring;
3204 /* Flush everything onto the inactive list. */
3205 for_each_ring(ring, dev_priv, i) {
3206 if (!i915.enable_execlists) {
3207 ret = i915_switch_context(ring, ring->default_context);
3212 ret = intel_ring_idle(ring);
3220 static void i965_write_fence_reg(struct drm_device *dev, int reg,
3221 struct drm_i915_gem_object *obj)
3223 struct drm_i915_private *dev_priv = dev->dev_private;
3225 int fence_pitch_shift;
3227 if (INTEL_INFO(dev)->gen >= 6) {
3228 fence_reg = FENCE_REG_SANDYBRIDGE_0;
3229 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
3231 fence_reg = FENCE_REG_965_0;
3232 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
3235 fence_reg += reg * 8;
3237 /* To w/a incoherency with non-atomic 64-bit register updates,
3238 * we split the 64-bit update into two 32-bit writes. In order
3239 * for a partial fence not to be evaluated between writes, we
3240 * precede the update with write to turn off the fence register,
3241 * and only enable the fence as the last step.
3243 * For extra levels of paranoia, we make sure each step lands
3244 * before applying the next step.
3246 I915_WRITE(fence_reg, 0);
3247 POSTING_READ(fence_reg);
3250 u32 size = i915_gem_obj_ggtt_size(obj);
3253 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
3255 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
3256 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
3257 if (obj->tiling_mode == I915_TILING_Y)
3258 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
3259 val |= I965_FENCE_REG_VALID;
3261 I915_WRITE(fence_reg + 4, val >> 32);
3262 POSTING_READ(fence_reg + 4);
3264 I915_WRITE(fence_reg + 0, val);
3265 POSTING_READ(fence_reg);
3267 I915_WRITE(fence_reg + 4, 0);
3268 POSTING_READ(fence_reg + 4);
3272 static void i915_write_fence_reg(struct drm_device *dev, int reg,
3273 struct drm_i915_gem_object *obj)
3275 struct drm_i915_private *dev_priv = dev->dev_private;
3279 u32 size = i915_gem_obj_ggtt_size(obj);
3283 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
3284 (size & -size) != size ||
3285 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3286 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
3287 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
3289 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
3294 /* Note: pitch better be a power of two tile widths */
3295 pitch_val = obj->stride / tile_width;
3296 pitch_val = ffs(pitch_val) - 1;
3298 val = i915_gem_obj_ggtt_offset(obj);
3299 if (obj->tiling_mode == I915_TILING_Y)
3300 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3301 val |= I915_FENCE_SIZE_BITS(size);
3302 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3303 val |= I830_FENCE_REG_VALID;
3308 reg = FENCE_REG_830_0 + reg * 4;
3310 reg = FENCE_REG_945_8 + (reg - 8) * 4;
3312 I915_WRITE(reg, val);
3316 static void i830_write_fence_reg(struct drm_device *dev, int reg,
3317 struct drm_i915_gem_object *obj)
3319 struct drm_i915_private *dev_priv = dev->dev_private;
3323 u32 size = i915_gem_obj_ggtt_size(obj);
3326 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
3327 (size & -size) != size ||
3328 (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3329 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
3330 i915_gem_obj_ggtt_offset(obj), size);
3332 pitch_val = obj->stride / 128;
3333 pitch_val = ffs(pitch_val) - 1;
3335 val = i915_gem_obj_ggtt_offset(obj);
3336 if (obj->tiling_mode == I915_TILING_Y)
3337 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3338 val |= I830_FENCE_SIZE_BITS(size);
3339 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3340 val |= I830_FENCE_REG_VALID;
3344 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
3345 POSTING_READ(FENCE_REG_830_0 + reg * 4);
3348 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
3350 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
3353 static void i915_gem_write_fence(struct drm_device *dev, int reg,
3354 struct drm_i915_gem_object *obj)
3356 struct drm_i915_private *dev_priv = dev->dev_private;
3358 /* Ensure that all CPU reads are completed before installing a fence
3359 * and all writes before removing the fence.
3361 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
3364 WARN(obj && (!obj->stride || !obj->tiling_mode),
3365 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
3366 obj->stride, obj->tiling_mode);
3369 i830_write_fence_reg(dev, reg, obj);
3370 else if (IS_GEN3(dev))
3371 i915_write_fence_reg(dev, reg, obj);
3372 else if (INTEL_INFO(dev)->gen >= 4)
3373 i965_write_fence_reg(dev, reg, obj);
3375 /* And similarly be paranoid that no direct access to this region
3376 * is reordered to before the fence is installed.
3378 if (i915_gem_object_needs_mb(obj))
3382 static inline int fence_number(struct drm_i915_private *dev_priv,
3383 struct drm_i915_fence_reg *fence)
3385 return fence - dev_priv->fence_regs;
3388 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
3389 struct drm_i915_fence_reg *fence,
3392 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3393 int reg = fence_number(dev_priv, fence);
3395 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
3398 obj->fence_reg = reg;
3400 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
3402 obj->fence_reg = I915_FENCE_REG_NONE;
3404 list_del_init(&fence->lru_list);
3406 obj->fence_dirty = false;
3410 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
3412 if (obj->last_fenced_req) {
3413 int ret = i915_wait_request(obj->last_fenced_req);
3417 i915_gem_request_assign(&obj->last_fenced_req, NULL);
3424 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
3426 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3427 struct drm_i915_fence_reg *fence;
3430 ret = i915_gem_object_wait_fence(obj);
3434 if (obj->fence_reg == I915_FENCE_REG_NONE)
3437 fence = &dev_priv->fence_regs[obj->fence_reg];
3439 if (WARN_ON(fence->pin_count))
3442 i915_gem_object_fence_lost(obj);
3443 i915_gem_object_update_fence(obj, fence, false);
3448 static struct drm_i915_fence_reg *
3449 i915_find_fence_reg(struct drm_device *dev)
3451 struct drm_i915_private *dev_priv = dev->dev_private;
3452 struct drm_i915_fence_reg *reg, *avail;
3455 /* First try to find a free reg */
3457 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
3458 reg = &dev_priv->fence_regs[i];
3462 if (!reg->pin_count)
3469 /* None available, try to steal one or wait for a user to finish */
3470 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
3478 /* Wait for completion of pending flips which consume fences */
3479 if (intel_has_pending_fb_unpin(dev))
3480 return ERR_PTR(-EAGAIN);
3482 return ERR_PTR(-EDEADLK);
3486 * i915_gem_object_get_fence - set up fencing for an object
3487 * @obj: object to map through a fence reg
3489 * When mapping objects through the GTT, userspace wants to be able to write
3490 * to them without having to worry about swizzling if the object is tiled.
3491 * This function walks the fence regs looking for a free one for @obj,
3492 * stealing one if it can't find any.
3494 * It then sets up the reg based on the object's properties: address, pitch
3495 * and tiling format.
3497 * For an untiled surface, this removes any existing fence.
3500 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
3502 struct drm_device *dev = obj->base.dev;
3503 struct drm_i915_private *dev_priv = dev->dev_private;
3504 bool enable = obj->tiling_mode != I915_TILING_NONE;
3505 struct drm_i915_fence_reg *reg;
3508 /* Have we updated the tiling parameters upon the object and so
3509 * will need to serialise the write to the associated fence register?
3511 if (obj->fence_dirty) {
3512 ret = i915_gem_object_wait_fence(obj);
3517 /* Just update our place in the LRU if our fence is getting reused. */
3518 if (obj->fence_reg != I915_FENCE_REG_NONE) {
3519 reg = &dev_priv->fence_regs[obj->fence_reg];
3520 if (!obj->fence_dirty) {
3521 list_move_tail(®->lru_list,
3522 &dev_priv->mm.fence_list);
3525 } else if (enable) {
3526 if (WARN_ON(!obj->map_and_fenceable))
3529 reg = i915_find_fence_reg(dev);
3531 return PTR_ERR(reg);
3534 struct drm_i915_gem_object *old = reg->obj;
3536 ret = i915_gem_object_wait_fence(old);
3540 i915_gem_object_fence_lost(old);
3545 i915_gem_object_update_fence(obj, reg, enable);
3550 static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3551 unsigned long cache_level)
3553 struct drm_mm_node *gtt_space = &vma->node;
3554 struct drm_mm_node *other;
3557 * On some machines we have to be careful when putting differing types
3558 * of snoopable memory together to avoid the prefetcher crossing memory
3559 * domains and dying. During vm initialisation, we decide whether or not
3560 * these constraints apply and set the drm_mm.color_adjust
3563 if (vma->vm->mm.color_adjust == NULL)
3566 if (!drm_mm_node_allocated(gtt_space))
3569 if (list_empty(>t_space->node_list))
3572 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3573 if (other->allocated && !other->hole_follows && other->color != cache_level)
3576 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3577 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3584 * Finds free space in the GTT aperture and binds the object there.
3586 static struct i915_vma *
3587 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3588 struct i915_address_space *vm,
3591 const struct i915_ggtt_view *view)
3593 struct drm_device *dev = obj->base.dev;
3594 struct drm_i915_private *dev_priv = dev->dev_private;
3595 u32 size, fence_size, fence_alignment, unfenced_alignment;
3596 unsigned long start =
3597 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3599 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
3600 struct i915_vma *vma;
3603 fence_size = i915_gem_get_gtt_size(dev,
3606 fence_alignment = i915_gem_get_gtt_alignment(dev,
3608 obj->tiling_mode, true);
3609 unfenced_alignment =
3610 i915_gem_get_gtt_alignment(dev,
3612 obj->tiling_mode, false);
3615 alignment = flags & PIN_MAPPABLE ? fence_alignment :
3617 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
3618 DRM_DEBUG("Invalid object alignment requested %u\n", alignment);
3619 return ERR_PTR(-EINVAL);
3622 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3624 /* If the object is bigger than the entire aperture, reject it early
3625 * before evicting everything in a vain attempt to find space.
3627 if (obj->base.size > end) {
3628 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
3630 flags & PIN_MAPPABLE ? "mappable" : "total",
3632 return ERR_PTR(-E2BIG);
3635 ret = i915_gem_object_get_pages(obj);
3637 return ERR_PTR(ret);
3639 i915_gem_object_pin_pages(obj);
3641 vma = i915_gem_obj_lookup_or_create_vma_view(obj, vm, view);
3646 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3650 DRM_MM_SEARCH_DEFAULT,
3651 DRM_MM_CREATE_DEFAULT);
3653 ret = i915_gem_evict_something(dev, vm, size, alignment,
3662 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
3664 goto err_remove_node;
3667 ret = i915_gem_gtt_prepare_object(obj);
3669 goto err_remove_node;
3671 trace_i915_vma_bind(vma, flags);
3672 ret = i915_vma_bind(vma, obj->cache_level,
3673 flags & PIN_GLOBAL ? GLOBAL_BIND : 0);
3675 goto err_finish_gtt;
3677 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3678 list_add_tail(&vma->mm_list, &vm->inactive_list);
3683 i915_gem_gtt_finish_object(obj);
3685 drm_mm_remove_node(&vma->node);
3687 i915_gem_vma_destroy(vma);
3690 i915_gem_object_unpin_pages(obj);
3695 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3698 /* If we don't have a page list set up, then we're not pinned
3699 * to GPU, and we can ignore the cache flush because it'll happen
3700 * again at bind time.
3702 if (obj->pages == NULL)
3706 * Stolen memory is always coherent with the GPU as it is explicitly
3707 * marked as wc by the system, or the system is cache-coherent.
3712 /* If the GPU is snooping the contents of the CPU cache,
3713 * we do not need to manually clear the CPU cache lines. However,
3714 * the caches are only snooped when the render cache is
3715 * flushed/invalidated. As we always have to emit invalidations
3716 * and flushes when moving into and out of the RENDER domain, correct
3717 * snooping behaviour occurs naturally as the result of our domain
3720 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3721 obj->cache_dirty = true;
3725 trace_i915_gem_object_clflush(obj);
3726 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
3727 obj->cache_dirty = false;
3732 /** Flushes the GTT write domain for the object if it's dirty. */
3734 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3736 uint32_t old_write_domain;
3738 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3741 /* No actual flushing is required for the GTT write domain. Writes
3742 * to it immediately go to main memory as far as we know, so there's
3743 * no chipset flush. It also doesn't land in render cache.
3745 * However, we do have to enforce the order so that all writes through
3746 * the GTT land before any writes to the device, such as updates to
3751 old_write_domain = obj->base.write_domain;
3752 obj->base.write_domain = 0;
3754 intel_fb_obj_flush(obj, false);
3756 intel_fb_obj_flush(obj, false);
3758 trace_i915_gem_object_change_domain(obj,
3759 obj->base.read_domains,
3763 /** Flushes the CPU write domain for the object if it's dirty. */
3765 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3767 uint32_t old_write_domain;
3769 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3772 if (i915_gem_clflush_object(obj, obj->pin_display))
3773 i915_gem_chipset_flush(obj->base.dev);
3775 old_write_domain = obj->base.write_domain;
3776 obj->base.write_domain = 0;
3778 trace_i915_gem_object_change_domain(obj,
3779 obj->base.read_domains,
3784 * Moves a single object to the GTT read, and possibly write domain.
3786 * This function returns when the move is complete, including waiting on
3790 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3792 uint32_t old_write_domain, old_read_domains;
3793 struct i915_vma *vma;
3796 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3799 ret = i915_gem_object_wait_rendering(obj, !write);
3803 i915_gem_object_retire(obj);
3805 /* Flush and acquire obj->pages so that we are coherent through
3806 * direct access in memory with previous cached writes through
3807 * shmemfs and that our cache domain tracking remains valid.
3808 * For example, if the obj->filp was moved to swap without us
3809 * being notified and releasing the pages, we would mistakenly
3810 * continue to assume that the obj remained out of the CPU cached
3813 ret = i915_gem_object_get_pages(obj);
3817 i915_gem_object_flush_cpu_write_domain(obj);
3819 /* Serialise direct access to this object with the barriers for
3820 * coherent writes from the GPU, by effectively invalidating the
3821 * GTT domain upon first access.
3823 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3826 old_write_domain = obj->base.write_domain;
3827 old_read_domains = obj->base.read_domains;
3829 /* It should now be out of any other write domains, and we can update
3830 * the domain values for our changes.
3832 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3833 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3835 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3836 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3841 intel_fb_obj_invalidate(obj, NULL);
3843 trace_i915_gem_object_change_domain(obj,
3847 /* And bump the LRU for this access */
3848 vma = i915_gem_obj_to_ggtt(obj);
3849 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
3850 list_move_tail(&vma->mm_list,
3851 &to_i915(obj->base.dev)->gtt.base.inactive_list);
3856 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3857 enum i915_cache_level cache_level)
3859 struct drm_device *dev = obj->base.dev;
3860 struct i915_vma *vma, *next;
3863 if (obj->cache_level == cache_level)
3866 if (i915_gem_obj_is_pinned(obj)) {
3867 DRM_DEBUG("can not change the cache level of pinned objects\n");
3871 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
3872 if (!i915_gem_valid_gtt_space(vma, cache_level)) {
3873 ret = i915_vma_unbind(vma);
3879 if (i915_gem_obj_bound_any(obj)) {
3880 ret = i915_gem_object_finish_gpu(obj);
3884 i915_gem_object_finish_gtt(obj);
3886 /* Before SandyBridge, you could not use tiling or fence
3887 * registers with snooped memory, so relinquish any fences
3888 * currently pointing to our region in the aperture.
3890 if (INTEL_INFO(dev)->gen < 6) {
3891 ret = i915_gem_object_put_fence(obj);
3896 list_for_each_entry(vma, &obj->vma_list, vma_link)
3897 if (drm_mm_node_allocated(&vma->node)) {
3898 ret = i915_vma_bind(vma, cache_level,
3899 vma->bound & GLOBAL_BIND);
3905 list_for_each_entry(vma, &obj->vma_list, vma_link)
3906 vma->node.color = cache_level;
3907 obj->cache_level = cache_level;
3909 if (obj->cache_dirty &&
3910 obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
3911 cpu_write_needs_clflush(obj)) {
3912 if (i915_gem_clflush_object(obj, true))
3913 i915_gem_chipset_flush(obj->base.dev);
3919 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3920 struct drm_file *file)
3922 struct drm_i915_gem_caching *args = data;
3923 struct drm_i915_gem_object *obj;
3926 ret = i915_mutex_lock_interruptible(dev);
3930 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3931 if (&obj->base == NULL) {
3936 switch (obj->cache_level) {
3937 case I915_CACHE_LLC:
3938 case I915_CACHE_L3_LLC:
3939 args->caching = I915_CACHING_CACHED;
3943 args->caching = I915_CACHING_DISPLAY;
3947 args->caching = I915_CACHING_NONE;
3951 drm_gem_object_unreference(&obj->base);
3953 mutex_unlock(&dev->struct_mutex);
3957 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3958 struct drm_file *file)
3960 struct drm_i915_gem_caching *args = data;
3961 struct drm_i915_gem_object *obj;
3962 enum i915_cache_level level;
3965 switch (args->caching) {
3966 case I915_CACHING_NONE:
3967 level = I915_CACHE_NONE;
3969 case I915_CACHING_CACHED:
3970 level = I915_CACHE_LLC;
3972 case I915_CACHING_DISPLAY:
3973 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
3979 ret = i915_mutex_lock_interruptible(dev);
3983 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3984 if (&obj->base == NULL) {
3989 ret = i915_gem_object_set_cache_level(obj, level);
3991 drm_gem_object_unreference(&obj->base);
3993 mutex_unlock(&dev->struct_mutex);
3997 static bool is_pin_display(struct drm_i915_gem_object *obj)
3999 struct i915_vma *vma;
4001 vma = i915_gem_obj_to_ggtt(obj);
4005 /* There are 2 sources that pin objects:
4006 * 1. The display engine (scanouts, sprites, cursors);
4007 * 2. Reservations for execbuffer;
4009 * We can ignore reservations as we hold the struct_mutex and
4010 * are only called outside of the reservation path.
4012 return vma->pin_count;
4016 * Prepare buffer for display plane (scanout, cursors, etc).
4017 * Can be called from an uninterruptible phase (modesetting) and allows
4018 * any flushes to be pipelined (for pageflips).
4021 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4023 struct intel_engine_cs *pipelined)
4025 u32 old_read_domains, old_write_domain;
4026 bool was_pin_display;
4029 if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) {
4030 ret = i915_gem_object_sync(obj, pipelined);
4035 /* Mark the pin_display early so that we account for the
4036 * display coherency whilst setting up the cache domains.
4038 was_pin_display = obj->pin_display;
4039 obj->pin_display = true;
4041 /* The display engine is not coherent with the LLC cache on gen6. As
4042 * a result, we make sure that the pinning that is about to occur is
4043 * done with uncached PTEs. This is lowest common denominator for all
4046 * However for gen6+, we could do better by using the GFDT bit instead
4047 * of uncaching, which would allow us to flush all the LLC-cached data
4048 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4050 ret = i915_gem_object_set_cache_level(obj,
4051 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
4053 goto err_unpin_display;
4055 /* As the user may map the buffer once pinned in the display plane
4056 * (e.g. libkms for the bootup splash), we have to ensure that we
4057 * always use map_and_fenceable for all scanout buffers.
4059 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
4061 goto err_unpin_display;
4063 i915_gem_object_flush_cpu_write_domain(obj);
4065 old_write_domain = obj->base.write_domain;
4066 old_read_domains = obj->base.read_domains;
4068 /* It should now be out of any other write domains, and we can update
4069 * the domain values for our changes.
4071 obj->base.write_domain = 0;
4072 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4074 trace_i915_gem_object_change_domain(obj,
4081 WARN_ON(was_pin_display != is_pin_display(obj));
4082 obj->pin_display = was_pin_display;
4087 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
4089 i915_gem_object_ggtt_unpin(obj);
4090 obj->pin_display = is_pin_display(obj);
4094 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
4098 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
4101 ret = i915_gem_object_wait_rendering(obj, false);
4105 /* Ensure that we invalidate the GPU's caches and TLBs. */
4106 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
4111 * Moves a single object to the CPU read, and possibly write domain.
4113 * This function returns when the move is complete, including waiting on
4117 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4119 uint32_t old_write_domain, old_read_domains;
4122 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4125 ret = i915_gem_object_wait_rendering(obj, !write);
4129 i915_gem_object_retire(obj);
4130 i915_gem_object_flush_gtt_write_domain(obj);
4132 old_write_domain = obj->base.write_domain;
4133 old_read_domains = obj->base.read_domains;
4135 /* Flush the CPU cache if it's still invalid. */
4136 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4137 i915_gem_clflush_object(obj, false);
4139 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
4142 /* It should now be out of any other write domains, and we can update
4143 * the domain values for our changes.
4145 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
4147 /* If we're writing through the CPU, then the GPU read domains will
4148 * need to be invalidated at next use.
4151 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4152 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4156 intel_fb_obj_invalidate(obj, NULL);
4158 trace_i915_gem_object_change_domain(obj,
4165 /* Throttle our rendering by waiting until the ring has completed our requests
4166 * emitted over 20 msec ago.
4168 * Note that if we were to use the current jiffies each time around the loop,
4169 * we wouldn't escape the function with any frames outstanding if the time to
4170 * render a frame was over 20ms.
4172 * This should get us reasonable parallelism between CPU and GPU but also
4173 * relatively low latency when blocking on a particular request to finish.
4176 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4178 struct drm_i915_private *dev_priv = dev->dev_private;
4179 struct drm_i915_file_private *file_priv = file->driver_priv;
4180 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
4181 struct drm_i915_gem_request *request, *target = NULL;
4182 unsigned reset_counter;
4185 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
4189 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
4193 spin_lock(&file_priv->mm.lock);
4194 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
4195 if (time_after_eq(request->emitted_jiffies, recent_enough))
4200 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
4202 i915_gem_request_reference(target);
4203 spin_unlock(&file_priv->mm.lock);
4208 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL);
4210 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
4212 mutex_lock(&dev->struct_mutex);
4213 i915_gem_request_unreference(target);
4214 mutex_unlock(&dev->struct_mutex);
4220 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4222 struct drm_i915_gem_object *obj = vma->obj;
4225 vma->node.start & (alignment - 1))
4228 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
4231 if (flags & PIN_OFFSET_BIAS &&
4232 vma->node.start < (flags & PIN_OFFSET_MASK))
4239 i915_gem_object_pin_view(struct drm_i915_gem_object *obj,
4240 struct i915_address_space *vm,
4243 const struct i915_ggtt_view *view)
4245 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4246 struct i915_vma *vma;
4250 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
4253 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
4256 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
4259 vma = i915_gem_obj_to_vma_view(obj, vm, view);
4261 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4264 if (i915_vma_misplaced(vma, alignment, flags)) {
4265 WARN(vma->pin_count,
4266 "bo is already pinned with incorrect alignment:"
4267 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
4268 " obj->map_and_fenceable=%d\n",
4269 i915_gem_obj_offset_view(obj, vm, view->type),
4271 !!(flags & PIN_MAPPABLE),
4272 obj->map_and_fenceable);
4273 ret = i915_vma_unbind(vma);
4281 bound = vma ? vma->bound : 0;
4282 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
4283 vma = i915_gem_object_bind_to_vm(obj, vm, alignment,
4286 return PTR_ERR(vma);
4289 if (flags & PIN_GLOBAL && !(vma->bound & GLOBAL_BIND)) {
4290 ret = i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND);
4295 if ((bound ^ vma->bound) & GLOBAL_BIND) {
4296 bool mappable, fenceable;
4297 u32 fence_size, fence_alignment;
4299 fence_size = i915_gem_get_gtt_size(obj->base.dev,
4302 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
4307 fenceable = (vma->node.size == fence_size &&
4308 (vma->node.start & (fence_alignment - 1)) == 0);
4310 mappable = (vma->node.start + obj->base.size <=
4311 dev_priv->gtt.mappable_end);
4313 obj->map_and_fenceable = mappable && fenceable;
4316 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4319 if (flags & PIN_MAPPABLE)
4320 obj->pin_mappable |= true;
4326 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
4328 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4331 BUG_ON(vma->pin_count == 0);
4332 BUG_ON(!i915_gem_obj_ggtt_bound(obj));
4334 if (--vma->pin_count == 0)
4335 obj->pin_mappable = false;
4339 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
4341 if (obj->fence_reg != I915_FENCE_REG_NONE) {
4342 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4343 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
4345 WARN_ON(!ggtt_vma ||
4346 dev_priv->fence_regs[obj->fence_reg].pin_count >
4347 ggtt_vma->pin_count);
4348 dev_priv->fence_regs[obj->fence_reg].pin_count++;
4355 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
4357 if (obj->fence_reg != I915_FENCE_REG_NONE) {
4358 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4359 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
4360 dev_priv->fence_regs[obj->fence_reg].pin_count--;
4365 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4366 struct drm_file *file)
4368 struct drm_i915_gem_busy *args = data;
4369 struct drm_i915_gem_object *obj;
4372 ret = i915_mutex_lock_interruptible(dev);
4376 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4377 if (&obj->base == NULL) {
4382 /* Count all active objects as busy, even if they are currently not used
4383 * by the gpu. Users of this interface expect objects to eventually
4384 * become non-busy without any further actions, therefore emit any
4385 * necessary flushes here.
4387 ret = i915_gem_object_flush_active(obj);
4389 args->busy = obj->active;
4390 if (obj->last_read_req) {
4391 struct intel_engine_cs *ring;
4392 BUILD_BUG_ON(I915_NUM_RINGS > 16);
4393 ring = i915_gem_request_get_ring(obj->last_read_req);
4394 args->busy |= intel_ring_flag(ring) << 16;
4397 drm_gem_object_unreference(&obj->base);
4399 mutex_unlock(&dev->struct_mutex);
4404 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4405 struct drm_file *file_priv)
4407 return i915_gem_ring_throttle(dev, file_priv);
4411 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4412 struct drm_file *file_priv)
4414 struct drm_i915_private *dev_priv = dev->dev_private;
4415 struct drm_i915_gem_madvise *args = data;
4416 struct drm_i915_gem_object *obj;
4419 switch (args->madv) {
4420 case I915_MADV_DONTNEED:
4421 case I915_MADV_WILLNEED:
4427 ret = i915_mutex_lock_interruptible(dev);
4431 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
4432 if (&obj->base == NULL) {
4437 if (i915_gem_obj_is_pinned(obj)) {
4443 obj->tiling_mode != I915_TILING_NONE &&
4444 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4445 if (obj->madv == I915_MADV_WILLNEED)
4446 i915_gem_object_unpin_pages(obj);
4447 if (args->madv == I915_MADV_WILLNEED)
4448 i915_gem_object_pin_pages(obj);
4451 if (obj->madv != __I915_MADV_PURGED)
4452 obj->madv = args->madv;
4454 /* if the object is no longer attached, discard its backing storage */
4455 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
4456 i915_gem_object_truncate(obj);
4458 args->retained = obj->madv != __I915_MADV_PURGED;
4461 drm_gem_object_unreference(&obj->base);
4463 mutex_unlock(&dev->struct_mutex);
4467 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4468 const struct drm_i915_gem_object_ops *ops)
4470 INIT_LIST_HEAD(&obj->global_list);
4471 INIT_LIST_HEAD(&obj->ring_list);
4472 INIT_LIST_HEAD(&obj->obj_exec_link);
4473 INIT_LIST_HEAD(&obj->vma_list);
4474 INIT_LIST_HEAD(&obj->batch_pool_list);
4478 obj->fence_reg = I915_FENCE_REG_NONE;
4479 obj->madv = I915_MADV_WILLNEED;
4481 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4484 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4485 .get_pages = i915_gem_object_get_pages_gtt,
4486 .put_pages = i915_gem_object_put_pages_gtt,
4489 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4492 struct drm_i915_gem_object *obj;
4494 struct address_space *mapping;
4498 obj = i915_gem_object_alloc(dev);
4502 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4503 i915_gem_object_free(obj);
4508 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4509 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4510 /* 965gm cannot relocate objects above 4GiB. */
4511 mask &= ~__GFP_HIGHMEM;
4512 mask |= __GFP_DMA32;
4515 mapping = file_inode(obj->base.filp)->i_mapping;
4516 mapping_set_gfp_mask(mapping, mask);
4519 i915_gem_object_init(obj, &i915_gem_object_ops);
4521 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4522 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4525 /* On some devices, we can have the GPU use the LLC (the CPU
4526 * cache) for about a 10% performance improvement
4527 * compared to uncached. Graphics requests other than
4528 * display scanout are coherent with the CPU in
4529 * accessing this cache. This means in this mode we
4530 * don't need to clflush on the CPU side, and on the
4531 * GPU side we only need to flush internal caches to
4532 * get data visible to the CPU.
4534 * However, we maintain the display planes as UC, and so
4535 * need to rebind when first used as such.
4537 obj->cache_level = I915_CACHE_LLC;
4539 obj->cache_level = I915_CACHE_NONE;
4541 trace_i915_gem_object_create(obj);
4546 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4548 /* If we are the last user of the backing storage (be it shmemfs
4549 * pages or stolen etc), we know that the pages are going to be
4550 * immediately released. In this case, we can then skip copying
4551 * back the contents from the GPU.
4554 if (obj->madv != I915_MADV_WILLNEED)
4557 if (obj->base.vm_obj == NULL)
4560 /* At first glance, this looks racy, but then again so would be
4561 * userspace racing mmap against close. However, the first external
4562 * reference to the filp can only be obtained through the
4563 * i915_gem_mmap_ioctl() which safeguards us against the user
4564 * acquiring such a reference whilst we are in the middle of
4565 * freeing the object.
4568 return atomic_long_read(&obj->base.filp->f_count) == 1;
4574 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4576 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4577 struct drm_device *dev = obj->base.dev;
4578 struct drm_i915_private *dev_priv = dev->dev_private;
4579 struct i915_vma *vma, *next;
4581 intel_runtime_pm_get(dev_priv);
4583 trace_i915_gem_object_destroy(obj);
4585 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4589 ret = i915_vma_unbind(vma);
4590 if (WARN_ON(ret == -ERESTARTSYS)) {
4591 bool was_interruptible;
4593 was_interruptible = dev_priv->mm.interruptible;
4594 dev_priv->mm.interruptible = false;
4596 WARN_ON(i915_vma_unbind(vma));
4598 dev_priv->mm.interruptible = was_interruptible;
4602 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4603 * before progressing. */
4605 i915_gem_object_unpin_pages(obj);
4607 WARN_ON(obj->frontbuffer_bits);
4609 if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4610 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4611 obj->tiling_mode != I915_TILING_NONE)
4612 i915_gem_object_unpin_pages(obj);
4614 if (WARN_ON(obj->pages_pin_count))
4615 obj->pages_pin_count = 0;
4616 if (discard_backing_storage(obj))
4617 obj->madv = I915_MADV_DONTNEED;
4618 i915_gem_object_put_pages(obj);
4619 i915_gem_object_free_mmap_offset(obj);
4624 if (obj->base.import_attach)
4625 drm_prime_gem_destroy(&obj->base, NULL);
4628 if (obj->ops->release)
4629 obj->ops->release(obj);
4631 drm_gem_object_release(&obj->base);
4632 i915_gem_info_remove_obj(dev_priv, obj->base.size);
4635 i915_gem_object_free(obj);
4637 intel_runtime_pm_put(dev_priv);
4640 struct i915_vma *i915_gem_obj_to_vma_view(struct drm_i915_gem_object *obj,
4641 struct i915_address_space *vm,
4642 const struct i915_ggtt_view *view)
4644 struct i915_vma *vma;
4645 list_for_each_entry(vma, &obj->vma_list, vma_link)
4646 if (vma->vm == vm && vma->ggtt_view.type == view->type)
4652 void i915_gem_vma_destroy(struct i915_vma *vma)
4654 struct i915_address_space *vm = NULL;
4655 WARN_ON(vma->node.allocated);
4657 /* Keep the vma as a placeholder in the execbuffer reservation lists */
4658 if (!list_empty(&vma->exec_list))
4663 if (!i915_is_ggtt(vm))
4664 i915_ppgtt_put(i915_vm_to_ppgtt(vm));
4666 list_del(&vma->vma_link);
4672 i915_gem_stop_ringbuffers(struct drm_device *dev)
4674 struct drm_i915_private *dev_priv = dev->dev_private;
4675 struct intel_engine_cs *ring;
4678 for_each_ring(ring, dev_priv, i)
4679 dev_priv->gt.stop_ring(ring);
4683 i915_gem_suspend(struct drm_device *dev)
4685 struct drm_i915_private *dev_priv = dev->dev_private;
4688 mutex_lock(&dev->struct_mutex);
4689 ret = i915_gpu_idle(dev);
4693 i915_gem_retire_requests(dev);
4695 /* Under UMS, be paranoid and evict. */
4696 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4697 i915_gem_evict_everything(dev);
4699 i915_gem_stop_ringbuffers(dev);
4700 mutex_unlock(&dev->struct_mutex);
4702 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4703 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4705 flush_delayed_work(&dev_priv->mm.idle_work);
4708 /* Assert that we sucessfully flushed all the work and
4709 * reset the GPU back to its idle, low power state.
4711 WARN_ON(dev_priv->mm.busy);
4716 mutex_unlock(&dev->struct_mutex);
4720 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice)
4722 struct drm_device *dev = ring->dev;
4723 struct drm_i915_private *dev_priv = dev->dev_private;
4724 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
4725 u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
4728 if (!HAS_L3_DPF(dev) || !remap_info)
4731 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
4736 * Note: We do not worry about the concurrent register cacheline hang
4737 * here because no other code should access these registers other than
4738 * at initialization time.
4740 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
4741 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
4742 intel_ring_emit(ring, reg_base + i);
4743 intel_ring_emit(ring, remap_info[i/4]);
4746 intel_ring_advance(ring);
4751 void i915_gem_init_swizzling(struct drm_device *dev)
4753 struct drm_i915_private *dev_priv = dev->dev_private;
4755 if (INTEL_INFO(dev)->gen < 5 ||
4756 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4759 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4760 DISP_TILE_SURFACE_SWIZZLING);
4765 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4767 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4768 else if (IS_GEN7(dev))
4769 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4770 else if (IS_GEN8(dev))
4771 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4777 intel_enable_blt(struct drm_device *dev)
4782 /* The blitter was dysfunctional on early prototypes */
4783 if (IS_GEN6(dev) && dev->pdev->revision < 8) {
4784 DRM_INFO("BLT not supported on this pre-production hardware;"
4785 " graphics performance will be degraded.\n");
4792 static void init_unused_ring(struct drm_device *dev, u32 base)
4794 struct drm_i915_private *dev_priv = dev->dev_private;
4796 I915_WRITE(RING_CTL(base), 0);
4797 I915_WRITE(RING_HEAD(base), 0);
4798 I915_WRITE(RING_TAIL(base), 0);
4799 I915_WRITE(RING_START(base), 0);
4802 static void init_unused_rings(struct drm_device *dev)
4805 init_unused_ring(dev, PRB1_BASE);
4806 init_unused_ring(dev, SRB0_BASE);
4807 init_unused_ring(dev, SRB1_BASE);
4808 init_unused_ring(dev, SRB2_BASE);
4809 init_unused_ring(dev, SRB3_BASE);
4810 } else if (IS_GEN2(dev)) {
4811 init_unused_ring(dev, SRB0_BASE);
4812 init_unused_ring(dev, SRB1_BASE);
4813 } else if (IS_GEN3(dev)) {
4814 init_unused_ring(dev, PRB1_BASE);
4815 init_unused_ring(dev, PRB2_BASE);
4819 int i915_gem_init_rings(struct drm_device *dev)
4821 struct drm_i915_private *dev_priv = dev->dev_private;
4824 ret = intel_init_render_ring_buffer(dev);
4829 ret = intel_init_bsd_ring_buffer(dev);
4831 goto cleanup_render_ring;
4834 if (intel_enable_blt(dev)) {
4835 ret = intel_init_blt_ring_buffer(dev);
4837 goto cleanup_bsd_ring;
4840 if (HAS_VEBOX(dev)) {
4841 ret = intel_init_vebox_ring_buffer(dev);
4843 goto cleanup_blt_ring;
4846 if (HAS_BSD2(dev)) {
4847 ret = intel_init_bsd2_ring_buffer(dev);
4849 goto cleanup_vebox_ring;
4852 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
4854 goto cleanup_bsd2_ring;
4859 intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]);
4861 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
4863 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
4865 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
4866 cleanup_render_ring:
4867 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
4873 i915_gem_init_hw(struct drm_device *dev)
4875 struct drm_i915_private *dev_priv = dev->dev_private;
4876 struct intel_engine_cs *ring;
4880 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
4884 /* Double layer security blanket, see i915_gem_init() */
4885 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4887 if (dev_priv->ellc_size)
4888 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4890 if (IS_HASWELL(dev))
4891 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4892 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4894 if (HAS_PCH_NOP(dev)) {
4895 if (IS_IVYBRIDGE(dev)) {
4896 u32 temp = I915_READ(GEN7_MSG_CTL);
4897 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4898 I915_WRITE(GEN7_MSG_CTL, temp);
4899 } else if (INTEL_INFO(dev)->gen >= 7) {
4900 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4901 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4902 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4906 i915_gem_init_swizzling(dev);
4909 * At least 830 can leave some of the unused rings
4910 * "active" (ie. head != tail) after resume which
4911 * will prevent c3 entry. Makes sure all unused rings
4914 init_unused_rings(dev);
4916 for_each_ring(ring, dev_priv, i) {
4917 ret = ring->init_hw(ring);
4922 for (i = 0; i < NUM_L3_SLICES(dev); i++)
4923 i915_gem_l3_remap(&dev_priv->ring[RCS], i);
4925 ret = i915_ppgtt_init_hw(dev);
4926 if (ret && ret != -EIO) {
4927 DRM_ERROR("PPGTT enable failed %d\n", ret);
4928 i915_gem_cleanup_ringbuffer(dev);
4931 ret = i915_gem_context_enable(dev_priv);
4932 if (ret && ret != -EIO) {
4933 DRM_ERROR("Context enable failed %d\n", ret);
4934 i915_gem_cleanup_ringbuffer(dev);
4940 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4944 int i915_gem_init(struct drm_device *dev)
4946 struct drm_i915_private *dev_priv = dev->dev_private;
4949 i915.enable_execlists = intel_sanitize_enable_execlists(dev,
4950 i915.enable_execlists);
4952 mutex_lock(&dev->struct_mutex);
4954 if (IS_VALLEYVIEW(dev)) {
4955 /* VLVA0 (potential hack), BIOS isn't actually waking us */
4956 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ);
4957 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) &
4958 VLV_GTLC_ALLOWWAKEACK), 10))
4959 DRM_DEBUG_DRIVER("allow wake ack timed out\n");
4962 if (!i915.enable_execlists) {
4963 dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission;
4964 dev_priv->gt.init_rings = i915_gem_init_rings;
4965 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
4966 dev_priv->gt.stop_ring = intel_stop_ring_buffer;
4968 dev_priv->gt.do_execbuf = intel_execlists_submission;
4969 dev_priv->gt.init_rings = intel_logical_rings_init;
4970 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
4971 dev_priv->gt.stop_ring = intel_logical_ring_stop;
4974 /* This is just a security blanket to placate dragons.
4975 * On some systems, we very sporadically observe that the first TLBs
4976 * used by the CS may be stale, despite us poking the TLB reset. If
4977 * we hold the forcewake during initialisation these problems
4978 * just magically go away.
4980 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4982 ret = i915_gem_init_userptr(dev);
4986 i915_gem_init_global_gtt(dev);
4988 ret = i915_gem_context_init(dev);
4992 ret = dev_priv->gt.init_rings(dev);
4996 ret = i915_gem_init_hw(dev);
4998 /* Allow ring initialisation to fail by marking the GPU as
4999 * wedged. But we only want to do this where the GPU is angry,
5000 * for all other failure, such as an allocation failure, bail.
5002 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
5003 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
5008 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5009 mutex_unlock(&dev->struct_mutex);
5015 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
5017 struct drm_i915_private *dev_priv = dev->dev_private;
5018 struct intel_engine_cs *ring;
5021 for_each_ring(ring, dev_priv, i)
5022 dev_priv->gt.cleanup_ring(ring);
5026 init_ring_lists(struct intel_engine_cs *ring)
5028 INIT_LIST_HEAD(&ring->active_list);
5029 INIT_LIST_HEAD(&ring->request_list);
5032 void i915_init_vm(struct drm_i915_private *dev_priv,
5033 struct i915_address_space *vm)
5035 if (!i915_is_ggtt(vm))
5036 drm_mm_init(&vm->mm, vm->start, vm->total);
5037 vm->dev = dev_priv->dev;
5038 INIT_LIST_HEAD(&vm->active_list);
5039 INIT_LIST_HEAD(&vm->inactive_list);
5040 INIT_LIST_HEAD(&vm->global_link);
5041 list_add_tail(&vm->global_link, &dev_priv->vm_list);
5045 i915_gem_load(struct drm_device *dev)
5047 struct drm_i915_private *dev_priv = dev->dev_private;
5050 INIT_LIST_HEAD(&dev_priv->vm_list);
5051 i915_init_vm(dev_priv, &dev_priv->gtt.base);
5053 INIT_LIST_HEAD(&dev_priv->context_list);
5054 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5055 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
5056 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5057 for (i = 0; i < I915_NUM_RINGS; i++)
5058 init_ring_lists(&dev_priv->ring[i]);
5059 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
5060 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
5061 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
5062 i915_gem_retire_work_handler);
5063 INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
5064 i915_gem_idle_work_handler);
5065 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5067 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
5068 if (!drm_core_check_feature(dev, DRIVER_MODESET) && IS_GEN3(dev)) {
5069 I915_WRITE(MI_ARB_STATE,
5070 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
5073 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
5075 /* Old X drivers will take 0-2 for front, back, depth buffers */
5076 if (!drm_core_check_feature(dev, DRIVER_MODESET))
5077 dev_priv->fence_reg_start = 3;
5079 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
5080 dev_priv->num_fence_regs = 32;
5081 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
5082 dev_priv->num_fence_regs = 16;
5084 dev_priv->num_fence_regs = 8;
5086 /* Initialize fence registers to zero */
5087 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5088 i915_gem_restore_fences(dev);
5090 i915_gem_detect_bit_6_swizzle(dev);
5091 init_waitqueue_head(&dev_priv->pending_flip_queue);
5093 dev_priv->mm.interruptible = true;
5096 dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
5097 dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
5098 dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
5099 register_shrinker(&dev_priv->mm.shrinker);
5101 dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
5102 register_oom_notifier(&dev_priv->mm.oom_notifier);
5105 i915_gem_batch_pool_init(dev, &dev_priv->mm.batch_pool);
5107 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE);
5110 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5112 struct drm_i915_file_private *file_priv = file->driver_priv;
5114 cancel_delayed_work_sync(&file_priv->mm.idle_work);
5116 /* Clean up our request list when the client is going away, so that
5117 * later retire_requests won't dereference our soon-to-be-gone
5120 spin_lock(&file_priv->mm.lock);
5121 while (!list_empty(&file_priv->mm.request_list)) {
5122 struct drm_i915_gem_request *request;
5124 request = list_first_entry(&file_priv->mm.request_list,
5125 struct drm_i915_gem_request,
5127 list_del(&request->client_list);
5128 request->file_priv = NULL;
5130 spin_unlock(&file_priv->mm.lock);
5134 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
5135 vm_ooffset_t foff, struct ucred *cred, u_short *color)
5137 *color = 0; /* XXXKIB */
5142 i915_gem_pager_dtor(void *handle)
5144 struct drm_gem_object *obj;
5145 struct drm_device *dev;
5150 mutex_lock(&dev->struct_mutex);
5151 drm_gem_free_mmap_offset(obj);
5152 i915_gem_release_mmap(to_intel_bo(obj));
5153 drm_gem_object_unreference(obj);
5154 mutex_unlock(&dev->struct_mutex);
5158 i915_gem_file_idle_work_handler(struct work_struct *work)
5160 struct drm_i915_file_private *file_priv =
5161 container_of(work, typeof(*file_priv), mm.idle_work.work);
5163 atomic_set(&file_priv->rps_wait_boost, false);
5166 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5168 struct drm_i915_file_private *file_priv;
5171 DRM_DEBUG_DRIVER("\n");
5173 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5177 file->driver_priv = file_priv;
5178 file_priv->dev_priv = dev->dev_private;
5179 file_priv->file = file;
5181 spin_init(&file_priv->mm.lock, "i915_priv");
5182 INIT_LIST_HEAD(&file_priv->mm.request_list);
5183 INIT_DELAYED_WORK(&file_priv->mm.idle_work,
5184 i915_gem_file_idle_work_handler);
5186 ret = i915_gem_context_open(dev, file);
5194 * i915_gem_track_fb - update frontbuffer tracking
5195 * old: current GEM buffer for the frontbuffer slots
5196 * new: new GEM buffer for the frontbuffer slots
5197 * frontbuffer_bits: bitmask of frontbuffer slots
5199 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5200 * from @old and setting them in @new. Both @old and @new can be NULL.
5202 void i915_gem_track_fb(struct drm_i915_gem_object *old,
5203 struct drm_i915_gem_object *new,
5204 unsigned frontbuffer_bits)
5207 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
5208 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
5209 old->frontbuffer_bits &= ~frontbuffer_bits;
5213 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
5214 WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
5215 new->frontbuffer_bits |= frontbuffer_bits;
5220 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
5222 if (!mutex_is_locked(mutex))
5225 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
5226 return mutex->owner == task;
5228 /* Since UP may be pre-empted, we cannot assume that we own the lock */
5235 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
5237 if (!mutex_trylock(&dev->struct_mutex)) {
5238 if (!mutex_is_locked_by(&dev->struct_mutex, current))
5241 if (to_i915(dev)->mm.shrinker_no_lock_stealing)
5251 static int num_vma_bound(struct drm_i915_gem_object *obj)
5253 struct i915_vma *vma;
5256 list_for_each_entry(vma, &obj->vma_list, vma_link)
5257 if (drm_mm_node_allocated(&vma->node))
5263 static unsigned long
5264 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
5266 struct drm_i915_private *dev_priv =
5267 container_of(shrinker, struct drm_i915_private, mm.shrinker);
5268 struct drm_device *dev = dev_priv->dev;
5269 struct drm_i915_gem_object *obj;
5270 unsigned long count;
5273 if (!i915_gem_shrinker_lock(dev, &unlock))
5277 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
5278 if (obj->pages_pin_count == 0)
5279 count += obj->base.size >> PAGE_SHIFT;
5281 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
5282 if (!i915_gem_obj_is_pinned(obj) &&
5283 obj->pages_pin_count == num_vma_bound(obj))
5284 count += obj->base.size >> PAGE_SHIFT;
5288 mutex_unlock(&dev->struct_mutex);
5294 /* All the new VM stuff */
5295 unsigned long i915_gem_obj_offset_view(struct drm_i915_gem_object *o,
5296 struct i915_address_space *vm,
5297 enum i915_ggtt_view_type view)
5299 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5300 struct i915_vma *vma;
5302 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
5304 list_for_each_entry(vma, &o->vma_list, vma_link) {
5305 if (vma->vm == vm && vma->ggtt_view.type == view)
5306 return vma->node.start;
5309 WARN(1, "%s vma for this object not found.\n",
5310 i915_is_ggtt(vm) ? "global" : "ppgtt");
5314 bool i915_gem_obj_bound_view(struct drm_i915_gem_object *o,
5315 struct i915_address_space *vm,
5316 enum i915_ggtt_view_type view)
5318 struct i915_vma *vma;
5320 list_for_each_entry(vma, &o->vma_list, vma_link)
5321 if (vma->vm == vm &&
5322 vma->ggtt_view.type == view &&
5323 drm_mm_node_allocated(&vma->node))
5329 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5331 struct i915_vma *vma;
5333 list_for_each_entry(vma, &o->vma_list, vma_link)
5334 if (drm_mm_node_allocated(&vma->node))
5340 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
5341 struct i915_address_space *vm)
5343 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5344 struct i915_vma *vma;
5346 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
5348 BUG_ON(list_empty(&o->vma_list));
5350 list_for_each_entry(vma, &o->vma_list, vma_link)
5352 return vma->node.size;
5358 static unsigned long
5359 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
5361 struct drm_i915_private *dev_priv =
5362 container_of(shrinker, struct drm_i915_private, mm.shrinker);
5363 struct drm_device *dev = dev_priv->dev;
5364 unsigned long freed;
5367 if (!i915_gem_shrinker_lock(dev, &unlock))
5370 freed = i915_gem_shrink(dev_priv,
5373 I915_SHRINK_UNBOUND |
5374 I915_SHRINK_PURGEABLE);
5375 if (freed < sc->nr_to_scan)
5376 freed += i915_gem_shrink(dev_priv,
5377 sc->nr_to_scan - freed,
5379 I915_SHRINK_UNBOUND);
5381 mutex_unlock(&dev->struct_mutex);
5387 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
5389 struct drm_i915_private *dev_priv =
5390 container_of(nb, struct drm_i915_private, mm.oom_notifier);
5391 struct drm_device *dev = dev_priv->dev;
5392 struct drm_i915_gem_object *obj;
5393 unsigned long timeout = msecs_to_jiffies(5000) + 1;
5394 unsigned long pinned, bound, unbound, freed_pages;
5395 bool was_interruptible;
5398 while (!i915_gem_shrinker_lock(dev, &unlock) && --timeout) {
5399 schedule_timeout_killable(1);
5400 if (fatal_signal_pending(current))
5404 pr_err("Unable to purge GPU memory due lock contention.\n");
5408 was_interruptible = dev_priv->mm.interruptible;
5409 dev_priv->mm.interruptible = false;
5411 freed_pages = i915_gem_shrink_all(dev_priv);
5413 dev_priv->mm.interruptible = was_interruptible;
5415 /* Because we may be allocating inside our own driver, we cannot
5416 * assert that there are no objects with pinned pages that are not
5417 * being pointed to by hardware.
5419 unbound = bound = pinned = 0;
5420 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
5421 if (!obj->base.filp) /* not backed by a freeable object */
5424 if (obj->pages_pin_count)
5425 pinned += obj->base.size;
5427 unbound += obj->base.size;
5429 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
5430 if (!obj->base.filp)
5433 if (obj->pages_pin_count)
5434 pinned += obj->base.size;
5436 bound += obj->base.size;
5440 mutex_unlock(&dev->struct_mutex);
5442 if (freed_pages || unbound || bound)
5443 pr_info("Purging GPU memory, %lu bytes freed, %lu bytes still pinned.\n",
5444 freed_pages << PAGE_SHIFT, pinned);
5445 if (unbound || bound)
5446 pr_err("%lu and %lu bytes still available in the "
5447 "bound and unbound GPU page lists.\n",
5450 *(unsigned long *)ptr += freed_pages;
5455 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
5457 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
5458 struct i915_vma *vma;
5460 list_for_each_entry(vma, &obj->vma_list, vma_link)
5461 if (vma->vm == ggtt &&
5462 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)