2 * Copyright © 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
26 * Copyright (c) 2011 The FreeBSD Foundation
27 * All rights reserved.
29 * This software was developed by Konstantin Belousov under sponsorship from
30 * the FreeBSD Foundation.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <sys/resourcevar.h>
56 #include <sys/sfbuf.h>
59 #include <drm/i915_drm.h>
61 #include "intel_drv.h"
62 #include <linux/shmem_fs.h>
63 #include <linux/completion.h>
64 #include <linux/highmem.h>
65 #include <linux/jiffies.h>
66 #include <linux/time.h>
68 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
69 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
70 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
72 bool map_and_fenceable,
74 static int i915_gem_phys_pwrite(struct drm_device *dev,
75 struct drm_i915_gem_object *obj,
76 struct drm_i915_gem_pwrite *args,
77 struct drm_file *file);
79 static void i915_gem_write_fence(struct drm_device *dev, int reg,
80 struct drm_i915_gem_object *obj);
81 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
82 struct drm_i915_fence_reg *fence,
85 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
87 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj);
88 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
90 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
93 i915_gem_release_mmap(obj);
95 /* As we do not have an associated fence register, we will force
96 * a tiling change if we ever need to acquire one.
98 obj->fence_dirty = false;
99 obj->fence_reg = I915_FENCE_REG_NONE;
102 static int i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj);
103 static bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj);
104 static void i915_gem_reset_fences(struct drm_device *dev);
105 static void i915_gem_lowmem(void *arg);
107 /* some bookkeeping */
108 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
111 dev_priv->mm.object_count++;
112 dev_priv->mm.object_memory += size;
115 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
118 dev_priv->mm.object_count--;
119 dev_priv->mm.object_memory -= size;
123 i915_gem_wait_for_error(struct i915_gpu_error *error)
127 #define EXIT_COND (!i915_reset_in_progress(error) || \
128 i915_terminally_wedged(error))
133 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
134 * userspace. If it takes that long something really bad is going on and
135 * we should simply try to bail out and fail as gracefully as possible.
137 ret = wait_event_interruptible_timeout(error->reset_queue,
141 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
143 } else if (ret < 0) {
151 int i915_mutex_lock_interruptible(struct drm_device *dev)
153 struct drm_i915_private *dev_priv = dev->dev_private;
156 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
160 ret = lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_SLEEPFAIL);
164 WARN_ON(i915_verify_lists(dev));
169 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
175 i915_gem_init_ioctl(struct drm_device *dev, void *data,
176 struct drm_file *file)
178 struct drm_i915_gem_init *args = data;
180 if (drm_core_check_feature(dev, DRIVER_MODESET))
183 if (args->gtt_start >= args->gtt_end ||
184 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
187 /* GEM with user mode setting was never supported on ilk and later. */
188 if (INTEL_INFO(dev)->gen >= 5)
191 mutex_lock(&dev->struct_mutex);
192 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
194 mutex_unlock(&dev->struct_mutex);
200 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
201 struct drm_file *file)
203 struct drm_i915_private *dev_priv = dev->dev_private;
204 struct drm_i915_gem_get_aperture *args = data;
205 struct drm_i915_gem_object *obj;
209 mutex_lock(&dev->struct_mutex);
210 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
212 pinned += obj->gtt_space->size;
213 mutex_unlock(&dev->struct_mutex);
215 args->aper_size = dev_priv->gtt.total;
216 args->aper_available_size = args->aper_size - pinned;
222 i915_gem_create(struct drm_file *file,
223 struct drm_device *dev,
227 struct drm_i915_gem_object *obj;
231 size = roundup(size, PAGE_SIZE);
235 /* Allocate the new object */
236 obj = i915_gem_alloc_object(dev, size);
241 ret = drm_gem_handle_create(file, &obj->base, &handle);
243 drm_gem_object_release(&obj->base);
244 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
245 drm_free(obj, M_DRM);
249 /* drop reference from allocate - handle holds it now */
250 drm_gem_object_unreference(&obj->base);
256 i915_gem_dumb_create(struct drm_file *file,
257 struct drm_device *dev,
258 struct drm_mode_create_dumb *args)
261 /* have to work out size/pitch and return them */
262 args->pitch = roundup2(args->width * ((args->bpp + 7) / 8), 64);
263 args->size = args->pitch * args->height;
264 return i915_gem_create(file, dev,
265 args->size, &args->handle);
268 int i915_gem_dumb_destroy(struct drm_file *file,
269 struct drm_device *dev,
273 return drm_gem_handle_delete(file, handle);
277 * Creates a new mm object and returns a handle to it.
280 i915_gem_create_ioctl(struct drm_device *dev, void *data,
281 struct drm_file *file)
283 struct drm_i915_gem_create *args = data;
285 return i915_gem_create(file, dev,
286 args->size, &args->handle);
289 static inline void vm_page_reference(vm_page_t m)
291 vm_page_flag_set(m, PG_REFERENCED);
295 i915_gem_shmem_pread(struct drm_device *dev,
296 struct drm_i915_gem_object *obj,
297 struct drm_i915_gem_pread *args,
298 struct drm_file *file)
305 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po;
307 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
310 vm_obj = obj->base.vm_obj;
313 VM_OBJECT_LOCK(vm_obj);
314 vm_object_pip_add(vm_obj, 1);
315 while (args->size > 0) {
316 obj_pi = OFF_TO_IDX(args->offset);
317 obj_po = args->offset & PAGE_MASK;
319 m = shmem_read_mapping_page(vm_obj, obj_pi);
320 VM_OBJECT_UNLOCK(vm_obj);
322 sf = sf_buf_alloc(m);
323 mkva = sf_buf_kva(sf);
324 length = min(args->size, PAGE_SIZE - obj_po);
326 if (do_bit17_swizzling &&
327 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) {
328 cnt = roundup2(obj_po + 1, 64);
329 cnt = min(cnt - obj_po, length);
330 swizzled_po = obj_po ^ 64;
333 swizzled_po = obj_po;
335 ret = -copyout_nofault(
336 (char *)mkva + swizzled_po,
337 (void *)(uintptr_t)args->data_ptr, cnt);
340 args->data_ptr += cnt;
347 VM_OBJECT_LOCK(vm_obj);
348 vm_page_reference(m);
349 vm_page_busy_wait(m, FALSE, "i915gem");
350 vm_page_unwire(m, 1);
356 vm_object_pip_wakeup(vm_obj);
357 VM_OBJECT_UNLOCK(vm_obj);
363 * Reads data from the object referenced by handle.
365 * On error, the contents of *data are undefined.
368 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
369 struct drm_file *file)
371 struct drm_i915_gem_pread *args = data;
372 struct drm_i915_gem_object *obj;
378 ret = i915_mutex_lock_interruptible(dev);
382 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
383 if (&obj->base == NULL) {
388 /* Bounds check source. */
389 if (args->offset > obj->base.size ||
390 args->size > obj->base.size - args->offset) {
395 ret = i915_gem_shmem_pread(dev, obj, args, file);
397 drm_gem_object_unreference(&obj->base);
399 mutex_unlock(&dev->struct_mutex);
404 /* This is the fast write path which cannot handle
405 * page faults in the source data
409 fast_user_write(struct io_mapping *mapping,
410 loff_t page_base, int page_offset,
411 char __user *user_data,
414 void __iomem *vaddr_atomic;
416 unsigned long unwritten;
418 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
419 /* We can use the cpu mem copy function because this is X86. */
420 vaddr = (void __force*)vaddr_atomic + page_offset;
421 unwritten = __copy_from_user_inatomic_nocache(vaddr,
423 io_mapping_unmap_atomic(vaddr_atomic);
428 * This is the fast pwrite path, where we copy the data directly from the
429 * user into the GTT, uncached.
432 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
433 struct drm_i915_gem_object *obj,
434 struct drm_i915_gem_pwrite *args,
435 struct drm_file *file)
437 drm_i915_private_t *dev_priv = dev->dev_private;
439 loff_t offset, page_base;
440 char __user *user_data;
441 int page_offset, page_length, ret;
443 ret = i915_gem_object_pin(obj, 0, true, true);
447 ret = i915_gem_object_set_to_gtt_domain(obj, true);
451 ret = i915_gem_object_put_fence(obj);
455 user_data = (char __user *) (uintptr_t) args->data_ptr;
458 offset = obj->gtt_offset + args->offset;
461 /* Operation in this page
463 * page_base = page offset within aperture
464 * page_offset = offset within page
465 * page_length = bytes to copy for this page
467 page_base = offset & PAGE_MASK;
468 page_offset = offset_in_page(offset);
469 page_length = remain;
470 if ((page_offset + remain) > PAGE_SIZE)
471 page_length = PAGE_SIZE - page_offset;
473 /* If we get a fault while copying data, then (presumably) our
474 * source page isn't available. Return the error and we'll
475 * retry in the slow path.
477 if (fast_user_write(dev_priv->gtt.mappable, page_base,
478 page_offset, user_data, page_length)) {
483 remain -= page_length;
484 user_data += page_length;
485 offset += page_length;
489 i915_gem_object_unpin(obj);
496 i915_gem_gtt_write(struct drm_device *dev, struct drm_i915_gem_object *obj,
497 uint64_t data_ptr, uint64_t size, uint64_t offset, struct drm_file *file)
503 * Pass the unaligned physical address and size to pmap_mapdev_attr()
504 * so it can properly calculate whether an extra page needs to be
505 * mapped or not to cover the requested range. The function will
506 * add the page offset into the returned mkva for us.
508 mkva = (vm_offset_t)pmap_mapdev_attr(dev->agp->base + obj->gtt_offset +
509 offset, size, PAT_WRITE_COMBINING);
510 ret = -copyin_nofault((void *)(uintptr_t)data_ptr, (char *)mkva, size);
511 pmap_unmapdev(mkva, size);
516 i915_gem_shmem_pwrite(struct drm_device *dev,
517 struct drm_i915_gem_object *obj,
518 struct drm_i915_gem_pwrite *args,
519 struct drm_file *file)
526 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po;
528 do_bit17_swizzling = 0;
531 vm_obj = obj->base.vm_obj;
534 VM_OBJECT_LOCK(vm_obj);
535 vm_object_pip_add(vm_obj, 1);
536 while (args->size > 0) {
537 obj_pi = OFF_TO_IDX(args->offset);
538 obj_po = args->offset & PAGE_MASK;
540 m = shmem_read_mapping_page(vm_obj, obj_pi);
541 VM_OBJECT_UNLOCK(vm_obj);
543 sf = sf_buf_alloc(m);
544 mkva = sf_buf_kva(sf);
545 length = min(args->size, PAGE_SIZE - obj_po);
547 if (do_bit17_swizzling &&
548 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) {
549 cnt = roundup2(obj_po + 1, 64);
550 cnt = min(cnt - obj_po, length);
551 swizzled_po = obj_po ^ 64;
554 swizzled_po = obj_po;
556 ret = -copyin_nofault(
557 (void *)(uintptr_t)args->data_ptr,
558 (char *)mkva + swizzled_po, cnt);
561 args->data_ptr += cnt;
568 VM_OBJECT_LOCK(vm_obj);
570 vm_page_reference(m);
571 vm_page_busy_wait(m, FALSE, "i915gem");
572 vm_page_unwire(m, 1);
578 vm_object_pip_wakeup(vm_obj);
579 VM_OBJECT_UNLOCK(vm_obj);
585 * Writes data to the object referenced by handle.
587 * On error, the contents of the buffer that were to be modified are undefined.
590 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
591 struct drm_file *file)
593 struct drm_i915_gem_pwrite *args = data;
594 struct drm_i915_gem_object *obj;
596 vm_offset_t start, end;
602 start = trunc_page(args->data_ptr);
603 end = round_page(args->data_ptr + args->size);
604 npages = howmany(end - start, PAGE_SIZE);
605 ma = kmalloc(npages * sizeof(vm_page_t), M_DRM, M_WAITOK |
607 npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
608 (vm_offset_t)args->data_ptr, args->size,
609 VM_PROT_READ, ma, npages);
615 ret = i915_mutex_lock_interruptible(dev);
619 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
620 if (&obj->base == NULL) {
625 /* Bounds check destination. */
626 if (args->offset > obj->base.size ||
627 args->size > obj->base.size - args->offset) {
633 ret = i915_gem_phys_pwrite(dev, obj, args, file);
634 } else if (obj->gtt_space &&
635 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
636 ret = i915_gem_object_pin(obj, 0, true, false);
639 ret = i915_gem_object_set_to_gtt_domain(obj, true);
642 ret = i915_gem_object_put_fence(obj);
645 ret = i915_gem_gtt_write(dev, obj, args->data_ptr, args->size,
648 i915_gem_object_unpin(obj);
650 ret = i915_gem_object_set_to_cpu_domain(obj, true);
653 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
656 drm_gem_object_unreference(&obj->base);
658 mutex_unlock(&dev->struct_mutex);
660 vm_page_unhold_pages(ma, npages);
667 i915_gem_check_wedge(struct i915_gpu_error *error,
670 if (i915_reset_in_progress(error)) {
671 /* Non-interruptible callers can't handle -EAGAIN, hence return
672 * -EIO unconditionally for these. */
676 /* Recovery complete, but the reset failed ... */
677 if (i915_terminally_wedged(error))
687 * Compare seqno against outstanding lazy request. Emit a request if they are
691 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
695 DRM_LOCK_ASSERT(ring->dev);
698 if (seqno == ring->outstanding_lazy_request)
699 ret = i915_add_request(ring, NULL, NULL);
705 * __wait_seqno - wait until execution of seqno has finished
706 * @ring: the ring expected to report seqno
708 * @reset_counter: reset sequence associated with the given seqno
709 * @interruptible: do an interruptible wait (normally yes)
710 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
712 * Note: It is of utmost importance that the passed in seqno and reset_counter
713 * values have been read by the caller in an smp safe manner. Where read-side
714 * locks are involved, it is sufficient to read the reset_counter before
715 * unlocking the lock that protects the seqno. For lockless tricks, the
716 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
719 * Returns 0 if the seqno was found within the alloted time. Else returns the
720 * errno with remaining time filled in timeout argument.
722 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
723 unsigned reset_counter,
724 bool interruptible, struct timespec *timeout)
726 drm_i915_private_t *dev_priv = ring->dev->dev_private;
727 struct timespec before, now, wait_time={1,0};
728 unsigned long timeout_jiffies;
730 bool wait_forever = true;
733 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
736 if (timeout != NULL) {
737 wait_time = *timeout;
738 wait_forever = false;
741 timeout_jiffies = timespec_to_jiffies(&wait_time);
743 if (WARN_ON(!ring->irq_get(ring)))
746 /* Record current time in case interrupted by signal, or wedged * */
747 getrawmonotonic(&before);
750 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
751 i915_reset_in_progress(&dev_priv->gpu_error) || \
752 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
755 end = wait_event_interruptible_timeout(ring->irq_queue,
759 end = wait_event_timeout(ring->irq_queue, EXIT_COND,
762 /* We need to check whether any gpu reset happened in between
763 * the caller grabbing the seqno and now ... */
764 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
767 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely
769 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
772 } while (end == 0 && wait_forever);
774 getrawmonotonic(&now);
780 struct timespec sleep_time = timespec_sub(now, before);
781 *timeout = timespec_sub(*timeout, sleep_time);
786 case -EAGAIN: /* Wedged */
787 case -ERESTARTSYS: /* Signal */
789 case 0: /* Timeout */
791 set_normalized_timespec(timeout, 0, 0);
792 return -ETIMEDOUT; /* -ETIME on Linux */
793 default: /* Completed */
794 WARN_ON(end < 0); /* We're not aware of other errors */
800 * Waits for a sequence number to be signaled, and cleans up the
801 * request and object lists appropriately for that event.
804 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
806 struct drm_device *dev = ring->dev;
807 struct drm_i915_private *dev_priv = dev->dev_private;
808 bool interruptible = dev_priv->mm.interruptible;
811 DRM_LOCK_ASSERT(dev);
814 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
818 ret = i915_gem_check_olr(ring, seqno);
822 return __wait_seqno(ring, seqno,
823 atomic_read(&dev_priv->gpu_error.reset_counter),
824 interruptible, NULL);
828 * Ensures that all rendering to the object has completed and the object is
829 * safe to unbind from the GTT or access from the CPU.
831 static __must_check int
832 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
835 struct intel_ring_buffer *ring = obj->ring;
839 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
843 ret = i915_wait_seqno(ring, seqno);
847 i915_gem_retire_requests_ring(ring);
849 /* Manually manage the write flush as we may have not yet
850 * retired the buffer.
852 if (obj->last_write_seqno &&
853 i915_seqno_passed(seqno, obj->last_write_seqno)) {
854 obj->last_write_seqno = 0;
855 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
861 /* A nonblocking variant of the above wait. This is a highly dangerous routine
862 * as the object state may change during this call.
864 static __must_check int
865 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
868 struct drm_device *dev = obj->base.dev;
869 struct drm_i915_private *dev_priv = dev->dev_private;
870 struct intel_ring_buffer *ring = obj->ring;
871 unsigned reset_counter;
875 DRM_LOCK_ASSERT(dev);
876 BUG_ON(!dev_priv->mm.interruptible);
878 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
882 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
886 ret = i915_gem_check_olr(ring, seqno);
890 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
891 mutex_unlock(&dev->struct_mutex);
892 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL);
893 mutex_lock(&dev->struct_mutex);
895 i915_gem_retire_requests_ring(ring);
897 /* Manually manage the write flush as we may have not yet
898 * retired the buffer.
900 if (obj->last_write_seqno &&
901 i915_seqno_passed(seqno, obj->last_write_seqno)) {
902 obj->last_write_seqno = 0;
903 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
910 * Called when user space prepares to use an object with the CPU, either
911 * through the mmap ioctl's mapping or a GTT mapping.
914 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
915 struct drm_file *file)
917 struct drm_i915_gem_set_domain *args = data;
918 struct drm_i915_gem_object *obj;
919 uint32_t read_domains = args->read_domains;
920 uint32_t write_domain = args->write_domain;
923 /* Only handle setting domains to types used by the CPU. */
924 if (write_domain & I915_GEM_GPU_DOMAINS)
927 if (read_domains & I915_GEM_GPU_DOMAINS)
930 /* Having something in the write domain implies it's in the read
931 * domain, and only that read domain. Enforce that in the request.
933 if (write_domain != 0 && read_domains != write_domain)
936 ret = i915_mutex_lock_interruptible(dev);
940 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
941 if (&obj->base == NULL) {
946 /* Try to flush the object off the GPU without holding the lock.
947 * We will repeat the flush holding the lock in the normal manner
948 * to catch cases where we are gazumped.
950 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);
954 if (read_domains & I915_GEM_DOMAIN_GTT) {
955 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
957 /* Silently promote "you're not bound, there was nothing to do"
958 * to success, since the client was just asking us to
959 * make sure everything was done.
964 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
968 drm_gem_object_unreference(&obj->base);
970 mutex_unlock(&dev->struct_mutex);
975 * Called when user space has done writes to this buffer
978 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
979 struct drm_file *file)
981 struct drm_i915_gem_sw_finish *args = data;
982 struct drm_i915_gem_object *obj;
985 ret = i915_mutex_lock_interruptible(dev);
988 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
989 if (&obj->base == NULL) {
994 /* Pinned buffers may be scanout, so flush the cache */
996 i915_gem_object_flush_cpu_write_domain(obj);
998 drm_gem_object_unreference(&obj->base);
1000 mutex_unlock(&dev->struct_mutex);
1005 * Maps the contents of an object, returning the address it is mapped
1008 * While the mapping holds a reference on the contents of the object, it doesn't
1009 * imply a ref on the object itself.
1012 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1013 struct drm_file *file)
1015 struct drm_i915_gem_mmap *args = data;
1016 struct drm_gem_object *obj;
1017 struct proc *p = curproc;
1018 vm_map_t map = &p->p_vmspace->vm_map;
1023 obj = drm_gem_object_lookup(dev, file, args->handle);
1027 if (args->size == 0)
1030 size = round_page(args->size);
1031 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
1037 vm_object_hold(obj->vm_obj);
1038 vm_object_reference_locked(obj->vm_obj);
1039 vm_object_drop(obj->vm_obj);
1040 rv = vm_map_find(map, obj->vm_obj, NULL,
1041 args->offset, &addr, args->size,
1042 PAGE_SIZE, /* align */
1044 VM_MAPTYPE_NORMAL, /* maptype */
1045 VM_PROT_READ | VM_PROT_WRITE, /* prot */
1046 VM_PROT_READ | VM_PROT_WRITE, /* max */
1047 MAP_SHARED /* cow */);
1048 if (rv != KERN_SUCCESS) {
1049 vm_object_deallocate(obj->vm_obj);
1050 error = -vm_mmap_to_errno(rv);
1052 args->addr_ptr = (uint64_t)addr;
1055 drm_gem_object_unreference(obj);
1060 * i915_gem_fault - fault a page into the GTT
1061 * vma: VMA in question
1064 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1065 * from userspace. The fault handler takes care of binding the object to
1066 * the GTT (if needed), allocating and programming a fence register (again,
1067 * only if needed based on whether the old reg is still valid or the object
1068 * is tiled) and inserting a new PTE into the faulting process.
1070 * Note that the faulting process may involve evicting existing objects
1071 * from the GTT and/or fence registers to make room. So performance may
1072 * suffer if the GTT working set is large or there are few fence registers
1076 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1078 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1079 struct drm_device *dev = obj->base.dev;
1080 drm_i915_private_t *dev_priv = dev->dev_private;
1081 pgoff_t page_offset;
1084 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1086 /* We don't use vmf->pgoff since that has the fake offset */
1087 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1090 ret = i915_mutex_lock_interruptible(dev);
1094 trace_i915_gem_object_fault(obj, page_offset, true, write);
1096 /* Access to snoopable pages through the GTT is incoherent. */
1097 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1102 /* Now bind it into the GTT if needed */
1103 ret = i915_gem_object_pin(obj, 0, true, false);
1107 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1111 ret = i915_gem_object_get_fence(obj);
1115 obj->fault_mappable = true;
1117 pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) +
1120 /* Finally, remap it using the new GTT offset */
1121 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1123 i915_gem_object_unpin(obj);
1125 mutex_unlock(&dev->struct_mutex);
1129 /* If this -EIO is due to a gpu hang, give the reset code a
1130 * chance to clean up the mess. Otherwise return the proper
1132 if (i915_terminally_wedged(&dev_priv->gpu_error))
1133 return VM_FAULT_SIGBUS;
1135 /* Give the error handler a chance to run and move the
1136 * objects off the GPU active list. Next time we service the
1137 * fault, we should be able to transition the page into the
1138 * GTT without touching the GPU (and so avoid further
1139 * EIO/EGAIN). If the GPU is wedged, then there is no issue
1140 * with coherency, just lost writes.
1148 * EBUSY is ok: this just means that another thread
1149 * already did the job.
1151 return VM_FAULT_NOPAGE;
1153 return VM_FAULT_OOM;
1155 return VM_FAULT_SIGBUS;
1157 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1158 return VM_FAULT_SIGBUS;
1164 * i915_gem_release_mmap - remove physical page mappings
1165 * @obj: obj in question
1167 * Preserve the reservation of the mmapping with the DRM core code, but
1168 * relinquish ownership of the pages back to the system.
1170 * It is vital that we remove the page mapping if we have mapped a tiled
1171 * object through the GTT and then lose the fence register due to
1172 * resource pressure. Similarly if the object has been moved out of the
1173 * aperture, than pages mapped into userspace must be revoked. Removing the
1174 * mapping will then trigger a page fault on the next user access, allowing
1175 * fixup by i915_gem_fault().
1178 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1184 if (!obj->fault_mappable)
1187 devobj = cdev_pager_lookup(obj);
1188 if (devobj != NULL) {
1189 page_count = OFF_TO_IDX(obj->base.size);
1191 VM_OBJECT_LOCK(devobj);
1192 for (i = 0; i < page_count; i++) {
1193 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
1196 cdev_pager_free_page(devobj, m);
1198 VM_OBJECT_UNLOCK(devobj);
1199 vm_object_deallocate(devobj);
1202 obj->fault_mappable = false;
1206 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1210 if (INTEL_INFO(dev)->gen >= 4 ||
1211 tiling_mode == I915_TILING_NONE)
1214 /* Previous chips need a power-of-two fence region when tiling */
1215 if (INTEL_INFO(dev)->gen == 3)
1216 gtt_size = 1024*1024;
1218 gtt_size = 512*1024;
1220 while (gtt_size < size)
1227 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1228 * @obj: object to check
1230 * Return the required GTT alignment for an object, taking into account
1231 * potential fence register mapping.
1234 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
1235 int tiling_mode, bool fenced)
1239 * Minimum alignment is 4k (GTT page size), but might be greater
1240 * if a fence register is needed for the object.
1242 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
1243 tiling_mode == I915_TILING_NONE)
1247 * Previous chips need to be aligned to the size of the smallest
1248 * fence register that can contain the object.
1250 return i915_gem_get_gtt_size(dev, size, tiling_mode);
1254 i915_gem_mmap_gtt(struct drm_file *file,
1255 struct drm_device *dev,
1259 struct drm_i915_private *dev_priv = dev->dev_private;
1260 struct drm_i915_gem_object *obj;
1263 ret = i915_mutex_lock_interruptible(dev);
1267 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1268 if (&obj->base == NULL) {
1273 if (obj->base.size > dev_priv->gtt.mappable_end) {
1278 if (obj->madv != I915_MADV_WILLNEED) {
1279 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1284 ret = drm_gem_create_mmap_offset(&obj->base);
1288 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
1289 DRM_GEM_MAPPING_KEY;
1291 drm_gem_object_unreference(&obj->base);
1293 mutex_unlock(&dev->struct_mutex);
1298 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1300 * @data: GTT mapping ioctl data
1301 * @file: GEM object info
1303 * Simply returns the fake offset to userspace so it can mmap it.
1304 * The mmap call will end up in drm_gem_mmap(), which will set things
1305 * up so we can get faults in the handler above.
1307 * The fault handler will take care of binding the object into the GTT
1308 * (since it may have been evicted to make room for something), allocating
1309 * a fence register, and mapping the appropriate aperture address into
1313 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1314 struct drm_file *file)
1316 struct drm_i915_gem_mmap_gtt *args = data;
1318 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1321 /* Immediately discard the backing storage */
1323 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1327 vm_obj = obj->base.vm_obj;
1328 VM_OBJECT_LOCK(vm_obj);
1329 vm_object_page_remove(vm_obj, 0, 0, false);
1330 VM_OBJECT_UNLOCK(vm_obj);
1331 obj->madv = __I915_MADV_PURGED;
1335 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1337 return obj->madv == I915_MADV_DONTNEED;
1341 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1346 BUG_ON(obj->madv == __I915_MADV_PURGED);
1348 if (obj->tiling_mode != I915_TILING_NONE)
1349 i915_gem_object_save_bit_17_swizzle(obj);
1350 if (obj->madv == I915_MADV_DONTNEED)
1352 page_count = obj->base.size / PAGE_SIZE;
1353 VM_OBJECT_LOCK(obj->base.vm_obj);
1354 #if GEM_PARANOID_CHECK_GTT
1355 i915_gem_assert_pages_not_mapped(obj->base.dev, obj->pages, page_count);
1357 for (i = 0; i < page_count; i++) {
1361 if (obj->madv == I915_MADV_WILLNEED)
1362 vm_page_reference(m);
1363 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem");
1364 vm_page_unwire(obj->pages[i], 1);
1365 vm_page_wakeup(obj->pages[i]);
1367 VM_OBJECT_UNLOCK(obj->base.vm_obj);
1369 drm_free(obj->pages, M_DRM);
1374 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
1376 const struct drm_i915_gem_object_ops *ops = obj->ops;
1378 if (obj->pages == NULL)
1381 BUG_ON(obj->gtt_space);
1383 if (obj->pages_pin_count)
1386 /* ->put_pages might need to allocate memory for the bit17 swizzle
1387 * array, hence protect them from being reaped by removing them from gtt
1389 list_del(&obj->gtt_list);
1391 ops->put_pages(obj);
1394 if (i915_gem_object_is_purgeable(obj))
1395 i915_gem_object_truncate(obj);
1401 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
1403 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1404 struct drm_device *dev;
1406 int page_count, i, j;
1407 struct vm_page *page;
1409 dev = obj->base.dev;
1410 KASSERT(obj->pages == NULL, ("Obj already has pages"));
1411 page_count = obj->base.size / PAGE_SIZE;
1412 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM,
1415 vm_obj = obj->base.vm_obj;
1416 VM_OBJECT_LOCK(vm_obj);
1418 for (i = 0; i < page_count; i++) {
1419 page = shmem_read_mapping_page(vm_obj, i);
1421 i915_gem_purge(dev_priv, page_count);
1425 obj->pages[i] = page;
1428 VM_OBJECT_UNLOCK(vm_obj);
1429 if (i915_gem_object_needs_bit17_swizzle(obj))
1430 i915_gem_object_do_bit_17_swizzle(obj);
1435 for (j = 0; j < i; j++) {
1436 page = obj->pages[j];
1437 vm_page_busy_wait(page, FALSE, "i915gem");
1438 vm_page_unwire(page, 0);
1439 vm_page_wakeup(page);
1441 VM_OBJECT_UNLOCK(vm_obj);
1442 drm_free(obj->pages, M_DRM);
1447 /* Ensure that the associated pages are gathered from the backing storage
1448 * and pinned into our object. i915_gem_object_get_pages() may be called
1449 * multiple times before they are released by a single call to
1450 * i915_gem_object_put_pages() - once the pages are no longer referenced
1451 * either as a result of memory pressure (reaping pages under the shrinker)
1452 * or as the object is itself released.
1455 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
1457 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1458 const struct drm_i915_gem_object_ops *ops = obj->ops;
1464 if (obj->madv != I915_MADV_WILLNEED) {
1465 DRM_ERROR("Attempting to obtain a purgeable object\n");
1469 BUG_ON(obj->pages_pin_count);
1471 ret = ops->get_pages(obj);
1475 list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
1480 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1481 struct intel_ring_buffer *ring)
1483 struct drm_device *dev = obj->base.dev;
1484 struct drm_i915_private *dev_priv = dev->dev_private;
1485 u32 seqno = intel_ring_get_seqno(ring);
1487 BUG_ON(ring == NULL);
1490 /* Add a reference if we're newly entering the active list. */
1492 drm_gem_object_reference(&obj->base);
1496 /* Move from whatever list we were on to the tail of execution. */
1497 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1498 list_move_tail(&obj->ring_list, &ring->active_list);
1500 obj->last_read_seqno = seqno;
1502 if (obj->fenced_gpu_access) {
1503 obj->last_fenced_seqno = seqno;
1505 /* Bump MRU to take account of the delayed flush */
1506 if (obj->fence_reg != I915_FENCE_REG_NONE) {
1507 struct drm_i915_fence_reg *reg;
1509 reg = &dev_priv->fence_regs[obj->fence_reg];
1510 list_move_tail(®->lru_list,
1511 &dev_priv->mm.fence_list);
1517 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1519 struct drm_device *dev = obj->base.dev;
1520 struct drm_i915_private *dev_priv = dev->dev_private;
1522 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
1523 BUG_ON(!obj->active);
1525 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1527 list_del_init(&obj->ring_list);
1530 obj->last_read_seqno = 0;
1531 obj->last_write_seqno = 0;
1532 obj->base.write_domain = 0;
1534 obj->last_fenced_seqno = 0;
1535 obj->fenced_gpu_access = false;
1538 drm_gem_object_unreference(&obj->base);
1540 WARN_ON(i915_verify_lists(dev));
1544 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
1546 struct drm_i915_private *dev_priv = dev->dev_private;
1547 struct intel_ring_buffer *ring;
1550 /* Carefully retire all requests without writing to the rings */
1551 for_each_ring(ring, dev_priv, i) {
1552 ret = intel_ring_idle(ring);
1556 i915_gem_retire_requests(dev);
1558 /* Finally reset hw state */
1559 for_each_ring(ring, dev_priv, i) {
1560 intel_ring_init_seqno(ring, seqno);
1562 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
1563 ring->sync_seqno[j] = 0;
1569 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
1571 struct drm_i915_private *dev_priv = dev->dev_private;
1577 /* HWS page needs to be set less than what we
1578 * will inject to ring
1580 ret = i915_gem_init_seqno(dev, seqno - 1);
1584 /* Carefully set the last_seqno value so that wrap
1585 * detection still works
1587 dev_priv->next_seqno = seqno;
1588 dev_priv->last_seqno = seqno - 1;
1589 if (dev_priv->last_seqno == 0)
1590 dev_priv->last_seqno--;
1596 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
1598 struct drm_i915_private *dev_priv = dev->dev_private;
1600 /* reserve 0 for non-seqno */
1601 if (dev_priv->next_seqno == 0) {
1602 int ret = i915_gem_init_seqno(dev, 0);
1606 dev_priv->next_seqno = 1;
1609 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
1614 i915_add_request(struct intel_ring_buffer *ring,
1615 struct drm_file *file,
1618 drm_i915_private_t *dev_priv = ring->dev->dev_private;
1619 struct drm_i915_gem_request *request;
1620 u32 request_ring_position;
1625 * Emit any outstanding flushes - execbuf can fail to emit the flush
1626 * after having emitted the batchbuffer command. Hence we need to fix
1627 * things up similar to emitting the lazy request. The difference here
1628 * is that the flush _must_ happen before the next request, no matter
1631 ret = intel_ring_flush_all_caches(ring);
1635 request = kmalloc(sizeof(*request), M_DRM, M_WAITOK | M_ZERO);
1636 if (request == NULL)
1640 /* Record the position of the start of the request so that
1641 * should we detect the updated seqno part-way through the
1642 * GPU processing the request, we never over-estimate the
1643 * position of the head.
1645 request_ring_position = intel_ring_get_tail(ring);
1647 ret = ring->add_request(ring);
1649 kfree(request, M_DRM);
1653 request->seqno = intel_ring_get_seqno(ring);
1654 request->ring = ring;
1655 request->tail = request_ring_position;
1656 request->emitted_jiffies = jiffies;
1657 was_empty = list_empty(&ring->request_list);
1658 list_add_tail(&request->list, &ring->request_list);
1659 request->file_priv = NULL;
1662 struct drm_i915_file_private *file_priv = file->driver_priv;
1664 spin_lock(&file_priv->mm.lock);
1665 request->file_priv = file_priv;
1666 list_add_tail(&request->client_list,
1667 &file_priv->mm.request_list);
1668 spin_unlock(&file_priv->mm.lock);
1671 ring->outstanding_lazy_request = 0;
1673 if (!dev_priv->mm.suspended) {
1674 if (i915_enable_hangcheck) {
1675 mod_timer(&dev_priv->gpu_error.hangcheck_timer,
1676 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
1679 queue_delayed_work(dev_priv->wq,
1680 &dev_priv->mm.retire_work,
1681 round_jiffies_up_relative(hz));
1682 intel_mark_busy(dev_priv->dev);
1687 *out_seqno = request->seqno;
1692 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1694 struct drm_i915_file_private *file_priv = request->file_priv;
1699 spin_lock(&file_priv->mm.lock);
1700 if (request->file_priv) {
1701 list_del(&request->client_list);
1702 request->file_priv = NULL;
1704 spin_unlock(&file_priv->mm.lock);
1707 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1708 struct intel_ring_buffer *ring)
1710 while (!list_empty(&ring->request_list)) {
1711 struct drm_i915_gem_request *request;
1713 request = list_first_entry(&ring->request_list,
1714 struct drm_i915_gem_request,
1717 list_del(&request->list);
1718 i915_gem_request_remove_from_client(request);
1719 drm_free(request, M_DRM);
1722 while (!list_empty(&ring->active_list)) {
1723 struct drm_i915_gem_object *obj;
1725 obj = list_first_entry(&ring->active_list,
1726 struct drm_i915_gem_object,
1729 i915_gem_object_move_to_inactive(obj);
1733 static void i915_gem_reset_fences(struct drm_device *dev)
1735 struct drm_i915_private *dev_priv = dev->dev_private;
1738 for (i = 0; i < dev_priv->num_fence_regs; i++) {
1739 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1741 i915_gem_write_fence(dev, i, NULL);
1744 i915_gem_object_fence_lost(reg->obj);
1748 INIT_LIST_HEAD(®->lru_list);
1751 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
1754 void i915_gem_reset(struct drm_device *dev)
1756 struct drm_i915_private *dev_priv = dev->dev_private;
1757 struct drm_i915_gem_object *obj;
1758 struct intel_ring_buffer *ring;
1761 for_each_ring(ring, dev_priv, i)
1762 i915_gem_reset_ring_lists(dev_priv, ring);
1764 /* Move everything out of the GPU domains to ensure we do any
1765 * necessary invalidation upon reuse.
1767 list_for_each_entry(obj,
1768 &dev_priv->mm.inactive_list,
1771 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1774 /* The fence registers are invalidated so clear them out */
1775 i915_gem_reset_fences(dev);
1779 * This function clears the request list as sequence numbers are passed.
1782 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
1786 if (list_empty(&ring->request_list))
1789 WARN_ON(i915_verify_lists(ring->dev));
1791 seqno = ring->get_seqno(ring, true);
1793 while (!list_empty(&ring->request_list)) {
1794 struct drm_i915_gem_request *request;
1796 request = list_first_entry(&ring->request_list,
1797 struct drm_i915_gem_request,
1800 if (!i915_seqno_passed(seqno, request->seqno))
1803 /* We know the GPU must have read the request to have
1804 * sent us the seqno + interrupt, so use the position
1805 * of tail of the request to update the last known position
1808 ring->last_retired_head = request->tail;
1810 list_del(&request->list);
1811 i915_gem_request_remove_from_client(request);
1812 kfree(request, M_DRM);
1815 /* Move any buffers on the active list that are no longer referenced
1816 * by the ringbuffer to the flushing/inactive lists as appropriate.
1818 while (!list_empty(&ring->active_list)) {
1819 struct drm_i915_gem_object *obj;
1821 obj = list_first_entry(&ring->active_list,
1822 struct drm_i915_gem_object,
1825 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
1828 i915_gem_object_move_to_inactive(obj);
1831 if (unlikely(ring->trace_irq_seqno &&
1832 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
1833 ring->irq_put(ring);
1834 ring->trace_irq_seqno = 0;
1840 i915_gem_retire_requests(struct drm_device *dev)
1842 drm_i915_private_t *dev_priv = dev->dev_private;
1843 struct intel_ring_buffer *ring;
1846 for_each_ring(ring, dev_priv, i)
1847 i915_gem_retire_requests_ring(ring);
1851 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
1852 bool purgeable_only)
1854 struct drm_i915_gem_object *obj, *next;
1857 list_for_each_entry_safe(obj, next,
1858 &dev_priv->mm.unbound_list,
1861 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
1862 i915_gem_object_put_pages(obj) == 0) {
1863 count += obj->base.size >> PAGE_SHIFT;
1864 if (count >= target)
1870 list_for_each_entry_safe(obj, next,
1871 &dev_priv->mm.inactive_list,
1874 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
1875 i915_gem_object_unbind(obj) == 0 &&
1876 i915_gem_object_put_pages(obj) == 0) {
1877 count += obj->base.size >> PAGE_SHIFT;
1878 if (count >= target)
1888 i915_gem_purge(struct drm_i915_private *dev_priv, long target)
1890 return __i915_gem_shrink(dev_priv, target, true);
1894 i915_gem_retire_work_handler(struct work_struct *work)
1896 drm_i915_private_t *dev_priv;
1897 struct drm_device *dev;
1898 struct intel_ring_buffer *ring;
1902 dev_priv = container_of(work, drm_i915_private_t,
1903 mm.retire_work.work);
1904 dev = dev_priv->dev;
1906 /* Come back later if the device is busy... */
1907 if (lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_NOWAIT)) {
1908 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
1909 round_jiffies_up_relative(hz));
1913 i915_gem_retire_requests(dev);
1915 /* Send a periodic flush down the ring so we don't hold onto GEM
1916 * objects indefinitely.
1919 for_each_ring(ring, dev_priv, i) {
1920 if (ring->gpu_caches_dirty)
1921 i915_add_request(ring, NULL, NULL);
1923 idle &= list_empty(&ring->request_list);
1926 if (!dev_priv->mm.suspended && !idle)
1927 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
1928 round_jiffies_up_relative(hz));
1930 intel_mark_idle(dev);
1932 mutex_unlock(&dev->struct_mutex);
1935 * Ensures that an object will eventually get non-busy by flushing any required
1936 * write domains, emitting any outstanding lazy request and retiring and
1937 * completed requests.
1940 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
1945 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
1949 i915_gem_retire_requests_ring(obj->ring);
1956 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
1957 * @DRM_IOCTL_ARGS: standard ioctl arguments
1959 * Returns 0 if successful, else an error is returned with the remaining time in
1960 * the timeout parameter.
1961 * -ETIME: object is still busy after timeout
1962 * -ERESTARTSYS: signal interrupted the wait
1963 * -ENONENT: object doesn't exist
1964 * Also possible, but rare:
1965 * -EAGAIN: GPU wedged
1967 * -ENODEV: Internal IRQ fail
1968 * -E?: The add request failed
1970 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
1971 * non-zero timeout parameter the wait ioctl will wait for the given number of
1972 * nanoseconds on an object becoming unbusy. Since the wait itself does so
1973 * without holding struct_mutex the object may become re-busied before this
1974 * function completes. A similar but shorter * race condition exists in the busy
1978 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
1980 drm_i915_private_t *dev_priv = dev->dev_private;
1981 struct drm_i915_gem_wait *args = data;
1982 struct drm_i915_gem_object *obj;
1983 struct intel_ring_buffer *ring = NULL;
1984 struct timespec timeout_stack, *timeout = NULL;
1985 unsigned reset_counter;
1989 if (args->timeout_ns >= 0) {
1990 timeout_stack = ns_to_timespec(args->timeout_ns);
1991 timeout = &timeout_stack;
1994 ret = i915_mutex_lock_interruptible(dev);
1998 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
1999 if (&obj->base == NULL) {
2000 mutex_unlock(&dev->struct_mutex);
2004 /* Need to make sure the object gets inactive eventually. */
2005 ret = i915_gem_object_flush_active(obj);
2010 seqno = obj->last_read_seqno;
2017 /* Do this after OLR check to make sure we make forward progress polling
2018 * on this IOCTL with a 0 timeout (like busy ioctl)
2020 if (!args->timeout_ns) {
2025 drm_gem_object_unreference(&obj->base);
2026 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
2027 mutex_unlock(&dev->struct_mutex);
2029 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout);
2031 WARN_ON(!timespec_valid(timeout));
2032 args->timeout_ns = timespec_to_ns(timeout);
2037 drm_gem_object_unreference(&obj->base);
2038 mutex_unlock(&dev->struct_mutex);
2043 * i915_gem_object_sync - sync an object to a ring.
2045 * @obj: object which may be in use on another ring.
2046 * @to: ring we wish to use the object on. May be NULL.
2048 * This code is meant to abstract object synchronization with the GPU.
2049 * Calling with NULL implies synchronizing the object with the CPU
2050 * rather than a particular GPU ring.
2052 * Returns 0 if successful, else propagates up the lower layer error.
2055 i915_gem_object_sync(struct drm_i915_gem_object *obj,
2056 struct intel_ring_buffer *to)
2058 struct intel_ring_buffer *from = obj->ring;
2062 if (from == NULL || to == from)
2065 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
2066 return i915_gem_object_wait_rendering(obj, false);
2068 idx = intel_ring_sync_index(from, to);
2070 seqno = obj->last_read_seqno;
2071 if (seqno <= from->sync_seqno[idx])
2074 ret = i915_gem_check_olr(obj->ring, seqno);
2078 ret = to->sync_to(to, from, seqno);
2080 /* We use last_read_seqno because sync_to()
2081 * might have just caused seqno wrap under
2084 from->sync_seqno[idx] = obj->last_read_seqno;
2089 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2091 u32 old_write_domain, old_read_domains;
2093 /* Force a pagefault for domain tracking on next user access */
2094 i915_gem_release_mmap(obj);
2096 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2099 /* Wait for any direct GTT access to complete */
2102 old_read_domains = obj->base.read_domains;
2103 old_write_domain = obj->base.write_domain;
2105 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2106 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2111 * Unbinds an object from the GTT aperture.
2114 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2116 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2119 if (obj->gtt_space == NULL)
2125 BUG_ON(obj->pages == NULL);
2127 ret = i915_gem_object_finish_gpu(obj);
2130 /* Continue on if we fail due to EIO, the GPU is hung so we
2131 * should be safe and we need to cleanup or else we might
2132 * cause memory corruption through use-after-free.
2135 i915_gem_object_finish_gtt(obj);
2137 /* Move the object to the CPU domain to ensure that
2138 * any possible CPU writes while it's not in the GTT
2139 * are flushed when we go to remap it.
2142 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2143 if (ret == -ERESTARTSYS)
2146 /* In the event of a disaster, abandon all caches and
2147 * hope for the best.
2149 i915_gem_clflush_object(obj);
2150 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2153 /* release the fence reg _after_ flushing */
2154 ret = i915_gem_object_put_fence(obj);
2158 if (obj->has_global_gtt_mapping)
2159 i915_gem_gtt_unbind_object(obj);
2160 if (obj->has_aliasing_ppgtt_mapping) {
2161 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2162 obj->has_aliasing_ppgtt_mapping = 0;
2164 i915_gem_gtt_finish_object(obj);
2166 i915_gem_object_put_pages_gtt(obj);
2168 list_del_init(&obj->gtt_list);
2169 list_del_init(&obj->mm_list);
2170 /* Avoid an unnecessary call to unbind on rebind. */
2171 obj->map_and_fenceable = true;
2173 drm_mm_put_block(obj->gtt_space);
2174 obj->gtt_space = NULL;
2175 obj->gtt_offset = 0;
2177 if (i915_gem_object_is_purgeable(obj))
2178 i915_gem_object_truncate(obj);
2183 int i915_gpu_idle(struct drm_device *dev)
2185 drm_i915_private_t *dev_priv = dev->dev_private;
2186 struct intel_ring_buffer *ring;
2189 /* Flush everything onto the inactive list. */
2190 for_each_ring(ring, dev_priv, i) {
2191 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
2195 ret = intel_ring_idle(ring);
2203 static void i965_write_fence_reg(struct drm_device *dev, int reg,
2204 struct drm_i915_gem_object *obj)
2206 drm_i915_private_t *dev_priv = dev->dev_private;
2208 int fence_pitch_shift;
2211 if (INTEL_INFO(dev)->gen >= 6) {
2212 fence_reg = FENCE_REG_SANDYBRIDGE_0;
2213 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
2215 fence_reg = FENCE_REG_965_0;
2216 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
2220 u32 size = obj->gtt_space->size;
2222 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2224 val |= obj->gtt_offset & 0xfffff000;
2225 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
2226 if (obj->tiling_mode == I915_TILING_Y)
2227 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2228 val |= I965_FENCE_REG_VALID;
2232 fence_reg += reg * 8;
2233 I915_WRITE64(fence_reg, val);
2234 POSTING_READ(fence_reg);
2237 static void i915_write_fence_reg(struct drm_device *dev, int reg,
2238 struct drm_i915_gem_object *obj)
2240 drm_i915_private_t *dev_priv = dev->dev_private;
2244 u32 size = obj->gtt_space->size;
2248 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2249 (size & -size) != size ||
2250 (obj->gtt_offset & (size - 1)),
2251 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2252 obj->gtt_offset, obj->map_and_fenceable, size);
2254 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2259 /* Note: pitch better be a power of two tile widths */
2260 pitch_val = obj->stride / tile_width;
2261 pitch_val = ffs(pitch_val) - 1;
2263 val = obj->gtt_offset;
2264 if (obj->tiling_mode == I915_TILING_Y)
2265 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2266 val |= I915_FENCE_SIZE_BITS(size);
2267 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2268 val |= I830_FENCE_REG_VALID;
2273 reg = FENCE_REG_830_0 + reg * 4;
2275 reg = FENCE_REG_945_8 + (reg - 8) * 4;
2277 I915_WRITE(reg, val);
2281 static void i830_write_fence_reg(struct drm_device *dev, int reg,
2282 struct drm_i915_gem_object *obj)
2284 drm_i915_private_t *dev_priv = dev->dev_private;
2288 u32 size = obj->gtt_space->size;
2291 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2292 (size & -size) != size ||
2293 (obj->gtt_offset & (size - 1)),
2294 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2295 obj->gtt_offset, size);
2297 pitch_val = obj->stride / 128;
2298 pitch_val = ffs(pitch_val) - 1;
2300 val = obj->gtt_offset;
2301 if (obj->tiling_mode == I915_TILING_Y)
2302 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2303 val |= I830_FENCE_SIZE_BITS(size);
2304 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2305 val |= I830_FENCE_REG_VALID;
2309 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2310 POSTING_READ(FENCE_REG_830_0 + reg * 4);
2313 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
2315 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
2318 static void i915_gem_write_fence(struct drm_device *dev, int reg,
2319 struct drm_i915_gem_object *obj)
2321 struct drm_i915_private *dev_priv = dev->dev_private;
2323 /* Ensure that all CPU reads are completed before installing a fence
2324 * and all writes before removing the fence.
2326 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
2329 switch (INTEL_INFO(dev)->gen) {
2333 case 4: i965_write_fence_reg(dev, reg, obj); break;
2334 case 3: i915_write_fence_reg(dev, reg, obj); break;
2335 case 2: i830_write_fence_reg(dev, reg, obj); break;
2339 /* And similarly be paranoid that no direct access to this region
2340 * is reordered to before the fence is installed.
2342 if (i915_gem_object_needs_mb(obj))
2346 static inline int fence_number(struct drm_i915_private *dev_priv,
2347 struct drm_i915_fence_reg *fence)
2349 return fence - dev_priv->fence_regs;
2352 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2353 struct drm_i915_fence_reg *fence,
2356 struct drm_device *dev = obj->base.dev;
2357 struct drm_i915_private *dev_priv = dev->dev_private;
2358 int fence_reg = fence_number(dev_priv, fence);
2360 /* In order to fully serialize access to the fenced region and
2361 * the update to the fence register we need to take extreme
2362 * measures on SNB+. In theory, the write to the fence register
2363 * flushes all memory transactions before, and coupled with the
2364 * mb() placed around the register write we serialise all memory
2365 * operations with respect to the changes in the tiler. Yet, on
2366 * SNB+ we need to take a step further and emit an explicit wbinvd()
2367 * on each processor in order to manually flush all memory
2368 * transactions before updating the fence register.
2370 if (HAS_LLC(obj->base.dev))
2371 cpu_wbinvd_on_all_cpus();
2372 i915_gem_write_fence(dev, fence_reg, enable ? obj : NULL);
2375 obj->fence_reg = fence_reg;
2377 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2379 obj->fence_reg = I915_FENCE_REG_NONE;
2381 list_del_init(&fence->lru_list);
2386 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
2388 if (obj->last_fenced_seqno) {
2389 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
2393 obj->last_fenced_seqno = 0;
2396 obj->fenced_gpu_access = false;
2401 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2403 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2406 ret = i915_gem_object_wait_fence(obj);
2410 if (obj->fence_reg == I915_FENCE_REG_NONE)
2413 i915_gem_object_update_fence(obj,
2414 &dev_priv->fence_regs[obj->fence_reg],
2416 i915_gem_object_fence_lost(obj);
2421 static struct drm_i915_fence_reg *
2422 i915_find_fence_reg(struct drm_device *dev)
2424 struct drm_i915_private *dev_priv = dev->dev_private;
2425 struct drm_i915_fence_reg *reg, *avail;
2428 /* First try to find a free reg */
2430 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2431 reg = &dev_priv->fence_regs[i];
2435 if (!reg->pin_count)
2442 /* None available, try to steal one or wait for a user to finish */
2443 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2454 * i915_gem_object_get_fence - set up fencing for an object
2455 * @obj: object to map through a fence reg
2457 * When mapping objects through the GTT, userspace wants to be able to write
2458 * to them without having to worry about swizzling if the object is tiled.
2459 * This function walks the fence regs looking for a free one for @obj,
2460 * stealing one if it can't find any.
2462 * It then sets up the reg based on the object's properties: address, pitch
2463 * and tiling format.
2465 * For an untiled surface, this removes any existing fence.
2468 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
2470 struct drm_device *dev = obj->base.dev;
2471 struct drm_i915_private *dev_priv = dev->dev_private;
2472 bool enable = obj->tiling_mode != I915_TILING_NONE;
2473 struct drm_i915_fence_reg *reg;
2476 /* Have we updated the tiling parameters upon the object and so
2477 * will need to serialise the write to the associated fence register?
2479 if (obj->fence_dirty) {
2480 ret = i915_gem_object_wait_fence(obj);
2485 /* Just update our place in the LRU if our fence is getting reused. */
2486 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2487 reg = &dev_priv->fence_regs[obj->fence_reg];
2488 if (!obj->fence_dirty) {
2489 list_move_tail(®->lru_list,
2490 &dev_priv->mm.fence_list);
2493 } else if (enable) {
2494 reg = i915_find_fence_reg(dev);
2499 struct drm_i915_gem_object *old = reg->obj;
2501 ret = i915_gem_object_wait_fence(old);
2505 i915_gem_object_fence_lost(old);
2510 i915_gem_object_update_fence(obj, reg, enable);
2511 obj->fence_dirty = false;
2516 static bool i915_gem_valid_gtt_space(struct drm_device *dev,
2517 struct drm_mm_node *gtt_space,
2518 unsigned long cache_level)
2520 struct drm_mm_node *other;
2522 /* On non-LLC machines we have to be careful when putting differing
2523 * types of snoopable memory together to avoid the prefetcher
2524 * crossing memory domains and dying.
2529 if (gtt_space == NULL)
2532 if (list_empty(>t_space->node_list))
2535 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2536 if (other->allocated && !other->hole_follows && other->color != cache_level)
2539 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2540 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2546 static void i915_gem_verify_gtt(struct drm_device *dev)
2549 struct drm_i915_private *dev_priv = dev->dev_private;
2550 struct drm_i915_gem_object *obj;
2553 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
2554 if (obj->gtt_space == NULL) {
2555 printk(KERN_ERR "object found on GTT list with no space reserved\n");
2560 if (obj->cache_level != obj->gtt_space->color) {
2561 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
2562 obj->gtt_space->start,
2563 obj->gtt_space->start + obj->gtt_space->size,
2565 obj->gtt_space->color);
2570 if (!i915_gem_valid_gtt_space(dev,
2572 obj->cache_level)) {
2573 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
2574 obj->gtt_space->start,
2575 obj->gtt_space->start + obj->gtt_space->size,
2587 * Finds free space in the GTT aperture and binds the object there.
2590 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2592 bool map_and_fenceable,
2595 struct drm_device *dev = obj->base.dev;
2596 drm_i915_private_t *dev_priv = dev->dev_private;
2597 struct drm_mm_node *free_space;
2598 uint32_t size, fence_size, fence_alignment, unfenced_alignment;
2599 bool mappable, fenceable;
2602 fence_size = i915_gem_get_gtt_size(dev,
2605 fence_alignment = i915_gem_get_gtt_alignment(dev,
2607 obj->tiling_mode, true);
2608 unfenced_alignment =
2609 i915_gem_get_gtt_alignment(dev,
2611 obj->tiling_mode, false);
2614 alignment = map_and_fenceable ? fence_alignment :
2616 if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2617 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2621 size = map_and_fenceable ? fence_size : obj->base.size;
2623 /* If the object is bigger than the entire aperture, reject it early
2624 * before evicting everything in a vain attempt to find space.
2626 if (obj->base.size >
2627 (map_and_fenceable ? dev_priv->gtt.mappable_end : dev_priv->gtt.total)) {
2628 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2633 if (map_and_fenceable)
2635 drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
2636 size, alignment, obj->cache_level,
2637 0, dev_priv->gtt.mappable_end,
2640 free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space,
2641 size, alignment, obj->cache_level,
2643 if (free_space != NULL) {
2644 if (map_and_fenceable)
2646 drm_mm_get_block_range_generic(free_space,
2647 size, alignment, obj->cache_level,
2648 0, dev_priv->gtt.mappable_end,
2652 drm_mm_get_block_generic(free_space,
2653 size, alignment, obj->cache_level,
2656 if (obj->gtt_space == NULL) {
2657 ret = i915_gem_evict_something(dev, size, alignment,
2668 * NOTE: i915_gem_object_get_pages_gtt() cannot
2669 * return ENOMEM, since we used VM_ALLOC_RETRY.
2671 ret = i915_gem_object_get_pages_gtt(obj);
2673 drm_mm_put_block(obj->gtt_space);
2674 obj->gtt_space = NULL;
2678 i915_gem_gtt_bind_object(obj, obj->cache_level);
2680 i915_gem_object_put_pages_gtt(obj);
2681 drm_mm_put_block(obj->gtt_space);
2682 obj->gtt_space = NULL;
2683 if (i915_gem_evict_everything(dev))
2688 list_add_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
2689 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2691 obj->gtt_offset = obj->gtt_space->start;
2694 obj->gtt_space->size == fence_size &&
2695 (obj->gtt_space->start & (fence_alignment - 1)) == 0;
2698 obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end;
2700 obj->map_and_fenceable = mappable && fenceable;
2702 i915_gem_verify_gtt(dev);
2707 i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2710 /* If we don't have a page list set up, then we're not pinned
2711 * to GPU, and we can ignore the cache flush because it'll happen
2712 * again at bind time.
2714 if (obj->pages == NULL)
2718 * Stolen memory is always coherent with the GPU as it is explicitly
2719 * marked as wc by the system, or the system is cache-coherent.
2724 /* If the GPU is snooping the contents of the CPU cache,
2725 * we do not need to manually clear the CPU cache lines. However,
2726 * the caches are only snooped when the render cache is
2727 * flushed/invalidated. As we always have to emit invalidations
2728 * and flushes when moving into and out of the RENDER domain, correct
2729 * snooping behaviour occurs naturally as the result of our domain
2732 if (obj->cache_level != I915_CACHE_NONE)
2735 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
2738 /** Flushes the GTT write domain for the object if it's dirty. */
2740 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
2742 uint32_t old_write_domain;
2744 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
2747 /* No actual flushing is required for the GTT write domain. Writes
2748 * to it immediately go to main memory as far as we know, so there's
2749 * no chipset flush. It also doesn't land in render cache.
2751 * However, we do have to enforce the order so that all writes through
2752 * the GTT land before any writes to the device, such as updates to
2757 old_write_domain = obj->base.write_domain;
2758 obj->base.write_domain = 0;
2761 /** Flushes the CPU write domain for the object if it's dirty. */
2763 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2765 uint32_t old_write_domain;
2767 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
2770 i915_gem_clflush_object(obj);
2771 i915_gem_chipset_flush(obj->base.dev);
2772 old_write_domain = obj->base.write_domain;
2773 obj->base.write_domain = 0;
2777 * Moves a single object to the GTT read, and possibly write domain.
2779 * This function returns when the move is complete, including waiting on
2783 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2785 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2786 uint32_t old_write_domain, old_read_domains;
2789 /* Not valid to be called on unbound objects. */
2790 if (obj->gtt_space == NULL)
2793 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
2796 ret = i915_gem_object_wait_rendering(obj, !write);
2800 i915_gem_object_flush_cpu_write_domain(obj);
2802 /* Serialise direct access to this object with the barriers for
2803 * coherent writes from the GPU, by effectively invalidating the
2804 * GTT domain upon first access.
2806 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2809 old_write_domain = obj->base.write_domain;
2810 old_read_domains = obj->base.read_domains;
2812 /* It should now be out of any other write domains, and we can update
2813 * the domain values for our changes.
2815 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2816 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2818 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2819 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2823 /* And bump the LRU for this access */
2824 if (i915_gem_object_is_inactive(obj))
2825 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2830 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2831 enum i915_cache_level cache_level)
2833 struct drm_device *dev = obj->base.dev;
2834 drm_i915_private_t *dev_priv = dev->dev_private;
2837 if (obj->cache_level == cache_level)
2840 if (obj->pin_count) {
2841 DRM_DEBUG("can not change the cache level of pinned objects\n");
2845 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
2846 ret = i915_gem_object_unbind(obj);
2851 if (obj->gtt_space) {
2852 ret = i915_gem_object_finish_gpu(obj);
2856 i915_gem_object_finish_gtt(obj);
2858 /* Before SandyBridge, you could not use tiling or fence
2859 * registers with snooped memory, so relinquish any fences
2860 * currently pointing to our region in the aperture.
2862 if (INTEL_INFO(dev)->gen < 6) {
2863 ret = i915_gem_object_put_fence(obj);
2868 if (obj->has_global_gtt_mapping)
2869 i915_gem_gtt_bind_object(obj, cache_level);
2870 if (obj->has_aliasing_ppgtt_mapping)
2871 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
2874 obj->gtt_space->color = cache_level;
2877 if (cache_level == I915_CACHE_NONE) {
2878 u32 old_read_domains, old_write_domain;
2880 /* If we're coming from LLC cached, then we haven't
2881 * actually been tracking whether the data is in the
2882 * CPU cache or not, since we only allow one bit set
2883 * in obj->write_domain and have been skipping the clflushes.
2884 * Just set it to the CPU cache for now.
2886 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
2887 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
2889 old_read_domains = obj->base.read_domains;
2890 old_write_domain = obj->base.write_domain;
2892 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2893 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2897 obj->cache_level = cache_level;
2898 i915_gem_verify_gtt(dev);
2902 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
2903 struct drm_file *file)
2905 struct drm_i915_gem_caching *args = data;
2906 struct drm_i915_gem_object *obj;
2909 ret = i915_mutex_lock_interruptible(dev);
2913 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
2914 if (&obj->base == NULL) {
2919 args->caching = obj->cache_level != I915_CACHE_NONE;
2921 drm_gem_object_unreference(&obj->base);
2923 mutex_unlock(&dev->struct_mutex);
2927 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
2928 struct drm_file *file)
2930 struct drm_i915_gem_caching *args = data;
2931 struct drm_i915_gem_object *obj;
2932 enum i915_cache_level level;
2935 switch (args->caching) {
2936 case I915_CACHING_NONE:
2937 level = I915_CACHE_NONE;
2939 case I915_CACHING_CACHED:
2940 level = I915_CACHE_LLC;
2946 ret = i915_mutex_lock_interruptible(dev);
2950 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
2951 if (&obj->base == NULL) {
2956 ret = i915_gem_object_set_cache_level(obj, level);
2958 drm_gem_object_unreference(&obj->base);
2960 mutex_unlock(&dev->struct_mutex);
2965 * Prepare buffer for display plane (scanout, cursors, etc).
2966 * Can be called from an uninterruptible phase (modesetting) and allows
2967 * any flushes to be pipelined (for pageflips).
2970 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
2972 struct intel_ring_buffer *pipelined)
2974 u32 old_read_domains, old_write_domain;
2977 if (pipelined != obj->ring) {
2978 ret = i915_gem_object_sync(obj, pipelined);
2983 /* The display engine is not coherent with the LLC cache on gen6. As
2984 * a result, we make sure that the pinning that is about to occur is
2985 * done with uncached PTEs. This is lowest common denominator for all
2988 * However for gen6+, we could do better by using the GFDT bit instead
2989 * of uncaching, which would allow us to flush all the LLC-cached data
2990 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
2992 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
2996 /* As the user may map the buffer once pinned in the display plane
2997 * (e.g. libkms for the bootup splash), we have to ensure that we
2998 * always use map_and_fenceable for all scanout buffers.
3000 ret = i915_gem_object_pin(obj, alignment, true, false);
3004 i915_gem_object_flush_cpu_write_domain(obj);
3006 old_write_domain = obj->base.write_domain;
3007 old_read_domains = obj->base.read_domains;
3009 /* It should now be out of any other write domains, and we can update
3010 * the domain values for our changes.
3012 obj->base.write_domain = 0;
3013 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3019 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
3023 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
3026 ret = i915_gem_object_wait_rendering(obj, false);
3030 /* Ensure that we invalidate the GPU's caches and TLBs. */
3031 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
3036 * Moves a single object to the CPU read, and possibly write domain.
3038 * This function returns when the move is complete, including waiting on
3042 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3044 uint32_t old_write_domain, old_read_domains;
3047 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3050 ret = i915_gem_object_wait_rendering(obj, !write);
3054 i915_gem_object_flush_gtt_write_domain(obj);
3056 old_write_domain = obj->base.write_domain;
3057 old_read_domains = obj->base.read_domains;
3059 /* Flush the CPU cache if it's still invalid. */
3060 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3061 i915_gem_clflush_object(obj);
3063 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3066 /* It should now be out of any other write domains, and we can update
3067 * the domain values for our changes.
3069 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3071 /* If we're writing through the CPU, then the GPU read domains will
3072 * need to be invalidated at next use.
3075 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3076 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3082 /* Throttle our rendering by waiting until the ring has completed our requests
3083 * emitted over 20 msec ago.
3085 * Note that if we were to use the current jiffies each time around the loop,
3086 * we wouldn't escape the function with any frames outstanding if the time to
3087 * render a frame was over 20ms.
3089 * This should get us reasonable parallelism between CPU and GPU but also
3090 * relatively low latency when blocking on a particular request to finish.
3093 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3095 struct drm_i915_private *dev_priv = dev->dev_private;
3096 struct drm_i915_file_private *file_priv = file->driver_priv;
3097 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3098 struct drm_i915_gem_request *request;
3099 struct intel_ring_buffer *ring = NULL;
3100 unsigned reset_counter;
3104 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3108 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
3112 spin_lock(&file_priv->mm.lock);
3113 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3114 if (time_after_eq(request->emitted_jiffies, recent_enough))
3117 ring = request->ring;
3118 seqno = request->seqno;
3120 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3121 spin_unlock(&file_priv->mm.lock);
3126 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL);
3128 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3134 i915_gem_object_pin(struct drm_i915_gem_object *obj,
3136 bool map_and_fenceable,
3141 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3144 if (obj->gtt_space != NULL) {
3145 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3146 (map_and_fenceable && !obj->map_and_fenceable)) {
3147 WARN(obj->pin_count,
3148 "bo is already pinned with incorrect alignment:"
3149 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3150 " obj->map_and_fenceable=%d\n",
3151 obj->gtt_offset, alignment,
3153 obj->map_and_fenceable);
3154 ret = i915_gem_object_unbind(obj);
3160 if (obj->gtt_space == NULL) {
3161 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3163 ret = i915_gem_object_bind_to_gtt(obj, alignment,
3169 if (!dev_priv->mm.aliasing_ppgtt)
3170 i915_gem_gtt_bind_object(obj, obj->cache_level);
3173 if (!obj->has_global_gtt_mapping && map_and_fenceable)
3174 i915_gem_gtt_bind_object(obj, obj->cache_level);
3177 obj->pin_mappable |= map_and_fenceable;
3183 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3185 BUG_ON(obj->pin_count == 0);
3186 BUG_ON(obj->gtt_space == NULL);
3188 if (--obj->pin_count == 0)
3189 obj->pin_mappable = false;
3193 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3194 struct drm_file *file)
3196 struct drm_i915_gem_pin *args = data;
3197 struct drm_i915_gem_object *obj;
3200 ret = i915_mutex_lock_interruptible(dev);
3204 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3205 if (&obj->base == NULL) {
3210 if (obj->madv != I915_MADV_WILLNEED) {
3211 DRM_ERROR("Attempting to pin a purgeable buffer\n");
3216 if (obj->pin_filp != NULL && obj->pin_filp != file) {
3217 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3223 if (obj->user_pin_count == 0) {
3224 ret = i915_gem_object_pin(obj, args->alignment, true, false);
3229 obj->user_pin_count++;
3230 obj->pin_filp = file;
3232 /* XXX - flush the CPU caches for pinned objects
3233 * as the X server doesn't manage domains yet
3235 i915_gem_object_flush_cpu_write_domain(obj);
3236 args->offset = obj->gtt_offset;
3238 drm_gem_object_unreference(&obj->base);
3240 mutex_unlock(&dev->struct_mutex);
3245 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3246 struct drm_file *file)
3248 struct drm_i915_gem_pin *args = data;
3249 struct drm_i915_gem_object *obj;
3252 ret = i915_mutex_lock_interruptible(dev);
3256 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3257 if (&obj->base == NULL) {
3262 if (obj->pin_filp != file) {
3263 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3268 obj->user_pin_count--;
3269 if (obj->user_pin_count == 0) {
3270 obj->pin_filp = NULL;
3271 i915_gem_object_unpin(obj);
3275 drm_gem_object_unreference(&obj->base);
3277 mutex_unlock(&dev->struct_mutex);
3282 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3283 struct drm_file *file)
3285 struct drm_i915_gem_busy *args = data;
3286 struct drm_i915_gem_object *obj;
3289 ret = i915_mutex_lock_interruptible(dev);
3293 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3294 if (&obj->base == NULL) {
3299 /* Count all active objects as busy, even if they are currently not used
3300 * by the gpu. Users of this interface expect objects to eventually
3301 * become non-busy without any further actions, therefore emit any
3302 * necessary flushes here.
3304 ret = i915_gem_object_flush_active(obj);
3306 args->busy = obj->active;
3308 args->busy |= intel_ring_flag(obj->ring) << 16;
3311 drm_gem_object_unreference(&obj->base);
3313 mutex_unlock(&dev->struct_mutex);
3318 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3319 struct drm_file *file_priv)
3321 return i915_gem_ring_throttle(dev, file_priv);
3325 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3326 struct drm_file *file_priv)
3328 struct drm_i915_gem_madvise *args = data;
3329 struct drm_i915_gem_object *obj;
3332 switch (args->madv) {
3333 case I915_MADV_DONTNEED:
3334 case I915_MADV_WILLNEED:
3340 ret = i915_mutex_lock_interruptible(dev);
3344 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3345 if (&obj->base == NULL) {
3350 if (obj->pin_count) {
3355 if (obj->madv != __I915_MADV_PURGED)
3356 obj->madv = args->madv;
3358 /* if the object is no longer attached, discard its backing storage */
3359 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
3360 i915_gem_object_truncate(obj);
3362 args->retained = obj->madv != __I915_MADV_PURGED;
3365 drm_gem_object_unreference(&obj->base);
3367 mutex_unlock(&dev->struct_mutex);
3371 void i915_gem_object_init(struct drm_i915_gem_object *obj,
3372 const struct drm_i915_gem_object_ops *ops)
3374 INIT_LIST_HEAD(&obj->mm_list);
3375 INIT_LIST_HEAD(&obj->gtt_list);
3376 INIT_LIST_HEAD(&obj->ring_list);
3377 INIT_LIST_HEAD(&obj->exec_list);
3381 obj->fence_reg = I915_FENCE_REG_NONE;
3382 obj->madv = I915_MADV_WILLNEED;
3383 /* Avoid an unnecessary call to unbind on the first bind. */
3384 obj->map_and_fenceable = true;
3386 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
3389 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3390 .get_pages = i915_gem_object_get_pages_gtt,
3391 .put_pages = i915_gem_object_put_pages_gtt,
3394 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3397 struct drm_i915_gem_object *obj;
3399 struct address_space *mapping;
3403 obj = kmalloc(sizeof(*obj), M_DRM, M_WAITOK | M_ZERO);
3407 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3413 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
3414 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
3415 /* 965gm cannot relocate objects above 4GiB. */
3416 mask &= ~__GFP_HIGHMEM;
3417 mask |= __GFP_DMA32;
3420 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3421 mapping_set_gfp_mask(mapping, mask);
3424 i915_gem_object_init(obj, &i915_gem_object_ops);
3426 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3427 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3430 /* On some devices, we can have the GPU use the LLC (the CPU
3431 * cache) for about a 10% performance improvement
3432 * compared to uncached. Graphics requests other than
3433 * display scanout are coherent with the CPU in
3434 * accessing this cache. This means in this mode we
3435 * don't need to clflush on the CPU side, and on the
3436 * GPU side we only need to flush internal caches to
3437 * get data visible to the CPU.
3439 * However, we maintain the display planes as UC, and so
3440 * need to rebind when first used as such.
3442 obj->cache_level = I915_CACHE_LLC;
3444 obj->cache_level = I915_CACHE_NONE;
3449 int i915_gem_init_object(struct drm_gem_object *obj)
3456 void i915_gem_free_object(struct drm_gem_object *gem_obj)
3458 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3459 struct drm_device *dev = obj->base.dev;
3460 drm_i915_private_t *dev_priv = dev->dev_private;
3463 i915_gem_detach_phys_object(dev, obj);
3466 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
3467 bool was_interruptible;
3469 was_interruptible = dev_priv->mm.interruptible;
3470 dev_priv->mm.interruptible = false;
3472 WARN_ON(i915_gem_object_unbind(obj));
3474 dev_priv->mm.interruptible = was_interruptible;
3477 drm_gem_free_mmap_offset(&obj->base);
3479 drm_gem_object_release(&obj->base);
3480 i915_gem_info_remove_obj(dev_priv, obj->base.size);
3482 drm_free(obj->bit_17, M_DRM);
3483 drm_free(obj, M_DRM);
3487 i915_gem_idle(struct drm_device *dev)
3489 drm_i915_private_t *dev_priv = dev->dev_private;
3492 mutex_lock(&dev->struct_mutex);
3494 if (dev_priv->mm.suspended) {
3495 mutex_unlock(&dev->struct_mutex);
3499 ret = i915_gpu_idle(dev);
3501 mutex_unlock(&dev->struct_mutex);
3504 i915_gem_retire_requests(dev);
3506 /* Under UMS, be paranoid and evict. */
3507 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3508 i915_gem_evict_everything(dev);
3510 i915_gem_reset_fences(dev);
3512 /* Hack! Don't let anybody do execbuf while we don't control the chip.
3513 * We need to replace this with a semaphore, or something.
3514 * And not confound mm.suspended!
3516 dev_priv->mm.suspended = 1;
3517 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
3519 i915_kernel_lost_context(dev);
3520 i915_gem_cleanup_ringbuffer(dev);
3522 mutex_unlock(&dev->struct_mutex);
3524 /* Cancel the retire work handler, which should be idle now. */
3525 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3530 void i915_gem_l3_remap(struct drm_device *dev)
3532 drm_i915_private_t *dev_priv = dev->dev_private;
3536 if (!HAS_L3_GPU_CACHE(dev))
3539 if (!dev_priv->l3_parity.remap_info)
3542 misccpctl = I915_READ(GEN7_MISCCPCTL);
3543 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
3544 POSTING_READ(GEN7_MISCCPCTL);
3546 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
3547 u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
3548 if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
3549 DRM_DEBUG("0x%x was already programmed to %x\n",
3550 GEN7_L3LOG_BASE + i, remap);
3551 if (remap && !dev_priv->l3_parity.remap_info[i/4])
3552 DRM_DEBUG_DRIVER("Clearing remapped register\n");
3553 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
3556 /* Make sure all the writes land before disabling dop clock gating */
3557 POSTING_READ(GEN7_L3LOG_BASE);
3559 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
3562 void i915_gem_init_swizzling(struct drm_device *dev)
3564 drm_i915_private_t *dev_priv = dev->dev_private;
3566 if (INTEL_INFO(dev)->gen < 5 ||
3567 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
3570 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
3571 DISP_TILE_SURFACE_SWIZZLING);
3576 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
3578 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3579 else if (IS_GEN7(dev))
3580 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
3586 intel_enable_blt(struct drm_device *dev)
3593 /* The blitter was dysfunctional on early prototypes */
3594 revision = pci_read_config(dev->dev, PCIR_REVID, 1);
3595 if (IS_GEN6(dev) && revision < 8) {
3596 DRM_INFO("BLT not supported on this pre-production hardware;"
3597 " graphics performance will be degraded.\n");
3604 static int i915_gem_init_rings(struct drm_device *dev)
3606 struct drm_i915_private *dev_priv = dev->dev_private;
3609 ret = intel_init_render_ring_buffer(dev);
3614 ret = intel_init_bsd_ring_buffer(dev);
3616 goto cleanup_render_ring;
3619 if (intel_enable_blt(dev)) {
3620 ret = intel_init_blt_ring_buffer(dev);
3622 goto cleanup_bsd_ring;
3625 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
3627 goto cleanup_blt_ring;
3632 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
3634 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
3635 cleanup_render_ring:
3636 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3642 i915_gem_init_hw(struct drm_device *dev)
3644 drm_i915_private_t *dev_priv = dev->dev_private;
3648 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
3652 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
3653 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
3655 i915_gem_l3_remap(dev);
3657 i915_gem_init_swizzling(dev);
3659 ret = i915_gem_init_rings(dev);
3664 * XXX: There was some w/a described somewhere suggesting loading
3665 * contexts before PPGTT.
3667 i915_gem_context_init(dev);
3668 i915_gem_init_ppgtt(dev);
3673 int i915_gem_init(struct drm_device *dev)
3675 struct drm_i915_private *dev_priv = dev->dev_private;
3678 mutex_lock(&dev->struct_mutex);
3679 i915_gem_init_global_gtt(dev);
3680 ret = i915_gem_init_hw(dev);
3681 mutex_unlock(&dev->struct_mutex);
3683 i915_gem_cleanup_aliasing_ppgtt(dev);
3687 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
3688 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3689 dev_priv->dri1.allow_batchbuffer = 1;
3694 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3696 drm_i915_private_t *dev_priv = dev->dev_private;
3697 struct intel_ring_buffer *ring;
3700 for_each_ring(ring, dev_priv, i)
3701 intel_cleanup_ring_buffer(ring);
3705 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3706 struct drm_file *file_priv)
3708 drm_i915_private_t *dev_priv = dev->dev_private;
3711 if (drm_core_check_feature(dev, DRIVER_MODESET))
3714 if (i915_reset_in_progress(&dev_priv->gpu_error)) {
3715 DRM_ERROR("Reenabling wedged hardware, good luck\n");
3716 atomic_set(&dev_priv->gpu_error.reset_counter, 0);
3719 mutex_lock(&dev->struct_mutex);
3720 dev_priv->mm.suspended = 0;
3722 ret = i915_gem_init_hw(dev);
3724 mutex_unlock(&dev->struct_mutex);
3728 KASSERT(list_empty(&dev_priv->mm.active_list), ("active list"));
3729 mutex_unlock(&dev->struct_mutex);
3731 ret = drm_irq_install(dev);
3733 goto cleanup_ringbuffer;
3738 mutex_lock(&dev->struct_mutex);
3739 i915_gem_cleanup_ringbuffer(dev);
3740 dev_priv->mm.suspended = 1;
3741 mutex_unlock(&dev->struct_mutex);
3747 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
3748 struct drm_file *file_priv)
3750 if (drm_core_check_feature(dev, DRIVER_MODESET))
3753 drm_irq_uninstall(dev);
3754 return i915_gem_idle(dev);
3758 i915_gem_lastclose(struct drm_device *dev)
3762 if (drm_core_check_feature(dev, DRIVER_MODESET))
3765 ret = i915_gem_idle(dev);
3767 DRM_ERROR("failed to idle hardware: %d\n", ret);
3771 init_ring_lists(struct intel_ring_buffer *ring)
3773 INIT_LIST_HEAD(&ring->active_list);
3774 INIT_LIST_HEAD(&ring->request_list);
3778 i915_gem_load(struct drm_device *dev)
3781 drm_i915_private_t *dev_priv = dev->dev_private;
3783 INIT_LIST_HEAD(&dev_priv->mm.active_list);
3784 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
3785 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
3786 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
3787 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
3788 for (i = 0; i < I915_NUM_RINGS; i++)
3789 init_ring_lists(&dev_priv->ring[i]);
3790 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
3791 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
3792 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
3793 i915_gem_retire_work_handler);
3794 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
3796 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3798 I915_WRITE(MI_ARB_STATE,
3799 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
3802 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
3804 /* Old X drivers will take 0-2 for front, back, depth buffers */
3805 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3806 dev_priv->fence_reg_start = 3;
3808 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
3809 dev_priv->num_fence_regs = 16;
3811 dev_priv->num_fence_regs = 8;
3813 /* Initialize fence registers to zero */
3814 i915_gem_reset_fences(dev);
3816 i915_gem_detect_bit_6_swizzle(dev);
3817 init_waitqueue_head(&dev_priv->pending_flip_queue);
3819 dev_priv->mm.interruptible = true;
3822 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
3823 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
3824 register_shrinker(&dev_priv->mm.inactive_shrinker);
3826 dev_priv->mm.inactive_shrinker = EVENTHANDLER_REGISTER(vm_lowmem,
3827 i915_gem_lowmem, dev, EVENTHANDLER_PRI_ANY);
3832 * Create a physically contiguous memory object for this object
3833 * e.g. for cursor + overlay regs
3835 static int i915_gem_init_phys_object(struct drm_device *dev,
3836 int id, int size, int align)
3838 drm_i915_private_t *dev_priv = dev->dev_private;
3839 struct drm_i915_gem_phys_object *phys_obj;
3842 if (dev_priv->mm.phys_objs[id - 1] || !size)
3845 phys_obj = kmalloc(sizeof(struct drm_i915_gem_phys_object), M_DRM,
3852 phys_obj->handle = drm_pci_alloc(dev, size, align);
3853 if (!phys_obj->handle) {
3857 pmap_change_attr((vm_offset_t)phys_obj->handle->vaddr,
3858 size / PAGE_SIZE, PAT_WRITE_COMBINING);
3860 dev_priv->mm.phys_objs[id - 1] = phys_obj;
3865 drm_free(phys_obj, M_DRM);
3869 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
3871 drm_i915_private_t *dev_priv = dev->dev_private;
3872 struct drm_i915_gem_phys_object *phys_obj;
3874 if (!dev_priv->mm.phys_objs[id - 1])
3877 phys_obj = dev_priv->mm.phys_objs[id - 1];
3878 if (phys_obj->cur_obj) {
3879 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
3882 drm_pci_free(dev, phys_obj->handle);
3883 drm_free(phys_obj, M_DRM);
3884 dev_priv->mm.phys_objs[id - 1] = NULL;
3887 void i915_gem_free_all_phys_object(struct drm_device *dev)
3891 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
3892 i915_gem_free_phys_object(dev, i);
3895 void i915_gem_detach_phys_object(struct drm_device *dev,
3896 struct drm_i915_gem_object *obj)
3898 struct vm_object *mapping = obj->base.vm_obj;
3905 vaddr = obj->phys_obj->handle->vaddr;
3907 page_count = obj->base.size / PAGE_SIZE;
3908 VM_OBJECT_LOCK(obj->base.vm_obj);
3909 for (i = 0; i < page_count; i++) {
3910 struct vm_page *page = shmem_read_mapping_page(mapping, i);
3911 if (!IS_ERR(page)) {
3912 VM_OBJECT_UNLOCK(obj->base.vm_obj);
3913 char *dst = kmap_atomic(page);
3914 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
3917 drm_clflush_pages(&page, 1);
3920 set_page_dirty(page);
3921 mark_page_accessed(page);
3922 page_cache_release(page);
3924 VM_OBJECT_LOCK(obj->base.vm_obj);
3925 vm_page_reference(page);
3926 vm_page_dirty(page);
3927 vm_page_busy_wait(page, FALSE, "i915gem");
3928 vm_page_unwire(page, 0);
3929 vm_page_wakeup(page);
3932 VM_OBJECT_UNLOCK(obj->base.vm_obj);
3933 intel_gtt_chipset_flush();
3935 obj->phys_obj->cur_obj = NULL;
3936 obj->phys_obj = NULL;
3940 i915_gem_attach_phys_object(struct drm_device *dev,
3941 struct drm_i915_gem_object *obj,
3945 struct vm_object *mapping = obj->base.vm_obj;
3946 drm_i915_private_t *dev_priv = dev->dev_private;
3951 if (id > I915_MAX_PHYS_OBJECT)
3954 if (obj->phys_obj) {
3955 if (obj->phys_obj->id == id)
3957 i915_gem_detach_phys_object(dev, obj);
3960 /* create a new object */
3961 if (!dev_priv->mm.phys_objs[id - 1]) {
3962 ret = i915_gem_init_phys_object(dev, id,
3963 obj->base.size, align);
3965 DRM_ERROR("failed to init phys object %d size: %zu\n",
3966 id, obj->base.size);
3971 /* bind to the object */
3972 obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
3973 obj->phys_obj->cur_obj = obj;
3975 page_count = obj->base.size / PAGE_SIZE;
3977 VM_OBJECT_LOCK(obj->base.vm_obj);
3978 for (i = 0; i < page_count; i++) {
3979 struct vm_page *page;
3982 page = shmem_read_mapping_page(mapping, i);
3983 VM_OBJECT_UNLOCK(obj->base.vm_obj);
3985 return PTR_ERR(page);
3987 src = kmap_atomic(page);
3988 dst = (char*)obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
3989 memcpy(dst, src, PAGE_SIZE);
3993 mark_page_accessed(page);
3994 page_cache_release(page);
3996 VM_OBJECT_LOCK(obj->base.vm_obj);
3997 vm_page_reference(page);
3998 vm_page_busy_wait(page, FALSE, "i915gem");
3999 vm_page_unwire(page, 0);
4000 vm_page_wakeup(page);
4002 VM_OBJECT_UNLOCK(obj->base.vm_obj);
4008 i915_gem_phys_pwrite(struct drm_device *dev,
4009 struct drm_i915_gem_object *obj,
4010 struct drm_i915_gem_pwrite *args,
4011 struct drm_file *file_priv)
4013 void *vaddr = (char *)obj->phys_obj->handle->vaddr + args->offset;
4014 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
4016 if (copyin_nofault(user_data, vaddr, args->size) != 0) {
4017 unsigned long unwritten;
4019 /* The physical object once assigned is fixed for the lifetime
4020 * of the obj, so we can safely drop the lock and continue
4023 mutex_unlock(&dev->struct_mutex);
4024 unwritten = copy_from_user(vaddr, user_data, args->size);
4025 mutex_lock(&dev->struct_mutex);
4030 i915_gem_chipset_flush(dev);
4034 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4036 struct drm_i915_file_private *file_priv = file->driver_priv;
4038 /* Clean up our request list when the client is going away, so that
4039 * later retire_requests won't dereference our soon-to-be-gone
4042 spin_lock(&file_priv->mm.lock);
4043 while (!list_empty(&file_priv->mm.request_list)) {
4044 struct drm_i915_gem_request *request;
4046 request = list_first_entry(&file_priv->mm.request_list,
4047 struct drm_i915_gem_request,
4049 list_del(&request->client_list);
4050 request->file_priv = NULL;
4052 spin_unlock(&file_priv->mm.lock);
4056 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
4057 vm_ooffset_t foff, struct ucred *cred, u_short *color)
4060 *color = 0; /* XXXKIB */
4067 i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot,
4070 struct drm_gem_object *gem_obj;
4071 struct drm_i915_gem_object *obj;
4072 struct drm_device *dev;
4073 drm_i915_private_t *dev_priv;
4078 gem_obj = vm_obj->handle;
4079 obj = to_intel_bo(gem_obj);
4080 dev = obj->base.dev;
4081 dev_priv = dev->dev_private;
4083 write = (prot & VM_PROT_WRITE) != 0;
4087 vm_object_pip_add(vm_obj, 1);
4090 * Remove the placeholder page inserted by vm_fault() from the
4091 * object before dropping the object lock. If
4092 * i915_gem_release_mmap() is active in parallel on this gem
4093 * object, then it owns the drm device sx and might find the
4094 * placeholder already. Then, since the page is busy,
4095 * i915_gem_release_mmap() sleeps waiting for the busy state
4096 * of the page cleared. We will be not able to acquire drm
4097 * device lock until i915_gem_release_mmap() is able to make a
4100 if (*mres != NULL) {
4102 vm_page_remove(oldm);
4107 VM_OBJECT_UNLOCK(vm_obj);
4113 ret = i915_mutex_lock_interruptible(dev);
4119 mutex_lock(&dev->struct_mutex);
4122 * Since the object lock was dropped, other thread might have
4123 * faulted on the same GTT address and instantiated the
4124 * mapping for the page. Recheck.
4126 VM_OBJECT_LOCK(vm_obj);
4127 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
4129 if ((m->flags & PG_BUSY) != 0) {
4130 mutex_unlock(&dev->struct_mutex);
4132 vm_page_sleep(m, "915pee");
4138 VM_OBJECT_UNLOCK(vm_obj);
4140 /* Access to snoopable pages through the GTT is incoherent. */
4141 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
4146 /* Now bind it into the GTT if needed */
4147 if (!obj->map_and_fenceable) {
4148 ret = i915_gem_object_unbind(obj);
4154 if (!obj->gtt_space) {
4155 ret = i915_gem_object_bind_to_gtt(obj, 0, true, false);
4161 ret = i915_gem_object_set_to_gtt_domain(obj, write);
4168 if (obj->tiling_mode == I915_TILING_NONE)
4169 ret = i915_gem_object_put_fence(obj);
4171 ret = i915_gem_object_get_fence(obj);
4177 if (i915_gem_object_is_inactive(obj))
4178 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
4180 obj->fault_mappable = true;
4181 VM_OBJECT_LOCK(vm_obj);
4182 m = vm_phys_fictitious_to_vm_page(dev->agp->base + obj->gtt_offset +
4189 KASSERT((m->flags & PG_FICTITIOUS) != 0,
4190 ("not fictitious %p", m));
4191 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
4193 if ((m->flags & PG_BUSY) != 0) {
4194 mutex_unlock(&dev->struct_mutex);
4196 vm_page_sleep(m, "915pbs");
4200 m->valid = VM_PAGE_BITS_ALL;
4201 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
4204 vm_page_busy_try(m, false);
4206 mutex_unlock(&dev->struct_mutex);
4210 vm_object_pip_wakeup(vm_obj);
4211 return (VM_PAGER_OK);
4214 mutex_unlock(&dev->struct_mutex);
4216 KASSERT(ret != 0, ("i915_gem_pager_fault: wrong return"));
4217 if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) {
4218 goto unlocked_vmobj;
4220 VM_OBJECT_LOCK(vm_obj);
4221 vm_object_pip_wakeup(vm_obj);
4222 return (VM_PAGER_ERROR);
4226 i915_gem_pager_dtor(void *handle)
4228 struct drm_gem_object *obj;
4229 struct drm_device *dev;
4234 mutex_lock(&dev->struct_mutex);
4235 drm_gem_free_mmap_offset(obj);
4236 i915_gem_release_mmap(to_intel_bo(obj));
4237 drm_gem_object_unreference(obj);
4238 mutex_unlock(&dev->struct_mutex);
4241 struct cdev_pager_ops i915_gem_pager_ops = {
4242 .cdev_pg_fault = i915_gem_pager_fault,
4243 .cdev_pg_ctor = i915_gem_pager_ctor,
4244 .cdev_pg_dtor = i915_gem_pager_dtor
4247 #define GEM_PARANOID_CHECK_GTT 0
4248 #if GEM_PARANOID_CHECK_GTT
4250 i915_gem_assert_pages_not_mapped(struct drm_device *dev, vm_page_t *ma,
4253 struct drm_i915_private *dev_priv;
4255 unsigned long start, end;
4259 dev_priv = dev->dev_private;
4260 start = OFF_TO_IDX(dev_priv->mm.gtt_start);
4261 end = OFF_TO_IDX(dev_priv->mm.gtt_end);
4262 for (i = start; i < end; i++) {
4263 pa = intel_gtt_read_pte_paddr(i);
4264 for (j = 0; j < page_count; j++) {
4265 if (pa == VM_PAGE_TO_PHYS(ma[j])) {
4266 panic("Page %p in GTT pte index %d pte %x",
4267 ma[i], i, intel_gtt_read_pte(i));
4275 i915_gpu_is_active(struct drm_device *dev)
4277 drm_i915_private_t *dev_priv = dev->dev_private;
4279 return !list_empty(&dev_priv->mm.active_list);
4283 i915_gem_lowmem(void *arg)
4285 struct drm_device *dev;
4286 struct drm_i915_private *dev_priv;
4287 struct drm_i915_gem_object *obj, *next;
4288 int cnt, cnt_fail, cnt_total;
4291 dev_priv = dev->dev_private;
4293 if (lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_NOWAIT))
4297 /* first scan for clean buffers */
4298 i915_gem_retire_requests(dev);
4300 cnt_total = cnt_fail = cnt = 0;
4302 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list,
4304 if (i915_gem_object_is_purgeable(obj)) {
4305 if (i915_gem_object_unbind(obj) != 0)
4311 /* second pass, evict/count anything still on the inactive list */
4312 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list,
4314 if (i915_gem_object_unbind(obj) == 0)
4320 if (cnt_fail > cnt_total / 100 && i915_gpu_is_active(dev)) {
4322 * We are desperate for pages, so as a last resort, wait
4323 * for the GPU to finish and discard whatever we can.
4324 * This has a dramatic impact to reduce the number of
4325 * OOM-killer events whilst running the GPU aggressively.
4327 if (i915_gpu_idle(dev) == 0)
4330 mutex_unlock(&dev->struct_mutex);