2 * Copyright © 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
26 * Copyright (c) 2011 The FreeBSD Foundation
27 * All rights reserved.
29 * This software was developed by Konstantin Belousov under sponsorship from
30 * the FreeBSD Foundation.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <sys/resourcevar.h>
56 #include <sys/sfbuf.h>
59 #include <drm/i915_drm.h>
61 #include "intel_drv.h"
62 #include "intel_ringbuffer.h"
63 #include <linux/completion.h>
64 #include <linux/jiffies.h>
65 #include <linux/time.h>
67 static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
68 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
69 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
70 static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
71 unsigned alignment, bool map_and_fenceable);
72 static int i915_gem_phys_pwrite(struct drm_device *dev,
73 struct drm_i915_gem_object *obj, uint64_t data_ptr, uint64_t offset,
74 uint64_t size, struct drm_file *file_priv);
76 static void i915_gem_write_fence(struct drm_device *dev, int reg,
77 struct drm_i915_gem_object *obj);
78 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
79 struct drm_i915_fence_reg *fence,
82 static uint32_t i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size,
84 static uint32_t i915_gem_get_gtt_alignment(struct drm_device *dev,
85 uint32_t size, int tiling_mode);
86 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
88 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj);
89 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
91 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
94 i915_gem_release_mmap(obj);
96 /* As we do not have an associated fence register, we will force
97 * a tiling change if we ever need to acquire one.
99 obj->fence_dirty = false;
100 obj->fence_reg = I915_FENCE_REG_NONE;
103 static int i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj);
104 static bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj);
105 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj);
106 static vm_page_t i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex);
107 static void i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
108 uint32_t flush_domains);
109 static void i915_gem_reset_fences(struct drm_device *dev);
110 static void i915_gem_lowmem(void *arg);
112 static int i915_gem_obj_io(struct drm_device *dev, uint32_t handle, uint64_t data_ptr,
113 uint64_t size, uint64_t offset, enum uio_rw rw, struct drm_file *file);
115 MALLOC_DEFINE(DRM_I915_GEM, "i915gem", "Allocations from i915 gem");
116 long i915_gem_wired_pages_cnt;
118 /* some bookkeeping */
119 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
122 dev_priv->mm.object_count++;
123 dev_priv->mm.object_memory += size;
126 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
129 dev_priv->mm.object_count--;
130 dev_priv->mm.object_memory -= size;
134 i915_gem_wait_for_error(struct drm_device *dev)
136 struct drm_i915_private *dev_priv = dev->dev_private;
137 struct completion *x = &dev_priv->error_completion;
140 if (!atomic_read(&dev_priv->mm.wedged))
144 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
145 * userspace. If it takes that long something really bad is going on and
146 * we should simply try to bail out and fail as gracefully as possible.
148 ret = wait_for_completion_interruptible_timeout(x, 10*hz);
150 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
152 } else if (ret < 0) {
156 if (atomic_read(&dev_priv->mm.wedged)) {
157 /* GPU is hung, bump the completion count to account for
158 * the token we just consumed so that we never hit zero and
159 * end up waiting upon a subsequent completion event that
162 spin_lock(&x->wait.lock);
164 spin_unlock(&x->wait.lock);
169 int i915_mutex_lock_interruptible(struct drm_device *dev)
173 ret = i915_gem_wait_for_error(dev);
177 ret = lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_SLEEPFAIL);
181 WARN_ON(i915_verify_lists(dev));
186 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
192 i915_gem_init_ioctl(struct drm_device *dev, void *data,
193 struct drm_file *file)
195 struct drm_i915_gem_init *args = data;
197 if (drm_core_check_feature(dev, DRIVER_MODESET))
200 if (args->gtt_start >= args->gtt_end ||
201 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
204 /* GEM with user mode setting was never supported on ilk and later. */
205 if (INTEL_INFO(dev)->gen >= 5)
208 lockmgr(&dev->dev_lock, LK_EXCLUSIVE|LK_RETRY|LK_CANRECURSE);
209 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
210 lockmgr(&dev->dev_lock, LK_RELEASE);
216 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
217 struct drm_file *file)
219 struct drm_i915_private *dev_priv = dev->dev_private;
220 struct drm_i915_gem_get_aperture *args = data;
221 struct drm_i915_gem_object *obj;
226 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list)
228 pinned += obj->gtt_space->size;
231 args->aper_size = dev_priv->mm.gtt_total;
232 args->aper_available_size = args->aper_size - pinned;
238 i915_gem_create(struct drm_file *file,
239 struct drm_device *dev,
243 struct drm_i915_gem_object *obj;
247 size = roundup(size, PAGE_SIZE);
251 /* Allocate the new object */
252 obj = i915_gem_alloc_object(dev, size);
257 ret = drm_gem_handle_create(file, &obj->base, &handle);
259 drm_gem_object_release(&obj->base);
260 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
261 drm_free(obj, DRM_I915_GEM);
265 /* drop reference from allocate - handle holds it now */
266 drm_gem_object_unreference(&obj->base);
272 i915_gem_dumb_create(struct drm_file *file,
273 struct drm_device *dev,
274 struct drm_mode_create_dumb *args)
277 /* have to work out size/pitch and return them */
278 args->pitch = roundup2(args->width * ((args->bpp + 7) / 8), 64);
279 args->size = args->pitch * args->height;
280 return i915_gem_create(file, dev,
281 args->size, &args->handle);
284 int i915_gem_dumb_destroy(struct drm_file *file,
285 struct drm_device *dev,
289 return drm_gem_handle_delete(file, handle);
293 * Creates a new mm object and returns a handle to it.
296 i915_gem_create_ioctl(struct drm_device *dev, void *data,
297 struct drm_file *file)
299 struct drm_i915_gem_create *args = data;
301 return i915_gem_create(file, dev,
302 args->size, &args->handle);
305 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
307 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
309 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
310 obj->tiling_mode != I915_TILING_NONE;
314 * Reads data from the object referenced by handle.
316 * On error, the contents of *data are undefined.
319 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
320 struct drm_file *file)
322 struct drm_i915_gem_pread *args = data;
324 return (i915_gem_obj_io(dev, args->handle, args->data_ptr, args->size,
325 args->offset, UIO_READ, file));
329 * Writes data to the object referenced by handle.
331 * On error, the contents of the buffer that were to be modified are undefined.
334 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
335 struct drm_file *file)
337 struct drm_i915_gem_pwrite *args = data;
339 return (i915_gem_obj_io(dev, args->handle, args->data_ptr, args->size,
340 args->offset, UIO_WRITE, file));
344 i915_gem_check_wedge(struct drm_i915_private *dev_priv,
347 if (atomic_read(&dev_priv->mm.wedged)) {
348 struct completion *x = &dev_priv->error_completion;
349 bool recovery_complete;
351 /* Give the error handler a chance to run. */
352 spin_lock(&x->wait.lock);
353 recovery_complete = x->done > 0;
354 spin_unlock(&x->wait.lock);
356 /* Non-interruptible callers can't handle -EAGAIN, hence return
357 * -EIO unconditionally for these. */
361 /* Recovery complete, but still wedged means reset failure. */
362 if (recovery_complete)
372 * Compare seqno against outstanding lazy request. Emit a request if they are
376 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
380 DRM_LOCK_ASSERT(ring->dev);
383 if (seqno == ring->outstanding_lazy_request)
384 ret = i915_add_request(ring, NULL, NULL);
390 * __wait_seqno - wait until execution of seqno has finished
391 * @ring: the ring expected to report seqno
393 * @interruptible: do an interruptible wait (normally yes)
394 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
396 * Returns 0 if the seqno was found within the alloted time. Else returns the
397 * errno with remaining time filled in timeout argument.
399 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
400 bool interruptible, struct timespec *timeout)
402 drm_i915_private_t *dev_priv = ring->dev->dev_private;
403 struct timespec before, now, wait_time={1,0};
404 unsigned long timeout_jiffies;
406 bool wait_forever = true;
409 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
412 if (timeout != NULL) {
413 wait_time = *timeout;
414 wait_forever = false;
417 timeout_jiffies = timespec_to_jiffies(&wait_time);
419 if (WARN_ON(!ring->irq_get(ring)))
422 /* Record current time in case interrupted by signal, or wedged * */
423 getrawmonotonic(&before);
426 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
427 atomic_read(&dev_priv->mm.wedged))
430 end = wait_event_interruptible_timeout(ring->irq_queue,
434 end = wait_event_timeout(ring->irq_queue, EXIT_COND,
437 ret = i915_gem_check_wedge(dev_priv, interruptible);
440 } while (end == 0 && wait_forever);
442 getrawmonotonic(&now);
448 struct timespec sleep_time = timespec_sub(now, before);
449 *timeout = timespec_sub(*timeout, sleep_time);
454 case -EAGAIN: /* Wedged */
455 case -ERESTARTSYS: /* Signal */
457 case 0: /* Timeout */
459 set_normalized_timespec(timeout, 0, 0);
460 return -ETIMEDOUT; /* -ETIME on Linux */
461 default: /* Completed */
462 WARN_ON(end < 0); /* We're not aware of other errors */
468 * Waits for a sequence number to be signaled, and cleans up the
469 * request and object lists appropriately for that event.
472 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
474 struct drm_device *dev = ring->dev;
475 struct drm_i915_private *dev_priv = dev->dev_private;
478 DRM_LOCK_ASSERT(dev);
481 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
485 ret = i915_gem_check_olr(ring, seqno);
489 ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible, NULL);
495 * Ensures that all rendering to the object has completed and the object is
496 * safe to unbind from the GTT or access from the CPU.
498 static __must_check int
499 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
502 struct intel_ring_buffer *ring = obj->ring;
506 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
510 ret = i915_wait_seqno(ring, seqno);
514 i915_gem_retire_requests_ring(ring);
516 /* Manually manage the write flush as we may have not yet
517 * retired the buffer.
519 if (obj->last_write_seqno &&
520 i915_seqno_passed(seqno, obj->last_write_seqno)) {
521 obj->last_write_seqno = 0;
522 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
529 * Called when user space prepares to use an object with the CPU, either
530 * through the mmap ioctl's mapping or a GTT mapping.
533 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
534 struct drm_file *file)
536 struct drm_i915_gem_set_domain *args = data;
537 struct drm_i915_gem_object *obj;
538 uint32_t read_domains = args->read_domains;
539 uint32_t write_domain = args->write_domain;
542 /* Only handle setting domains to types used by the CPU. */
543 if (write_domain & I915_GEM_GPU_DOMAINS)
546 if (read_domains & I915_GEM_GPU_DOMAINS)
549 /* Having something in the write domain implies it's in the read
550 * domain, and only that read domain. Enforce that in the request.
552 if (write_domain != 0 && read_domains != write_domain)
555 ret = i915_mutex_lock_interruptible(dev);
559 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
560 if (&obj->base == NULL) {
565 if (read_domains & I915_GEM_DOMAIN_GTT) {
566 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
568 /* Silently promote "you're not bound, there was nothing to do"
569 * to success, since the client was just asking us to
570 * make sure everything was done.
575 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
578 drm_gem_object_unreference(&obj->base);
585 * Called when user space has done writes to this buffer
588 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
589 struct drm_file *file)
591 struct drm_i915_gem_sw_finish *args = data;
592 struct drm_i915_gem_object *obj;
595 ret = i915_mutex_lock_interruptible(dev);
598 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
599 if (&obj->base == NULL) {
604 /* Pinned buffers may be scanout, so flush the cache */
606 i915_gem_object_flush_cpu_write_domain(obj);
608 drm_gem_object_unreference(&obj->base);
615 * Maps the contents of an object, returning the address it is mapped
618 * While the mapping holds a reference on the contents of the object, it doesn't
619 * imply a ref on the object itself.
622 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
623 struct drm_file *file)
625 struct drm_i915_gem_mmap *args = data;
626 struct drm_gem_object *obj;
627 struct proc *p = curproc;
628 vm_map_t map = &p->p_vmspace->vm_map;
633 obj = drm_gem_object_lookup(dev, file, args->handle);
640 size = round_page(args->size);
642 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
650 vm_object_hold(obj->vm_obj);
651 vm_object_reference_locked(obj->vm_obj);
652 vm_object_drop(obj->vm_obj);
653 rv = vm_map_find(map, obj->vm_obj, args->offset, &addr, args->size,
654 PAGE_SIZE, /* align */
656 VM_MAPTYPE_NORMAL, /* maptype */
657 VM_PROT_READ | VM_PROT_WRITE, /* prot */
658 VM_PROT_READ | VM_PROT_WRITE, /* max */
659 MAP_SHARED /* cow */);
660 if (rv != KERN_SUCCESS) {
661 vm_object_deallocate(obj->vm_obj);
662 error = -vm_mmap_to_errno(rv);
664 args->addr_ptr = (uint64_t)addr;
667 drm_gem_object_unreference(obj);
672 * i915_gem_release_mmap - remove physical page mappings
673 * @obj: obj in question
675 * Preserve the reservation of the mmapping with the DRM core code, but
676 * relinquish ownership of the pages back to the system.
678 * It is vital that we remove the page mapping if we have mapped a tiled
679 * object through the GTT and then lose the fence register due to
680 * resource pressure. Similarly if the object has been moved out of the
681 * aperture, than pages mapped into userspace must be revoked. Removing the
682 * mapping will then trigger a page fault on the next user access, allowing
683 * fixup by i915_gem_fault().
686 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
692 if (!obj->fault_mappable)
695 devobj = cdev_pager_lookup(obj);
696 if (devobj != NULL) {
697 page_count = OFF_TO_IDX(obj->base.size);
699 VM_OBJECT_LOCK(devobj);
700 for (i = 0; i < page_count; i++) {
701 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
704 cdev_pager_free_page(devobj, m);
706 VM_OBJECT_UNLOCK(devobj);
707 vm_object_deallocate(devobj);
710 obj->fault_mappable = false;
714 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
718 if (INTEL_INFO(dev)->gen >= 4 ||
719 tiling_mode == I915_TILING_NONE)
722 /* Previous chips need a power-of-two fence region when tiling */
723 if (INTEL_INFO(dev)->gen == 3)
724 gtt_size = 1024*1024;
728 while (gtt_size < size)
735 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
736 * @obj: object to check
738 * Return the required GTT alignment for an object, taking into account
739 * potential fence register mapping.
742 i915_gem_get_gtt_alignment(struct drm_device *dev,
748 * Minimum alignment is 4k (GTT page size), but might be greater
749 * if a fence register is needed for the object.
751 if (INTEL_INFO(dev)->gen >= 4 ||
752 tiling_mode == I915_TILING_NONE)
756 * Previous chips need to be aligned to the size of the smallest
757 * fence register that can contain the object.
759 return i915_gem_get_gtt_size(dev, size, tiling_mode);
763 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
766 * @size: size of the object
767 * @tiling_mode: tiling mode of the object
769 * Return the required GTT alignment for an object, only taking into account
770 * unfenced tiled surface requirements.
773 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
778 * Minimum alignment is 4k (GTT page size) for sane hw.
780 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
781 tiling_mode == I915_TILING_NONE)
784 /* Previous hardware however needs to be aligned to a power-of-two
785 * tile height. The simplest method for determining this is to reuse
786 * the power-of-tile object size.
788 return i915_gem_get_gtt_size(dev, size, tiling_mode);
792 i915_gem_mmap_gtt(struct drm_file *file,
793 struct drm_device *dev,
797 struct drm_i915_private *dev_priv = dev->dev_private;
798 struct drm_i915_gem_object *obj;
801 ret = i915_mutex_lock_interruptible(dev);
805 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
806 if (&obj->base == NULL) {
811 if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
816 if (obj->madv != I915_MADV_WILLNEED) {
817 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
822 ret = drm_gem_create_mmap_offset(&obj->base);
826 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
829 drm_gem_object_unreference(&obj->base);
836 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
838 * @data: GTT mapping ioctl data
839 * @file: GEM object info
841 * Simply returns the fake offset to userspace so it can mmap it.
842 * The mmap call will end up in drm_gem_mmap(), which will set things
843 * up so we can get faults in the handler above.
845 * The fault handler will take care of binding the object into the GTT
846 * (since it may have been evicted to make room for something), allocating
847 * a fence register, and mapping the appropriate aperture address into
851 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
852 struct drm_file *file)
854 struct drm_i915_gem_mmap_gtt *args = data;
856 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
859 /* Immediately discard the backing storage */
861 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
865 vm_obj = obj->base.vm_obj;
866 VM_OBJECT_LOCK(vm_obj);
867 vm_object_page_remove(vm_obj, 0, 0, false);
868 VM_OBJECT_UNLOCK(vm_obj);
869 obj->madv = __I915_MADV_PURGED;
873 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
875 return obj->madv == I915_MADV_DONTNEED;
878 static inline void vm_page_reference(vm_page_t m)
880 vm_page_flag_set(m, PG_REFERENCED);
884 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
889 BUG_ON(obj->madv == __I915_MADV_PURGED);
891 if (obj->tiling_mode != I915_TILING_NONE)
892 i915_gem_object_save_bit_17_swizzle(obj);
893 if (obj->madv == I915_MADV_DONTNEED)
895 page_count = obj->base.size / PAGE_SIZE;
896 VM_OBJECT_LOCK(obj->base.vm_obj);
897 #if GEM_PARANOID_CHECK_GTT
898 i915_gem_assert_pages_not_mapped(obj->base.dev, obj->pages, page_count);
900 for (i = 0; i < page_count; i++) {
904 if (obj->madv == I915_MADV_WILLNEED)
905 vm_page_reference(m);
906 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem");
907 vm_page_unwire(obj->pages[i], 1);
908 vm_page_wakeup(obj->pages[i]);
909 atomic_add_long(&i915_gem_wired_pages_cnt, -1);
911 VM_OBJECT_UNLOCK(obj->base.vm_obj);
913 drm_free(obj->pages, DRM_I915_GEM);
918 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
921 struct drm_device *dev;
924 int page_count, i, j;
927 KASSERT(obj->pages == NULL, ("Obj already has pages"));
928 page_count = obj->base.size / PAGE_SIZE;
929 obj->pages = kmalloc(page_count * sizeof(vm_page_t), DRM_I915_GEM,
931 vm_obj = obj->base.vm_obj;
932 VM_OBJECT_LOCK(vm_obj);
933 for (i = 0; i < page_count; i++) {
934 if ((obj->pages[i] = i915_gem_wire_page(vm_obj, i)) == NULL)
937 VM_OBJECT_UNLOCK(vm_obj);
938 if (i915_gem_object_needs_bit17_swizzle(obj))
939 i915_gem_object_do_bit_17_swizzle(obj);
943 for (j = 0; j < i; j++) {
945 vm_page_busy_wait(m, FALSE, "i915gem");
946 vm_page_unwire(m, 0);
948 atomic_add_long(&i915_gem_wired_pages_cnt, -1);
950 VM_OBJECT_UNLOCK(vm_obj);
951 drm_free(obj->pages, DRM_I915_GEM);
957 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
958 struct intel_ring_buffer *ring)
960 struct drm_device *dev = obj->base.dev;
961 struct drm_i915_private *dev_priv = dev->dev_private;
962 u32 seqno = intel_ring_get_seqno(ring);
964 BUG_ON(ring == NULL);
967 /* Add a reference if we're newly entering the active list. */
969 drm_gem_object_reference(&obj->base);
973 /* Move from whatever list we were on to the tail of execution. */
974 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
975 list_move_tail(&obj->ring_list, &ring->active_list);
977 obj->last_read_seqno = seqno;
979 if (obj->fenced_gpu_access) {
980 obj->last_fenced_seqno = seqno;
982 /* Bump MRU to take account of the delayed flush */
983 if (obj->fence_reg != I915_FENCE_REG_NONE) {
984 struct drm_i915_fence_reg *reg;
986 reg = &dev_priv->fence_regs[obj->fence_reg];
987 list_move_tail(®->lru_list,
988 &dev_priv->mm.fence_list);
994 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
996 struct drm_device *dev = obj->base.dev;
997 struct drm_i915_private *dev_priv = dev->dev_private;
999 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
1000 BUG_ON(!obj->active);
1002 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1004 list_del_init(&obj->ring_list);
1007 obj->last_read_seqno = 0;
1008 obj->last_write_seqno = 0;
1009 obj->base.write_domain = 0;
1011 obj->last_fenced_seqno = 0;
1012 obj->fenced_gpu_access = false;
1015 drm_gem_object_unreference(&obj->base);
1017 WARN_ON(i915_verify_lists(dev));
1021 i915_gem_handle_seqno_wrap(struct drm_device *dev)
1023 struct drm_i915_private *dev_priv = dev->dev_private;
1024 struct intel_ring_buffer *ring;
1027 /* The hardware uses various monotonic 32-bit counters, if we
1028 * detect that they will wraparound we need to idle the GPU
1029 * and reset those counters.
1032 for_each_ring(ring, dev_priv, i) {
1033 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
1034 ret |= ring->sync_seqno[j] != 0;
1039 ret = i915_gpu_idle(dev);
1043 i915_gem_retire_requests(dev);
1044 for_each_ring(ring, dev_priv, i) {
1045 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
1046 ring->sync_seqno[j] = 0;
1053 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
1055 struct drm_i915_private *dev_priv = dev->dev_private;
1057 /* reserve 0 for non-seqno */
1058 if (dev_priv->next_seqno == 0) {
1059 int ret = i915_gem_handle_seqno_wrap(dev);
1063 dev_priv->next_seqno = 1;
1066 *seqno = dev_priv->next_seqno++;
1071 i915_add_request(struct intel_ring_buffer *ring,
1072 struct drm_file *file,
1075 drm_i915_private_t *dev_priv = ring->dev->dev_private;
1076 struct drm_i915_gem_request *request;
1077 u32 request_ring_position;
1082 * Emit any outstanding flushes - execbuf can fail to emit the flush
1083 * after having emitted the batchbuffer command. Hence we need to fix
1084 * things up similar to emitting the lazy request. The difference here
1085 * is that the flush _must_ happen before the next request, no matter
1088 if (ring->gpu_caches_dirty) {
1089 ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS);
1093 ring->gpu_caches_dirty = false;
1096 request = kmalloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO);
1097 if (request == NULL)
1101 /* Record the position of the start of the request so that
1102 * should we detect the updated seqno part-way through the
1103 * GPU processing the request, we never over-estimate the
1104 * position of the head.
1106 request_ring_position = intel_ring_get_tail(ring);
1108 ret = ring->add_request(ring);
1110 kfree(request, DRM_I915_GEM);
1114 request->seqno = intel_ring_get_seqno(ring);
1115 request->ring = ring;
1116 request->tail = request_ring_position;
1117 request->emitted_jiffies = jiffies;
1118 was_empty = list_empty(&ring->request_list);
1119 list_add_tail(&request->list, &ring->request_list);
1120 request->file_priv = NULL;
1123 struct drm_i915_file_private *file_priv = file->driver_priv;
1125 spin_lock(&file_priv->mm.lock);
1126 request->file_priv = file_priv;
1127 list_add_tail(&request->client_list,
1128 &file_priv->mm.request_list);
1129 spin_unlock(&file_priv->mm.lock);
1132 ring->outstanding_lazy_request = 0;
1134 if (!dev_priv->mm.suspended) {
1135 if (i915_enable_hangcheck) {
1136 mod_timer(&dev_priv->hangcheck_timer,
1137 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
1140 queue_delayed_work(dev_priv->wq,
1141 &dev_priv->mm.retire_work,
1142 round_jiffies_up_relative(hz));
1143 intel_mark_busy(dev_priv->dev);
1148 *out_seqno = request->seqno;
1153 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1155 struct drm_i915_file_private *file_priv = request->file_priv;
1160 spin_lock(&file_priv->mm.lock);
1161 if (request->file_priv) {
1162 list_del(&request->client_list);
1163 request->file_priv = NULL;
1165 spin_unlock(&file_priv->mm.lock);
1168 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1169 struct intel_ring_buffer *ring)
1171 while (!list_empty(&ring->request_list)) {
1172 struct drm_i915_gem_request *request;
1174 request = list_first_entry(&ring->request_list,
1175 struct drm_i915_gem_request,
1178 list_del(&request->list);
1179 i915_gem_request_remove_from_client(request);
1180 drm_free(request, DRM_I915_GEM);
1183 while (!list_empty(&ring->active_list)) {
1184 struct drm_i915_gem_object *obj;
1186 obj = list_first_entry(&ring->active_list,
1187 struct drm_i915_gem_object,
1190 list_del_init(&obj->gpu_write_list);
1191 i915_gem_object_move_to_inactive(obj);
1195 static void i915_gem_reset_fences(struct drm_device *dev)
1197 struct drm_i915_private *dev_priv = dev->dev_private;
1200 for (i = 0; i < dev_priv->num_fence_regs; i++) {
1201 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1203 i915_gem_write_fence(dev, i, NULL);
1206 i915_gem_object_fence_lost(reg->obj);
1210 INIT_LIST_HEAD(®->lru_list);
1213 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
1216 void i915_gem_reset(struct drm_device *dev)
1218 struct drm_i915_private *dev_priv = dev->dev_private;
1219 struct drm_i915_gem_object *obj;
1220 struct intel_ring_buffer *ring;
1223 for_each_ring(ring, dev_priv, i)
1224 i915_gem_reset_ring_lists(dev_priv, ring);
1226 /* Move everything out of the GPU domains to ensure we do any
1227 * necessary invalidation upon reuse.
1229 list_for_each_entry(obj,
1230 &dev_priv->mm.inactive_list,
1233 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1236 /* The fence registers are invalidated so clear them out */
1237 i915_gem_reset_fences(dev);
1241 * This function clears the request list as sequence numbers are passed.
1244 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
1248 if (list_empty(&ring->request_list))
1251 WARN_ON(i915_verify_lists(ring->dev));
1253 seqno = ring->get_seqno(ring, true);
1255 while (!list_empty(&ring->request_list)) {
1256 struct drm_i915_gem_request *request;
1258 request = list_first_entry(&ring->request_list,
1259 struct drm_i915_gem_request,
1262 if (!i915_seqno_passed(seqno, request->seqno))
1265 /* We know the GPU must have read the request to have
1266 * sent us the seqno + interrupt, so use the position
1267 * of tail of the request to update the last known position
1270 ring->last_retired_head = request->tail;
1272 list_del(&request->list);
1273 i915_gem_request_remove_from_client(request);
1274 kfree(request, DRM_I915_GEM);
1277 /* Move any buffers on the active list that are no longer referenced
1278 * by the ringbuffer to the flushing/inactive lists as appropriate.
1280 while (!list_empty(&ring->active_list)) {
1281 struct drm_i915_gem_object *obj;
1283 obj = list_first_entry(&ring->active_list,
1284 struct drm_i915_gem_object,
1287 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
1290 i915_gem_object_move_to_inactive(obj);
1293 if (unlikely(ring->trace_irq_seqno &&
1294 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
1295 ring->irq_put(ring);
1296 ring->trace_irq_seqno = 0;
1302 i915_gem_retire_requests(struct drm_device *dev)
1304 drm_i915_private_t *dev_priv = dev->dev_private;
1305 struct intel_ring_buffer *ring;
1308 for_each_ring(ring, dev_priv, i)
1309 i915_gem_retire_requests_ring(ring);
1313 i915_gem_retire_work_handler(struct work_struct *work)
1315 drm_i915_private_t *dev_priv;
1316 struct drm_device *dev;
1317 struct intel_ring_buffer *ring;
1321 dev_priv = container_of(work, drm_i915_private_t,
1322 mm.retire_work.work);
1323 dev = dev_priv->dev;
1325 /* Come back later if the device is busy... */
1326 if (lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_NOWAIT)) {
1327 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
1328 round_jiffies_up_relative(hz));
1332 i915_gem_retire_requests(dev);
1334 /* Send a periodic flush down the ring so we don't hold onto GEM
1335 * objects indefinitely.
1338 for_each_ring(ring, dev_priv, i) {
1339 if (ring->gpu_caches_dirty)
1340 i915_add_request(ring, NULL, NULL);
1342 idle &= list_empty(&ring->request_list);
1345 if (!dev_priv->mm.suspended && !idle)
1346 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
1347 round_jiffies_up_relative(hz));
1349 intel_mark_idle(dev);
1354 * Ensures that an object will eventually get non-busy by flushing any required
1355 * write domains, emitting any outstanding lazy request and retiring and
1356 * completed requests.
1359 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
1364 ret = i915_gem_object_flush_gpu_write_domain(obj);
1368 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
1372 i915_gem_retire_requests_ring(obj->ring);
1379 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
1380 * @DRM_IOCTL_ARGS: standard ioctl arguments
1382 * Returns 0 if successful, else an error is returned with the remaining time in
1383 * the timeout parameter.
1384 * -ETIME: object is still busy after timeout
1385 * -ERESTARTSYS: signal interrupted the wait
1386 * -ENONENT: object doesn't exist
1387 * Also possible, but rare:
1388 * -EAGAIN: GPU wedged
1390 * -ENODEV: Internal IRQ fail
1391 * -E?: The add request failed
1393 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
1394 * non-zero timeout parameter the wait ioctl will wait for the given number of
1395 * nanoseconds on an object becoming unbusy. Since the wait itself does so
1396 * without holding struct_mutex the object may become re-busied before this
1397 * function completes. A similar but shorter * race condition exists in the busy
1401 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
1403 struct drm_i915_gem_wait *args = data;
1404 struct drm_i915_gem_object *obj;
1405 struct intel_ring_buffer *ring = NULL;
1406 struct timespec timeout_stack, *timeout = NULL;
1410 if (args->timeout_ns >= 0) {
1411 timeout_stack = ns_to_timespec(args->timeout_ns);
1412 timeout = &timeout_stack;
1415 ret = i915_mutex_lock_interruptible(dev);
1419 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
1420 if (&obj->base == NULL) {
1425 /* Need to make sure the object gets inactive eventually. */
1426 ret = i915_gem_object_flush_active(obj);
1431 seqno = obj->last_read_seqno;
1438 /* Do this after OLR check to make sure we make forward progress polling
1439 * on this IOCTL with a 0 timeout (like busy ioctl)
1441 if (!args->timeout_ns) {
1446 drm_gem_object_unreference(&obj->base);
1449 ret = __wait_seqno(ring, seqno, true, timeout);
1451 WARN_ON(!timespec_valid(timeout));
1452 args->timeout_ns = timespec_to_ns(timeout);
1457 drm_gem_object_unreference(&obj->base);
1463 * i915_gem_object_sync - sync an object to a ring.
1465 * @obj: object which may be in use on another ring.
1466 * @to: ring we wish to use the object on. May be NULL.
1468 * This code is meant to abstract object synchronization with the GPU.
1469 * Calling with NULL implies synchronizing the object with the CPU
1470 * rather than a particular GPU ring.
1472 * Returns 0 if successful, else propagates up the lower layer error.
1475 i915_gem_object_sync(struct drm_i915_gem_object *obj,
1476 struct intel_ring_buffer *to)
1478 struct intel_ring_buffer *from = obj->ring;
1482 if (from == NULL || to == from)
1485 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
1486 return i915_gem_object_wait_rendering(obj, false);
1488 idx = intel_ring_sync_index(from, to);
1490 seqno = obj->last_read_seqno;
1491 if (seqno <= from->sync_seqno[idx])
1494 ret = i915_gem_check_olr(obj->ring, seqno);
1498 ret = to->sync_to(to, from, seqno);
1500 /* We use last_read_seqno because sync_to()
1501 * might have just caused seqno wrap under
1504 from->sync_seqno[idx] = obj->last_read_seqno;
1509 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
1511 u32 old_write_domain, old_read_domains;
1513 /* Act a barrier for all accesses through the GTT */
1516 /* Force a pagefault for domain tracking on next user access */
1517 i915_gem_release_mmap(obj);
1519 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
1522 old_read_domains = obj->base.read_domains;
1523 old_write_domain = obj->base.write_domain;
1525 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
1526 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
1531 * Unbinds an object from the GTT aperture.
1534 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
1536 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
1539 if (obj->gtt_space == NULL)
1545 ret = i915_gem_object_finish_gpu(obj);
1548 /* Continue on if we fail due to EIO, the GPU is hung so we
1549 * should be safe and we need to cleanup or else we might
1550 * cause memory corruption through use-after-free.
1553 i915_gem_object_finish_gtt(obj);
1555 /* Move the object to the CPU domain to ensure that
1556 * any possible CPU writes while it's not in the GTT
1557 * are flushed when we go to remap it.
1560 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1561 if (ret == -ERESTART || ret == -EINTR)
1564 /* In the event of a disaster, abandon all caches and
1565 * hope for the best.
1567 i915_gem_clflush_object(obj);
1568 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1571 /* release the fence reg _after_ flushing */
1572 ret = i915_gem_object_put_fence(obj);
1576 if (obj->has_global_gtt_mapping)
1577 i915_gem_gtt_unbind_object(obj);
1578 if (obj->has_aliasing_ppgtt_mapping) {
1579 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
1580 obj->has_aliasing_ppgtt_mapping = 0;
1582 i915_gem_gtt_finish_object(obj);
1584 i915_gem_object_put_pages_gtt(obj);
1586 list_del_init(&obj->gtt_list);
1587 list_del_init(&obj->mm_list);
1588 /* Avoid an unnecessary call to unbind on rebind. */
1589 obj->map_and_fenceable = true;
1591 drm_mm_put_block(obj->gtt_space);
1592 obj->gtt_space = NULL;
1593 obj->gtt_offset = 0;
1595 if (i915_gem_object_is_purgeable(obj))
1596 i915_gem_object_truncate(obj);
1601 int i915_gpu_idle(struct drm_device *dev)
1603 drm_i915_private_t *dev_priv = dev->dev_private;
1604 struct intel_ring_buffer *ring;
1607 /* Flush everything onto the inactive list. */
1608 for_each_ring(ring, dev_priv, i) {
1609 ret = intel_ring_idle(ring);
1617 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
1618 struct drm_i915_gem_object *obj)
1620 drm_i915_private_t *dev_priv = dev->dev_private;
1624 u32 size = obj->gtt_space->size;
1626 val = (uint64_t)((obj->gtt_offset + size - 4096) &
1628 val |= obj->gtt_offset & 0xfffff000;
1629 val |= (uint64_t)((obj->stride / 128) - 1) <<
1630 SANDYBRIDGE_FENCE_PITCH_SHIFT;
1632 if (obj->tiling_mode == I915_TILING_Y)
1633 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
1634 val |= I965_FENCE_REG_VALID;
1638 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
1639 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
1642 static void i965_write_fence_reg(struct drm_device *dev, int reg,
1643 struct drm_i915_gem_object *obj)
1645 drm_i915_private_t *dev_priv = dev->dev_private;
1649 u32 size = obj->gtt_space->size;
1651 val = (uint64_t)((obj->gtt_offset + size - 4096) &
1653 val |= obj->gtt_offset & 0xfffff000;
1654 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
1655 if (obj->tiling_mode == I915_TILING_Y)
1656 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
1657 val |= I965_FENCE_REG_VALID;
1661 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
1662 POSTING_READ(FENCE_REG_965_0 + reg * 8);
1665 static void i915_write_fence_reg(struct drm_device *dev, int reg,
1666 struct drm_i915_gem_object *obj)
1668 drm_i915_private_t *dev_priv = dev->dev_private;
1672 u32 size = obj->gtt_space->size;
1676 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
1677 (size & -size) != size ||
1678 (obj->gtt_offset & (size - 1)),
1679 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
1680 obj->gtt_offset, obj->map_and_fenceable, size);
1682 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
1687 /* Note: pitch better be a power of two tile widths */
1688 pitch_val = obj->stride / tile_width;
1689 pitch_val = ffs(pitch_val) - 1;
1691 val = obj->gtt_offset;
1692 if (obj->tiling_mode == I915_TILING_Y)
1693 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
1694 val |= I915_FENCE_SIZE_BITS(size);
1695 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
1696 val |= I830_FENCE_REG_VALID;
1701 reg = FENCE_REG_830_0 + reg * 4;
1703 reg = FENCE_REG_945_8 + (reg - 8) * 4;
1705 I915_WRITE(reg, val);
1709 static void i830_write_fence_reg(struct drm_device *dev, int reg,
1710 struct drm_i915_gem_object *obj)
1712 drm_i915_private_t *dev_priv = dev->dev_private;
1716 u32 size = obj->gtt_space->size;
1719 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
1720 (size & -size) != size ||
1721 (obj->gtt_offset & (size - 1)),
1722 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
1723 obj->gtt_offset, size);
1725 pitch_val = obj->stride / 128;
1726 pitch_val = ffs(pitch_val) - 1;
1728 val = obj->gtt_offset;
1729 if (obj->tiling_mode == I915_TILING_Y)
1730 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
1731 val |= I830_FENCE_SIZE_BITS(size);
1732 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
1733 val |= I830_FENCE_REG_VALID;
1737 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
1738 POSTING_READ(FENCE_REG_830_0 + reg * 4);
1741 static void i915_gem_write_fence(struct drm_device *dev, int reg,
1742 struct drm_i915_gem_object *obj)
1744 switch (INTEL_INFO(dev)->gen) {
1746 case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
1748 case 4: i965_write_fence_reg(dev, reg, obj); break;
1749 case 3: i915_write_fence_reg(dev, reg, obj); break;
1750 case 2: i830_write_fence_reg(dev, reg, obj); break;
1755 static inline int fence_number(struct drm_i915_private *dev_priv,
1756 struct drm_i915_fence_reg *fence)
1758 return fence - dev_priv->fence_regs;
1761 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
1762 struct drm_i915_fence_reg *fence,
1765 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1766 int reg = fence_number(dev_priv, fence);
1768 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
1771 obj->fence_reg = reg;
1773 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
1775 obj->fence_reg = I915_FENCE_REG_NONE;
1777 list_del_init(&fence->lru_list);
1782 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
1786 if (obj->fenced_gpu_access) {
1787 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
1788 ret = i915_gem_flush_ring(obj->ring,
1789 0, obj->base.write_domain);
1794 obj->fenced_gpu_access = false;
1797 if (obj->last_fenced_seqno) {
1798 ret = i915_wait_seqno(obj->ring,
1799 obj->last_fenced_seqno);
1803 obj->last_fenced_seqno = 0;
1806 /* Ensure that all CPU reads are completed before installing a fence
1807 * and all writes before removing the fence.
1809 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
1816 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
1818 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1821 ret = i915_gem_object_flush_fence(obj);
1825 if (obj->fence_reg == I915_FENCE_REG_NONE)
1828 i915_gem_object_update_fence(obj,
1829 &dev_priv->fence_regs[obj->fence_reg],
1831 i915_gem_object_fence_lost(obj);
1836 static struct drm_i915_fence_reg *
1837 i915_find_fence_reg(struct drm_device *dev)
1839 struct drm_i915_private *dev_priv = dev->dev_private;
1840 struct drm_i915_fence_reg *reg, *avail;
1843 /* First try to find a free reg */
1845 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
1846 reg = &dev_priv->fence_regs[i];
1850 if (!reg->pin_count)
1857 /* None available, try to steal one or wait for a user to finish */
1858 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
1869 * i915_gem_object_get_fence - set up fencing for an object
1870 * @obj: object to map through a fence reg
1872 * When mapping objects through the GTT, userspace wants to be able to write
1873 * to them without having to worry about swizzling if the object is tiled.
1874 * This function walks the fence regs looking for a free one for @obj,
1875 * stealing one if it can't find any.
1877 * It then sets up the reg based on the object's properties: address, pitch
1878 * and tiling format.
1880 * For an untiled surface, this removes any existing fence.
1883 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
1885 struct drm_device *dev = obj->base.dev;
1886 struct drm_i915_private *dev_priv = dev->dev_private;
1887 bool enable = obj->tiling_mode != I915_TILING_NONE;
1888 struct drm_i915_fence_reg *reg;
1891 /* Have we updated the tiling parameters upon the object and so
1892 * will need to serialise the write to the associated fence register?
1894 if (obj->fence_dirty) {
1895 ret = i915_gem_object_flush_fence(obj);
1900 /* Just update our place in the LRU if our fence is getting reused. */
1901 if (obj->fence_reg != I915_FENCE_REG_NONE) {
1902 reg = &dev_priv->fence_regs[obj->fence_reg];
1903 if (!obj->fence_dirty) {
1904 list_move_tail(®->lru_list,
1905 &dev_priv->mm.fence_list);
1908 } else if (enable) {
1909 reg = i915_find_fence_reg(dev);
1914 struct drm_i915_gem_object *old = reg->obj;
1916 ret = i915_gem_object_flush_fence(old);
1920 i915_gem_object_fence_lost(old);
1925 i915_gem_object_update_fence(obj, reg, enable);
1926 obj->fence_dirty = false;
1932 * Finds free space in the GTT aperture and binds the object there.
1935 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
1937 bool map_and_fenceable)
1939 struct drm_device *dev = obj->base.dev;
1940 drm_i915_private_t *dev_priv = dev->dev_private;
1941 struct drm_mm_node *free_space;
1942 uint32_t size, fence_size, fence_alignment, unfenced_alignment;
1943 bool mappable, fenceable;
1945 bool nonblocking = false;
1947 if (obj->madv != I915_MADV_WILLNEED) {
1948 DRM_ERROR("Attempting to bind a purgeable object\n");
1952 fence_size = i915_gem_get_gtt_size(dev, obj->base.size,
1954 fence_alignment = i915_gem_get_gtt_alignment(dev, obj->base.size,
1956 unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(dev,
1957 obj->base.size, obj->tiling_mode);
1959 alignment = map_and_fenceable ? fence_alignment :
1961 if (map_and_fenceable && (alignment & (fence_alignment - 1)) != 0) {
1962 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
1966 size = map_and_fenceable ? fence_size : obj->base.size;
1968 /* If the object is bigger than the entire aperture, reject it early
1969 * before evicting everything in a vain attempt to find space.
1971 if (obj->base.size > (map_and_fenceable ?
1972 dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
1974 "Attempting to bind an object larger than the aperture\n");
1979 if (map_and_fenceable)
1980 free_space = drm_mm_search_free_in_range(
1981 &dev_priv->mm.gtt_space, size, alignment, 0,
1982 dev_priv->mm.gtt_mappable_end, 0);
1984 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
1985 size, alignment, 0);
1986 if (free_space != NULL) {
1988 if (map_and_fenceable)
1989 obj->gtt_space = drm_mm_get_block_range_generic(
1990 free_space, size, alignment, color, 0,
1991 dev_priv->mm.gtt_mappable_end, 1);
1993 obj->gtt_space = drm_mm_get_block_generic(free_space,
1994 size, alignment, color, 1);
1996 if (obj->gtt_space == NULL) {
1997 ret = i915_gem_evict_something(dev, size, alignment,
2007 * NOTE: i915_gem_object_get_pages_gtt() cannot
2008 * return ENOMEM, since we used VM_ALLOC_RETRY.
2010 ret = i915_gem_object_get_pages_gtt(obj, 0);
2012 drm_mm_put_block(obj->gtt_space);
2013 obj->gtt_space = NULL;
2017 i915_gem_gtt_bind_object(obj, obj->cache_level);
2019 i915_gem_object_put_pages_gtt(obj);
2020 drm_mm_put_block(obj->gtt_space);
2021 obj->gtt_space = NULL;
2022 if (i915_gem_evict_everything(dev))
2027 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
2028 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2030 obj->gtt_offset = obj->gtt_space->start;
2033 obj->gtt_space->size == fence_size &&
2034 (obj->gtt_space->start & (fence_alignment - 1)) == 0;
2037 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2038 obj->map_and_fenceable = mappable && fenceable;
2044 i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2047 /* If we don't have a page list set up, then we're not pinned
2048 * to GPU, and we can ignore the cache flush because it'll happen
2049 * again at bind time.
2051 if (obj->pages == NULL)
2054 /* If the GPU is snooping the contents of the CPU cache,
2055 * we do not need to manually clear the CPU cache lines. However,
2056 * the caches are only snooped when the render cache is
2057 * flushed/invalidated. As we always have to emit invalidations
2058 * and flushes when moving into and out of the RENDER domain, correct
2059 * snooping behaviour occurs naturally as the result of our domain
2062 if (obj->cache_level != I915_CACHE_NONE)
2065 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
2068 /** Flushes the GTT write domain for the object if it's dirty. */
2070 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
2072 uint32_t old_write_domain;
2074 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
2077 /* No actual flushing is required for the GTT write domain. Writes
2078 * to it immediately go to main memory as far as we know, so there's
2079 * no chipset flush. It also doesn't land in render cache.
2081 * However, we do have to enforce the order so that all writes through
2082 * the GTT land before any writes to the device, such as updates to
2087 old_write_domain = obj->base.write_domain;
2088 obj->base.write_domain = 0;
2091 /** Flushes the CPU write domain for the object if it's dirty. */
2093 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2095 uint32_t old_write_domain;
2097 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
2100 i915_gem_clflush_object(obj);
2101 intel_gtt_chipset_flush();
2102 old_write_domain = obj->base.write_domain;
2103 obj->base.write_domain = 0;
2107 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
2110 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
2112 return (i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain));
2116 * Moves a single object to the GTT read, and possibly write domain.
2118 * This function returns when the move is complete, including waiting on
2122 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2124 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2125 uint32_t old_write_domain, old_read_domains;
2128 /* Not valid to be called on unbound objects. */
2129 if (obj->gtt_space == NULL)
2132 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
2135 ret = i915_gem_object_flush_gpu_write_domain(obj);
2139 ret = i915_gem_object_wait_rendering(obj, !write);
2143 i915_gem_object_flush_cpu_write_domain(obj);
2145 old_write_domain = obj->base.write_domain;
2146 old_read_domains = obj->base.read_domains;
2148 /* It should now be out of any other write domains, and we can update
2149 * the domain values for our changes.
2151 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2152 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2154 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2155 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2159 /* And bump the LRU for this access */
2160 if (i915_gem_object_is_inactive(obj))
2161 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2166 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2167 enum i915_cache_level cache_level)
2169 struct drm_device *dev = obj->base.dev;
2170 drm_i915_private_t *dev_priv = dev->dev_private;
2173 if (obj->cache_level == cache_level)
2176 if (obj->pin_count) {
2177 DRM_DEBUG("can not change the cache level of pinned objects\n");
2181 if (obj->gtt_space) {
2182 ret = i915_gem_object_finish_gpu(obj);
2186 i915_gem_object_finish_gtt(obj);
2188 /* Before SandyBridge, you could not use tiling or fence
2189 * registers with snooped memory, so relinquish any fences
2190 * currently pointing to our region in the aperture.
2192 if (INTEL_INFO(obj->base.dev)->gen < 6) {
2193 ret = i915_gem_object_put_fence(obj);
2198 if (obj->has_global_gtt_mapping)
2199 i915_gem_gtt_bind_object(obj, cache_level);
2200 if (obj->has_aliasing_ppgtt_mapping)
2201 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
2205 if (cache_level == I915_CACHE_NONE) {
2206 u32 old_read_domains, old_write_domain;
2208 /* If we're coming from LLC cached, then we haven't
2209 * actually been tracking whether the data is in the
2210 * CPU cache or not, since we only allow one bit set
2211 * in obj->write_domain and have been skipping the clflushes.
2212 * Just set it to the CPU cache for now.
2214 KASSERT((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) == 0,
2215 ("obj %p in CPU write domain", obj));
2216 KASSERT((obj->base.read_domains & ~I915_GEM_DOMAIN_CPU) == 0,
2217 ("obj %p in CPU read domain", obj));
2219 old_read_domains = obj->base.read_domains;
2220 old_write_domain = obj->base.write_domain;
2222 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2223 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2227 obj->cache_level = cache_level;
2231 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
2232 struct drm_file *file)
2234 struct drm_i915_gem_caching *args = data;
2235 struct drm_i915_gem_object *obj;
2238 ret = i915_mutex_lock_interruptible(dev);
2242 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
2243 if (&obj->base == NULL) {
2248 args->caching = obj->cache_level != I915_CACHE_NONE;
2250 drm_gem_object_unreference(&obj->base);
2256 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
2257 struct drm_file *file)
2259 struct drm_i915_gem_caching *args = data;
2260 struct drm_i915_gem_object *obj;
2261 enum i915_cache_level level;
2264 switch (args->caching) {
2265 case I915_CACHING_NONE:
2266 level = I915_CACHE_NONE;
2268 case I915_CACHING_CACHED:
2269 level = I915_CACHE_LLC;
2275 ret = i915_mutex_lock_interruptible(dev);
2279 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
2280 if (&obj->base == NULL) {
2285 ret = i915_gem_object_set_cache_level(obj, level);
2287 drm_gem_object_unreference(&obj->base);
2294 * Prepare buffer for display plane (scanout, cursors, etc).
2295 * Can be called from an uninterruptible phase (modesetting) and allows
2296 * any flushes to be pipelined (for pageflips).
2299 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
2301 struct intel_ring_buffer *pipelined)
2303 u32 old_read_domains, old_write_domain;
2306 ret = i915_gem_object_flush_gpu_write_domain(obj);
2310 if (pipelined != obj->ring) {
2311 ret = i915_gem_object_sync(obj, pipelined);
2316 /* The display engine is not coherent with the LLC cache on gen6. As
2317 * a result, we make sure that the pinning that is about to occur is
2318 * done with uncached PTEs. This is lowest common denominator for all
2321 * However for gen6+, we could do better by using the GFDT bit instead
2322 * of uncaching, which would allow us to flush all the LLC-cached data
2323 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
2325 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
2329 /* As the user may map the buffer once pinned in the display plane
2330 * (e.g. libkms for the bootup splash), we have to ensure that we
2331 * always use map_and_fenceable for all scanout buffers.
2333 ret = i915_gem_object_pin(obj, alignment, true);
2337 i915_gem_object_flush_cpu_write_domain(obj);
2339 old_write_domain = obj->base.write_domain;
2340 old_read_domains = obj->base.read_domains;
2342 /* It should now be out of any other write domains, and we can update
2343 * the domain values for our changes.
2345 obj->base.write_domain = 0;
2346 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2352 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
2356 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
2359 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
2360 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
2365 ret = i915_gem_object_wait_rendering(obj, false);
2369 /* Ensure that we invalidate the GPU's caches and TLBs. */
2370 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
2375 * Moves a single object to the CPU read, and possibly write domain.
2377 * This function returns when the move is complete, including waiting on
2381 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
2383 uint32_t old_write_domain, old_read_domains;
2386 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
2389 ret = i915_gem_object_flush_gpu_write_domain(obj);
2393 ret = i915_gem_object_wait_rendering(obj, !write);
2397 i915_gem_object_flush_gtt_write_domain(obj);
2399 old_write_domain = obj->base.write_domain;
2400 old_read_domains = obj->base.read_domains;
2402 /* Flush the CPU cache if it's still invalid. */
2403 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2404 i915_gem_clflush_object(obj);
2406 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2409 /* It should now be out of any other write domains, and we can update
2410 * the domain values for our changes.
2412 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2414 /* If we're writing through the CPU, then the GPU read domains will
2415 * need to be invalidated at next use.
2418 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2419 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2425 /* Throttle our rendering by waiting until the ring has completed our requests
2426 * emitted over 20 msec ago.
2428 * Note that if we were to use the current jiffies each time around the loop,
2429 * we wouldn't escape the function with any frames outstanding if the time to
2430 * render a frame was over 20ms.
2432 * This should get us reasonable parallelism between CPU and GPU but also
2433 * relatively low latency when blocking on a particular request to finish.
2436 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
2438 struct drm_i915_private *dev_priv = dev->dev_private;
2439 struct drm_i915_file_private *file_priv = file->driver_priv;
2440 unsigned long recent_enough = ticks - (20 * hz / 1000);
2441 struct drm_i915_gem_request *request;
2442 struct intel_ring_buffer *ring = NULL;
2446 if (atomic_read(&dev_priv->mm.wedged))
2449 spin_lock(&file_priv->mm.lock);
2450 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
2451 if (time_after_eq(request->emitted_jiffies, recent_enough))
2454 ring = request->ring;
2455 seqno = request->seqno;
2457 spin_unlock(&file_priv->mm.lock);
2462 ret = __wait_seqno(ring, seqno, true, NULL);
2465 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
2471 i915_gem_object_pin(struct drm_i915_gem_object *obj,
2473 bool map_and_fenceable)
2477 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
2480 if (obj->gtt_space != NULL) {
2481 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
2482 (map_and_fenceable && !obj->map_and_fenceable)) {
2483 WARN(obj->pin_count,
2484 "bo is already pinned with incorrect alignment:"
2485 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
2486 " obj->map_and_fenceable=%d\n",
2487 obj->gtt_offset, alignment,
2489 obj->map_and_fenceable);
2490 ret = i915_gem_object_unbind(obj);
2496 if (obj->gtt_space == NULL) {
2497 ret = i915_gem_object_bind_to_gtt(obj, alignment,
2503 if (!obj->has_global_gtt_mapping && map_and_fenceable)
2504 i915_gem_gtt_bind_object(obj, obj->cache_level);
2507 obj->pin_mappable |= map_and_fenceable;
2513 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
2515 BUG_ON(obj->pin_count == 0);
2516 BUG_ON(obj->gtt_space == NULL);
2518 if (--obj->pin_count == 0)
2519 obj->pin_mappable = false;
2523 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
2524 struct drm_file *file)
2526 struct drm_i915_gem_pin *args = data;
2527 struct drm_i915_gem_object *obj;
2528 struct drm_gem_object *gobj;
2531 ret = i915_mutex_lock_interruptible(dev);
2535 gobj = drm_gem_object_lookup(dev, file, args->handle);
2540 obj = to_intel_bo(gobj);
2542 if (obj->madv != I915_MADV_WILLNEED) {
2543 DRM_ERROR("Attempting to pin a purgeable buffer\n");
2548 if (obj->pin_filp != NULL && obj->pin_filp != file) {
2549 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
2555 obj->user_pin_count++;
2556 obj->pin_filp = file;
2557 if (obj->user_pin_count == 1) {
2558 ret = i915_gem_object_pin(obj, args->alignment, true);
2563 /* XXX - flush the CPU caches for pinned objects
2564 * as the X server doesn't manage domains yet
2566 i915_gem_object_flush_cpu_write_domain(obj);
2567 args->offset = obj->gtt_offset;
2569 drm_gem_object_unreference(&obj->base);
2576 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
2577 struct drm_file *file)
2579 struct drm_i915_gem_pin *args = data;
2580 struct drm_i915_gem_object *obj;
2583 ret = i915_mutex_lock_interruptible(dev);
2587 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
2588 if (&obj->base == NULL) {
2593 if (obj->pin_filp != file) {
2594 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
2599 obj->user_pin_count--;
2600 if (obj->user_pin_count == 0) {
2601 obj->pin_filp = NULL;
2602 i915_gem_object_unpin(obj);
2606 drm_gem_object_unreference(&obj->base);
2613 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
2614 struct drm_file *file)
2616 struct drm_i915_gem_busy *args = data;
2617 struct drm_i915_gem_object *obj;
2620 ret = i915_mutex_lock_interruptible(dev);
2624 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
2625 if (&obj->base == NULL) {
2630 /* Count all active objects as busy, even if they are currently not used
2631 * by the gpu. Users of this interface expect objects to eventually
2632 * become non-busy without any further actions, therefore emit any
2633 * necessary flushes here.
2635 ret = i915_gem_object_flush_active(obj);
2637 args->busy = obj->active;
2639 args->busy |= intel_ring_flag(obj->ring) << 17;
2642 drm_gem_object_unreference(&obj->base);
2649 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
2650 struct drm_file *file_priv)
2652 return i915_gem_ring_throttle(dev, file_priv);
2656 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
2657 struct drm_file *file_priv)
2659 struct drm_i915_gem_madvise *args = data;
2660 struct drm_i915_gem_object *obj;
2663 switch (args->madv) {
2664 case I915_MADV_DONTNEED:
2665 case I915_MADV_WILLNEED:
2671 ret = i915_mutex_lock_interruptible(dev);
2675 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
2676 if (&obj->base == NULL) {
2681 if (obj->pin_count) {
2686 if (obj->madv != __I915_MADV_PURGED)
2687 obj->madv = args->madv;
2689 /* if the object is no longer attached, discard its backing storage */
2690 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
2691 i915_gem_object_truncate(obj);
2693 args->retained = obj->madv != __I915_MADV_PURGED;
2696 drm_gem_object_unreference(&obj->base);
2702 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
2705 struct drm_i915_private *dev_priv;
2706 struct drm_i915_gem_object *obj;
2708 dev_priv = dev->dev_private;
2710 obj = kmalloc(sizeof(*obj), DRM_I915_GEM, M_WAITOK | M_ZERO);
2712 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
2713 drm_free(obj, DRM_I915_GEM);
2717 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2718 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2721 /* On some devices, we can have the GPU use the LLC (the CPU
2722 * cache) for about a 10% performance improvement
2723 * compared to uncached. Graphics requests other than
2724 * display scanout are coherent with the CPU in
2725 * accessing this cache. This means in this mode we
2726 * don't need to clflush on the CPU side, and on the
2727 * GPU side we only need to flush internal caches to
2728 * get data visible to the CPU.
2730 * However, we maintain the display planes as UC, and so
2731 * need to rebind when first used as such.
2733 obj->cache_level = I915_CACHE_LLC;
2735 obj->cache_level = I915_CACHE_NONE;
2736 obj->base.driver_private = NULL;
2737 obj->fence_reg = I915_FENCE_REG_NONE;
2738 INIT_LIST_HEAD(&obj->mm_list);
2739 INIT_LIST_HEAD(&obj->gtt_list);
2740 INIT_LIST_HEAD(&obj->ring_list);
2741 INIT_LIST_HEAD(&obj->exec_list);
2742 INIT_LIST_HEAD(&obj->gpu_write_list);
2743 obj->madv = I915_MADV_WILLNEED;
2744 /* Avoid an unnecessary call to unbind on the first bind. */
2745 obj->map_and_fenceable = true;
2747 i915_gem_info_add_obj(dev_priv, size);
2752 int i915_gem_init_object(struct drm_gem_object *obj)
2759 void i915_gem_free_object(struct drm_gem_object *gem_obj)
2761 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
2762 struct drm_device *dev = obj->base.dev;
2763 drm_i915_private_t *dev_priv = dev->dev_private;
2766 i915_gem_detach_phys_object(dev, obj);
2769 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
2770 bool was_interruptible;
2772 was_interruptible = dev_priv->mm.interruptible;
2773 dev_priv->mm.interruptible = false;
2775 WARN_ON(i915_gem_object_unbind(obj));
2777 dev_priv->mm.interruptible = was_interruptible;
2780 drm_gem_free_mmap_offset(&obj->base);
2782 drm_gem_object_release(&obj->base);
2783 i915_gem_info_remove_obj(dev_priv, obj->base.size);
2785 drm_free(obj->bit_17, DRM_I915_GEM);
2786 drm_free(obj, DRM_I915_GEM);
2790 i915_gem_do_init(struct drm_device *dev, unsigned long start,
2791 unsigned long mappable_end, unsigned long end)
2793 drm_i915_private_t *dev_priv;
2794 unsigned long mappable;
2797 dev_priv = dev->dev_private;
2798 mappable = min(end, mappable_end) - start;
2800 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start);
2802 dev_priv->mm.gtt_start = start;
2803 dev_priv->mm.gtt_mappable_end = mappable_end;
2804 dev_priv->mm.gtt_end = end;
2805 dev_priv->mm.gtt_total = end - start;
2806 dev_priv->mm.mappable_gtt_total = mappable;
2808 /* Take over this portion of the GTT */
2809 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
2810 device_printf(dev->dev,
2811 "taking over the fictitious range 0x%lx-0x%lx\n",
2812 dev->agp->base + start, dev->agp->base + start + mappable);
2813 error = -vm_phys_fictitious_reg_range(dev->agp->base + start,
2814 dev->agp->base + start + mappable, VM_MEMATTR_WRITE_COMBINING);
2819 i915_gem_idle(struct drm_device *dev)
2821 drm_i915_private_t *dev_priv = dev->dev_private;
2826 if (dev_priv->mm.suspended) {
2831 ret = i915_gpu_idle(dev);
2836 i915_gem_retire_requests(dev);
2838 /* Under UMS, be paranoid and evict. */
2839 if (!drm_core_check_feature(dev, DRIVER_MODESET))
2840 i915_gem_evict_everything(dev);
2842 i915_gem_reset_fences(dev);
2844 /* Hack! Don't let anybody do execbuf while we don't control the chip.
2845 * We need to replace this with a semaphore, or something.
2846 * And not confound mm.suspended!
2848 dev_priv->mm.suspended = 1;
2849 del_timer_sync(&dev_priv->hangcheck_timer);
2851 i915_kernel_lost_context(dev);
2852 i915_gem_cleanup_ringbuffer(dev);
2856 /* Cancel the retire work handler, which should be idle now. */
2857 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
2862 void i915_gem_l3_remap(struct drm_device *dev)
2864 drm_i915_private_t *dev_priv = dev->dev_private;
2868 if (!HAS_L3_GPU_CACHE(dev))
2871 if (!dev_priv->l3_parity.remap_info)
2874 misccpctl = I915_READ(GEN7_MISCCPCTL);
2875 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
2876 POSTING_READ(GEN7_MISCCPCTL);
2878 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
2879 u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
2880 if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
2881 DRM_DEBUG("0x%x was already programmed to %x\n",
2882 GEN7_L3LOG_BASE + i, remap);
2883 if (remap && !dev_priv->l3_parity.remap_info[i/4])
2884 DRM_DEBUG_DRIVER("Clearing remapped register\n");
2885 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
2888 /* Make sure all the writes land before disabling dop clock gating */
2889 POSTING_READ(GEN7_L3LOG_BASE);
2891 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
2894 void i915_gem_init_swizzling(struct drm_device *dev)
2896 drm_i915_private_t *dev_priv = dev->dev_private;
2898 if (INTEL_INFO(dev)->gen < 5 ||
2899 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
2902 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
2903 DISP_TILE_SURFACE_SWIZZLING);
2908 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
2910 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
2912 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
2916 intel_enable_blt(struct drm_device *dev)
2923 /* The blitter was dysfunctional on early prototypes */
2924 revision = pci_read_config(dev->dev, PCIR_REVID, 1);
2925 if (IS_GEN6(dev) && revision < 8) {
2926 DRM_INFO("BLT not supported on this pre-production hardware;"
2927 " graphics performance will be degraded.\n");
2935 i915_gem_init_hw(struct drm_device *dev)
2937 drm_i915_private_t *dev_priv = dev->dev_private;
2940 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
2941 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
2943 i915_gem_l3_remap(dev);
2945 i915_gem_init_swizzling(dev);
2947 ret = intel_init_render_ring_buffer(dev);
2952 ret = intel_init_bsd_ring_buffer(dev);
2954 goto cleanup_render_ring;
2957 if (intel_enable_blt(dev)) {
2958 ret = intel_init_blt_ring_buffer(dev);
2960 goto cleanup_bsd_ring;
2963 dev_priv->next_seqno = 1;
2966 * XXX: There was some w/a described somewhere suggesting loading
2967 * contexts before PPGTT.
2969 #if 0 /* XXX: HW context support */
2970 i915_gem_context_init(dev);
2972 i915_gem_init_ppgtt(dev);
2977 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
2978 cleanup_render_ring:
2979 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
2984 intel_enable_ppgtt(struct drm_device *dev)
2986 if (i915_enable_ppgtt >= 0)
2987 return i915_enable_ppgtt;
2989 /* Disable ppgtt on SNB if VT-d is on. */
2990 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_enabled)
2996 int i915_gem_init(struct drm_device *dev)
2998 struct drm_i915_private *dev_priv = dev->dev_private;
2999 unsigned long prealloc_size, gtt_size, mappable_size;
3002 prealloc_size = dev_priv->mm.gtt->stolen_size;
3003 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
3004 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
3006 /* Basic memrange allocator for stolen space */
3007 drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
3010 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
3011 /* PPGTT pdes are stolen from global gtt ptes, so shrink the
3012 * aperture accordingly when using aliasing ppgtt. */
3013 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
3014 /* For paranoia keep the guard page in between. */
3015 gtt_size -= PAGE_SIZE;
3017 i915_gem_do_init(dev, 0, mappable_size, gtt_size);
3019 ret = i915_gem_init_aliasing_ppgtt(dev);
3025 /* Let GEM Manage all of the aperture.
3027 * However, leave one page at the end still bound to the scratch
3028 * page. There are a number of places where the hardware
3029 * apparently prefetches past the end of the object, and we've
3030 * seen multiple hangs with the GPU head pointer stuck in a
3031 * batchbuffer bound at the last page of the aperture. One page
3032 * should be enough to keep any prefetching inside of the
3035 i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
3038 ret = i915_gem_init_hw(dev);
3041 i915_gem_cleanup_aliasing_ppgtt(dev);
3046 /* Try to set up FBC with a reasonable compressed buffer size */
3047 if (I915_HAS_FBC(dev) && i915_powersave) {
3050 /* Leave 1M for line length buffer & misc. */
3052 /* Try to get a 32M buffer... */
3053 if (prealloc_size > (36*1024*1024))
3054 cfb_size = 32*1024*1024;
3055 else /* fall back to 7/8 of the stolen space */
3056 cfb_size = prealloc_size * 7 / 8;
3057 i915_setup_compression(dev, cfb_size);
3061 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
3062 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3063 dev_priv->dri1.allow_batchbuffer = 1;
3068 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3070 drm_i915_private_t *dev_priv = dev->dev_private;
3071 struct intel_ring_buffer *ring;
3074 for_each_ring(ring, dev_priv, i)
3075 intel_cleanup_ring_buffer(ring);
3079 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3080 struct drm_file *file_priv)
3082 drm_i915_private_t *dev_priv = dev->dev_private;
3085 if (drm_core_check_feature(dev, DRIVER_MODESET))
3088 if (atomic_read(&dev_priv->mm.wedged)) {
3089 DRM_ERROR("Reenabling wedged hardware, good luck\n");
3090 atomic_set(&dev_priv->mm.wedged, 0);
3094 dev_priv->mm.suspended = 0;
3096 ret = i915_gem_init_hw(dev);
3102 KASSERT(list_empty(&dev_priv->mm.active_list), ("active list"));
3105 ret = drm_irq_install(dev);
3107 goto cleanup_ringbuffer;
3113 i915_gem_cleanup_ringbuffer(dev);
3114 dev_priv->mm.suspended = 1;
3121 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
3122 struct drm_file *file_priv)
3124 if (drm_core_check_feature(dev, DRIVER_MODESET))
3127 drm_irq_uninstall(dev);
3128 return i915_gem_idle(dev);
3132 i915_gem_lastclose(struct drm_device *dev)
3136 if (drm_core_check_feature(dev, DRIVER_MODESET))
3139 ret = i915_gem_idle(dev);
3141 DRM_ERROR("failed to idle hardware: %d\n", ret);
3145 init_ring_lists(struct intel_ring_buffer *ring)
3147 INIT_LIST_HEAD(&ring->active_list);
3148 INIT_LIST_HEAD(&ring->request_list);
3149 INIT_LIST_HEAD(&ring->gpu_write_list);
3153 i915_gem_load(struct drm_device *dev)
3156 drm_i915_private_t *dev_priv = dev->dev_private;
3158 INIT_LIST_HEAD(&dev_priv->mm.active_list);
3159 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
3160 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
3161 INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
3162 for (i = 0; i < I915_NUM_RINGS; i++)
3163 init_ring_lists(&dev_priv->ring[i]);
3164 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
3165 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
3166 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
3167 i915_gem_retire_work_handler);
3168 init_completion(&dev_priv->error_completion);
3170 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3172 I915_WRITE(MI_ARB_STATE,
3173 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
3176 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
3178 /* Old X drivers will take 0-2 for front, back, depth buffers */
3179 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3180 dev_priv->fence_reg_start = 3;
3182 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
3183 dev_priv->num_fence_regs = 16;
3185 dev_priv->num_fence_regs = 8;
3187 /* Initialize fence registers to zero */
3188 i915_gem_reset_fences(dev);
3190 i915_gem_detect_bit_6_swizzle(dev);
3191 init_waitqueue_head(&dev_priv->pending_flip_queue);
3193 dev_priv->mm.interruptible = true;
3196 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
3197 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
3198 register_shrinker(&dev_priv->mm.inactive_shrinker);
3200 dev_priv->mm.i915_lowmem = EVENTHANDLER_REGISTER(vm_lowmem,
3201 i915_gem_lowmem, dev, EVENTHANDLER_PRI_ANY);
3206 * Create a physically contiguous memory object for this object
3207 * e.g. for cursor + overlay regs
3209 static int i915_gem_init_phys_object(struct drm_device *dev,
3210 int id, int size, int align)
3212 drm_i915_private_t *dev_priv = dev->dev_private;
3213 struct drm_i915_gem_phys_object *phys_obj;
3216 if (dev_priv->mm.phys_objs[id - 1] || !size)
3219 phys_obj = kmalloc(sizeof(struct drm_i915_gem_phys_object), DRM_I915_GEM,
3226 phys_obj->handle = drm_pci_alloc(dev, size, align, ~0);
3227 if (!phys_obj->handle) {
3231 pmap_change_attr((vm_offset_t)phys_obj->handle->vaddr,
3232 size / PAGE_SIZE, PAT_WRITE_COMBINING);
3234 dev_priv->mm.phys_objs[id - 1] = phys_obj;
3239 drm_free(phys_obj, DRM_I915_GEM);
3243 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
3245 drm_i915_private_t *dev_priv = dev->dev_private;
3246 struct drm_i915_gem_phys_object *phys_obj;
3248 if (!dev_priv->mm.phys_objs[id - 1])
3251 phys_obj = dev_priv->mm.phys_objs[id - 1];
3252 if (phys_obj->cur_obj) {
3253 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
3256 drm_pci_free(dev, phys_obj->handle);
3257 drm_free(phys_obj, DRM_I915_GEM);
3258 dev_priv->mm.phys_objs[id - 1] = NULL;
3261 void i915_gem_free_all_phys_object(struct drm_device *dev)
3265 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
3266 i915_gem_free_phys_object(dev, i);
3269 void i915_gem_detach_phys_object(struct drm_device *dev,
3270 struct drm_i915_gem_object *obj)
3279 vaddr = obj->phys_obj->handle->vaddr;
3281 page_count = obj->base.size / PAGE_SIZE;
3282 VM_OBJECT_LOCK(obj->base.vm_obj);
3283 for (i = 0; i < page_count; i++) {
3284 m = i915_gem_wire_page(obj->base.vm_obj, i);
3288 VM_OBJECT_UNLOCK(obj->base.vm_obj);
3289 sf = sf_buf_alloc(m);
3291 dst = (char *)sf_buf_kva(sf);
3292 memcpy(dst, vaddr + IDX_TO_OFF(i), PAGE_SIZE);
3295 drm_clflush_pages(&m, 1);
3297 VM_OBJECT_LOCK(obj->base.vm_obj);
3298 vm_page_reference(m);
3300 vm_page_busy_wait(m, FALSE, "i915gem");
3301 vm_page_unwire(m, 0);
3303 atomic_add_long(&i915_gem_wired_pages_cnt, -1);
3305 VM_OBJECT_UNLOCK(obj->base.vm_obj);
3306 intel_gtt_chipset_flush();
3308 obj->phys_obj->cur_obj = NULL;
3309 obj->phys_obj = NULL;
3313 i915_gem_attach_phys_object(struct drm_device *dev,
3314 struct drm_i915_gem_object *obj,
3318 drm_i915_private_t *dev_priv = dev->dev_private;
3322 int i, page_count, ret;
3324 if (id > I915_MAX_PHYS_OBJECT)
3327 if (obj->phys_obj) {
3328 if (obj->phys_obj->id == id)
3330 i915_gem_detach_phys_object(dev, obj);
3333 /* create a new object */
3334 if (!dev_priv->mm.phys_objs[id - 1]) {
3335 ret = i915_gem_init_phys_object(dev, id,
3336 obj->base.size, align);
3338 DRM_ERROR("failed to init phys object %d size: %zu\n",
3339 id, obj->base.size);
3344 /* bind to the object */
3345 obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
3346 obj->phys_obj->cur_obj = obj;
3348 page_count = obj->base.size / PAGE_SIZE;
3350 VM_OBJECT_LOCK(obj->base.vm_obj);
3352 for (i = 0; i < page_count; i++) {
3353 m = i915_gem_wire_page(obj->base.vm_obj, i);
3358 VM_OBJECT_UNLOCK(obj->base.vm_obj);
3359 sf = sf_buf_alloc(m);
3360 src = (char *)sf_buf_kva(sf);
3361 dst = (char *)obj->phys_obj->handle->vaddr + IDX_TO_OFF(i);
3362 memcpy(dst, src, PAGE_SIZE);
3365 VM_OBJECT_LOCK(obj->base.vm_obj);
3367 vm_page_reference(m);
3368 vm_page_busy_wait(m, FALSE, "i915gem");
3369 vm_page_unwire(m, 0);
3371 atomic_add_long(&i915_gem_wired_pages_cnt, -1);
3373 VM_OBJECT_UNLOCK(obj->base.vm_obj);
3379 i915_gem_phys_pwrite(struct drm_device *dev,
3380 struct drm_i915_gem_object *obj,
3384 struct drm_file *file_priv)
3386 char *user_data, *vaddr;
3389 vaddr = (char *)obj->phys_obj->handle->vaddr + offset;
3390 user_data = (char *)(uintptr_t)data_ptr;
3392 if (copyin_nofault(user_data, vaddr, size) != 0) {
3393 /* The physical object once assigned is fixed for the lifetime
3394 * of the obj, so we can safely drop the lock and continue
3398 ret = -copyin(user_data, vaddr, size);
3404 intel_gtt_chipset_flush();
3408 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
3410 struct drm_i915_file_private *file_priv = file->driver_priv;
3412 /* Clean up our request list when the client is going away, so that
3413 * later retire_requests won't dereference our soon-to-be-gone
3416 spin_lock(&file_priv->mm.lock);
3417 while (!list_empty(&file_priv->mm.request_list)) {
3418 struct drm_i915_gem_request *request;
3420 request = list_first_entry(&file_priv->mm.request_list,
3421 struct drm_i915_gem_request,
3423 list_del(&request->client_list);
3424 request->file_priv = NULL;
3426 spin_unlock(&file_priv->mm.lock);
3430 i915_gem_swap_io(struct drm_device *dev, struct drm_i915_gem_object *obj,
3431 uint64_t data_ptr, uint64_t size, uint64_t offset, enum uio_rw rw,
3432 struct drm_file *file)
3439 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po;
3441 if (obj->gtt_offset != 0 && rw == UIO_READ)
3442 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
3444 do_bit17_swizzling = 0;
3447 vm_obj = obj->base.vm_obj;
3450 VM_OBJECT_LOCK(vm_obj);
3451 vm_object_pip_add(vm_obj, 1);
3453 obj_pi = OFF_TO_IDX(offset);
3454 obj_po = offset & PAGE_MASK;
3456 m = i915_gem_wire_page(vm_obj, obj_pi);
3457 VM_OBJECT_UNLOCK(vm_obj);
3459 sf = sf_buf_alloc(m);
3460 mkva = sf_buf_kva(sf);
3461 length = min(size, PAGE_SIZE - obj_po);
3462 while (length > 0) {
3463 if (do_bit17_swizzling &&
3464 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) {
3465 cnt = roundup2(obj_po + 1, 64);
3466 cnt = min(cnt - obj_po, length);
3467 swizzled_po = obj_po ^ 64;
3470 swizzled_po = obj_po;
3473 ret = -copyout_nofault(
3474 (char *)mkva + swizzled_po,
3475 (void *)(uintptr_t)data_ptr, cnt);
3477 ret = -copyin_nofault(
3478 (void *)(uintptr_t)data_ptr,
3479 (char *)mkva + swizzled_po, cnt);
3489 VM_OBJECT_LOCK(vm_obj);
3490 if (rw == UIO_WRITE)
3492 vm_page_reference(m);
3493 vm_page_busy_wait(m, FALSE, "i915gem");
3494 vm_page_unwire(m, 1);
3496 atomic_add_long(&i915_gem_wired_pages_cnt, -1);
3501 vm_object_pip_wakeup(vm_obj);
3502 VM_OBJECT_UNLOCK(vm_obj);
3508 i915_gem_gtt_write(struct drm_device *dev, struct drm_i915_gem_object *obj,
3509 uint64_t data_ptr, uint64_t size, uint64_t offset, struct drm_file *file)
3515 * Pass the unaligned physical address and size to pmap_mapdev_attr()
3516 * so it can properly calculate whether an extra page needs to be
3517 * mapped or not to cover the requested range. The function will
3518 * add the page offset into the returned mkva for us.
3520 mkva = (vm_offset_t)pmap_mapdev_attr(dev->agp->base + obj->gtt_offset +
3521 offset, size, PAT_WRITE_COMBINING);
3522 ret = -copyin_nofault((void *)(uintptr_t)data_ptr, (char *)mkva, size);
3523 pmap_unmapdev(mkva, size);
3528 i915_gem_obj_io(struct drm_device *dev, uint32_t handle, uint64_t data_ptr,
3529 uint64_t size, uint64_t offset, enum uio_rw rw, struct drm_file *file)
3531 struct drm_i915_gem_object *obj;
3533 vm_offset_t start, end;
3538 start = trunc_page(data_ptr);
3539 end = round_page(data_ptr + size);
3540 npages = howmany(end - start, PAGE_SIZE);
3541 ma = kmalloc(npages * sizeof(vm_page_t), DRM_I915_GEM, M_WAITOK |
3543 npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
3544 (vm_offset_t)data_ptr, size,
3545 (rw == UIO_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, ma, npages);
3551 ret = i915_mutex_lock_interruptible(dev);
3555 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
3556 if (&obj->base == NULL) {
3560 if (offset > obj->base.size || size > obj->base.size - offset) {
3565 if (rw == UIO_READ) {
3566 ret = i915_gem_swap_io(dev, obj, data_ptr, size, offset,
3569 if (obj->phys_obj) {
3570 ret = i915_gem_phys_pwrite(dev, obj, data_ptr, offset,
3572 } else if (obj->gtt_space &&
3573 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
3574 ret = i915_gem_object_pin(obj, 0, true);
3577 ret = i915_gem_object_set_to_gtt_domain(obj, true);
3580 ret = i915_gem_object_put_fence(obj);
3583 ret = i915_gem_gtt_write(dev, obj, data_ptr, size,
3586 i915_gem_object_unpin(obj);
3588 ret = i915_gem_object_set_to_cpu_domain(obj, true);
3591 ret = i915_gem_swap_io(dev, obj, data_ptr, size, offset,
3596 drm_gem_object_unreference(&obj->base);
3600 vm_page_unhold_pages(ma, npages);
3602 drm_free(ma, DRM_I915_GEM);
3607 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
3608 vm_ooffset_t foff, struct ucred *cred, u_short *color)
3611 *color = 0; /* XXXKIB */
3618 i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot,
3621 struct drm_gem_object *gem_obj;
3622 struct drm_i915_gem_object *obj;
3623 struct drm_device *dev;
3624 drm_i915_private_t *dev_priv;
3629 gem_obj = vm_obj->handle;
3630 obj = to_intel_bo(gem_obj);
3631 dev = obj->base.dev;
3632 dev_priv = dev->dev_private;
3634 write = (prot & VM_PROT_WRITE) != 0;
3638 vm_object_pip_add(vm_obj, 1);
3641 * Remove the placeholder page inserted by vm_fault() from the
3642 * object before dropping the object lock. If
3643 * i915_gem_release_mmap() is active in parallel on this gem
3644 * object, then it owns the drm device sx and might find the
3645 * placeholder already. Then, since the page is busy,
3646 * i915_gem_release_mmap() sleeps waiting for the busy state
3647 * of the page cleared. We will be not able to acquire drm
3648 * device lock until i915_gem_release_mmap() is able to make a
3651 if (*mres != NULL) {
3653 vm_page_remove(oldm);
3658 VM_OBJECT_UNLOCK(vm_obj);
3664 ret = i915_mutex_lock_interruptible(dev);
3673 * Since the object lock was dropped, other thread might have
3674 * faulted on the same GTT address and instantiated the
3675 * mapping for the page. Recheck.
3677 VM_OBJECT_LOCK(vm_obj);
3678 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
3680 if ((m->flags & PG_BUSY) != 0) {
3683 vm_page_sleep(m, "915pee");
3689 VM_OBJECT_UNLOCK(vm_obj);
3691 /* Now bind it into the GTT if needed */
3692 if (!obj->map_and_fenceable) {
3693 ret = i915_gem_object_unbind(obj);
3699 if (!obj->gtt_space) {
3700 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
3706 ret = i915_gem_object_set_to_gtt_domain(obj, write);
3713 if (obj->tiling_mode == I915_TILING_NONE)
3714 ret = i915_gem_object_put_fence(obj);
3716 ret = i915_gem_object_get_fence(obj);
3722 if (i915_gem_object_is_inactive(obj))
3723 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3725 obj->fault_mappable = true;
3726 VM_OBJECT_LOCK(vm_obj);
3727 m = vm_phys_fictitious_to_vm_page(dev->agp->base + obj->gtt_offset +
3734 KASSERT((m->flags & PG_FICTITIOUS) != 0,
3735 ("not fictitious %p", m));
3736 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
3738 if ((m->flags & PG_BUSY) != 0) {
3741 vm_page_sleep(m, "915pbs");
3745 m->valid = VM_PAGE_BITS_ALL;
3746 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
3749 vm_page_busy_try(m, false);
3755 vm_object_pip_wakeup(vm_obj);
3756 return (VM_PAGER_OK);
3761 KASSERT(ret != 0, ("i915_gem_pager_fault: wrong return"));
3762 if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) {
3763 goto unlocked_vmobj;
3765 VM_OBJECT_LOCK(vm_obj);
3766 vm_object_pip_wakeup(vm_obj);
3767 return (VM_PAGER_ERROR);
3771 i915_gem_pager_dtor(void *handle)
3773 struct drm_gem_object *obj;
3774 struct drm_device *dev;
3780 drm_gem_free_mmap_offset(obj);
3781 i915_gem_release_mmap(to_intel_bo(obj));
3782 drm_gem_object_unreference(obj);
3786 struct cdev_pager_ops i915_gem_pager_ops = {
3787 .cdev_pg_fault = i915_gem_pager_fault,
3788 .cdev_pg_ctor = i915_gem_pager_ctor,
3789 .cdev_pg_dtor = i915_gem_pager_dtor
3792 #define GEM_PARANOID_CHECK_GTT 0
3793 #if GEM_PARANOID_CHECK_GTT
3795 i915_gem_assert_pages_not_mapped(struct drm_device *dev, vm_page_t *ma,
3798 struct drm_i915_private *dev_priv;
3800 unsigned long start, end;
3804 dev_priv = dev->dev_private;
3805 start = OFF_TO_IDX(dev_priv->mm.gtt_start);
3806 end = OFF_TO_IDX(dev_priv->mm.gtt_end);
3807 for (i = start; i < end; i++) {
3808 pa = intel_gtt_read_pte_paddr(i);
3809 for (j = 0; j < page_count; j++) {
3810 if (pa == VM_PAGE_TO_PHYS(ma[j])) {
3811 panic("Page %p in GTT pte index %d pte %x",
3812 ma[i], i, intel_gtt_read_pte(i));
3820 i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
3821 uint32_t flush_domains)
3823 struct drm_i915_gem_object *obj, *next;
3824 uint32_t old_write_domain;
3826 list_for_each_entry_safe(obj, next, &ring->gpu_write_list,
3828 if (obj->base.write_domain & flush_domains) {
3829 old_write_domain = obj->base.write_domain;
3830 obj->base.write_domain = 0;
3831 list_del_init(&obj->gpu_write_list);
3832 i915_gem_object_move_to_active(obj, ring);
3837 #define VM_OBJECT_LOCK_ASSERT_OWNED(object)
3840 i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex)
3845 VM_OBJECT_LOCK_ASSERT_OWNED(object);
3846 m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3847 if (m->valid != VM_PAGE_BITS_ALL) {
3848 if (vm_pager_has_page(object, pindex)) {
3849 rv = vm_pager_get_page(object, &m, 1);
3850 m = vm_page_lookup(object, pindex);
3853 if (rv != VM_PAGER_OK) {
3858 pmap_zero_page(VM_PAGE_TO_PHYS(m));
3859 m->valid = VM_PAGE_BITS_ALL;
3865 atomic_add_long(&i915_gem_wired_pages_cnt, 1);
3870 i915_gem_flush_ring(struct intel_ring_buffer *ring, uint32_t invalidate_domains,
3871 uint32_t flush_domains)
3875 if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
3878 ret = ring->flush(ring, invalidate_domains, flush_domains);
3882 if (flush_domains & I915_GEM_GPU_DOMAINS)
3883 i915_gem_process_flushing_list(ring, flush_domains);
3888 i915_gpu_is_active(struct drm_device *dev)
3890 drm_i915_private_t *dev_priv = dev->dev_private;
3892 return !list_empty(&dev_priv->mm.active_list);
3896 i915_gem_lowmem(void *arg)
3898 struct drm_device *dev;
3899 struct drm_i915_private *dev_priv;
3900 struct drm_i915_gem_object *obj, *next;
3901 int cnt, cnt_fail, cnt_total;
3904 dev_priv = dev->dev_private;
3906 if (lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_NOWAIT))
3910 /* first scan for clean buffers */
3911 i915_gem_retire_requests(dev);
3913 cnt_total = cnt_fail = cnt = 0;
3915 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list,
3917 if (i915_gem_object_is_purgeable(obj)) {
3918 if (i915_gem_object_unbind(obj) != 0)
3924 /* second pass, evict/count anything still on the inactive list */
3925 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list,
3927 if (i915_gem_object_unbind(obj) == 0)
3933 if (cnt_fail > cnt_total / 100 && i915_gpu_is_active(dev)) {
3935 * We are desperate for pages, so as a last resort, wait
3936 * for the GPU to finish and discard whatever we can.
3937 * This has a dramatic impact to reduce the number of
3938 * OOM-killer events whilst running the GPU aggressively.
3940 if (i915_gpu_idle(dev) == 0)
3947 i915_gem_unload(struct drm_device *dev)
3949 struct drm_i915_private *dev_priv;
3951 dev_priv = dev->dev_private;
3952 EVENTHANDLER_DEREGISTER(vm_lowmem, dev_priv->mm.i915_lowmem);