2 * Copyright © 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
26 * Copyright (c) 2011 The FreeBSD Foundation
27 * All rights reserved.
29 * This software was developed by Konstantin Belousov under sponsorship from
30 * the FreeBSD Foundation.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <sys/resourcevar.h>
56 #include <sys/sfbuf.h>
59 #include <drm/i915_drm.h>
61 #include "intel_drv.h"
62 #include "intel_ringbuffer.h"
63 #include <linux/completion.h>
64 #include <linux/jiffies.h>
65 #include <linux/time.h>
67 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
68 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
69 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
71 bool map_and_fenceable,
73 static int i915_gem_phys_pwrite(struct drm_device *dev,
74 struct drm_i915_gem_object *obj,
75 struct drm_i915_gem_pwrite *args,
76 struct drm_file *file);
78 static void i915_gem_write_fence(struct drm_device *dev, int reg,
79 struct drm_i915_gem_object *obj);
80 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
81 struct drm_i915_fence_reg *fence,
84 static uint32_t i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size,
86 static uint32_t i915_gem_get_gtt_alignment(struct drm_device *dev,
87 uint32_t size, int tiling_mode);
88 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
90 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj);
91 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
93 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
96 i915_gem_release_mmap(obj);
98 /* As we do not have an associated fence register, we will force
99 * a tiling change if we ever need to acquire one.
101 obj->fence_dirty = false;
102 obj->fence_reg = I915_FENCE_REG_NONE;
105 static int i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj);
106 static bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj);
107 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj);
108 static vm_page_t i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex);
109 static void i915_gem_reset_fences(struct drm_device *dev);
110 static void i915_gem_lowmem(void *arg);
112 /* some bookkeeping */
113 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
116 dev_priv->mm.object_count++;
117 dev_priv->mm.object_memory += size;
120 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
123 dev_priv->mm.object_count--;
124 dev_priv->mm.object_memory -= size;
128 i915_gem_wait_for_error(struct drm_device *dev)
130 struct drm_i915_private *dev_priv = dev->dev_private;
131 struct completion *x = &dev_priv->error_completion;
134 if (!atomic_read(&dev_priv->mm.wedged))
138 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
139 * userspace. If it takes that long something really bad is going on and
140 * we should simply try to bail out and fail as gracefully as possible.
142 ret = wait_for_completion_interruptible_timeout(x, 10*hz);
144 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
146 } else if (ret < 0) {
150 if (atomic_read(&dev_priv->mm.wedged)) {
151 /* GPU is hung, bump the completion count to account for
152 * the token we just consumed so that we never hit zero and
153 * end up waiting upon a subsequent completion event that
156 spin_lock(&x->wait.lock);
158 spin_unlock(&x->wait.lock);
163 int i915_mutex_lock_interruptible(struct drm_device *dev)
167 ret = i915_gem_wait_for_error(dev);
171 ret = lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_SLEEPFAIL);
175 WARN_ON(i915_verify_lists(dev));
180 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
186 i915_gem_init_ioctl(struct drm_device *dev, void *data,
187 struct drm_file *file)
189 struct drm_i915_gem_init *args = data;
191 if (drm_core_check_feature(dev, DRIVER_MODESET))
194 if (args->gtt_start >= args->gtt_end ||
195 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
198 /* GEM with user mode setting was never supported on ilk and later. */
199 if (INTEL_INFO(dev)->gen >= 5)
202 lockmgr(&dev->dev_lock, LK_EXCLUSIVE|LK_RETRY|LK_CANRECURSE);
203 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
204 lockmgr(&dev->dev_lock, LK_RELEASE);
210 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
211 struct drm_file *file)
213 struct drm_i915_private *dev_priv = dev->dev_private;
214 struct drm_i915_gem_get_aperture *args = data;
215 struct drm_i915_gem_object *obj;
220 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list)
222 pinned += obj->gtt_space->size;
225 args->aper_size = dev_priv->mm.gtt_total;
226 args->aper_available_size = args->aper_size - pinned;
232 i915_gem_create(struct drm_file *file,
233 struct drm_device *dev,
237 struct drm_i915_gem_object *obj;
241 size = roundup(size, PAGE_SIZE);
245 /* Allocate the new object */
246 obj = i915_gem_alloc_object(dev, size);
251 ret = drm_gem_handle_create(file, &obj->base, &handle);
253 drm_gem_object_release(&obj->base);
254 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
255 drm_free(obj, M_DRM);
259 /* drop reference from allocate - handle holds it now */
260 drm_gem_object_unreference(&obj->base);
266 i915_gem_dumb_create(struct drm_file *file,
267 struct drm_device *dev,
268 struct drm_mode_create_dumb *args)
271 /* have to work out size/pitch and return them */
272 args->pitch = roundup2(args->width * ((args->bpp + 7) / 8), 64);
273 args->size = args->pitch * args->height;
274 return i915_gem_create(file, dev,
275 args->size, &args->handle);
278 int i915_gem_dumb_destroy(struct drm_file *file,
279 struct drm_device *dev,
283 return drm_gem_handle_delete(file, handle);
287 * Creates a new mm object and returns a handle to it.
290 i915_gem_create_ioctl(struct drm_device *dev, void *data,
291 struct drm_file *file)
293 struct drm_i915_gem_create *args = data;
295 return i915_gem_create(file, dev,
296 args->size, &args->handle);
299 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
301 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
303 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
304 obj->tiling_mode != I915_TILING_NONE;
307 static inline void vm_page_reference(vm_page_t m)
309 vm_page_flag_set(m, PG_REFERENCED);
313 i915_gem_shmem_pread(struct drm_device *dev,
314 struct drm_i915_gem_object *obj,
315 struct drm_i915_gem_pread *args,
316 struct drm_file *file)
323 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po;
325 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
328 vm_obj = obj->base.vm_obj;
331 VM_OBJECT_LOCK(vm_obj);
332 vm_object_pip_add(vm_obj, 1);
333 while (args->size > 0) {
334 obj_pi = OFF_TO_IDX(args->offset);
335 obj_po = args->offset & PAGE_MASK;
337 m = i915_gem_wire_page(vm_obj, obj_pi);
338 VM_OBJECT_UNLOCK(vm_obj);
340 sf = sf_buf_alloc(m);
341 mkva = sf_buf_kva(sf);
342 length = min(args->size, PAGE_SIZE - obj_po);
344 if (do_bit17_swizzling &&
345 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) {
346 cnt = roundup2(obj_po + 1, 64);
347 cnt = min(cnt - obj_po, length);
348 swizzled_po = obj_po ^ 64;
351 swizzled_po = obj_po;
353 ret = -copyout_nofault(
354 (char *)mkva + swizzled_po,
355 (void *)(uintptr_t)args->data_ptr, cnt);
358 args->data_ptr += cnt;
365 VM_OBJECT_LOCK(vm_obj);
366 vm_page_reference(m);
367 vm_page_busy_wait(m, FALSE, "i915gem");
368 vm_page_unwire(m, 1);
374 vm_object_pip_wakeup(vm_obj);
375 VM_OBJECT_UNLOCK(vm_obj);
381 * Reads data from the object referenced by handle.
383 * On error, the contents of *data are undefined.
386 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
387 struct drm_file *file)
389 struct drm_i915_gem_pread *args = data;
390 struct drm_i915_gem_object *obj;
396 ret = i915_mutex_lock_interruptible(dev);
400 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
401 if (&obj->base == NULL) {
406 /* Bounds check source. */
407 if (args->offset > obj->base.size ||
408 args->size > obj->base.size - args->offset) {
413 ret = i915_gem_shmem_pread(dev, obj, args, file);
415 drm_gem_object_unreference(&obj->base);
422 * This is the fast pwrite path, where we copy the data directly from the
423 * user into the GTT, uncached.
426 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
427 struct drm_i915_gem_object *obj,
428 struct drm_i915_gem_pwrite *args,
429 struct drm_file *file)
435 * Pass the unaligned physical address and size to pmap_mapdev_attr()
436 * so it can properly calculate whether an extra page needs to be
437 * mapped or not to cover the requested range. The function will
438 * add the page offset into the returned mkva for us.
440 mkva = (vm_offset_t)pmap_mapdev_attr(dev->agp->base + obj->gtt_offset +
441 args->offset, args->size, PAT_WRITE_COMBINING);
442 ret = -copyin_nofault((void *)(uintptr_t)args->data_ptr, (char *)mkva, args->size);
443 pmap_unmapdev(mkva, args->size);
449 i915_gem_shmem_pwrite(struct drm_device *dev,
450 struct drm_i915_gem_object *obj,
451 struct drm_i915_gem_pwrite *args,
452 struct drm_file *file)
459 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po;
461 do_bit17_swizzling = 0;
464 vm_obj = obj->base.vm_obj;
467 VM_OBJECT_LOCK(vm_obj);
468 vm_object_pip_add(vm_obj, 1);
469 while (args->size > 0) {
470 obj_pi = OFF_TO_IDX(args->offset);
471 obj_po = args->offset & PAGE_MASK;
473 m = i915_gem_wire_page(vm_obj, obj_pi);
474 VM_OBJECT_UNLOCK(vm_obj);
476 sf = sf_buf_alloc(m);
477 mkva = sf_buf_kva(sf);
478 length = min(args->size, PAGE_SIZE - obj_po);
480 if (do_bit17_swizzling &&
481 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) {
482 cnt = roundup2(obj_po + 1, 64);
483 cnt = min(cnt - obj_po, length);
484 swizzled_po = obj_po ^ 64;
487 swizzled_po = obj_po;
489 ret = -copyin_nofault(
490 (void *)(uintptr_t)args->data_ptr,
491 (char *)mkva + swizzled_po, cnt);
494 args->data_ptr += cnt;
501 VM_OBJECT_LOCK(vm_obj);
503 vm_page_reference(m);
504 vm_page_busy_wait(m, FALSE, "i915gem");
505 vm_page_unwire(m, 1);
511 vm_object_pip_wakeup(vm_obj);
512 VM_OBJECT_UNLOCK(vm_obj);
518 * Writes data to the object referenced by handle.
520 * On error, the contents of the buffer that were to be modified are undefined.
523 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
524 struct drm_file *file)
526 struct drm_i915_gem_pwrite *args = data;
527 struct drm_i915_gem_object *obj;
533 ret = i915_mutex_lock_interruptible(dev);
537 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
538 if (&obj->base == NULL) {
543 /* Bounds check destination. */
544 if (args->offset > obj->base.size ||
545 args->size > obj->base.size - args->offset) {
551 /* We can only do the GTT pwrite on untiled buffers, as otherwise
552 * it would end up going through the fenced access, and we'll get
553 * different detiling behavior between reading and writing.
554 * pread/pwrite currently are reading and writing from the CPU
555 * perspective, requiring manual detiling by the client.
558 ret = i915_gem_phys_pwrite(dev, obj, args, file);
562 if (obj->cache_level == I915_CACHE_NONE &&
563 obj->tiling_mode == I915_TILING_NONE &&
564 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
565 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
566 /* Note that the gtt paths might fail with non-page-backed user
567 * pointers (e.g. gtt mappings when moving data between
568 * textures). Fallback to the shmem path in that case. */
571 if (ret == -EFAULT || ret == -ENOSPC)
572 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
575 drm_gem_object_unreference(&obj->base);
582 i915_gem_check_wedge(struct drm_i915_private *dev_priv,
585 if (atomic_read(&dev_priv->mm.wedged)) {
586 struct completion *x = &dev_priv->error_completion;
587 bool recovery_complete;
589 /* Give the error handler a chance to run. */
590 spin_lock(&x->wait.lock);
591 recovery_complete = x->done > 0;
592 spin_unlock(&x->wait.lock);
594 /* Non-interruptible callers can't handle -EAGAIN, hence return
595 * -EIO unconditionally for these. */
599 /* Recovery complete, but still wedged means reset failure. */
600 if (recovery_complete)
610 * Compare seqno against outstanding lazy request. Emit a request if they are
614 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
618 DRM_LOCK_ASSERT(ring->dev);
621 if (seqno == ring->outstanding_lazy_request)
622 ret = i915_add_request(ring, NULL, NULL);
628 * __wait_seqno - wait until execution of seqno has finished
629 * @ring: the ring expected to report seqno
631 * @interruptible: do an interruptible wait (normally yes)
632 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
634 * Returns 0 if the seqno was found within the alloted time. Else returns the
635 * errno with remaining time filled in timeout argument.
637 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
638 bool interruptible, struct timespec *timeout)
640 drm_i915_private_t *dev_priv = ring->dev->dev_private;
641 struct timespec before, now, wait_time={1,0};
642 unsigned long timeout_jiffies;
644 bool wait_forever = true;
647 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
650 if (timeout != NULL) {
651 wait_time = *timeout;
652 wait_forever = false;
655 timeout_jiffies = timespec_to_jiffies(&wait_time);
657 if (WARN_ON(!ring->irq_get(ring)))
660 /* Record current time in case interrupted by signal, or wedged * */
661 getrawmonotonic(&before);
664 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
665 atomic_read(&dev_priv->mm.wedged))
668 end = wait_event_interruptible_timeout(ring->irq_queue,
672 end = wait_event_timeout(ring->irq_queue, EXIT_COND,
675 ret = i915_gem_check_wedge(dev_priv, interruptible);
678 } while (end == 0 && wait_forever);
680 getrawmonotonic(&now);
686 struct timespec sleep_time = timespec_sub(now, before);
687 *timeout = timespec_sub(*timeout, sleep_time);
692 case -EAGAIN: /* Wedged */
693 case -ERESTARTSYS: /* Signal */
695 case 0: /* Timeout */
697 set_normalized_timespec(timeout, 0, 0);
698 return -ETIMEDOUT; /* -ETIME on Linux */
699 default: /* Completed */
700 WARN_ON(end < 0); /* We're not aware of other errors */
706 * Waits for a sequence number to be signaled, and cleans up the
707 * request and object lists appropriately for that event.
710 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
712 struct drm_device *dev = ring->dev;
713 struct drm_i915_private *dev_priv = dev->dev_private;
716 DRM_LOCK_ASSERT(dev);
719 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
723 ret = i915_gem_check_olr(ring, seqno);
727 ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible, NULL);
733 * Ensures that all rendering to the object has completed and the object is
734 * safe to unbind from the GTT or access from the CPU.
736 static __must_check int
737 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
740 struct intel_ring_buffer *ring = obj->ring;
744 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
748 ret = i915_wait_seqno(ring, seqno);
752 i915_gem_retire_requests_ring(ring);
754 /* Manually manage the write flush as we may have not yet
755 * retired the buffer.
757 if (obj->last_write_seqno &&
758 i915_seqno_passed(seqno, obj->last_write_seqno)) {
759 obj->last_write_seqno = 0;
760 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
766 /* A nonblocking variant of the above wait. This is a highly dangerous routine
767 * as the object state may change during this call.
769 static __must_check int
770 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
773 struct drm_device *dev = obj->base.dev;
774 struct drm_i915_private *dev_priv = dev->dev_private;
775 struct intel_ring_buffer *ring = obj->ring;
779 DRM_LOCK_ASSERT(dev);
780 BUG_ON(!dev_priv->mm.interruptible);
782 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
786 ret = i915_gem_check_wedge(dev_priv, true);
790 ret = i915_gem_check_olr(ring, seqno);
795 ret = __wait_seqno(ring, seqno, true, NULL);
798 i915_gem_retire_requests_ring(ring);
800 /* Manually manage the write flush as we may have not yet
801 * retired the buffer.
803 if (obj->last_write_seqno &&
804 i915_seqno_passed(seqno, obj->last_write_seqno)) {
805 obj->last_write_seqno = 0;
806 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
813 * Called when user space prepares to use an object with the CPU, either
814 * through the mmap ioctl's mapping or a GTT mapping.
817 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
818 struct drm_file *file)
820 struct drm_i915_gem_set_domain *args = data;
821 struct drm_i915_gem_object *obj;
822 uint32_t read_domains = args->read_domains;
823 uint32_t write_domain = args->write_domain;
826 /* Only handle setting domains to types used by the CPU. */
827 if (write_domain & I915_GEM_GPU_DOMAINS)
830 if (read_domains & I915_GEM_GPU_DOMAINS)
833 /* Having something in the write domain implies it's in the read
834 * domain, and only that read domain. Enforce that in the request.
836 if (write_domain != 0 && read_domains != write_domain)
839 ret = i915_mutex_lock_interruptible(dev);
843 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
844 if (&obj->base == NULL) {
849 /* Try to flush the object off the GPU without holding the lock.
850 * We will repeat the flush holding the lock in the normal manner
851 * to catch cases where we are gazumped.
853 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);
857 if (read_domains & I915_GEM_DOMAIN_GTT) {
858 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
860 /* Silently promote "you're not bound, there was nothing to do"
861 * to success, since the client was just asking us to
862 * make sure everything was done.
867 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
871 drm_gem_object_unreference(&obj->base);
878 * Called when user space has done writes to this buffer
881 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
882 struct drm_file *file)
884 struct drm_i915_gem_sw_finish *args = data;
885 struct drm_i915_gem_object *obj;
888 ret = i915_mutex_lock_interruptible(dev);
891 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
892 if (&obj->base == NULL) {
897 /* Pinned buffers may be scanout, so flush the cache */
899 i915_gem_object_flush_cpu_write_domain(obj);
901 drm_gem_object_unreference(&obj->base);
908 * Maps the contents of an object, returning the address it is mapped
911 * While the mapping holds a reference on the contents of the object, it doesn't
912 * imply a ref on the object itself.
915 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
916 struct drm_file *file)
918 struct drm_i915_gem_mmap *args = data;
919 struct drm_gem_object *obj;
920 struct proc *p = curproc;
921 vm_map_t map = &p->p_vmspace->vm_map;
926 obj = drm_gem_object_lookup(dev, file, args->handle);
933 size = round_page(args->size);
935 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
943 vm_object_hold(obj->vm_obj);
944 vm_object_reference_locked(obj->vm_obj);
945 vm_object_drop(obj->vm_obj);
946 rv = vm_map_find(map, obj->vm_obj, NULL,
947 args->offset, &addr, args->size,
948 PAGE_SIZE, /* align */
950 VM_MAPTYPE_NORMAL, /* maptype */
951 VM_PROT_READ | VM_PROT_WRITE, /* prot */
952 VM_PROT_READ | VM_PROT_WRITE, /* max */
953 MAP_SHARED /* cow */);
954 if (rv != KERN_SUCCESS) {
955 vm_object_deallocate(obj->vm_obj);
956 error = -vm_mmap_to_errno(rv);
958 args->addr_ptr = (uint64_t)addr;
961 drm_gem_object_unreference(obj);
966 * i915_gem_release_mmap - remove physical page mappings
967 * @obj: obj in question
969 * Preserve the reservation of the mmapping with the DRM core code, but
970 * relinquish ownership of the pages back to the system.
972 * It is vital that we remove the page mapping if we have mapped a tiled
973 * object through the GTT and then lose the fence register due to
974 * resource pressure. Similarly if the object has been moved out of the
975 * aperture, than pages mapped into userspace must be revoked. Removing the
976 * mapping will then trigger a page fault on the next user access, allowing
977 * fixup by i915_gem_fault().
980 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
986 if (!obj->fault_mappable)
989 devobj = cdev_pager_lookup(obj);
990 if (devobj != NULL) {
991 page_count = OFF_TO_IDX(obj->base.size);
993 VM_OBJECT_LOCK(devobj);
994 for (i = 0; i < page_count; i++) {
995 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
998 cdev_pager_free_page(devobj, m);
1000 VM_OBJECT_UNLOCK(devobj);
1001 vm_object_deallocate(devobj);
1004 obj->fault_mappable = false;
1008 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1012 if (INTEL_INFO(dev)->gen >= 4 ||
1013 tiling_mode == I915_TILING_NONE)
1016 /* Previous chips need a power-of-two fence region when tiling */
1017 if (INTEL_INFO(dev)->gen == 3)
1018 gtt_size = 1024*1024;
1020 gtt_size = 512*1024;
1022 while (gtt_size < size)
1029 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1030 * @obj: object to check
1032 * Return the required GTT alignment for an object, taking into account
1033 * potential fence register mapping.
1036 i915_gem_get_gtt_alignment(struct drm_device *dev,
1042 * Minimum alignment is 4k (GTT page size), but might be greater
1043 * if a fence register is needed for the object.
1045 if (INTEL_INFO(dev)->gen >= 4 ||
1046 tiling_mode == I915_TILING_NONE)
1050 * Previous chips need to be aligned to the size of the smallest
1051 * fence register that can contain the object.
1053 return i915_gem_get_gtt_size(dev, size, tiling_mode);
1057 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1060 * @size: size of the object
1061 * @tiling_mode: tiling mode of the object
1063 * Return the required GTT alignment for an object, only taking into account
1064 * unfenced tiled surface requirements.
1067 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1072 * Minimum alignment is 4k (GTT page size) for sane hw.
1074 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1075 tiling_mode == I915_TILING_NONE)
1078 /* Previous hardware however needs to be aligned to a power-of-two
1079 * tile height. The simplest method for determining this is to reuse
1080 * the power-of-tile object size.
1082 return i915_gem_get_gtt_size(dev, size, tiling_mode);
1086 i915_gem_mmap_gtt(struct drm_file *file,
1087 struct drm_device *dev,
1091 struct drm_i915_private *dev_priv = dev->dev_private;
1092 struct drm_i915_gem_object *obj;
1095 ret = i915_mutex_lock_interruptible(dev);
1099 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1100 if (&obj->base == NULL) {
1105 if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1110 if (obj->madv != I915_MADV_WILLNEED) {
1111 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1116 ret = drm_gem_create_mmap_offset(&obj->base);
1120 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
1121 DRM_GEM_MAPPING_KEY;
1123 drm_gem_object_unreference(&obj->base);
1130 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1132 * @data: GTT mapping ioctl data
1133 * @file: GEM object info
1135 * Simply returns the fake offset to userspace so it can mmap it.
1136 * The mmap call will end up in drm_gem_mmap(), which will set things
1137 * up so we can get faults in the handler above.
1139 * The fault handler will take care of binding the object into the GTT
1140 * (since it may have been evicted to make room for something), allocating
1141 * a fence register, and mapping the appropriate aperture address into
1145 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1146 struct drm_file *file)
1148 struct drm_i915_gem_mmap_gtt *args = data;
1150 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1153 /* Immediately discard the backing storage */
1155 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1159 vm_obj = obj->base.vm_obj;
1160 VM_OBJECT_LOCK(vm_obj);
1161 vm_object_page_remove(vm_obj, 0, 0, false);
1162 VM_OBJECT_UNLOCK(vm_obj);
1163 obj->madv = __I915_MADV_PURGED;
1167 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1169 return obj->madv == I915_MADV_DONTNEED;
1173 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1178 BUG_ON(obj->madv == __I915_MADV_PURGED);
1180 if (obj->tiling_mode != I915_TILING_NONE)
1181 i915_gem_object_save_bit_17_swizzle(obj);
1182 if (obj->madv == I915_MADV_DONTNEED)
1184 page_count = obj->base.size / PAGE_SIZE;
1185 VM_OBJECT_LOCK(obj->base.vm_obj);
1186 #if GEM_PARANOID_CHECK_GTT
1187 i915_gem_assert_pages_not_mapped(obj->base.dev, obj->pages, page_count);
1189 for (i = 0; i < page_count; i++) {
1193 if (obj->madv == I915_MADV_WILLNEED)
1194 vm_page_reference(m);
1195 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem");
1196 vm_page_unwire(obj->pages[i], 1);
1197 vm_page_wakeup(obj->pages[i]);
1199 VM_OBJECT_UNLOCK(obj->base.vm_obj);
1201 drm_free(obj->pages, M_DRM);
1206 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1209 struct drm_device *dev;
1212 int page_count, i, j;
1214 dev = obj->base.dev;
1215 KASSERT(obj->pages == NULL, ("Obj already has pages"));
1216 page_count = obj->base.size / PAGE_SIZE;
1217 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM,
1219 vm_obj = obj->base.vm_obj;
1220 VM_OBJECT_LOCK(vm_obj);
1221 for (i = 0; i < page_count; i++) {
1222 if ((obj->pages[i] = i915_gem_wire_page(vm_obj, i)) == NULL)
1225 VM_OBJECT_UNLOCK(vm_obj);
1226 if (i915_gem_object_needs_bit17_swizzle(obj))
1227 i915_gem_object_do_bit_17_swizzle(obj);
1231 for (j = 0; j < i; j++) {
1233 vm_page_busy_wait(m, FALSE, "i915gem");
1234 vm_page_unwire(m, 0);
1237 VM_OBJECT_UNLOCK(vm_obj);
1238 drm_free(obj->pages, M_DRM);
1244 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1245 struct intel_ring_buffer *ring)
1247 struct drm_device *dev = obj->base.dev;
1248 struct drm_i915_private *dev_priv = dev->dev_private;
1249 u32 seqno = intel_ring_get_seqno(ring);
1251 BUG_ON(ring == NULL);
1254 /* Add a reference if we're newly entering the active list. */
1256 drm_gem_object_reference(&obj->base);
1260 /* Move from whatever list we were on to the tail of execution. */
1261 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1262 list_move_tail(&obj->ring_list, &ring->active_list);
1264 obj->last_read_seqno = seqno;
1266 if (obj->fenced_gpu_access) {
1267 obj->last_fenced_seqno = seqno;
1269 /* Bump MRU to take account of the delayed flush */
1270 if (obj->fence_reg != I915_FENCE_REG_NONE) {
1271 struct drm_i915_fence_reg *reg;
1273 reg = &dev_priv->fence_regs[obj->fence_reg];
1274 list_move_tail(®->lru_list,
1275 &dev_priv->mm.fence_list);
1281 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1283 struct drm_device *dev = obj->base.dev;
1284 struct drm_i915_private *dev_priv = dev->dev_private;
1286 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
1287 BUG_ON(!obj->active);
1289 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1291 list_del_init(&obj->ring_list);
1294 obj->last_read_seqno = 0;
1295 obj->last_write_seqno = 0;
1296 obj->base.write_domain = 0;
1298 obj->last_fenced_seqno = 0;
1299 obj->fenced_gpu_access = false;
1302 drm_gem_object_unreference(&obj->base);
1304 WARN_ON(i915_verify_lists(dev));
1308 i915_gem_handle_seqno_wrap(struct drm_device *dev)
1310 struct drm_i915_private *dev_priv = dev->dev_private;
1311 struct intel_ring_buffer *ring;
1314 /* The hardware uses various monotonic 32-bit counters, if we
1315 * detect that they will wraparound we need to idle the GPU
1316 * and reset those counters.
1319 for_each_ring(ring, dev_priv, i) {
1320 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
1321 ret |= ring->sync_seqno[j] != 0;
1326 ret = i915_gpu_idle(dev);
1330 i915_gem_retire_requests(dev);
1331 for_each_ring(ring, dev_priv, i) {
1332 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
1333 ring->sync_seqno[j] = 0;
1340 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
1342 struct drm_i915_private *dev_priv = dev->dev_private;
1344 /* reserve 0 for non-seqno */
1345 if (dev_priv->next_seqno == 0) {
1346 int ret = i915_gem_handle_seqno_wrap(dev);
1350 dev_priv->next_seqno = 1;
1353 *seqno = dev_priv->next_seqno++;
1358 i915_add_request(struct intel_ring_buffer *ring,
1359 struct drm_file *file,
1362 drm_i915_private_t *dev_priv = ring->dev->dev_private;
1363 struct drm_i915_gem_request *request;
1364 u32 request_ring_position;
1369 * Emit any outstanding flushes - execbuf can fail to emit the flush
1370 * after having emitted the batchbuffer command. Hence we need to fix
1371 * things up similar to emitting the lazy request. The difference here
1372 * is that the flush _must_ happen before the next request, no matter
1375 ret = intel_ring_flush_all_caches(ring);
1379 request = kmalloc(sizeof(*request), M_DRM, M_WAITOK | M_ZERO);
1380 if (request == NULL)
1384 /* Record the position of the start of the request so that
1385 * should we detect the updated seqno part-way through the
1386 * GPU processing the request, we never over-estimate the
1387 * position of the head.
1389 request_ring_position = intel_ring_get_tail(ring);
1391 ret = ring->add_request(ring);
1393 kfree(request, M_DRM);
1397 request->seqno = intel_ring_get_seqno(ring);
1398 request->ring = ring;
1399 request->tail = request_ring_position;
1400 request->emitted_jiffies = jiffies;
1401 was_empty = list_empty(&ring->request_list);
1402 list_add_tail(&request->list, &ring->request_list);
1403 request->file_priv = NULL;
1406 struct drm_i915_file_private *file_priv = file->driver_priv;
1408 spin_lock(&file_priv->mm.lock);
1409 request->file_priv = file_priv;
1410 list_add_tail(&request->client_list,
1411 &file_priv->mm.request_list);
1412 spin_unlock(&file_priv->mm.lock);
1415 ring->outstanding_lazy_request = 0;
1417 if (!dev_priv->mm.suspended) {
1418 if (i915_enable_hangcheck) {
1419 mod_timer(&dev_priv->hangcheck_timer,
1420 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
1423 queue_delayed_work(dev_priv->wq,
1424 &dev_priv->mm.retire_work,
1425 round_jiffies_up_relative(hz));
1426 intel_mark_busy(dev_priv->dev);
1431 *out_seqno = request->seqno;
1436 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1438 struct drm_i915_file_private *file_priv = request->file_priv;
1443 spin_lock(&file_priv->mm.lock);
1444 if (request->file_priv) {
1445 list_del(&request->client_list);
1446 request->file_priv = NULL;
1448 spin_unlock(&file_priv->mm.lock);
1451 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1452 struct intel_ring_buffer *ring)
1454 while (!list_empty(&ring->request_list)) {
1455 struct drm_i915_gem_request *request;
1457 request = list_first_entry(&ring->request_list,
1458 struct drm_i915_gem_request,
1461 list_del(&request->list);
1462 i915_gem_request_remove_from_client(request);
1463 drm_free(request, M_DRM);
1466 while (!list_empty(&ring->active_list)) {
1467 struct drm_i915_gem_object *obj;
1469 obj = list_first_entry(&ring->active_list,
1470 struct drm_i915_gem_object,
1473 i915_gem_object_move_to_inactive(obj);
1477 static void i915_gem_reset_fences(struct drm_device *dev)
1479 struct drm_i915_private *dev_priv = dev->dev_private;
1482 for (i = 0; i < dev_priv->num_fence_regs; i++) {
1483 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1485 i915_gem_write_fence(dev, i, NULL);
1488 i915_gem_object_fence_lost(reg->obj);
1492 INIT_LIST_HEAD(®->lru_list);
1495 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
1498 void i915_gem_reset(struct drm_device *dev)
1500 struct drm_i915_private *dev_priv = dev->dev_private;
1501 struct drm_i915_gem_object *obj;
1502 struct intel_ring_buffer *ring;
1505 for_each_ring(ring, dev_priv, i)
1506 i915_gem_reset_ring_lists(dev_priv, ring);
1508 /* Move everything out of the GPU domains to ensure we do any
1509 * necessary invalidation upon reuse.
1511 list_for_each_entry(obj,
1512 &dev_priv->mm.inactive_list,
1515 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1518 /* The fence registers are invalidated so clear them out */
1519 i915_gem_reset_fences(dev);
1523 * This function clears the request list as sequence numbers are passed.
1526 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
1530 if (list_empty(&ring->request_list))
1533 WARN_ON(i915_verify_lists(ring->dev));
1535 seqno = ring->get_seqno(ring, true);
1537 while (!list_empty(&ring->request_list)) {
1538 struct drm_i915_gem_request *request;
1540 request = list_first_entry(&ring->request_list,
1541 struct drm_i915_gem_request,
1544 if (!i915_seqno_passed(seqno, request->seqno))
1547 /* We know the GPU must have read the request to have
1548 * sent us the seqno + interrupt, so use the position
1549 * of tail of the request to update the last known position
1552 ring->last_retired_head = request->tail;
1554 list_del(&request->list);
1555 i915_gem_request_remove_from_client(request);
1556 kfree(request, M_DRM);
1559 /* Move any buffers on the active list that are no longer referenced
1560 * by the ringbuffer to the flushing/inactive lists as appropriate.
1562 while (!list_empty(&ring->active_list)) {
1563 struct drm_i915_gem_object *obj;
1565 obj = list_first_entry(&ring->active_list,
1566 struct drm_i915_gem_object,
1569 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
1572 i915_gem_object_move_to_inactive(obj);
1575 if (unlikely(ring->trace_irq_seqno &&
1576 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
1577 ring->irq_put(ring);
1578 ring->trace_irq_seqno = 0;
1584 i915_gem_retire_requests(struct drm_device *dev)
1586 drm_i915_private_t *dev_priv = dev->dev_private;
1587 struct intel_ring_buffer *ring;
1590 for_each_ring(ring, dev_priv, i)
1591 i915_gem_retire_requests_ring(ring);
1595 i915_gem_retire_work_handler(struct work_struct *work)
1597 drm_i915_private_t *dev_priv;
1598 struct drm_device *dev;
1599 struct intel_ring_buffer *ring;
1603 dev_priv = container_of(work, drm_i915_private_t,
1604 mm.retire_work.work);
1605 dev = dev_priv->dev;
1607 /* Come back later if the device is busy... */
1608 if (lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_NOWAIT)) {
1609 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
1610 round_jiffies_up_relative(hz));
1614 i915_gem_retire_requests(dev);
1616 /* Send a periodic flush down the ring so we don't hold onto GEM
1617 * objects indefinitely.
1620 for_each_ring(ring, dev_priv, i) {
1621 if (ring->gpu_caches_dirty)
1622 i915_add_request(ring, NULL, NULL);
1624 idle &= list_empty(&ring->request_list);
1627 if (!dev_priv->mm.suspended && !idle)
1628 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
1629 round_jiffies_up_relative(hz));
1631 intel_mark_idle(dev);
1636 * Ensures that an object will eventually get non-busy by flushing any required
1637 * write domains, emitting any outstanding lazy request and retiring and
1638 * completed requests.
1641 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
1646 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
1650 i915_gem_retire_requests_ring(obj->ring);
1657 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
1658 * @DRM_IOCTL_ARGS: standard ioctl arguments
1660 * Returns 0 if successful, else an error is returned with the remaining time in
1661 * the timeout parameter.
1662 * -ETIME: object is still busy after timeout
1663 * -ERESTARTSYS: signal interrupted the wait
1664 * -ENONENT: object doesn't exist
1665 * Also possible, but rare:
1666 * -EAGAIN: GPU wedged
1668 * -ENODEV: Internal IRQ fail
1669 * -E?: The add request failed
1671 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
1672 * non-zero timeout parameter the wait ioctl will wait for the given number of
1673 * nanoseconds on an object becoming unbusy. Since the wait itself does so
1674 * without holding struct_mutex the object may become re-busied before this
1675 * function completes. A similar but shorter * race condition exists in the busy
1679 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
1681 struct drm_i915_gem_wait *args = data;
1682 struct drm_i915_gem_object *obj;
1683 struct intel_ring_buffer *ring = NULL;
1684 struct timespec timeout_stack, *timeout = NULL;
1688 if (args->timeout_ns >= 0) {
1689 timeout_stack = ns_to_timespec(args->timeout_ns);
1690 timeout = &timeout_stack;
1693 ret = i915_mutex_lock_interruptible(dev);
1697 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
1698 if (&obj->base == NULL) {
1703 /* Need to make sure the object gets inactive eventually. */
1704 ret = i915_gem_object_flush_active(obj);
1709 seqno = obj->last_read_seqno;
1716 /* Do this after OLR check to make sure we make forward progress polling
1717 * on this IOCTL with a 0 timeout (like busy ioctl)
1719 if (!args->timeout_ns) {
1724 drm_gem_object_unreference(&obj->base);
1727 ret = __wait_seqno(ring, seqno, true, timeout);
1729 WARN_ON(!timespec_valid(timeout));
1730 args->timeout_ns = timespec_to_ns(timeout);
1735 drm_gem_object_unreference(&obj->base);
1741 * i915_gem_object_sync - sync an object to a ring.
1743 * @obj: object which may be in use on another ring.
1744 * @to: ring we wish to use the object on. May be NULL.
1746 * This code is meant to abstract object synchronization with the GPU.
1747 * Calling with NULL implies synchronizing the object with the CPU
1748 * rather than a particular GPU ring.
1750 * Returns 0 if successful, else propagates up the lower layer error.
1753 i915_gem_object_sync(struct drm_i915_gem_object *obj,
1754 struct intel_ring_buffer *to)
1756 struct intel_ring_buffer *from = obj->ring;
1760 if (from == NULL || to == from)
1763 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
1764 return i915_gem_object_wait_rendering(obj, false);
1766 idx = intel_ring_sync_index(from, to);
1768 seqno = obj->last_read_seqno;
1769 if (seqno <= from->sync_seqno[idx])
1772 ret = i915_gem_check_olr(obj->ring, seqno);
1776 ret = to->sync_to(to, from, seqno);
1778 /* We use last_read_seqno because sync_to()
1779 * might have just caused seqno wrap under
1782 from->sync_seqno[idx] = obj->last_read_seqno;
1787 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
1789 u32 old_write_domain, old_read_domains;
1791 /* Act a barrier for all accesses through the GTT */
1794 /* Force a pagefault for domain tracking on next user access */
1795 i915_gem_release_mmap(obj);
1797 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
1800 old_read_domains = obj->base.read_domains;
1801 old_write_domain = obj->base.write_domain;
1803 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
1804 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
1809 * Unbinds an object from the GTT aperture.
1812 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
1814 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
1817 if (obj->gtt_space == NULL)
1823 ret = i915_gem_object_finish_gpu(obj);
1826 /* Continue on if we fail due to EIO, the GPU is hung so we
1827 * should be safe and we need to cleanup or else we might
1828 * cause memory corruption through use-after-free.
1831 i915_gem_object_finish_gtt(obj);
1833 /* Move the object to the CPU domain to ensure that
1834 * any possible CPU writes while it's not in the GTT
1835 * are flushed when we go to remap it.
1838 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1839 if (ret == -ERESTART || ret == -EINTR)
1842 /* In the event of a disaster, abandon all caches and
1843 * hope for the best.
1845 i915_gem_clflush_object(obj);
1846 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1849 /* release the fence reg _after_ flushing */
1850 ret = i915_gem_object_put_fence(obj);
1854 if (obj->has_global_gtt_mapping)
1855 i915_gem_gtt_unbind_object(obj);
1856 if (obj->has_aliasing_ppgtt_mapping) {
1857 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
1858 obj->has_aliasing_ppgtt_mapping = 0;
1860 i915_gem_gtt_finish_object(obj);
1862 i915_gem_object_put_pages_gtt(obj);
1864 list_del_init(&obj->gtt_list);
1865 list_del_init(&obj->mm_list);
1866 /* Avoid an unnecessary call to unbind on rebind. */
1867 obj->map_and_fenceable = true;
1869 drm_mm_put_block(obj->gtt_space);
1870 obj->gtt_space = NULL;
1871 obj->gtt_offset = 0;
1873 if (i915_gem_object_is_purgeable(obj))
1874 i915_gem_object_truncate(obj);
1879 int i915_gpu_idle(struct drm_device *dev)
1881 drm_i915_private_t *dev_priv = dev->dev_private;
1882 struct intel_ring_buffer *ring;
1885 /* Flush everything onto the inactive list. */
1886 for_each_ring(ring, dev_priv, i) {
1887 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
1891 ret = intel_ring_idle(ring);
1899 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
1900 struct drm_i915_gem_object *obj)
1902 drm_i915_private_t *dev_priv = dev->dev_private;
1906 u32 size = obj->gtt_space->size;
1908 val = (uint64_t)((obj->gtt_offset + size - 4096) &
1910 val |= obj->gtt_offset & 0xfffff000;
1911 val |= (uint64_t)((obj->stride / 128) - 1) <<
1912 SANDYBRIDGE_FENCE_PITCH_SHIFT;
1914 if (obj->tiling_mode == I915_TILING_Y)
1915 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
1916 val |= I965_FENCE_REG_VALID;
1920 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
1921 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
1924 static void i965_write_fence_reg(struct drm_device *dev, int reg,
1925 struct drm_i915_gem_object *obj)
1927 drm_i915_private_t *dev_priv = dev->dev_private;
1931 u32 size = obj->gtt_space->size;
1933 val = (uint64_t)((obj->gtt_offset + size - 4096) &
1935 val |= obj->gtt_offset & 0xfffff000;
1936 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
1937 if (obj->tiling_mode == I915_TILING_Y)
1938 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
1939 val |= I965_FENCE_REG_VALID;
1943 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
1944 POSTING_READ(FENCE_REG_965_0 + reg * 8);
1947 static void i915_write_fence_reg(struct drm_device *dev, int reg,
1948 struct drm_i915_gem_object *obj)
1950 drm_i915_private_t *dev_priv = dev->dev_private;
1954 u32 size = obj->gtt_space->size;
1958 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
1959 (size & -size) != size ||
1960 (obj->gtt_offset & (size - 1)),
1961 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
1962 obj->gtt_offset, obj->map_and_fenceable, size);
1964 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
1969 /* Note: pitch better be a power of two tile widths */
1970 pitch_val = obj->stride / tile_width;
1971 pitch_val = ffs(pitch_val) - 1;
1973 val = obj->gtt_offset;
1974 if (obj->tiling_mode == I915_TILING_Y)
1975 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
1976 val |= I915_FENCE_SIZE_BITS(size);
1977 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
1978 val |= I830_FENCE_REG_VALID;
1983 reg = FENCE_REG_830_0 + reg * 4;
1985 reg = FENCE_REG_945_8 + (reg - 8) * 4;
1987 I915_WRITE(reg, val);
1991 static void i830_write_fence_reg(struct drm_device *dev, int reg,
1992 struct drm_i915_gem_object *obj)
1994 drm_i915_private_t *dev_priv = dev->dev_private;
1998 u32 size = obj->gtt_space->size;
2001 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2002 (size & -size) != size ||
2003 (obj->gtt_offset & (size - 1)),
2004 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2005 obj->gtt_offset, size);
2007 pitch_val = obj->stride / 128;
2008 pitch_val = ffs(pitch_val) - 1;
2010 val = obj->gtt_offset;
2011 if (obj->tiling_mode == I915_TILING_Y)
2012 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2013 val |= I830_FENCE_SIZE_BITS(size);
2014 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2015 val |= I830_FENCE_REG_VALID;
2019 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2020 POSTING_READ(FENCE_REG_830_0 + reg * 4);
2023 static void i915_gem_write_fence(struct drm_device *dev, int reg,
2024 struct drm_i915_gem_object *obj)
2026 switch (INTEL_INFO(dev)->gen) {
2028 case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
2030 case 4: i965_write_fence_reg(dev, reg, obj); break;
2031 case 3: i915_write_fence_reg(dev, reg, obj); break;
2032 case 2: i830_write_fence_reg(dev, reg, obj); break;
2037 static inline int fence_number(struct drm_i915_private *dev_priv,
2038 struct drm_i915_fence_reg *fence)
2040 return fence - dev_priv->fence_regs;
2043 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2044 struct drm_i915_fence_reg *fence,
2047 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2048 int reg = fence_number(dev_priv, fence);
2050 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
2053 obj->fence_reg = reg;
2055 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2057 obj->fence_reg = I915_FENCE_REG_NONE;
2059 list_del_init(&fence->lru_list);
2064 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
2066 if (obj->last_fenced_seqno) {
2067 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
2071 obj->last_fenced_seqno = 0;
2074 /* Ensure that all CPU reads are completed before installing a fence
2075 * and all writes before removing the fence.
2077 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2080 obj->fenced_gpu_access = false;
2085 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2087 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2090 ret = i915_gem_object_flush_fence(obj);
2094 if (obj->fence_reg == I915_FENCE_REG_NONE)
2097 i915_gem_object_update_fence(obj,
2098 &dev_priv->fence_regs[obj->fence_reg],
2100 i915_gem_object_fence_lost(obj);
2105 static struct drm_i915_fence_reg *
2106 i915_find_fence_reg(struct drm_device *dev)
2108 struct drm_i915_private *dev_priv = dev->dev_private;
2109 struct drm_i915_fence_reg *reg, *avail;
2112 /* First try to find a free reg */
2114 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2115 reg = &dev_priv->fence_regs[i];
2119 if (!reg->pin_count)
2126 /* None available, try to steal one or wait for a user to finish */
2127 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2138 * i915_gem_object_get_fence - set up fencing for an object
2139 * @obj: object to map through a fence reg
2141 * When mapping objects through the GTT, userspace wants to be able to write
2142 * to them without having to worry about swizzling if the object is tiled.
2143 * This function walks the fence regs looking for a free one for @obj,
2144 * stealing one if it can't find any.
2146 * It then sets up the reg based on the object's properties: address, pitch
2147 * and tiling format.
2149 * For an untiled surface, this removes any existing fence.
2152 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
2154 struct drm_device *dev = obj->base.dev;
2155 struct drm_i915_private *dev_priv = dev->dev_private;
2156 bool enable = obj->tiling_mode != I915_TILING_NONE;
2157 struct drm_i915_fence_reg *reg;
2160 /* Have we updated the tiling parameters upon the object and so
2161 * will need to serialise the write to the associated fence register?
2163 if (obj->fence_dirty) {
2164 ret = i915_gem_object_flush_fence(obj);
2169 /* Just update our place in the LRU if our fence is getting reused. */
2170 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2171 reg = &dev_priv->fence_regs[obj->fence_reg];
2172 if (!obj->fence_dirty) {
2173 list_move_tail(®->lru_list,
2174 &dev_priv->mm.fence_list);
2177 } else if (enable) {
2178 reg = i915_find_fence_reg(dev);
2183 struct drm_i915_gem_object *old = reg->obj;
2185 ret = i915_gem_object_flush_fence(old);
2189 i915_gem_object_fence_lost(old);
2194 i915_gem_object_update_fence(obj, reg, enable);
2195 obj->fence_dirty = false;
2200 static bool i915_gem_valid_gtt_space(struct drm_device *dev,
2201 struct drm_mm_node *gtt_space,
2202 unsigned long cache_level)
2204 struct drm_mm_node *other;
2206 /* On non-LLC machines we have to be careful when putting differing
2207 * types of snoopable memory together to avoid the prefetcher
2208 * crossing memory domains and dieing.
2213 if (gtt_space == NULL)
2216 if (list_empty(>t_space->node_list))
2219 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2220 if (other->allocated && !other->hole_follows && other->color != cache_level)
2223 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2224 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2230 static void i915_gem_verify_gtt(struct drm_device *dev)
2233 struct drm_i915_private *dev_priv = dev->dev_private;
2234 struct drm_i915_gem_object *obj;
2237 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
2238 if (obj->gtt_space == NULL) {
2239 printk(KERN_ERR "object found on GTT list with no space reserved\n");
2244 if (obj->cache_level != obj->gtt_space->color) {
2245 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
2246 obj->gtt_space->start,
2247 obj->gtt_space->start + obj->gtt_space->size,
2249 obj->gtt_space->color);
2254 if (!i915_gem_valid_gtt_space(dev,
2256 obj->cache_level)) {
2257 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
2258 obj->gtt_space->start,
2259 obj->gtt_space->start + obj->gtt_space->size,
2271 * Finds free space in the GTT aperture and binds the object there.
2274 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2276 bool map_and_fenceable,
2279 struct drm_device *dev = obj->base.dev;
2280 drm_i915_private_t *dev_priv = dev->dev_private;
2281 struct drm_mm_node *free_space;
2282 uint32_t size, fence_size, fence_alignment, unfenced_alignment;
2283 bool mappable, fenceable;
2286 if (obj->madv != I915_MADV_WILLNEED) {
2287 DRM_ERROR("Attempting to bind a purgeable object\n");
2291 fence_size = i915_gem_get_gtt_size(dev, obj->base.size,
2293 fence_alignment = i915_gem_get_gtt_alignment(dev, obj->base.size,
2295 unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(dev,
2296 obj->base.size, obj->tiling_mode);
2298 alignment = map_and_fenceable ? fence_alignment :
2300 if (map_and_fenceable && (alignment & (fence_alignment - 1)) != 0) {
2301 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2305 size = map_and_fenceable ? fence_size : obj->base.size;
2307 /* If the object is bigger than the entire aperture, reject it early
2308 * before evicting everything in a vain attempt to find space.
2310 if (obj->base.size > (map_and_fenceable ?
2311 dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2313 "Attempting to bind an object larger than the aperture\n");
2318 if (map_and_fenceable)
2320 drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
2321 size, alignment, obj->cache_level,
2322 0, dev_priv->mm.gtt_mappable_end,
2325 free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space,
2326 size, alignment, obj->cache_level,
2329 if (free_space != NULL) {
2330 if (map_and_fenceable)
2332 drm_mm_get_block_range_generic(free_space,
2333 size, alignment, obj->cache_level,
2334 0, dev_priv->mm.gtt_mappable_end,
2338 drm_mm_get_block_generic(free_space,
2339 size, alignment, obj->cache_level,
2342 if (obj->gtt_space == NULL) {
2343 ret = i915_gem_evict_something(dev, size, alignment,
2352 if (WARN_ON(!i915_gem_valid_gtt_space(dev,
2354 obj->cache_level))) {
2355 drm_mm_put_block(obj->gtt_space);
2356 obj->gtt_space = NULL;
2361 * NOTE: i915_gem_object_get_pages_gtt() cannot
2362 * return ENOMEM, since we used VM_ALLOC_RETRY.
2364 ret = i915_gem_object_get_pages_gtt(obj, 0);
2366 drm_mm_put_block(obj->gtt_space);
2367 obj->gtt_space = NULL;
2371 i915_gem_gtt_bind_object(obj, obj->cache_level);
2373 i915_gem_object_put_pages_gtt(obj);
2374 drm_mm_put_block(obj->gtt_space);
2375 obj->gtt_space = NULL;
2376 if (i915_gem_evict_everything(dev))
2381 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
2382 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2384 obj->gtt_offset = obj->gtt_space->start;
2387 obj->gtt_space->size == fence_size &&
2388 (obj->gtt_space->start & (fence_alignment - 1)) == 0;
2391 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2392 obj->map_and_fenceable = mappable && fenceable;
2394 i915_gem_verify_gtt(dev);
2399 i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2402 /* If we don't have a page list set up, then we're not pinned
2403 * to GPU, and we can ignore the cache flush because it'll happen
2404 * again at bind time.
2406 if (obj->pages == NULL)
2409 /* If the GPU is snooping the contents of the CPU cache,
2410 * we do not need to manually clear the CPU cache lines. However,
2411 * the caches are only snooped when the render cache is
2412 * flushed/invalidated. As we always have to emit invalidations
2413 * and flushes when moving into and out of the RENDER domain, correct
2414 * snooping behaviour occurs naturally as the result of our domain
2417 if (obj->cache_level != I915_CACHE_NONE)
2420 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
2423 /** Flushes the GTT write domain for the object if it's dirty. */
2425 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
2427 uint32_t old_write_domain;
2429 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
2432 /* No actual flushing is required for the GTT write domain. Writes
2433 * to it immediately go to main memory as far as we know, so there's
2434 * no chipset flush. It also doesn't land in render cache.
2436 * However, we do have to enforce the order so that all writes through
2437 * the GTT land before any writes to the device, such as updates to
2442 old_write_domain = obj->base.write_domain;
2443 obj->base.write_domain = 0;
2446 /** Flushes the CPU write domain for the object if it's dirty. */
2448 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2450 uint32_t old_write_domain;
2452 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
2455 i915_gem_clflush_object(obj);
2456 intel_gtt_chipset_flush();
2457 old_write_domain = obj->base.write_domain;
2458 obj->base.write_domain = 0;
2462 * Moves a single object to the GTT read, and possibly write domain.
2464 * This function returns when the move is complete, including waiting on
2468 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2470 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2471 uint32_t old_write_domain, old_read_domains;
2474 /* Not valid to be called on unbound objects. */
2475 if (obj->gtt_space == NULL)
2478 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
2481 ret = i915_gem_object_wait_rendering(obj, !write);
2485 i915_gem_object_flush_cpu_write_domain(obj);
2487 old_write_domain = obj->base.write_domain;
2488 old_read_domains = obj->base.read_domains;
2490 /* It should now be out of any other write domains, and we can update
2491 * the domain values for our changes.
2493 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2494 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2496 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2497 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2501 /* And bump the LRU for this access */
2502 if (i915_gem_object_is_inactive(obj))
2503 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2508 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2509 enum i915_cache_level cache_level)
2511 struct drm_device *dev = obj->base.dev;
2512 drm_i915_private_t *dev_priv = dev->dev_private;
2515 if (obj->cache_level == cache_level)
2518 if (obj->pin_count) {
2519 DRM_DEBUG("can not change the cache level of pinned objects\n");
2523 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
2524 ret = i915_gem_object_unbind(obj);
2529 if (obj->gtt_space) {
2530 ret = i915_gem_object_finish_gpu(obj);
2534 i915_gem_object_finish_gtt(obj);
2536 /* Before SandyBridge, you could not use tiling or fence
2537 * registers with snooped memory, so relinquish any fences
2538 * currently pointing to our region in the aperture.
2540 if (INTEL_INFO(dev)->gen < 6) {
2541 ret = i915_gem_object_put_fence(obj);
2546 if (obj->has_global_gtt_mapping)
2547 i915_gem_gtt_bind_object(obj, cache_level);
2548 if (obj->has_aliasing_ppgtt_mapping)
2549 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
2552 obj->gtt_space->color = cache_level;
2555 if (cache_level == I915_CACHE_NONE) {
2556 u32 old_read_domains, old_write_domain;
2558 /* If we're coming from LLC cached, then we haven't
2559 * actually been tracking whether the data is in the
2560 * CPU cache or not, since we only allow one bit set
2561 * in obj->write_domain and have been skipping the clflushes.
2562 * Just set it to the CPU cache for now.
2564 KASSERT((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) == 0,
2565 ("obj %p in CPU write domain", obj));
2566 KASSERT((obj->base.read_domains & ~I915_GEM_DOMAIN_CPU) == 0,
2567 ("obj %p in CPU read domain", obj));
2569 old_read_domains = obj->base.read_domains;
2570 old_write_domain = obj->base.write_domain;
2572 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2573 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2577 obj->cache_level = cache_level;
2578 i915_gem_verify_gtt(dev);
2582 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
2583 struct drm_file *file)
2585 struct drm_i915_gem_caching *args = data;
2586 struct drm_i915_gem_object *obj;
2589 ret = i915_mutex_lock_interruptible(dev);
2593 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
2594 if (&obj->base == NULL) {
2599 args->caching = obj->cache_level != I915_CACHE_NONE;
2601 drm_gem_object_unreference(&obj->base);
2607 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
2608 struct drm_file *file)
2610 struct drm_i915_gem_caching *args = data;
2611 struct drm_i915_gem_object *obj;
2612 enum i915_cache_level level;
2615 switch (args->caching) {
2616 case I915_CACHING_NONE:
2617 level = I915_CACHE_NONE;
2619 case I915_CACHING_CACHED:
2620 level = I915_CACHE_LLC;
2626 ret = i915_mutex_lock_interruptible(dev);
2630 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
2631 if (&obj->base == NULL) {
2636 ret = i915_gem_object_set_cache_level(obj, level);
2638 drm_gem_object_unreference(&obj->base);
2645 * Prepare buffer for display plane (scanout, cursors, etc).
2646 * Can be called from an uninterruptible phase (modesetting) and allows
2647 * any flushes to be pipelined (for pageflips).
2650 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
2652 struct intel_ring_buffer *pipelined)
2654 u32 old_read_domains, old_write_domain;
2657 if (pipelined != obj->ring) {
2658 ret = i915_gem_object_sync(obj, pipelined);
2663 /* The display engine is not coherent with the LLC cache on gen6. As
2664 * a result, we make sure that the pinning that is about to occur is
2665 * done with uncached PTEs. This is lowest common denominator for all
2668 * However for gen6+, we could do better by using the GFDT bit instead
2669 * of uncaching, which would allow us to flush all the LLC-cached data
2670 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
2672 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
2676 /* As the user may map the buffer once pinned in the display plane
2677 * (e.g. libkms for the bootup splash), we have to ensure that we
2678 * always use map_and_fenceable for all scanout buffers.
2680 ret = i915_gem_object_pin(obj, alignment, true, false);
2684 i915_gem_object_flush_cpu_write_domain(obj);
2686 old_write_domain = obj->base.write_domain;
2687 old_read_domains = obj->base.read_domains;
2689 /* It should now be out of any other write domains, and we can update
2690 * the domain values for our changes.
2692 obj->base.write_domain = 0;
2693 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2699 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
2703 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
2706 ret = i915_gem_object_wait_rendering(obj, false);
2710 /* Ensure that we invalidate the GPU's caches and TLBs. */
2711 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
2716 * Moves a single object to the CPU read, and possibly write domain.
2718 * This function returns when the move is complete, including waiting on
2722 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
2724 uint32_t old_write_domain, old_read_domains;
2727 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
2730 ret = i915_gem_object_wait_rendering(obj, !write);
2734 i915_gem_object_flush_gtt_write_domain(obj);
2736 old_write_domain = obj->base.write_domain;
2737 old_read_domains = obj->base.read_domains;
2739 /* Flush the CPU cache if it's still invalid. */
2740 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2741 i915_gem_clflush_object(obj);
2743 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2746 /* It should now be out of any other write domains, and we can update
2747 * the domain values for our changes.
2749 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2751 /* If we're writing through the CPU, then the GPU read domains will
2752 * need to be invalidated at next use.
2755 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2756 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2762 /* Throttle our rendering by waiting until the ring has completed our requests
2763 * emitted over 20 msec ago.
2765 * Note that if we were to use the current jiffies each time around the loop,
2766 * we wouldn't escape the function with any frames outstanding if the time to
2767 * render a frame was over 20ms.
2769 * This should get us reasonable parallelism between CPU and GPU but also
2770 * relatively low latency when blocking on a particular request to finish.
2773 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
2775 struct drm_i915_private *dev_priv = dev->dev_private;
2776 struct drm_i915_file_private *file_priv = file->driver_priv;
2777 unsigned long recent_enough = ticks - (20 * hz / 1000);
2778 struct drm_i915_gem_request *request;
2779 struct intel_ring_buffer *ring = NULL;
2783 if (atomic_read(&dev_priv->mm.wedged))
2786 spin_lock(&file_priv->mm.lock);
2787 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
2788 if (time_after_eq(request->emitted_jiffies, recent_enough))
2791 ring = request->ring;
2792 seqno = request->seqno;
2794 spin_unlock(&file_priv->mm.lock);
2799 ret = __wait_seqno(ring, seqno, true, NULL);
2802 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
2808 i915_gem_object_pin(struct drm_i915_gem_object *obj,
2810 bool map_and_fenceable,
2815 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
2818 if (obj->gtt_space != NULL) {
2819 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
2820 (map_and_fenceable && !obj->map_and_fenceable)) {
2821 WARN(obj->pin_count,
2822 "bo is already pinned with incorrect alignment:"
2823 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
2824 " obj->map_and_fenceable=%d\n",
2825 obj->gtt_offset, alignment,
2827 obj->map_and_fenceable);
2828 ret = i915_gem_object_unbind(obj);
2834 if (obj->gtt_space == NULL) {
2835 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2837 ret = i915_gem_object_bind_to_gtt(obj, alignment,
2843 if (!dev_priv->mm.aliasing_ppgtt)
2844 i915_gem_gtt_bind_object(obj, obj->cache_level);
2847 if (!obj->has_global_gtt_mapping && map_and_fenceable)
2848 i915_gem_gtt_bind_object(obj, obj->cache_level);
2851 obj->pin_mappable |= map_and_fenceable;
2857 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
2859 BUG_ON(obj->pin_count == 0);
2860 BUG_ON(obj->gtt_space == NULL);
2862 if (--obj->pin_count == 0)
2863 obj->pin_mappable = false;
2867 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
2868 struct drm_file *file)
2870 struct drm_i915_gem_pin *args = data;
2871 struct drm_i915_gem_object *obj;
2874 ret = i915_mutex_lock_interruptible(dev);
2878 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
2879 if (&obj->base == NULL) {
2884 if (obj->madv != I915_MADV_WILLNEED) {
2885 DRM_ERROR("Attempting to pin a purgeable buffer\n");
2890 if (obj->pin_filp != NULL && obj->pin_filp != file) {
2891 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
2897 if (obj->user_pin_count == 0) {
2898 ret = i915_gem_object_pin(obj, args->alignment, true, false);
2903 obj->user_pin_count++;
2904 obj->pin_filp = file;
2906 /* XXX - flush the CPU caches for pinned objects
2907 * as the X server doesn't manage domains yet
2909 i915_gem_object_flush_cpu_write_domain(obj);
2910 args->offset = obj->gtt_offset;
2912 drm_gem_object_unreference(&obj->base);
2919 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
2920 struct drm_file *file)
2922 struct drm_i915_gem_pin *args = data;
2923 struct drm_i915_gem_object *obj;
2926 ret = i915_mutex_lock_interruptible(dev);
2930 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
2931 if (&obj->base == NULL) {
2936 if (obj->pin_filp != file) {
2937 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
2942 obj->user_pin_count--;
2943 if (obj->user_pin_count == 0) {
2944 obj->pin_filp = NULL;
2945 i915_gem_object_unpin(obj);
2949 drm_gem_object_unreference(&obj->base);
2956 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
2957 struct drm_file *file)
2959 struct drm_i915_gem_busy *args = data;
2960 struct drm_i915_gem_object *obj;
2963 ret = i915_mutex_lock_interruptible(dev);
2967 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
2968 if (&obj->base == NULL) {
2973 /* Count all active objects as busy, even if they are currently not used
2974 * by the gpu. Users of this interface expect objects to eventually
2975 * become non-busy without any further actions, therefore emit any
2976 * necessary flushes here.
2978 ret = i915_gem_object_flush_active(obj);
2980 args->busy = obj->active;
2982 args->busy |= intel_ring_flag(obj->ring) << 17;
2985 drm_gem_object_unreference(&obj->base);
2992 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
2993 struct drm_file *file_priv)
2995 return i915_gem_ring_throttle(dev, file_priv);
2999 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3000 struct drm_file *file_priv)
3002 struct drm_i915_gem_madvise *args = data;
3003 struct drm_i915_gem_object *obj;
3006 switch (args->madv) {
3007 case I915_MADV_DONTNEED:
3008 case I915_MADV_WILLNEED:
3014 ret = i915_mutex_lock_interruptible(dev);
3018 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3019 if (&obj->base == NULL) {
3024 if (obj->pin_count) {
3029 if (obj->madv != __I915_MADV_PURGED)
3030 obj->madv = args->madv;
3032 /* if the object is no longer attached, discard its backing storage */
3033 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
3034 i915_gem_object_truncate(obj);
3036 args->retained = obj->madv != __I915_MADV_PURGED;
3039 drm_gem_object_unreference(&obj->base);
3045 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3048 struct drm_i915_private *dev_priv;
3049 struct drm_i915_gem_object *obj;
3051 dev_priv = dev->dev_private;
3053 obj = kmalloc(sizeof(*obj), M_DRM, M_WAITOK | M_ZERO);
3055 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3056 drm_free(obj, M_DRM);
3060 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3061 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3064 /* On some devices, we can have the GPU use the LLC (the CPU
3065 * cache) for about a 10% performance improvement
3066 * compared to uncached. Graphics requests other than
3067 * display scanout are coherent with the CPU in
3068 * accessing this cache. This means in this mode we
3069 * don't need to clflush on the CPU side, and on the
3070 * GPU side we only need to flush internal caches to
3071 * get data visible to the CPU.
3073 * However, we maintain the display planes as UC, and so
3074 * need to rebind when first used as such.
3076 obj->cache_level = I915_CACHE_LLC;
3078 obj->cache_level = I915_CACHE_NONE;
3079 obj->base.driver_private = NULL;
3080 obj->fence_reg = I915_FENCE_REG_NONE;
3081 INIT_LIST_HEAD(&obj->mm_list);
3082 INIT_LIST_HEAD(&obj->gtt_list);
3083 INIT_LIST_HEAD(&obj->ring_list);
3084 INIT_LIST_HEAD(&obj->exec_list);
3085 obj->madv = I915_MADV_WILLNEED;
3086 /* Avoid an unnecessary call to unbind on the first bind. */
3087 obj->map_and_fenceable = true;
3089 i915_gem_info_add_obj(dev_priv, size);
3094 int i915_gem_init_object(struct drm_gem_object *obj)
3101 void i915_gem_free_object(struct drm_gem_object *gem_obj)
3103 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3104 struct drm_device *dev = obj->base.dev;
3105 drm_i915_private_t *dev_priv = dev->dev_private;
3108 i915_gem_detach_phys_object(dev, obj);
3111 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
3112 bool was_interruptible;
3114 was_interruptible = dev_priv->mm.interruptible;
3115 dev_priv->mm.interruptible = false;
3117 WARN_ON(i915_gem_object_unbind(obj));
3119 dev_priv->mm.interruptible = was_interruptible;
3122 drm_gem_free_mmap_offset(&obj->base);
3124 drm_gem_object_release(&obj->base);
3125 i915_gem_info_remove_obj(dev_priv, obj->base.size);
3127 drm_free(obj->bit_17, M_DRM);
3128 drm_free(obj, M_DRM);
3132 i915_gem_do_init(struct drm_device *dev, unsigned long start,
3133 unsigned long mappable_end, unsigned long end)
3135 drm_i915_private_t *dev_priv;
3136 unsigned long mappable;
3139 dev_priv = dev->dev_private;
3140 mappable = min(end, mappable_end) - start;
3142 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start);
3144 dev_priv->mm.gtt_start = start;
3145 dev_priv->mm.gtt_mappable_end = mappable_end;
3146 dev_priv->mm.gtt_end = end;
3147 dev_priv->mm.gtt_total = end - start;
3148 dev_priv->mm.mappable_gtt_total = mappable;
3150 /* Take over this portion of the GTT */
3151 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
3152 device_printf(dev->dev,
3153 "taking over the fictitious range 0x%lx-0x%lx\n",
3154 dev->agp->base + start, dev->agp->base + start + mappable);
3155 error = -vm_phys_fictitious_reg_range(dev->agp->base + start,
3156 dev->agp->base + start + mappable, VM_MEMATTR_WRITE_COMBINING);
3161 i915_gem_idle(struct drm_device *dev)
3163 drm_i915_private_t *dev_priv = dev->dev_private;
3168 if (dev_priv->mm.suspended) {
3173 ret = i915_gpu_idle(dev);
3178 i915_gem_retire_requests(dev);
3180 /* Under UMS, be paranoid and evict. */
3181 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3182 i915_gem_evict_everything(dev);
3184 i915_gem_reset_fences(dev);
3186 /* Hack! Don't let anybody do execbuf while we don't control the chip.
3187 * We need to replace this with a semaphore, or something.
3188 * And not confound mm.suspended!
3190 dev_priv->mm.suspended = 1;
3191 del_timer_sync(&dev_priv->hangcheck_timer);
3193 i915_kernel_lost_context(dev);
3194 i915_gem_cleanup_ringbuffer(dev);
3198 /* Cancel the retire work handler, which should be idle now. */
3199 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3204 void i915_gem_l3_remap(struct drm_device *dev)
3206 drm_i915_private_t *dev_priv = dev->dev_private;
3210 if (!HAS_L3_GPU_CACHE(dev))
3213 if (!dev_priv->l3_parity.remap_info)
3216 misccpctl = I915_READ(GEN7_MISCCPCTL);
3217 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
3218 POSTING_READ(GEN7_MISCCPCTL);
3220 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
3221 u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
3222 if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
3223 DRM_DEBUG("0x%x was already programmed to %x\n",
3224 GEN7_L3LOG_BASE + i, remap);
3225 if (remap && !dev_priv->l3_parity.remap_info[i/4])
3226 DRM_DEBUG_DRIVER("Clearing remapped register\n");
3227 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
3230 /* Make sure all the writes land before disabling dop clock gating */
3231 POSTING_READ(GEN7_L3LOG_BASE);
3233 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
3236 void i915_gem_init_swizzling(struct drm_device *dev)
3238 drm_i915_private_t *dev_priv = dev->dev_private;
3240 if (INTEL_INFO(dev)->gen < 5 ||
3241 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
3244 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
3245 DISP_TILE_SURFACE_SWIZZLING);
3250 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
3252 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3254 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
3258 intel_enable_blt(struct drm_device *dev)
3265 /* The blitter was dysfunctional on early prototypes */
3266 revision = pci_read_config(dev->dev, PCIR_REVID, 1);
3267 if (IS_GEN6(dev) && revision < 8) {
3268 DRM_INFO("BLT not supported on this pre-production hardware;"
3269 " graphics performance will be degraded.\n");
3277 i915_gem_init_hw(struct drm_device *dev)
3279 drm_i915_private_t *dev_priv = dev->dev_private;
3282 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
3283 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
3285 i915_gem_l3_remap(dev);
3287 i915_gem_init_swizzling(dev);
3289 ret = intel_init_render_ring_buffer(dev);
3294 ret = intel_init_bsd_ring_buffer(dev);
3296 goto cleanup_render_ring;
3299 if (intel_enable_blt(dev)) {
3300 ret = intel_init_blt_ring_buffer(dev);
3302 goto cleanup_bsd_ring;
3305 dev_priv->next_seqno = 1;
3308 * XXX: There was some w/a described somewhere suggesting loading
3309 * contexts before PPGTT.
3311 i915_gem_context_init(dev);
3312 i915_gem_init_ppgtt(dev);
3317 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
3318 cleanup_render_ring:
3319 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3324 intel_enable_ppgtt(struct drm_device *dev)
3326 if (i915_enable_ppgtt >= 0)
3327 return i915_enable_ppgtt;
3329 /* Disable ppgtt on SNB if VT-d is on. */
3330 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_enabled)
3336 int i915_gem_init(struct drm_device *dev)
3338 struct drm_i915_private *dev_priv = dev->dev_private;
3339 unsigned long prealloc_size, gtt_size, mappable_size;
3342 prealloc_size = dev_priv->mm.gtt->stolen_size;
3343 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
3344 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
3346 /* Basic memrange allocator for stolen space */
3347 drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
3350 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
3351 /* PPGTT pdes are stolen from global gtt ptes, so shrink the
3352 * aperture accordingly when using aliasing ppgtt. */
3353 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
3354 /* For paranoia keep the guard page in between. */
3355 gtt_size -= PAGE_SIZE;
3357 i915_gem_do_init(dev, 0, mappable_size, gtt_size);
3359 ret = i915_gem_init_aliasing_ppgtt(dev);
3365 /* Let GEM Manage all of the aperture.
3367 * However, leave one page at the end still bound to the scratch
3368 * page. There are a number of places where the hardware
3369 * apparently prefetches past the end of the object, and we've
3370 * seen multiple hangs with the GPU head pointer stuck in a
3371 * batchbuffer bound at the last page of the aperture. One page
3372 * should be enough to keep any prefetching inside of the
3375 i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
3378 ret = i915_gem_init_hw(dev);
3381 i915_gem_cleanup_aliasing_ppgtt(dev);
3386 /* Try to set up FBC with a reasonable compressed buffer size */
3387 if (I915_HAS_FBC(dev) && i915_powersave) {
3390 /* Leave 1M for line length buffer & misc. */
3392 /* Try to get a 32M buffer... */
3393 if (prealloc_size > (36*1024*1024))
3394 cfb_size = 32*1024*1024;
3395 else /* fall back to 7/8 of the stolen space */
3396 cfb_size = prealloc_size * 7 / 8;
3397 i915_setup_compression(dev, cfb_size);
3401 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
3402 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3403 dev_priv->dri1.allow_batchbuffer = 1;
3408 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3410 drm_i915_private_t *dev_priv = dev->dev_private;
3411 struct intel_ring_buffer *ring;
3414 for_each_ring(ring, dev_priv, i)
3415 intel_cleanup_ring_buffer(ring);
3419 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3420 struct drm_file *file_priv)
3422 drm_i915_private_t *dev_priv = dev->dev_private;
3425 if (drm_core_check_feature(dev, DRIVER_MODESET))
3428 if (atomic_read(&dev_priv->mm.wedged)) {
3429 DRM_ERROR("Reenabling wedged hardware, good luck\n");
3430 atomic_set(&dev_priv->mm.wedged, 0);
3434 dev_priv->mm.suspended = 0;
3436 ret = i915_gem_init_hw(dev);
3442 KASSERT(list_empty(&dev_priv->mm.active_list), ("active list"));
3445 ret = drm_irq_install(dev);
3447 goto cleanup_ringbuffer;
3453 i915_gem_cleanup_ringbuffer(dev);
3454 dev_priv->mm.suspended = 1;
3461 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
3462 struct drm_file *file_priv)
3464 if (drm_core_check_feature(dev, DRIVER_MODESET))
3467 drm_irq_uninstall(dev);
3468 return i915_gem_idle(dev);
3472 i915_gem_lastclose(struct drm_device *dev)
3476 if (drm_core_check_feature(dev, DRIVER_MODESET))
3479 ret = i915_gem_idle(dev);
3481 DRM_ERROR("failed to idle hardware: %d\n", ret);
3485 init_ring_lists(struct intel_ring_buffer *ring)
3487 INIT_LIST_HEAD(&ring->active_list);
3488 INIT_LIST_HEAD(&ring->request_list);
3492 i915_gem_load(struct drm_device *dev)
3495 drm_i915_private_t *dev_priv = dev->dev_private;
3497 INIT_LIST_HEAD(&dev_priv->mm.active_list);
3498 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
3499 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
3500 INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
3501 for (i = 0; i < I915_NUM_RINGS; i++)
3502 init_ring_lists(&dev_priv->ring[i]);
3503 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
3504 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
3505 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
3506 i915_gem_retire_work_handler);
3507 init_completion(&dev_priv->error_completion);
3509 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3511 I915_WRITE(MI_ARB_STATE,
3512 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
3515 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
3517 /* Old X drivers will take 0-2 for front, back, depth buffers */
3518 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3519 dev_priv->fence_reg_start = 3;
3521 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
3522 dev_priv->num_fence_regs = 16;
3524 dev_priv->num_fence_regs = 8;
3526 /* Initialize fence registers to zero */
3527 i915_gem_reset_fences(dev);
3529 i915_gem_detect_bit_6_swizzle(dev);
3530 init_waitqueue_head(&dev_priv->pending_flip_queue);
3532 dev_priv->mm.interruptible = true;
3535 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
3536 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
3537 register_shrinker(&dev_priv->mm.inactive_shrinker);
3539 dev_priv->mm.i915_lowmem = EVENTHANDLER_REGISTER(vm_lowmem,
3540 i915_gem_lowmem, dev, EVENTHANDLER_PRI_ANY);
3545 * Create a physically contiguous memory object for this object
3546 * e.g. for cursor + overlay regs
3548 static int i915_gem_init_phys_object(struct drm_device *dev,
3549 int id, int size, int align)
3551 drm_i915_private_t *dev_priv = dev->dev_private;
3552 struct drm_i915_gem_phys_object *phys_obj;
3555 if (dev_priv->mm.phys_objs[id - 1] || !size)
3558 phys_obj = kmalloc(sizeof(struct drm_i915_gem_phys_object), M_DRM,
3565 phys_obj->handle = drm_pci_alloc(dev, size, align, ~0);
3566 if (!phys_obj->handle) {
3570 pmap_change_attr((vm_offset_t)phys_obj->handle->vaddr,
3571 size / PAGE_SIZE, PAT_WRITE_COMBINING);
3573 dev_priv->mm.phys_objs[id - 1] = phys_obj;
3578 drm_free(phys_obj, M_DRM);
3582 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
3584 drm_i915_private_t *dev_priv = dev->dev_private;
3585 struct drm_i915_gem_phys_object *phys_obj;
3587 if (!dev_priv->mm.phys_objs[id - 1])
3590 phys_obj = dev_priv->mm.phys_objs[id - 1];
3591 if (phys_obj->cur_obj) {
3592 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
3595 drm_pci_free(dev, phys_obj->handle);
3596 drm_free(phys_obj, M_DRM);
3597 dev_priv->mm.phys_objs[id - 1] = NULL;
3600 void i915_gem_free_all_phys_object(struct drm_device *dev)
3604 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
3605 i915_gem_free_phys_object(dev, i);
3608 void i915_gem_detach_phys_object(struct drm_device *dev,
3609 struct drm_i915_gem_object *obj)
3618 vaddr = obj->phys_obj->handle->vaddr;
3620 page_count = obj->base.size / PAGE_SIZE;
3621 VM_OBJECT_LOCK(obj->base.vm_obj);
3622 for (i = 0; i < page_count; i++) {
3623 m = i915_gem_wire_page(obj->base.vm_obj, i);
3627 VM_OBJECT_UNLOCK(obj->base.vm_obj);
3628 sf = sf_buf_alloc(m);
3630 dst = (char *)sf_buf_kva(sf);
3631 memcpy(dst, vaddr + IDX_TO_OFF(i), PAGE_SIZE);
3634 drm_clflush_pages(&m, 1);
3636 VM_OBJECT_LOCK(obj->base.vm_obj);
3637 vm_page_reference(m);
3639 vm_page_busy_wait(m, FALSE, "i915gem");
3640 vm_page_unwire(m, 0);
3643 VM_OBJECT_UNLOCK(obj->base.vm_obj);
3644 intel_gtt_chipset_flush();
3646 obj->phys_obj->cur_obj = NULL;
3647 obj->phys_obj = NULL;
3651 i915_gem_attach_phys_object(struct drm_device *dev,
3652 struct drm_i915_gem_object *obj,
3656 drm_i915_private_t *dev_priv = dev->dev_private;
3660 int i, page_count, ret;
3662 if (id > I915_MAX_PHYS_OBJECT)
3665 if (obj->phys_obj) {
3666 if (obj->phys_obj->id == id)
3668 i915_gem_detach_phys_object(dev, obj);
3671 /* create a new object */
3672 if (!dev_priv->mm.phys_objs[id - 1]) {
3673 ret = i915_gem_init_phys_object(dev, id,
3674 obj->base.size, align);
3676 DRM_ERROR("failed to init phys object %d size: %zu\n",
3677 id, obj->base.size);
3682 /* bind to the object */
3683 obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
3684 obj->phys_obj->cur_obj = obj;
3686 page_count = obj->base.size / PAGE_SIZE;
3688 VM_OBJECT_LOCK(obj->base.vm_obj);
3690 for (i = 0; i < page_count; i++) {
3691 m = i915_gem_wire_page(obj->base.vm_obj, i);
3696 VM_OBJECT_UNLOCK(obj->base.vm_obj);
3697 sf = sf_buf_alloc(m);
3698 src = (char *)sf_buf_kva(sf);
3699 dst = (char *)obj->phys_obj->handle->vaddr + IDX_TO_OFF(i);
3700 memcpy(dst, src, PAGE_SIZE);
3703 VM_OBJECT_LOCK(obj->base.vm_obj);
3705 vm_page_reference(m);
3706 vm_page_busy_wait(m, FALSE, "i915gem");
3707 vm_page_unwire(m, 0);
3710 VM_OBJECT_UNLOCK(obj->base.vm_obj);
3716 i915_gem_phys_pwrite(struct drm_device *dev,
3717 struct drm_i915_gem_object *obj,
3718 struct drm_i915_gem_pwrite *args,
3719 struct drm_file *file_priv)
3721 void *vaddr = (char *)obj->phys_obj->handle->vaddr + args->offset;
3722 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
3724 if (copyin_nofault(user_data, vaddr, args->size) != 0) {
3725 unsigned long unwritten;
3727 /* The physical object once assigned is fixed for the lifetime
3728 * of the obj, so we can safely drop the lock and continue
3732 unwritten = copy_from_user(vaddr, user_data, args->size);
3738 i915_gem_chipset_flush(dev);
3742 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
3744 struct drm_i915_file_private *file_priv = file->driver_priv;
3746 /* Clean up our request list when the client is going away, so that
3747 * later retire_requests won't dereference our soon-to-be-gone
3750 spin_lock(&file_priv->mm.lock);
3751 while (!list_empty(&file_priv->mm.request_list)) {
3752 struct drm_i915_gem_request *request;
3754 request = list_first_entry(&file_priv->mm.request_list,
3755 struct drm_i915_gem_request,
3757 list_del(&request->client_list);
3758 request->file_priv = NULL;
3760 spin_unlock(&file_priv->mm.lock);
3764 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
3765 vm_ooffset_t foff, struct ucred *cred, u_short *color)
3768 *color = 0; /* XXXKIB */
3775 i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot,
3778 struct drm_gem_object *gem_obj;
3779 struct drm_i915_gem_object *obj;
3780 struct drm_device *dev;
3781 drm_i915_private_t *dev_priv;
3786 gem_obj = vm_obj->handle;
3787 obj = to_intel_bo(gem_obj);
3788 dev = obj->base.dev;
3789 dev_priv = dev->dev_private;
3791 write = (prot & VM_PROT_WRITE) != 0;
3795 vm_object_pip_add(vm_obj, 1);
3798 * Remove the placeholder page inserted by vm_fault() from the
3799 * object before dropping the object lock. If
3800 * i915_gem_release_mmap() is active in parallel on this gem
3801 * object, then it owns the drm device sx and might find the
3802 * placeholder already. Then, since the page is busy,
3803 * i915_gem_release_mmap() sleeps waiting for the busy state
3804 * of the page cleared. We will be not able to acquire drm
3805 * device lock until i915_gem_release_mmap() is able to make a
3808 if (*mres != NULL) {
3810 vm_page_remove(oldm);
3815 VM_OBJECT_UNLOCK(vm_obj);
3821 ret = i915_mutex_lock_interruptible(dev);
3830 * Since the object lock was dropped, other thread might have
3831 * faulted on the same GTT address and instantiated the
3832 * mapping for the page. Recheck.
3834 VM_OBJECT_LOCK(vm_obj);
3835 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
3837 if ((m->flags & PG_BUSY) != 0) {
3840 vm_page_sleep(m, "915pee");
3846 VM_OBJECT_UNLOCK(vm_obj);
3848 /* Now bind it into the GTT if needed */
3849 if (!obj->map_and_fenceable) {
3850 ret = i915_gem_object_unbind(obj);
3856 if (!obj->gtt_space) {
3857 ret = i915_gem_object_bind_to_gtt(obj, 0, true, false);
3863 ret = i915_gem_object_set_to_gtt_domain(obj, write);
3870 if (obj->tiling_mode == I915_TILING_NONE)
3871 ret = i915_gem_object_put_fence(obj);
3873 ret = i915_gem_object_get_fence(obj);
3879 if (i915_gem_object_is_inactive(obj))
3880 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3882 obj->fault_mappable = true;
3883 VM_OBJECT_LOCK(vm_obj);
3884 m = vm_phys_fictitious_to_vm_page(dev->agp->base + obj->gtt_offset +
3891 KASSERT((m->flags & PG_FICTITIOUS) != 0,
3892 ("not fictitious %p", m));
3893 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
3895 if ((m->flags & PG_BUSY) != 0) {
3898 vm_page_sleep(m, "915pbs");
3902 m->valid = VM_PAGE_BITS_ALL;
3903 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
3906 vm_page_busy_try(m, false);
3912 vm_object_pip_wakeup(vm_obj);
3913 return (VM_PAGER_OK);
3918 KASSERT(ret != 0, ("i915_gem_pager_fault: wrong return"));
3919 if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) {
3920 goto unlocked_vmobj;
3922 VM_OBJECT_LOCK(vm_obj);
3923 vm_object_pip_wakeup(vm_obj);
3924 return (VM_PAGER_ERROR);
3928 i915_gem_pager_dtor(void *handle)
3930 struct drm_gem_object *obj;
3931 struct drm_device *dev;
3937 drm_gem_free_mmap_offset(obj);
3938 i915_gem_release_mmap(to_intel_bo(obj));
3939 drm_gem_object_unreference(obj);
3943 struct cdev_pager_ops i915_gem_pager_ops = {
3944 .cdev_pg_fault = i915_gem_pager_fault,
3945 .cdev_pg_ctor = i915_gem_pager_ctor,
3946 .cdev_pg_dtor = i915_gem_pager_dtor
3949 #define GEM_PARANOID_CHECK_GTT 0
3950 #if GEM_PARANOID_CHECK_GTT
3952 i915_gem_assert_pages_not_mapped(struct drm_device *dev, vm_page_t *ma,
3955 struct drm_i915_private *dev_priv;
3957 unsigned long start, end;
3961 dev_priv = dev->dev_private;
3962 start = OFF_TO_IDX(dev_priv->mm.gtt_start);
3963 end = OFF_TO_IDX(dev_priv->mm.gtt_end);
3964 for (i = start; i < end; i++) {
3965 pa = intel_gtt_read_pte_paddr(i);
3966 for (j = 0; j < page_count; j++) {
3967 if (pa == VM_PAGE_TO_PHYS(ma[j])) {
3968 panic("Page %p in GTT pte index %d pte %x",
3969 ma[i], i, intel_gtt_read_pte(i));
3976 #define VM_OBJECT_LOCK_ASSERT_OWNED(object)
3979 i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex)
3984 VM_OBJECT_LOCK_ASSERT_OWNED(object);
3985 m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3986 if (m->valid != VM_PAGE_BITS_ALL) {
3987 if (vm_pager_has_page(object, pindex)) {
3988 rv = vm_pager_get_page(object, &m, 1);
3989 m = vm_page_lookup(object, pindex);
3992 if (rv != VM_PAGER_OK) {
3997 pmap_zero_page(VM_PAGE_TO_PHYS(m));
3998 m->valid = VM_PAGE_BITS_ALL;
4008 i915_gpu_is_active(struct drm_device *dev)
4010 drm_i915_private_t *dev_priv = dev->dev_private;
4012 return !list_empty(&dev_priv->mm.active_list);
4016 i915_gem_lowmem(void *arg)
4018 struct drm_device *dev;
4019 struct drm_i915_private *dev_priv;
4020 struct drm_i915_gem_object *obj, *next;
4021 int cnt, cnt_fail, cnt_total;
4024 dev_priv = dev->dev_private;
4026 if (lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_NOWAIT))
4030 /* first scan for clean buffers */
4031 i915_gem_retire_requests(dev);
4033 cnt_total = cnt_fail = cnt = 0;
4035 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list,
4037 if (i915_gem_object_is_purgeable(obj)) {
4038 if (i915_gem_object_unbind(obj) != 0)
4044 /* second pass, evict/count anything still on the inactive list */
4045 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list,
4047 if (i915_gem_object_unbind(obj) == 0)
4053 if (cnt_fail > cnt_total / 100 && i915_gpu_is_active(dev)) {
4055 * We are desperate for pages, so as a last resort, wait
4056 * for the GPU to finish and discard whatever we can.
4057 * This has a dramatic impact to reduce the number of
4058 * OOM-killer events whilst running the GPU aggressively.
4060 if (i915_gpu_idle(dev) == 0)
4067 i915_gem_unload(struct drm_device *dev)
4069 struct drm_i915_private *dev_priv;
4071 dev_priv = dev->dev_private;
4072 EVENTHANDLER_DEREGISTER(vm_lowmem, dev_priv->mm.i915_lowmem);