Revert "drm/i915: Sync i915_gem_pwrite_ioctl() with Linux 3.11"
[dragonfly.git] / sys / dev / drm / i915 / i915_gem.c
CommitLineData
561529b1 1/*
575ea5a0
FT
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 * Copyright (c) 2011 The FreeBSD Foundation
27 * All rights reserved.
28 *
29 * This software was developed by Konstantin Belousov under sponsorship from
30 * the FreeBSD Foundation.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
1b006bb0 52 *
575ea5a0
FT
53 */
54
e2b8ab05
FT
55#include <sys/resourcevar.h>
56#include <sys/sfbuf.h>
7256b59b 57#include <machine/md_var.h>
e2b8ab05 58
18e26a6d 59#include <drm/drmP.h>
5c6c6f23 60#include <drm/i915_drm.h>
5718399f 61#include "i915_drv.h"
7256b59b 62#include "i915_trace.h"
5718399f 63#include "intel_drv.h"
1964046d 64#include <linux/shmem_fs.h>
7256b59b
FT
65#include <linux/slab.h>
66#include <linux/pci.h>
575ea5a0 67
7cbd1a46
FT
68static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
69static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
b00bc81c
FT
70static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
71 unsigned alignment,
72 bool map_and_fenceable,
73 bool nonblocking);
e11a51e3 74static int i915_gem_phys_pwrite(struct drm_device *dev,
d1c259ee
FT
75 struct drm_i915_gem_object *obj,
76 struct drm_i915_gem_pwrite *args,
77 struct drm_file *file);
e3359f38
FT
78
79static void i915_gem_write_fence(struct drm_device *dev, int reg,
80 struct drm_i915_gem_object *obj);
81static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
82 struct drm_i915_fence_reg *fence,
83 bool enable);
7cbd1a46 84
a2fdbec6 85static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
575ea5a0 86static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
e3359f38
FT
87
88static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
89{
90 if (obj->tiling_mode)
91 i915_gem_release_mmap(obj);
92
93 /* As we do not have an associated fence register, we will force
94 * a tiling change if we ever need to acquire one.
95 */
f192107f 96 obj->fence_dirty = false;
e3359f38
FT
97 obj->fence_reg = I915_FENCE_REG_NONE;
98}
99
575ea5a0 100static bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj);
575ea5a0
FT
101static void i915_gem_lowmem(void *arg);
102
e11a51e3
FT
103/* some bookkeeping */
104static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
105 size_t size)
575ea5a0 106{
575ea5a0
FT
107 dev_priv->mm.object_count++;
108 dev_priv->mm.object_memory += size;
109}
110
e11a51e3
FT
111static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
112 size_t size)
575ea5a0 113{
575ea5a0
FT
114 dev_priv->mm.object_count--;
115 dev_priv->mm.object_memory -= size;
116}
117
118static int
a2fdbec6 119i915_gem_wait_for_error(struct i915_gpu_error *error)
575ea5a0 120{
575ea5a0
FT
121 int ret;
122
a2fdbec6
FT
123#define EXIT_COND (!i915_reset_in_progress(error) || \
124 i915_terminally_wedged(error))
125 if (EXIT_COND)
d65a337f 126 return 0;
575ea5a0 127
901476d5
FT
128 /*
129 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
130 * userspace. If it takes that long something really bad is going on and
131 * we should simply try to bail out and fail as gracefully as possible.
132 */
a2fdbec6
FT
133 ret = wait_event_interruptible_timeout(error->reset_queue,
134 EXIT_COND,
135 10*HZ);
901476d5
FT
136 if (ret == 0) {
137 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
138 return -EIO;
139 } else if (ret < 0) {
140 return ret;
575ea5a0 141 }
a2fdbec6 142#undef EXIT_COND
575ea5a0 143
d65a337f 144 return 0;
575ea5a0
FT
145}
146
e11a51e3 147int i915_mutex_lock_interruptible(struct drm_device *dev)
575ea5a0 148{
a2fdbec6 149 struct drm_i915_private *dev_priv = dev->dev_private;
575ea5a0
FT
150 int ret;
151
a2fdbec6 152 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
d2557f23
FT
153 if (ret)
154 return ret;
575ea5a0 155
a2fdbec6 156 ret = lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_SLEEPFAIL);
e11a51e3
FT
157 if (ret)
158 return -EINTR;
575ea5a0 159
e11a51e3 160 WARN_ON(i915_verify_lists(dev));
e11a51e3 161 return 0;
575ea5a0
FT
162}
163
e11a51e3
FT
164static inline bool
165i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
575ea5a0 166{
f192107f 167 return !obj->active;
575ea5a0
FT
168}
169
170int
171i915_gem_init_ioctl(struct drm_device *dev, void *data,
f192107f 172 struct drm_file *file)
575ea5a0 173{
7256b59b 174 struct drm_i915_private *dev_priv = dev->dev_private;
f192107f 175 struct drm_i915_gem_init *args = data;
575ea5a0 176
f192107f
FT
177 if (drm_core_check_feature(dev, DRIVER_MODESET))
178 return -ENODEV;
575ea5a0
FT
179
180 if (args->gtt_start >= args->gtt_end ||
181 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
f192107f
FT
182 return -EINVAL;
183
184 /* GEM with user mode setting was never supported on ilk and later. */
185 if (INTEL_INFO(dev)->gen >= 5)
186 return -ENODEV;
575ea5a0 187
a2fdbec6
FT
188 mutex_lock(&dev->struct_mutex);
189 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
190 args->gtt_end);
7256b59b 191 dev_priv->gtt.mappable_end = args->gtt_end;
a2fdbec6 192 mutex_unlock(&dev->struct_mutex);
04adb68c
FT
193
194 return 0;
575ea5a0
FT
195}
196
575ea5a0
FT
197int
198i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
e11a51e3 199 struct drm_file *file)
575ea5a0 200{
f192107f
FT
201 struct drm_i915_private *dev_priv = dev->dev_private;
202 struct drm_i915_gem_get_aperture *args = data;
575ea5a0
FT
203 struct drm_i915_gem_object *obj;
204 size_t pinned;
205
575ea5a0 206 pinned = 0;
a2fdbec6 207 mutex_lock(&dev->struct_mutex);
5d0b1887 208 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
f192107f
FT
209 if (obj->pin_count)
210 pinned += obj->gtt_space->size;
a2fdbec6 211 mutex_unlock(&dev->struct_mutex);
575ea5a0 212
a2fdbec6 213 args->aper_size = dev_priv->gtt.total;
575ea5a0
FT
214 args->aper_available_size = args->aper_size - pinned;
215
f192107f 216 return 0;
575ea5a0
FT
217}
218
5d0b1887
FT
219void i915_gem_object_free(struct drm_i915_gem_object *obj)
220{
221 kfree(obj);
222}
223
245593da 224static int
d2557f23
FT
225i915_gem_create(struct drm_file *file,
226 struct drm_device *dev,
227 uint64_t size,
228 uint32_t *handle_p)
575ea5a0 229{
e11a51e3 230 struct drm_i915_gem_object *obj;
575ea5a0 231 int ret;
d2557f23 232 u32 handle;
575ea5a0 233
e11a51e3
FT
234 size = roundup(size, PAGE_SIZE);
235 if (size == 0)
d2557f23 236 return -EINVAL;
575ea5a0 237
d2557f23 238 /* Allocate the new object */
e11a51e3
FT
239 obj = i915_gem_alloc_object(dev, size);
240 if (obj == NULL)
d2557f23 241 return -ENOMEM;
575ea5a0 242
e11a51e3 243 ret = drm_gem_handle_create(file, &obj->base, &handle);
d2557f23 244 if (ret) {
e11a51e3
FT
245 drm_gem_object_release(&obj->base);
246 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
7256b59b
FT
247 i915_gem_object_free(obj);
248 return ret;
575ea5a0
FT
249 }
250
e11a51e3
FT
251 /* drop reference from allocate - handle holds it now */
252 drm_gem_object_unreference(&obj->base);
7256b59b
FT
253 trace_i915_gem_object_create(obj);
254
e11a51e3 255 *handle_p = handle;
d2557f23 256 return 0;
e11a51e3 257}
575ea5a0 258
e11a51e3
FT
259int
260i915_gem_dumb_create(struct drm_file *file,
261 struct drm_device *dev,
262 struct drm_mode_create_dumb *args)
263{
575ea5a0 264
e11a51e3 265 /* have to work out size/pitch and return them */
7256b59b 266 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
e11a51e3 267 args->size = args->pitch * args->height;
d2557f23
FT
268 return i915_gem_create(file, dev,
269 args->size, &args->handle);
575ea5a0
FT
270}
271
e11a51e3
FT
272int i915_gem_dumb_destroy(struct drm_file *file,
273 struct drm_device *dev,
274 uint32_t handle)
275{
276
d2557f23 277 return drm_gem_handle_delete(file, handle);
e11a51e3
FT
278}
279
280/**
281 * Creates a new mm object and returns a handle to it.
282 */
283int
284i915_gem_create_ioctl(struct drm_device *dev, void *data,
285 struct drm_file *file)
286{
287 struct drm_i915_gem_create *args = data;
288
d2557f23
FT
289 return i915_gem_create(file, dev,
290 args->size, &args->handle);
e11a51e3
FT
291}
292
7256b59b
FT
293static inline int
294__copy_to_user_swizzled(char __user *cpu_vaddr,
295 const char *gpu_vaddr, int gpu_offset,
296 int length)
297{
298 int ret, cpu_offset = 0;
299
300 while (length > 0) {
301 int cacheline_end = ALIGN(gpu_offset + 1, 64);
302 int this_length = min(cacheline_end - gpu_offset, length);
303 int swizzled_gpu_offset = gpu_offset ^ 64;
304
305 ret = __copy_to_user(cpu_vaddr + cpu_offset,
306 gpu_vaddr + swizzled_gpu_offset,
307 this_length);
308 if (ret)
309 return ret + length;
310
311 cpu_offset += this_length;
312 gpu_offset += this_length;
313 length -= this_length;
314 }
315
316 return 0;
317}
318
319static inline int
320__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
321 const char __user *cpu_vaddr,
322 int length)
323{
324 int ret, cpu_offset = 0;
325
326 while (length > 0) {
327 int cacheline_end = ALIGN(gpu_offset + 1, 64);
328 int this_length = min(cacheline_end - gpu_offset, length);
329 int swizzled_gpu_offset = gpu_offset ^ 64;
330
331 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
332 cpu_vaddr + cpu_offset,
333 this_length);
334 if (ret)
335 return ret + length;
336
337 cpu_offset += this_length;
338 gpu_offset += this_length;
339 length -= this_length;
340 }
341
342 return 0;
343}
344
345/* Per-page copy function for the shmem pread fastpath.
346 * Flushes invalid cachelines before reading the target if
347 * needs_clflush is set. */
348static int
349shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length,
350 char __user *user_data,
351 bool page_do_bit17_swizzling, bool needs_clflush)
352{
353 char *vaddr;
354 int ret;
355
356 if (unlikely(page_do_bit17_swizzling))
357 return -EINVAL;
358
359 vaddr = kmap_atomic(page);
360 if (needs_clflush)
361 drm_clflush_virt_range(vaddr + shmem_page_offset,
362 page_length);
363 ret = __copy_to_user_inatomic(user_data,
364 vaddr + shmem_page_offset,
365 page_length);
366 kunmap_atomic(vaddr);
367
368 return ret ? -EFAULT : 0;
369}
370
371static void
372shmem_clflush_swizzled_range(char *addr, unsigned long length,
373 bool swizzled)
374{
375 if (unlikely(swizzled)) {
376 unsigned long start = (unsigned long) addr;
377 unsigned long end = (unsigned long) addr + length;
378
379 /* For swizzling simply ensure that we always flush both
380 * channels. Lame, but simple and it works. Swizzled
381 * pwrite/pread is far from a hotpath - current userspace
382 * doesn't use it at all. */
383 start = round_down(start, 128);
384 end = round_up(end, 128);
385
386 drm_clflush_virt_range((void *)start, end - start);
387 } else {
388 drm_clflush_virt_range(addr, length);
389 }
390
391}
392
393/* Only difference to the fast-path function is that this can handle bit17
394 * and uses non-atomic copy and kmap functions. */
395static int
396shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length,
397 char __user *user_data,
398 bool page_do_bit17_swizzling, bool needs_clflush)
399{
400 char *vaddr;
401 int ret;
402
403 vaddr = kmap(page);
404 if (needs_clflush)
405 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
406 page_length,
407 page_do_bit17_swizzling);
408
409 if (page_do_bit17_swizzling)
410 ret = __copy_to_user_swizzled(user_data,
411 vaddr, shmem_page_offset,
412 page_length);
413 else
414 ret = __copy_to_user(user_data,
415 vaddr + shmem_page_offset,
416 page_length);
417 kunmap(page);
418
419 return ret ? - EFAULT : 0;
420}
421
d1c259ee
FT
422static inline void vm_page_reference(vm_page_t m)
423{
424 vm_page_flag_set(m, PG_REFERENCED);
425}
426
427static int
428i915_gem_shmem_pread(struct drm_device *dev,
429 struct drm_i915_gem_object *obj,
430 struct drm_i915_gem_pread *args,
431 struct drm_file *file)
432{
7256b59b
FT
433 char __user *user_data;
434 ssize_t remain;
435 off_t offset;
436 int shmem_page_offset, page_length, ret = 0;
437 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
438 int hit_slowpath = 0;
439 int needs_clflush = 0;
440 int i;
d1c259ee 441
7256b59b
FT
442 user_data = (char __user *) (uintptr_t) args->data_ptr;
443 remain = args->size;
d1c259ee 444
7256b59b
FT
445 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
446
447 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
448 /* If we're not in the cpu read domain, set ourself into the gtt
449 * read domain and manually flush cachelines (if required). This
450 * optimizes for the case when the gpu will dirty the data
451 * anyway again before the next pread happens. */
452 if (obj->cache_level == I915_CACHE_NONE)
453 needs_clflush = 1;
454 if (obj->gtt_space) {
455 ret = i915_gem_object_set_to_gtt_domain(obj, false);
456 if (ret)
457 return ret;
458 }
459 }
d1c259ee 460
7256b59b
FT
461 ret = i915_gem_object_get_pages(obj);
462 if (ret)
463 return ret;
d1c259ee 464
7256b59b 465 i915_gem_object_pin_pages(obj);
d1c259ee 466
7256b59b 467 offset = args->offset;
d1c259ee 468
7256b59b
FT
469 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) {
470 struct vm_page *page;
471
472 if (i < offset >> PAGE_SHIFT)
473 continue;
474
475 if (remain <= 0)
d1c259ee 476 break;
7256b59b
FT
477
478 /* Operation in this page
479 *
480 * shmem_page_offset = offset within page in shmem file
481 * page_length = bytes to copy for this page
482 */
483 shmem_page_offset = offset_in_page(offset);
484 page_length = remain;
485 if ((shmem_page_offset + page_length) > PAGE_SIZE)
486 page_length = PAGE_SIZE - shmem_page_offset;
487
488#ifdef __linux__
489 page = sg_page(sg);
490 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
491 (page_to_phys(page) & (1 << 17)) != 0;
492#else
493 page = obj->pages[i];
494 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
495 (VM_PAGE_TO_PHYS(page) & (1 << 17)) != 0;
496#endif
497
498 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
499 user_data, page_do_bit17_swizzling,
500 needs_clflush);
501 if (ret == 0)
502 goto next_page;
503
504 hit_slowpath = 1;
505 mutex_unlock(&dev->struct_mutex);
506
507#ifdef __linux__
508 if (!prefaulted) {
509 ret = fault_in_multipages_writeable(user_data, remain);
510 /* Userspace is tricking us, but we've already clobbered
511 * its pages with the prefault and promised to write the
512 * data up to the first fault. Hence ignore any errors
513 * and just continue. */
514 (void)ret;
515 prefaulted = 1;
516 }
517#endif
518
519 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
520 user_data, page_do_bit17_swizzling,
521 needs_clflush);
522
523 mutex_lock(&dev->struct_mutex);
524
525next_page:
526#ifdef __linux__
527 mark_page_accessed(page);
528#endif
529
530 if (ret)
531 goto out;
532
533 remain -= page_length;
534 user_data += page_length;
535 offset += page_length;
d1c259ee 536 }
d1c259ee 537
7256b59b
FT
538out:
539 i915_gem_object_unpin_pages(obj);
540
541 if (hit_slowpath) {
542 /* Fixup: Kill any reinstated backing storage pages */
543 if (obj->madv == __I915_MADV_PURGED)
544 i915_gem_object_truncate(obj);
545 }
546
547 return ret;
d1c259ee
FT
548}
549
e11a51e3
FT
550/**
551 * Reads data from the object referenced by handle.
552 *
553 * On error, the contents of *data are undefined.
554 */
555int
556i915_gem_pread_ioctl(struct drm_device *dev, void *data,
557 struct drm_file *file)
558{
d2557f23 559 struct drm_i915_gem_pread *args = data;
d1c259ee
FT
560 struct drm_i915_gem_object *obj;
561 int ret = 0;
562
563 if (args->size == 0)
564 return 0;
565
566 ret = i915_mutex_lock_interruptible(dev);
567 if (ret)
568 return ret;
569
570 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
571 if (&obj->base == NULL) {
572 ret = -ENOENT;
573 goto unlock;
574 }
575
576 /* Bounds check source. */
577 if (args->offset > obj->base.size ||
578 args->size > obj->base.size - args->offset) {
579 ret = -EINVAL;
580 goto out;
581 }
582
583 ret = i915_gem_shmem_pread(dev, obj, args, file);
584out:
585 drm_gem_object_unreference(&obj->base);
586unlock:
a2fdbec6
FT
587 mutex_unlock(&dev->struct_mutex);
588 return ret;
589}
590
dce51b26 591#if 0
a2fdbec6
FT
592/* This is the fast write path which cannot handle
593 * page faults in the source data
594 */
595
596static inline int
597fast_user_write(struct io_mapping *mapping,
598 loff_t page_base, int page_offset,
599 char __user *user_data,
600 int length)
601{
602 void __iomem *vaddr_atomic;
603 void *vaddr;
604 unsigned long unwritten;
605
606 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
607 /* We can use the cpu mem copy function because this is X86. */
dce51b26 608 vaddr = (void __force*)vaddr_atomic + page_offset;
a2fdbec6
FT
609 unwritten = __copy_from_user_inatomic_nocache(vaddr,
610 user_data, length);
611 io_mapping_unmap_atomic(vaddr_atomic);
612 return unwritten;
613}
614
615/**
616 * This is the fast pwrite path, where we copy the data directly from the
617 * user into the GTT, uncached.
618 */
619static int
620i915_gem_gtt_pwrite_fast(struct drm_device *dev,
621 struct drm_i915_gem_object *obj,
622 struct drm_i915_gem_pwrite *args,
623 struct drm_file *file)
624{
625 drm_i915_private_t *dev_priv = dev->dev_private;
626 ssize_t remain;
627 loff_t offset, page_base;
628 char __user *user_data;
629 int page_offset, page_length, ret;
630
631 ret = i915_gem_object_pin(obj, 0, true, true);
632 if (ret)
633 goto out;
634
635 ret = i915_gem_object_set_to_gtt_domain(obj, true);
636 if (ret)
637 goto out_unpin;
638
639 ret = i915_gem_object_put_fence(obj);
640 if (ret)
641 goto out_unpin;
642
8e26cdf6 643 user_data = to_user_ptr(args->data_ptr);
a2fdbec6
FT
644 remain = args->size;
645
646 offset = obj->gtt_offset + args->offset;
647
648 while (remain > 0) {
649 /* Operation in this page
650 *
651 * page_base = page offset within aperture
652 * page_offset = offset within page
653 * page_length = bytes to copy for this page
654 */
655 page_base = offset & PAGE_MASK;
656 page_offset = offset_in_page(offset);
657 page_length = remain;
658 if ((page_offset + remain) > PAGE_SIZE)
659 page_length = PAGE_SIZE - page_offset;
660
661 /* If we get a fault while copying data, then (presumably) our
662 * source page isn't available. Return the error and we'll
663 * retry in the slow path.
664 */
665 if (fast_user_write(dev_priv->gtt.mappable, page_base,
666 page_offset, user_data, page_length)) {
667 ret = -EFAULT;
668 goto out_unpin;
669 }
670
671 remain -= page_length;
672 user_data += page_length;
673 offset += page_length;
674 }
675
676out_unpin:
677 i915_gem_object_unpin(obj);
678out:
d1c259ee
FT
679 return ret;
680}
dce51b26
FT
681#endif
682
683static int
684i915_gem_gtt_write(struct drm_device *dev, struct drm_i915_gem_object *obj,
685 uint64_t data_ptr, uint64_t size, uint64_t offset, struct drm_file *file)
686{
687 vm_offset_t mkva;
688 int ret;
689
690 /*
691 * Pass the unaligned physical address and size to pmap_mapdev_attr()
692 * so it can properly calculate whether an extra page needs to be
693 * mapped or not to cover the requested range. The function will
694 * add the page offset into the returned mkva for us.
695 */
696 mkva = (vm_offset_t)pmap_mapdev_attr(dev->agp->base + obj->gtt_offset +
697 offset, size, PAT_WRITE_COMBINING);
698 ret = -copyin_nofault((void *)(uintptr_t)data_ptr, (char *)mkva, size);
699 pmap_unmapdev(mkva, size);
700 return ret;
701}
d1c259ee 702
7256b59b
FT
703#if 0
704/* Per-page copy function for the shmem pwrite fastpath.
705 * Flushes invalid cachelines before writing to the target if
706 * needs_clflush_before is set and flushes out any written cachelines after
707 * writing if needs_clflush is set. */
708static int
709shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length,
710 char __user *user_data,
711 bool page_do_bit17_swizzling,
712 bool needs_clflush_before,
713 bool needs_clflush_after)
714{
715 char *vaddr;
716 int ret;
717
718 if (unlikely(page_do_bit17_swizzling))
719 return -EINVAL;
720
721 vaddr = kmap_atomic(page);
722 if (needs_clflush_before)
723 drm_clflush_virt_range(vaddr + shmem_page_offset,
724 page_length);
725 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
726 user_data,
727 page_length);
728 if (needs_clflush_after)
729 drm_clflush_virt_range(vaddr + shmem_page_offset,
730 page_length);
731 kunmap_atomic(vaddr);
732
733 return ret ? -EFAULT : 0;
734}
735
736/* Only difference to the fast-path function is that this can handle bit17
737 * and uses non-atomic copy and kmap functions. */
738static int
739shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length,
740 char __user *user_data,
741 bool page_do_bit17_swizzling,
742 bool needs_clflush_before,
743 bool needs_clflush_after)
744{
745 char *vaddr;
746 int ret;
747
748 vaddr = kmap(page);
749 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
750 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
751 page_length,
752 page_do_bit17_swizzling);
753 if (page_do_bit17_swizzling)
754 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
755 user_data,
756 page_length);
757 else
758 ret = __copy_from_user(vaddr + shmem_page_offset,
759 user_data,
760 page_length);
761 if (needs_clflush_after)
762 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
763 page_length,
764 page_do_bit17_swizzling);
765 kunmap(page);
766
767 return ret ? -EFAULT : 0;
768}
769#endif
770
d1c259ee
FT
771static int
772i915_gem_shmem_pwrite(struct drm_device *dev,
773 struct drm_i915_gem_object *obj,
774 struct drm_i915_gem_pwrite *args,
775 struct drm_file *file)
776{
777 vm_object_t vm_obj;
778 vm_page_t m;
779 struct sf_buf *sf;
780 vm_offset_t mkva;
781 vm_pindex_t obj_pi;
782 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po;
783
784 do_bit17_swizzling = 0;
785
786 obj->dirty = 1;
787 vm_obj = obj->base.vm_obj;
788 ret = 0;
789
790 VM_OBJECT_LOCK(vm_obj);
791 vm_object_pip_add(vm_obj, 1);
792 while (args->size > 0) {
793 obj_pi = OFF_TO_IDX(args->offset);
794 obj_po = args->offset & PAGE_MASK;
795
56c606a8 796 m = shmem_read_mapping_page(vm_obj, obj_pi);
d1c259ee
FT
797 VM_OBJECT_UNLOCK(vm_obj);
798
799 sf = sf_buf_alloc(m);
800 mkva = sf_buf_kva(sf);
801 length = min(args->size, PAGE_SIZE - obj_po);
802 while (length > 0) {
803 if (do_bit17_swizzling &&
804 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) {
805 cnt = roundup2(obj_po + 1, 64);
806 cnt = min(cnt - obj_po, length);
807 swizzled_po = obj_po ^ 64;
808 } else {
809 cnt = length;
810 swizzled_po = obj_po;
811 }
812 ret = -copyin_nofault(
813 (void *)(uintptr_t)args->data_ptr,
814 (char *)mkva + swizzled_po, cnt);
815 if (ret != 0)
816 break;
817 args->data_ptr += cnt;
818 args->size -= cnt;
819 length -= cnt;
820 args->offset += cnt;
821 obj_po += cnt;
822 }
823 sf_buf_free(sf);
824 VM_OBJECT_LOCK(vm_obj);
825 vm_page_dirty(m);
826 vm_page_reference(m);
827 vm_page_busy_wait(m, FALSE, "i915gem");
828 vm_page_unwire(m, 1);
829 vm_page_wakeup(m);
830
831 if (ret != 0)
832 break;
833 }
834 vm_object_pip_wakeup(vm_obj);
835 VM_OBJECT_UNLOCK(vm_obj);
836
837 return (ret);
575ea5a0
FT
838}
839
e11a51e3
FT
840/**
841 * Writes data to the object referenced by handle.
842 *
843 * On error, the contents of the buffer that were to be modified are undefined.
844 */
575ea5a0 845int
e11a51e3
FT
846i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
847 struct drm_file *file)
575ea5a0 848{
d2557f23 849 struct drm_i915_gem_pwrite *args = data;
d1c259ee 850 struct drm_i915_gem_object *obj;
dce51b26
FT
851 vm_page_t *ma;
852 vm_offset_t start, end;
853 int npages, ret;
d1c259ee
FT
854
855 if (args->size == 0)
856 return 0;
857
dce51b26
FT
858 start = trunc_page(args->data_ptr);
859 end = round_page(args->data_ptr + args->size);
860 npages = howmany(end - start, PAGE_SIZE);
861 ma = kmalloc(npages * sizeof(vm_page_t), M_DRM, M_WAITOK |
862 M_ZERO);
863 npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
864 (vm_offset_t)args->data_ptr, args->size,
865 VM_PROT_READ, ma, npages);
866 if (npages == -1) {
867 ret = -EFAULT;
868 goto free_ma;
869 }
7e793f0f 870
d1c259ee 871 ret = i915_mutex_lock_interruptible(dev);
dce51b26
FT
872 if (ret != 0)
873 goto unlocked;
d1c259ee
FT
874
875 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
876 if (&obj->base == NULL) {
877 ret = -ENOENT;
878 goto unlock;
879 }
575ea5a0 880
d1c259ee
FT
881 /* Bounds check destination. */
882 if (args->offset > obj->base.size ||
883 args->size > obj->base.size - args->offset) {
884 ret = -EINVAL;
885 goto out;
886 }
887
d1c259ee
FT
888 if (obj->phys_obj) {
889 ret = i915_gem_phys_pwrite(dev, obj, args, file);
dce51b26
FT
890 } else if (obj->gtt_space &&
891 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
892 ret = i915_gem_object_pin(obj, 0, true, false);
893 if (ret != 0)
894 goto out;
895 ret = i915_gem_object_set_to_gtt_domain(obj, true);
896 if (ret != 0)
897 goto out_unpin;
898 ret = i915_gem_object_put_fence(obj);
899 if (ret != 0)
900 goto out_unpin;
901 ret = i915_gem_gtt_write(dev, obj, args->data_ptr, args->size,
902 args->offset, file);
903out_unpin:
904 i915_gem_object_unpin(obj);
905 } else {
906 ret = i915_gem_object_set_to_cpu_domain(obj, true);
907 if (ret != 0)
908 goto out;
005b6ef6 909 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
dce51b26 910 }
d1c259ee
FT
911out:
912 drm_gem_object_unreference(&obj->base);
913unlock:
a2fdbec6 914 mutex_unlock(&dev->struct_mutex);
dce51b26
FT
915unlocked:
916 vm_page_unhold_pages(ma, npages);
917free_ma:
918 drm_free(ma, M_DRM);
d1c259ee 919 return ret;
e11a51e3 920}
575ea5a0 921
245593da 922int
a2fdbec6 923i915_gem_check_wedge(struct i915_gpu_error *error,
245593da
FT
924 bool interruptible)
925{
a2fdbec6 926 if (i915_reset_in_progress(error)) {
245593da
FT
927 /* Non-interruptible callers can't handle -EAGAIN, hence return
928 * -EIO unconditionally for these. */
929 if (!interruptible)
930 return -EIO;
931
a2fdbec6
FT
932 /* Recovery complete, but the reset failed ... */
933 if (i915_terminally_wedged(error))
245593da
FT
934 return -EIO;
935
936 return -EAGAIN;
937 }
938
939 return 0;
940}
941
686a02f1
FT
942/*
943 * Compare seqno against outstanding lazy request. Emit a request if they are
944 * equal.
945 */
946static int
947i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
948{
949 int ret;
950
951 DRM_LOCK_ASSERT(ring->dev);
952
953 ret = 0;
954 if (seqno == ring->outstanding_lazy_request)
5d0b1887 955 ret = i915_add_request(ring, NULL);
686a02f1
FT
956
957 return ret;
958}
959
02727ecd
FT
960/**
961 * __wait_seqno - wait until execution of seqno has finished
962 * @ring: the ring expected to report seqno
963 * @seqno: duh!
a2fdbec6 964 * @reset_counter: reset sequence associated with the given seqno
02727ecd
FT
965 * @interruptible: do an interruptible wait (normally yes)
966 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
967 *
a2fdbec6
FT
968 * Note: It is of utmost importance that the passed in seqno and reset_counter
969 * values have been read by the caller in an smp safe manner. Where read-side
970 * locks are involved, it is sufficient to read the reset_counter before
971 * unlocking the lock that protects the seqno. For lockless tricks, the
972 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
973 * inserted.
974 *
02727ecd
FT
975 * Returns 0 if the seqno was found within the alloted time. Else returns the
976 * errno with remaining time filled in timeout argument.
977 */
978static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
a2fdbec6 979 unsigned reset_counter,
02727ecd
FT
980 bool interruptible, struct timespec *timeout)
981{
982 drm_i915_private_t *dev_priv = ring->dev->dev_private;
19b28dc8
FT
983 struct timespec before, now, wait_time={1,0};
984 unsigned long timeout_jiffies;
985 long end;
986 bool wait_forever = true;
987 int ret;
02727ecd
FT
988
989 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
990 return 0;
991
19b28dc8
FT
992 if (timeout != NULL) {
993 wait_time = *timeout;
994 wait_forever = false;
995 }
996
8e26cdf6 997 timeout_jiffies = timespec_to_jiffies_timeout(&wait_time);
19b28dc8 998
02727ecd
FT
999 if (WARN_ON(!ring->irq_get(ring)))
1000 return -ENODEV;
1001
19b28dc8
FT
1002 /* Record current time in case interrupted by signal, or wedged * */
1003 getrawmonotonic(&before);
02727ecd 1004
19b28dc8
FT
1005#define EXIT_COND \
1006 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
a2fdbec6
FT
1007 i915_reset_in_progress(&dev_priv->gpu_error) || \
1008 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
19b28dc8
FT
1009 do {
1010 if (interruptible)
1011 end = wait_event_interruptible_timeout(ring->irq_queue,
1012 EXIT_COND,
1013 timeout_jiffies);
1014 else
1015 end = wait_event_timeout(ring->irq_queue, EXIT_COND,
1016 timeout_jiffies);
1017
a2fdbec6
FT
1018 /* We need to check whether any gpu reset happened in between
1019 * the caller grabbing the seqno and now ... */
1020 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
1021 end = -EAGAIN;
1022
1023 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely
1024 * gone. */
1025 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
19b28dc8
FT
1026 if (ret)
1027 end = ret;
1028 } while (end == 0 && wait_forever);
1029
1030 getrawmonotonic(&now);
02727ecd
FT
1031
1032 ring->irq_put(ring);
19b28dc8
FT
1033#undef EXIT_COND
1034
1035 if (timeout) {
1036 struct timespec sleep_time = timespec_sub(now, before);
1037 *timeout = timespec_sub(*timeout, sleep_time);
8e26cdf6
FT
1038 if (!timespec_valid(timeout)) /* i.e. negative time remains */
1039 set_normalized_timespec(timeout, 0, 0);
19b28dc8 1040 }
02727ecd 1041
19b28dc8
FT
1042 switch (end) {
1043 case -EIO:
1044 case -EAGAIN: /* Wedged */
1045 case -ERESTARTSYS: /* Signal */
1046 return (int)end;
1047 case 0: /* Timeout */
19b28dc8
FT
1048 return -ETIMEDOUT; /* -ETIME on Linux */
1049 default: /* Completed */
1050 WARN_ON(end < 0); /* We're not aware of other errors */
1051 return 0;
1052 }
02727ecd
FT
1053}
1054
e11a51e3
FT
1055/**
1056 * Waits for a sequence number to be signaled, and cleans up the
1057 * request and object lists appropriately for that event.
1058 */
1059int
1060i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1061{
d2557f23
FT
1062 struct drm_device *dev = ring->dev;
1063 struct drm_i915_private *dev_priv = dev->dev_private;
0b869d8a
FT
1064 bool interruptible = dev_priv->mm.interruptible;
1065 int ret;
575ea5a0 1066
d2557f23 1067 DRM_LOCK_ASSERT(dev);
245593da 1068 BUG_ON(seqno == 0);
575ea5a0 1069
a2fdbec6 1070 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
245593da
FT
1071 if (ret)
1072 return ret;
575ea5a0 1073
686a02f1
FT
1074 ret = i915_gem_check_olr(ring, seqno);
1075 if (ret)
1076 return ret;
e11a51e3 1077
a2fdbec6
FT
1078 return __wait_seqno(ring, seqno,
1079 atomic_read(&dev_priv->gpu_error.reset_counter),
1080 interruptible, NULL);
575ea5a0
FT
1081}
1082
5d0b1887
FT
1083static int
1084i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
1085 struct intel_ring_buffer *ring)
1086{
1087 i915_gem_retire_requests_ring(ring);
1088
1089 /* Manually manage the write flush as we may have not yet
1090 * retired the buffer.
1091 *
1092 * Note that the last_write_seqno is always the earlier of
1093 * the two (read/write) seqno, so if we haved successfully waited,
1094 * we know we have passed the last write.
1095 */
1096 obj->last_write_seqno = 0;
1097 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1098
1099 return 0;
1100}
1101
e11a51e3
FT
1102/**
1103 * Ensures that all rendering to the object has completed and the object is
1104 * safe to unbind from the GTT or access from the CPU.
1105 */
686a02f1
FT
1106static __must_check int
1107i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1108 bool readonly)
575ea5a0 1109{
d2557f23 1110 struct intel_ring_buffer *ring = obj->ring;
e11a51e3 1111 u32 seqno;
575ea5a0
FT
1112 int ret;
1113
d2557f23 1114 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
e11a51e3
FT
1115 if (seqno == 0)
1116 return 0;
575ea5a0 1117
d2557f23 1118 ret = i915_wait_seqno(ring, seqno);
686a02f1
FT
1119 if (ret)
1120 return ret;
e11a51e3 1121
5d0b1887 1122 return i915_gem_object_wait_rendering__tail(obj, ring);
686a02f1
FT
1123}
1124
67838cc5
FT
1125/* A nonblocking variant of the above wait. This is a highly dangerous routine
1126 * as the object state may change during this call.
1127 */
1128static __must_check int
1129i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1130 bool readonly)
1131{
1132 struct drm_device *dev = obj->base.dev;
1133 struct drm_i915_private *dev_priv = dev->dev_private;
1134 struct intel_ring_buffer *ring = obj->ring;
a2fdbec6 1135 unsigned reset_counter;
67838cc5
FT
1136 u32 seqno;
1137 int ret;
1138
1139 DRM_LOCK_ASSERT(dev);
1140 BUG_ON(!dev_priv->mm.interruptible);
1141
1142 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1143 if (seqno == 0)
1144 return 0;
1145
a2fdbec6 1146 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
67838cc5
FT
1147 if (ret)
1148 return ret;
1149
1150 ret = i915_gem_check_olr(ring, seqno);
1151 if (ret)
1152 return ret;
1153
a2fdbec6
FT
1154 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1155 mutex_unlock(&dev->struct_mutex);
1156 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL);
1157 mutex_lock(&dev->struct_mutex);
5d0b1887
FT
1158 if (ret)
1159 return ret;
67838cc5 1160
5d0b1887 1161 return i915_gem_object_wait_rendering__tail(obj, ring);
67838cc5
FT
1162}
1163
e11a51e3
FT
1164/**
1165 * Called when user space prepares to use an object with the CPU, either
1166 * through the mmap ioctl's mapping or a GTT mapping.
1167 */
575ea5a0 1168int
e11a51e3
FT
1169i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1170 struct drm_file *file)
575ea5a0 1171{
f192107f 1172 struct drm_i915_gem_set_domain *args = data;
575ea5a0 1173 struct drm_i915_gem_object *obj;
f192107f
FT
1174 uint32_t read_domains = args->read_domains;
1175 uint32_t write_domain = args->write_domain;
575ea5a0
FT
1176 int ret;
1177
f192107f
FT
1178 /* Only handle setting domains to types used by the CPU. */
1179 if (write_domain & I915_GEM_GPU_DOMAINS)
1180 return -EINVAL;
e11a51e3 1181
f192107f
FT
1182 if (read_domains & I915_GEM_GPU_DOMAINS)
1183 return -EINVAL;
1184
1185 /* Having something in the write domain implies it's in the read
1186 * domain, and only that read domain. Enforce that in the request.
1187 */
1188 if (write_domain != 0 && read_domains != write_domain)
1189 return -EINVAL;
575ea5a0
FT
1190
1191 ret = i915_mutex_lock_interruptible(dev);
f192107f
FT
1192 if (ret)
1193 return ret;
575ea5a0
FT
1194
1195 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1196 if (&obj->base == NULL) {
1197 ret = -ENOENT;
1198 goto unlock;
1199 }
1200
67838cc5
FT
1201 /* Try to flush the object off the GPU without holding the lock.
1202 * We will repeat the flush holding the lock in the normal manner
1203 * to catch cases where we are gazumped.
1204 */
1205 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);
1206 if (ret)
1207 goto unref;
1208
f192107f 1209 if (read_domains & I915_GEM_DOMAIN_GTT) {
e11a51e3 1210 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
f192107f
FT
1211
1212 /* Silently promote "you're not bound, there was nothing to do"
1213 * to success, since the client was just asking us to
1214 * make sure everything was done.
1215 */
e11a51e3
FT
1216 if (ret == -EINVAL)
1217 ret = 0;
f192107f 1218 } else {
e11a51e3 1219 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
f192107f 1220 }
575ea5a0 1221
67838cc5 1222unref:
575ea5a0
FT
1223 drm_gem_object_unreference(&obj->base);
1224unlock:
a2fdbec6 1225 mutex_unlock(&dev->struct_mutex);
f192107f 1226 return ret;
575ea5a0
FT
1227}
1228
7cbd1a46 1229/**
e11a51e3 1230 * Called when user space has done writes to this buffer
7cbd1a46
FT
1231 */
1232int
e11a51e3
FT
1233i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1234 struct drm_file *file)
7cbd1a46 1235{
686a02f1 1236 struct drm_i915_gem_sw_finish *args = data;
e11a51e3 1237 struct drm_i915_gem_object *obj;
686a02f1 1238 int ret = 0;
7cbd1a46 1239
e11a51e3 1240 ret = i915_mutex_lock_interruptible(dev);
d2557f23
FT
1241 if (ret)
1242 return ret;
e11a51e3
FT
1243 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1244 if (&obj->base == NULL) {
1245 ret = -ENOENT;
1246 goto unlock;
1247 }
f192107f
FT
1248
1249 /* Pinned buffers may be scanout, so flush the cache */
d2557f23 1250 if (obj->pin_count)
e11a51e3 1251 i915_gem_object_flush_cpu_write_domain(obj);
f192107f 1252
e11a51e3
FT
1253 drm_gem_object_unreference(&obj->base);
1254unlock:
a2fdbec6 1255 mutex_unlock(&dev->struct_mutex);
d2557f23 1256 return ret;
e11a51e3 1257}
7cbd1a46 1258
e11a51e3
FT
1259/**
1260 * Maps the contents of an object, returning the address it is mapped
1261 * into.
1262 *
1263 * While the mapping holds a reference on the contents of the object, it doesn't
1264 * imply a ref on the object itself.
1265 */
1266int
1267i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1268 struct drm_file *file)
1269{
c737e47c 1270 struct drm_i915_gem_mmap *args = data;
e11a51e3 1271 struct drm_gem_object *obj;
c737e47c
FT
1272 struct proc *p = curproc;
1273 vm_map_t map = &p->p_vmspace->vm_map;
e11a51e3
FT
1274 vm_offset_t addr;
1275 vm_size_t size;
c737e47c 1276 int error = 0, rv;
7cbd1a46 1277
e11a51e3
FT
1278 obj = drm_gem_object_lookup(dev, file, args->handle);
1279 if (obj == NULL)
c737e47c
FT
1280 return -ENOENT;
1281
e11a51e3
FT
1282 if (args->size == 0)
1283 goto out;
c737e47c 1284
e11a51e3 1285 size = round_page(args->size);
e11a51e3 1286 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
e11a51e3
FT
1287 error = ENOMEM;
1288 goto out;
7cbd1a46
FT
1289 }
1290
e11a51e3
FT
1291 addr = 0;
1292 vm_object_hold(obj->vm_obj);
1293 vm_object_reference_locked(obj->vm_obj);
1294 vm_object_drop(obj->vm_obj);
0adbcbd6
MD
1295 rv = vm_map_find(map, obj->vm_obj, NULL,
1296 args->offset, &addr, args->size,
1297 PAGE_SIZE, /* align */
1298 TRUE, /* fitit */
1299 VM_MAPTYPE_NORMAL, /* maptype */
1300 VM_PROT_READ | VM_PROT_WRITE, /* prot */
1301 VM_PROT_READ | VM_PROT_WRITE, /* max */
1302 MAP_SHARED /* cow */);
e11a51e3
FT
1303 if (rv != KERN_SUCCESS) {
1304 vm_object_deallocate(obj->vm_obj);
1305 error = -vm_mmap_to_errno(rv);
1306 } else {
1307 args->addr_ptr = (uint64_t)addr;
7cbd1a46 1308 }
e11a51e3
FT
1309out:
1310 drm_gem_object_unreference(obj);
1311 return (error);
7cbd1a46
FT
1312}
1313
e9587a4e
FT
1314int i915_intr_pf;
1315
0b869d8a
FT
1316/**
1317 * i915_gem_fault - fault a page into the GTT
1318 * vma: VMA in question
1319 * vmf: fault info
1320 *
1321 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1322 * from userspace. The fault handler takes care of binding the object to
1323 * the GTT (if needed), allocating and programming a fence register (again,
1324 * only if needed based on whether the old reg is still valid or the object
1325 * is tiled) and inserting a new PTE into the faulting process.
1326 *
1327 * Note that the faulting process may involve evicting existing objects
1328 * from the GTT and/or fence registers to make room. So performance may
1329 * suffer if the GTT working set is large or there are few fence registers
1330 * left.
1331 */
e9587a4e
FT
1332int
1333i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot,
1334 vm_page_t *mres)
a2fdbec6 1335{
e9587a4e
FT
1336 struct drm_gem_object *gem_obj;
1337 struct drm_i915_gem_object *obj;
1338 struct drm_device *dev;
1339 drm_i915_private_t *dev_priv;
1340 vm_page_t m, oldm;
1341 int cause, ret;
1342 bool write;
a2fdbec6 1343
e9587a4e
FT
1344 gem_obj = vm_obj->handle;
1345 obj = to_intel_bo(gem_obj);
1346 dev = obj->base.dev;
1347 dev_priv = dev->dev_private;
1348#if 0
1349 write = (prot & VM_PROT_WRITE) != 0;
1350#else
1351 write = true;
1352#endif
1353 vm_object_pip_add(vm_obj, 1);
a2fdbec6 1354
e9587a4e
FT
1355 /*
1356 * Remove the placeholder page inserted by vm_fault() from the
1357 * object before dropping the object lock. If
1358 * i915_gem_release_mmap() is active in parallel on this gem
1359 * object, then it owns the drm device sx and might find the
1360 * placeholder already. Then, since the page is busy,
1361 * i915_gem_release_mmap() sleeps waiting for the busy state
1362 * of the page cleared. We will be not able to acquire drm
1363 * device lock until i915_gem_release_mmap() is able to make a
1364 * progress.
1365 */
1366 if (*mres != NULL) {
1367 oldm = *mres;
1368 vm_page_remove(oldm);
1369 *mres = NULL;
1370 } else
1371 oldm = NULL;
1372retry:
1373 VM_OBJECT_UNLOCK(vm_obj);
1374unlocked_vmobj:
1375 cause = ret = 0;
1376 m = NULL;
a2fdbec6 1377
e9587a4e
FT
1378 if (i915_intr_pf) {
1379 ret = i915_mutex_lock_interruptible(dev);
1380 if (ret != 0) {
1381 cause = 10;
1382 goto out;
1383 }
1384 } else
1385 mutex_lock(&dev->struct_mutex);
1386
1387 /*
1388 * Since the object lock was dropped, other thread might have
1389 * faulted on the same GTT address and instantiated the
1390 * mapping for the page. Recheck.
1391 */
1392 VM_OBJECT_LOCK(vm_obj);
1393 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
1394 if (m != NULL) {
1395 if ((m->flags & PG_BUSY) != 0) {
1396 mutex_unlock(&dev->struct_mutex);
1397#if 0 /* XXX */
1398 vm_page_sleep(m, "915pee");
1399#endif
1400 goto retry;
1401 }
1402 goto have_page;
1403 } else
1404 VM_OBJECT_UNLOCK(vm_obj);
a2fdbec6
FT
1405
1406 /* Access to snoopable pages through the GTT is incoherent. */
1407 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1408 ret = -EINVAL;
1409 goto unlock;
1410 }
1411
1412 /* Now bind it into the GTT if needed */
e9587a4e
FT
1413 if (!obj->map_and_fenceable) {
1414 ret = i915_gem_object_unbind(obj);
1415 if (ret != 0) {
1416 cause = 20;
1417 goto unlock;
1418 }
1419 }
1420 if (!obj->gtt_space) {
1421 ret = i915_gem_object_bind_to_gtt(obj, 0, true, false);
1422 if (ret != 0) {
1423 cause = 30;
1424 goto unlock;
1425 }
a2fdbec6 1426
e9587a4e
FT
1427 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1428 if (ret != 0) {
1429 cause = 40;
1430 goto unlock;
1431 }
1432 }
a2fdbec6 1433
e9587a4e
FT
1434 if (obj->tiling_mode == I915_TILING_NONE)
1435 ret = i915_gem_object_put_fence(obj);
1436 else
1437 ret = i915_gem_object_get_fence(obj);
1438 if (ret != 0) {
1439 cause = 50;
1440 goto unlock;
1441 }
1442
1443 if (i915_gem_object_is_inactive(obj))
1444 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
a2fdbec6
FT
1445
1446 obj->fault_mappable = true;
e9587a4e
FT
1447 VM_OBJECT_LOCK(vm_obj);
1448 m = vm_phys_fictitious_to_vm_page(dev->agp->base + obj->gtt_offset +
1449 offset);
1450 if (m == NULL) {
1451 cause = 60;
1452 ret = -EFAULT;
1453 goto unlock;
1454 }
1455 KASSERT((m->flags & PG_FICTITIOUS) != 0,
1456 ("not fictitious %p", m));
1457 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
a2fdbec6 1458
e9587a4e
FT
1459 if ((m->flags & PG_BUSY) != 0) {
1460 mutex_unlock(&dev->struct_mutex);
1461#if 0 /* XXX */
1462 vm_page_sleep(m, "915pbs");
1463#endif
1464 goto retry;
1465 }
1466 m->valid = VM_PAGE_BITS_ALL;
1467 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
1468have_page:
1469 *mres = m;
1470 vm_page_busy_try(m, false);
1471
1472 mutex_unlock(&dev->struct_mutex);
1473 if (oldm != NULL) {
1474 vm_page_free(oldm);
1475 }
1476 vm_object_pip_wakeup(vm_obj);
1477 return (VM_PAGER_OK);
a2fdbec6 1478
a2fdbec6
FT
1479unlock:
1480 mutex_unlock(&dev->struct_mutex);
1481out:
e9587a4e
FT
1482 KASSERT(ret != 0, ("i915_gem_pager_fault: wrong return"));
1483 if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) {
1484 goto unlocked_vmobj;
a2fdbec6 1485 }
e9587a4e
FT
1486 VM_OBJECT_LOCK(vm_obj);
1487 vm_object_pip_wakeup(vm_obj);
1488 return (VM_PAGER_ERROR);
a2fdbec6 1489}
0b869d8a 1490
e11a51e3
FT
1491/**
1492 * i915_gem_release_mmap - remove physical page mappings
1493 * @obj: obj in question
901caa58 1494 *
e11a51e3
FT
1495 * Preserve the reservation of the mmapping with the DRM core code, but
1496 * relinquish ownership of the pages back to the system.
901caa58 1497 *
e11a51e3
FT
1498 * It is vital that we remove the page mapping if we have mapped a tiled
1499 * object through the GTT and then lose the fence register due to
1500 * resource pressure. Similarly if the object has been moved out of the
1501 * aperture, than pages mapped into userspace must be revoked. Removing the
1502 * mapping will then trigger a page fault on the next user access, allowing
1503 * fixup by i915_gem_fault().
901caa58 1504 */
e11a51e3
FT
1505void
1506i915_gem_release_mmap(struct drm_i915_gem_object *obj)
575ea5a0 1507{
e11a51e3
FT
1508 vm_object_t devobj;
1509 vm_page_t m;
1510 int i, page_count;
901caa58 1511
e11a51e3
FT
1512 if (!obj->fault_mappable)
1513 return;
901caa58 1514
e11a51e3
FT
1515 devobj = cdev_pager_lookup(obj);
1516 if (devobj != NULL) {
1517 page_count = OFF_TO_IDX(obj->base.size);
575ea5a0 1518
e11a51e3
FT
1519 VM_OBJECT_LOCK(devobj);
1520 for (i = 0; i < page_count; i++) {
1521 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
1522 if (m == NULL)
1523 continue;
1524 cdev_pager_free_page(devobj, m);
575ea5a0 1525 }
e11a51e3
FT
1526 VM_OBJECT_UNLOCK(devobj);
1527 vm_object_deallocate(devobj);
575ea5a0 1528 }
575ea5a0 1529
e11a51e3 1530 obj->fault_mappable = false;
575ea5a0
FT
1531}
1532
a2fdbec6 1533uint32_t
e11a51e3 1534i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
575ea5a0 1535{
e11a51e3 1536 uint32_t gtt_size;
575ea5a0 1537
e11a51e3
FT
1538 if (INTEL_INFO(dev)->gen >= 4 ||
1539 tiling_mode == I915_TILING_NONE)
d2557f23 1540 return size;
575ea5a0 1541
e11a51e3
FT
1542 /* Previous chips need a power-of-two fence region when tiling */
1543 if (INTEL_INFO(dev)->gen == 3)
1544 gtt_size = 1024*1024;
1545 else
1546 gtt_size = 512*1024;
575ea5a0 1547
e11a51e3
FT
1548 while (gtt_size < size)
1549 gtt_size <<= 1;
575ea5a0 1550
d2557f23 1551 return gtt_size;
e11a51e3 1552}
575ea5a0 1553
e11a51e3
FT
1554/**
1555 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1556 * @obj: object to check
1557 *
1558 * Return the required GTT alignment for an object, taking into account
1559 * potential fence register mapping.
1560 */
a2fdbec6
FT
1561uint32_t
1562i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
1563 int tiling_mode, bool fenced)
e11a51e3 1564{
f4e1c372 1565
e11a51e3
FT
1566 /*
1567 * Minimum alignment is 4k (GTT page size), but might be greater
1568 * if a fence register is needed for the object.
1569 */
a2fdbec6 1570 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
e11a51e3 1571 tiling_mode == I915_TILING_NONE)
d2557f23 1572 return 4096;
575ea5a0 1573
e11a51e3
FT
1574 /*
1575 * Previous chips need to be aligned to the size of the smallest
1576 * fence register that can contain the object.
1577 */
d2557f23 1578 return i915_gem_get_gtt_size(dev, size, tiling_mode);
575ea5a0
FT
1579}
1580
575ea5a0 1581int
e11a51e3
FT
1582i915_gem_mmap_gtt(struct drm_file *file,
1583 struct drm_device *dev,
1584 uint32_t handle,
1585 uint64_t *offset)
575ea5a0 1586{
d2557f23 1587 struct drm_i915_private *dev_priv = dev->dev_private;
e11a51e3 1588 struct drm_i915_gem_object *obj;
d65a337f 1589 int ret;
575ea5a0 1590
e11a51e3 1591 ret = i915_mutex_lock_interruptible(dev);
d2557f23
FT
1592 if (ret)
1593 return ret;
575ea5a0 1594
e11a51e3
FT
1595 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1596 if (&obj->base == NULL) {
1597 ret = -ENOENT;
1598 goto unlock;
575ea5a0
FT
1599 }
1600
a2fdbec6 1601 if (obj->base.size > dev_priv->gtt.mappable_end) {
e11a51e3
FT
1602 ret = -E2BIG;
1603 goto out;
1604 }
d65a337f 1605
e11a51e3
FT
1606 if (obj->madv != I915_MADV_WILLNEED) {
1607 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1608 ret = -EINVAL;
1609 goto out;
1610 }
575ea5a0 1611
e11a51e3 1612 ret = drm_gem_create_mmap_offset(&obj->base);
d2557f23 1613 if (ret)
e11a51e3 1614 goto out;
575ea5a0 1615
e11a51e3
FT
1616 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
1617 DRM_GEM_MAPPING_KEY;
1618out:
1619 drm_gem_object_unreference(&obj->base);
1620unlock:
a2fdbec6 1621 mutex_unlock(&dev->struct_mutex);
d2557f23 1622 return ret;
575ea5a0
FT
1623}
1624
e11a51e3
FT
1625/**
1626 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1627 * @dev: DRM device
1628 * @data: GTT mapping ioctl data
1629 * @file: GEM object info
1630 *
1631 * Simply returns the fake offset to userspace so it can mmap it.
1632 * The mmap call will end up in drm_gem_mmap(), which will set things
1633 * up so we can get faults in the handler above.
1634 *
1635 * The fault handler will take care of binding the object into the GTT
1636 * (since it may have been evicted to make room for something), allocating
1637 * a fence register, and mapping the appropriate aperture address into
1638 * userspace.
1639 */
575ea5a0 1640int
e11a51e3
FT
1641i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1642 struct drm_file *file)
575ea5a0 1643{
686a02f1 1644 struct drm_i915_gem_mmap_gtt *args = data;
575ea5a0 1645
d2557f23 1646 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
575ea5a0
FT
1647}
1648
e11a51e3
FT
1649/* Immediately discard the backing storage */
1650static void
1651i915_gem_object_truncate(struct drm_i915_gem_object *obj)
575ea5a0 1652{
e11a51e3 1653 vm_object_t vm_obj;
575ea5a0 1654
e11a51e3
FT
1655 vm_obj = obj->base.vm_obj;
1656 VM_OBJECT_LOCK(vm_obj);
1657 vm_object_page_remove(vm_obj, 0, 0, false);
1658 VM_OBJECT_UNLOCK(vm_obj);
1659 obj->madv = __I915_MADV_PURGED;
575ea5a0
FT
1660}
1661
e11a51e3
FT
1662static inline int
1663i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
575ea5a0 1664{
e11a51e3 1665 return obj->madv == I915_MADV_DONTNEED;
575ea5a0
FT
1666}
1667
e11a51e3
FT
1668static void
1669i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
575ea5a0 1670{
e11a51e3
FT
1671 vm_page_t m;
1672 int page_count, i;
575ea5a0 1673
e11a51e3 1674 BUG_ON(obj->madv == __I915_MADV_PURGED);
575ea5a0 1675
e11a51e3
FT
1676 if (obj->tiling_mode != I915_TILING_NONE)
1677 i915_gem_object_save_bit_17_swizzle(obj);
1678 if (obj->madv == I915_MADV_DONTNEED)
1679 obj->dirty = 0;
1680 page_count = obj->base.size / PAGE_SIZE;
1681 VM_OBJECT_LOCK(obj->base.vm_obj);
1682#if GEM_PARANOID_CHECK_GTT
1683 i915_gem_assert_pages_not_mapped(obj->base.dev, obj->pages, page_count);
1684#endif
1685 for (i = 0; i < page_count; i++) {
1686 m = obj->pages[i];
1687 if (obj->dirty)
1688 vm_page_dirty(m);
1689 if (obj->madv == I915_MADV_WILLNEED)
1690 vm_page_reference(m);
1691 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem");
1692 vm_page_unwire(obj->pages[i], 1);
1693 vm_page_wakeup(obj->pages[i]);
e11a51e3
FT
1694 }
1695 VM_OBJECT_UNLOCK(obj->base.vm_obj);
1696 obj->dirty = 0;
5a3b77d5 1697 drm_free(obj->pages, M_DRM);
e11a51e3 1698 obj->pages = NULL;
99f70504
FT
1699}
1700
a2fdbec6
FT
1701int
1702i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
1703{
1704 const struct drm_i915_gem_object_ops *ops = obj->ops;
1705
1706 if (obj->pages == NULL)
1707 return 0;
1708
1709 BUG_ON(obj->gtt_space);
1710
1711 if (obj->pages_pin_count)
1712 return -EBUSY;
1713
1714 /* ->put_pages might need to allocate memory for the bit17 swizzle
1715 * array, hence protect them from being reaped by removing them from gtt
1716 * lists early. */
5d0b1887 1717 list_del(&obj->global_list);
a2fdbec6
FT
1718
1719 ops->put_pages(obj);
1720 obj->pages = NULL;
1721
1722 if (i915_gem_object_is_purgeable(obj))
1723 i915_gem_object_truncate(obj);
1724
1725 return 0;
1726}
1727
575ea5a0 1728static int
dfa24183 1729i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
575ea5a0 1730{
a2fdbec6 1731 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
e11a51e3 1732 struct drm_device *dev;
575ea5a0 1733 vm_object_t vm_obj;
e11a51e3 1734 int page_count, i, j;
56c606a8 1735 struct vm_page *page;
575ea5a0 1736
e11a51e3
FT
1737 dev = obj->base.dev;
1738 KASSERT(obj->pages == NULL, ("Obj already has pages"));
1739 page_count = obj->base.size / PAGE_SIZE;
5a3b77d5 1740 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM,
e11a51e3 1741 M_WAITOK);
56c606a8 1742
575ea5a0 1743 vm_obj = obj->base.vm_obj;
575ea5a0 1744 VM_OBJECT_LOCK(vm_obj);
56c606a8 1745
e11a51e3 1746 for (i = 0; i < page_count; i++) {
56c606a8 1747 page = shmem_read_mapping_page(vm_obj, i);
a2fdbec6
FT
1748 if (IS_ERR(page)) {
1749 i915_gem_purge(dev_priv, page_count);
56c606a8 1750 goto err_pages;
a2fdbec6 1751 }
56c606a8
FT
1752
1753 obj->pages[i] = page;
e11a51e3 1754 }
56c606a8 1755
e11a51e3
FT
1756 VM_OBJECT_UNLOCK(vm_obj);
1757 if (i915_gem_object_needs_bit17_swizzle(obj))
1758 i915_gem_object_do_bit_17_swizzle(obj);
575ea5a0 1759
56c606a8
FT
1760 return 0;
1761
1762err_pages:
e11a51e3 1763 for (j = 0; j < i; j++) {
56c606a8
FT
1764 page = obj->pages[j];
1765 vm_page_busy_wait(page, FALSE, "i915gem");
1766 vm_page_unwire(page, 0);
1767 vm_page_wakeup(page);
575ea5a0 1768 }
575ea5a0 1769 VM_OBJECT_UNLOCK(vm_obj);
5a3b77d5 1770 drm_free(obj->pages, M_DRM);
e11a51e3
FT
1771 obj->pages = NULL;
1772 return (-EIO);
575ea5a0
FT
1773}
1774
a2fdbec6
FT
1775/* Ensure that the associated pages are gathered from the backing storage
1776 * and pinned into our object. i915_gem_object_get_pages() may be called
1777 * multiple times before they are released by a single call to
1778 * i915_gem_object_put_pages() - once the pages are no longer referenced
1779 * either as a result of memory pressure (reaping pages under the shrinker)
1780 * or as the object is itself released.
1781 */
1782int
1783i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
1784{
1785 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1786 const struct drm_i915_gem_object_ops *ops = obj->ops;
1787 int ret;
1788
1789 if (obj->pages)
1790 return 0;
1791
1792 if (obj->madv != I915_MADV_WILLNEED) {
1793 DRM_ERROR("Attempting to obtain a purgeable object\n");
1794 return -EINVAL;
1795 }
1796
1797 BUG_ON(obj->pages_pin_count);
1798
1799 ret = ops->get_pages(obj);
1800 if (ret)
1801 return ret;
1802
5d0b1887 1803 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
a2fdbec6
FT
1804 return 0;
1805}
1806
e11a51e3
FT
1807void
1808i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
b5c29a34 1809 struct intel_ring_buffer *ring)
575ea5a0 1810{
e11a51e3
FT
1811 struct drm_device *dev = obj->base.dev;
1812 struct drm_i915_private *dev_priv = dev->dev_private;
b5c29a34 1813 u32 seqno = intel_ring_get_seqno(ring);
575ea5a0 1814
686a02f1 1815 BUG_ON(ring == NULL);
5d0b1887
FT
1816 if (obj->ring != ring && obj->last_write_seqno) {
1817 /* Keep the seqno relative to the current ring */
1818 obj->last_write_seqno = seqno;
1819 }
e11a51e3 1820 obj->ring = ring;
575ea5a0 1821
e11a51e3
FT
1822 /* Add a reference if we're newly entering the active list. */
1823 if (!obj->active) {
1824 drm_gem_object_reference(&obj->base);
1825 obj->active = 1;
575ea5a0
FT
1826 }
1827
e11a51e3
FT
1828 /* Move from whatever list we were on to the tail of execution. */
1829 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1830 list_move_tail(&obj->ring_list, &ring->active_list);
575ea5a0 1831
686a02f1
FT
1832 obj->last_read_seqno = seqno;
1833
e11a51e3
FT
1834 if (obj->fenced_gpu_access) {
1835 obj->last_fenced_seqno = seqno;
575ea5a0 1836
e11a51e3
FT
1837 /* Bump MRU to take account of the delayed flush */
1838 if (obj->fence_reg != I915_FENCE_REG_NONE) {
686a02f1
FT
1839 struct drm_i915_fence_reg *reg;
1840
e11a51e3
FT
1841 reg = &dev_priv->fence_regs[obj->fence_reg];
1842 list_move_tail(&reg->lru_list,
1843 &dev_priv->mm.fence_list);
575ea5a0
FT
1844 }
1845 }
575ea5a0
FT
1846}
1847
e11a51e3
FT
1848static void
1849i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1850{
1851 struct drm_device *dev = obj->base.dev;
1852 struct drm_i915_private *dev_priv = dev->dev_private;
1853
f192107f 1854 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
686a02f1 1855 BUG_ON(!obj->active);
f192107f 1856
19df918d
FT
1857 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1858
f192107f 1859 list_del_init(&obj->ring_list);
e11a51e3 1860 obj->ring = NULL;
e11a51e3 1861
f192107f
FT
1862 obj->last_read_seqno = 0;
1863 obj->last_write_seqno = 0;
1864 obj->base.write_domain = 0;
1865
1866 obj->last_fenced_seqno = 0;
e11a51e3
FT
1867 obj->fenced_gpu_access = false;
1868
1869 obj->active = 0;
e11a51e3
FT
1870 drm_gem_object_unreference(&obj->base);
1871
e11a51e3 1872 WARN_ON(i915_verify_lists(dev));
575ea5a0
FT
1873}
1874
b5c29a34 1875static int
a2fdbec6 1876i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
575ea5a0 1877{
b5c29a34
FT
1878 struct drm_i915_private *dev_priv = dev->dev_private;
1879 struct intel_ring_buffer *ring;
1880 int ret, i, j;
1881
a2fdbec6 1882 /* Carefully retire all requests without writing to the rings */
b5c29a34 1883 for_each_ring(ring, dev_priv, i) {
a2fdbec6
FT
1884 ret = intel_ring_idle(ring);
1885 if (ret)
1886 return ret;
b5c29a34 1887 }
b5c29a34 1888 i915_gem_retire_requests(dev);
a2fdbec6
FT
1889
1890 /* Finally reset hw state */
b5c29a34 1891 for_each_ring(ring, dev_priv, i) {
a2fdbec6
FT
1892 intel_ring_init_seqno(ring, seqno);
1893
b5c29a34
FT
1894 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
1895 ring->sync_seqno[j] = 0;
1896 }
1897
1898 return 0;
1899}
1900
a2fdbec6
FT
1901int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
1902{
1903 struct drm_i915_private *dev_priv = dev->dev_private;
1904 int ret;
1905
1906 if (seqno == 0)
1907 return -EINVAL;
1908
1909 /* HWS page needs to be set less than what we
1910 * will inject to ring
1911 */
1912 ret = i915_gem_init_seqno(dev, seqno - 1);
1913 if (ret)
1914 return ret;
1915
1916 /* Carefully set the last_seqno value so that wrap
1917 * detection still works
1918 */
1919 dev_priv->next_seqno = seqno;
1920 dev_priv->last_seqno = seqno - 1;
1921 if (dev_priv->last_seqno == 0)
1922 dev_priv->last_seqno--;
1923
1924 return 0;
1925}
1926
b5c29a34
FT
1927int
1928i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
1929{
1930 struct drm_i915_private *dev_priv = dev->dev_private;
575ea5a0 1931
e11a51e3 1932 /* reserve 0 for non-seqno */
b5c29a34 1933 if (dev_priv->next_seqno == 0) {
a2fdbec6 1934 int ret = i915_gem_init_seqno(dev, 0);
b5c29a34
FT
1935 if (ret)
1936 return ret;
1937
e11a51e3 1938 dev_priv->next_seqno = 1;
b5c29a34 1939 }
e11a51e3 1940
a2fdbec6 1941 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
b5c29a34 1942 return 0;
575ea5a0
FT
1943}
1944
5d0b1887
FT
1945int __i915_add_request(struct intel_ring_buffer *ring,
1946 struct drm_file *file,
1947 struct drm_i915_gem_object *obj,
1948 u32 *out_seqno)
575ea5a0 1949{
686a02f1 1950 drm_i915_private_t *dev_priv = ring->dev->dev_private;
f192107f 1951 struct drm_i915_gem_request *request;
5d0b1887 1952 u32 request_ring_position, request_start;
e11a51e3 1953 int was_empty;
575ea5a0
FT
1954 int ret;
1955
5d0b1887 1956 request_start = intel_ring_get_tail(ring);
686a02f1
FT
1957 /*
1958 * Emit any outstanding flushes - execbuf can fail to emit the flush
1959 * after having emitted the batchbuffer command. Hence we need to fix
1960 * things up similar to emitting the lazy request. The difference here
1961 * is that the flush _must_ happen before the next request, no matter
1962 * what.
1963 */
b312333e
FT
1964 ret = intel_ring_flush_all_caches(ring);
1965 if (ret)
1966 return ret;
686a02f1 1967
159fc1d7 1968 request = kmalloc(sizeof(*request), M_DRM, M_WAITOK);
f192107f
FT
1969 if (request == NULL)
1970 return -ENOMEM;
575ea5a0 1971
d2557f23 1972
686a02f1
FT
1973 /* Record the position of the start of the request so that
1974 * should we detect the updated seqno part-way through the
1975 * GPU processing the request, we never over-estimate the
1976 * position of the head.
1977 */
e11a51e3 1978 request_ring_position = intel_ring_get_tail(ring);
575ea5a0 1979
b5c29a34 1980 ret = ring->add_request(ring);
686a02f1 1981 if (ret) {
158486a6 1982 kfree(request);
686a02f1
FT
1983 return ret;
1984 }
575ea5a0 1985
b5c29a34 1986 request->seqno = intel_ring_get_seqno(ring);
e11a51e3 1987 request->ring = ring;
5d0b1887 1988 request->head = request_start;
e11a51e3 1989 request->tail = request_ring_position;
5d0b1887
FT
1990 request->ctx = ring->last_context;
1991 request->batch_obj = obj;
1992
1993 /* Whilst this request exists, batch_obj will be on the
1994 * active_list, and so will hold the active reference. Only when this
1995 * request is retired will the the batch_obj be moved onto the
1996 * inactive_list and lose its active reference. Hence we do not need
1997 * to explicitly hold another reference here.
1998 */
1999
2000 if (request->ctx)
2001 i915_gem_context_reference(request->ctx);
2002
686a02f1 2003 request->emitted_jiffies = jiffies;
e11a51e3
FT
2004 was_empty = list_empty(&ring->request_list);
2005 list_add_tail(&request->list, &ring->request_list);
686a02f1 2006 request->file_priv = NULL;
e11a51e3 2007
686a02f1
FT
2008 if (file) {
2009 struct drm_i915_file_private *file_priv = file->driver_priv;
e11a51e3
FT
2010
2011 spin_lock(&file_priv->mm.lock);
2012 request->file_priv = file_priv;
2013 list_add_tail(&request->client_list,
686a02f1 2014 &file_priv->mm.request_list);
e11a51e3 2015 spin_unlock(&file_priv->mm.lock);
575ea5a0
FT
2016 }
2017
e11a51e3 2018 ring->outstanding_lazy_request = 0;
575ea5a0 2019
e11a51e3
FT
2020 if (!dev_priv->mm.suspended) {
2021 if (i915_enable_hangcheck) {
a2fdbec6 2022 mod_timer(&dev_priv->gpu_error.hangcheck_timer,
561529b1 2023 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
e11a51e3 2024 }
561529b1 2025 if (was_empty) {
e11a51e3 2026 queue_delayed_work(dev_priv->wq,
561529b1
FT
2027 &dev_priv->mm.retire_work,
2028 round_jiffies_up_relative(hz));
2029 intel_mark_busy(dev_priv->dev);
2030 }
e11a51e3 2031 }
686a02f1 2032
f192107f 2033 if (out_seqno)
b5c29a34 2034 *out_seqno = request->seqno;
686a02f1 2035 return 0;
575ea5a0
FT
2036}
2037
e11a51e3
FT
2038static inline void
2039i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
575ea5a0 2040{
e11a51e3 2041 struct drm_i915_file_private *file_priv = request->file_priv;
575ea5a0 2042
e11a51e3
FT
2043 if (!file_priv)
2044 return;
2045
e11a51e3 2046 spin_lock(&file_priv->mm.lock);
d2557f23 2047 if (request->file_priv) {
e11a51e3
FT
2048 list_del(&request->client_list);
2049 request->file_priv = NULL;
575ea5a0 2050 }
e11a51e3 2051 spin_unlock(&file_priv->mm.lock);
575ea5a0
FT
2052}
2053
5d0b1887
FT
2054static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj)
2055{
2056 if (acthd >= obj->gtt_offset &&
2057 acthd < obj->gtt_offset + obj->base.size)
2058 return true;
2059
2060 return false;
2061}
2062
2063static bool i915_head_inside_request(const u32 acthd_unmasked,
2064 const u32 request_start,
2065 const u32 request_end)
2066{
2067 const u32 acthd = acthd_unmasked & HEAD_ADDR;
2068
2069 if (request_start < request_end) {
2070 if (acthd >= request_start && acthd < request_end)
2071 return true;
2072 } else if (request_start > request_end) {
2073 if (acthd >= request_start || acthd < request_end)
2074 return true;
2075 }
2076
2077 return false;
2078}
2079
2080static bool i915_request_guilty(struct drm_i915_gem_request *request,
2081 const u32 acthd, bool *inside)
2082{
2083 /* There is a possibility that unmasked head address
2084 * pointing inside the ring, matches the batch_obj address range.
2085 * However this is extremely unlikely.
2086 */
2087
2088 if (request->batch_obj) {
2089 if (i915_head_inside_object(acthd, request->batch_obj)) {
2090 *inside = true;
2091 return true;
2092 }
2093 }
2094
2095 if (i915_head_inside_request(acthd, request->head, request->tail)) {
2096 *inside = false;
2097 return true;
2098 }
2099
2100 return false;
2101}
2102
2103static void i915_set_reset_status(struct intel_ring_buffer *ring,
2104 struct drm_i915_gem_request *request,
2105 u32 acthd)
2106{
2107 struct i915_ctx_hang_stats *hs = NULL;
2108 bool inside, guilty;
2109
2110 /* Innocent until proven guilty */
2111 guilty = false;
2112
2113 if (ring->hangcheck.action != wait &&
2114 i915_request_guilty(request, acthd, &inside)) {
2115 DRM_ERROR("%s hung %s bo (0x%x ctx %d) at 0x%x\n",
2116 ring->name,
2117 inside ? "inside" : "flushing",
2118 request->batch_obj ?
2119 request->batch_obj->gtt_offset : 0,
2120 request->ctx ? request->ctx->id : 0,
2121 acthd);
2122
2123 guilty = true;
2124 }
2125
2126 /* If contexts are disabled or this is the default context, use
2127 * file_priv->reset_state
2128 */
2129 if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID)
2130 hs = &request->ctx->hang_stats;
2131 else if (request->file_priv)
2132 hs = &request->file_priv->hang_stats;
2133
2134 if (hs) {
2135 if (guilty)
2136 hs->batch_active++;
2137 else
2138 hs->batch_pending++;
2139 }
2140}
2141
2142static void i915_gem_free_request(struct drm_i915_gem_request *request)
2143{
2144 list_del(&request->list);
2145 i915_gem_request_remove_from_client(request);
2146
2147 if (request->ctx)
2148 i915_gem_context_unreference(request->ctx);
2149
2150 kfree(request);
2151}
2152
d2557f23
FT
2153static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
2154 struct intel_ring_buffer *ring)
575ea5a0 2155{
5d0b1887
FT
2156 u32 completed_seqno;
2157 u32 acthd;
2158
2159 acthd = intel_ring_get_active_head(ring);
2160 completed_seqno = ring->get_seqno(ring, false);
2161
e11a51e3
FT
2162 while (!list_empty(&ring->request_list)) {
2163 struct drm_i915_gem_request *request;
575ea5a0 2164
e11a51e3 2165 request = list_first_entry(&ring->request_list,
d2557f23
FT
2166 struct drm_i915_gem_request,
2167 list);
e11a51e3 2168
5d0b1887
FT
2169 if (request->seqno > completed_seqno)
2170 i915_set_reset_status(ring, request, acthd);
2171
2172 i915_gem_free_request(request);
575ea5a0 2173 }
575ea5a0 2174
e11a51e3
FT
2175 while (!list_empty(&ring->active_list)) {
2176 struct drm_i915_gem_object *obj;
2177
2178 obj = list_first_entry(&ring->active_list,
d2557f23
FT
2179 struct drm_i915_gem_object,
2180 ring_list);
e11a51e3 2181
e11a51e3 2182 i915_gem_object_move_to_inactive(obj);
575ea5a0 2183 }
575ea5a0
FT
2184}
2185
8e26cdf6 2186void i915_gem_restore_fences(struct drm_device *dev)
575ea5a0 2187{
e11a51e3
FT
2188 struct drm_i915_private *dev_priv = dev->dev_private;
2189 int i;
575ea5a0 2190
e11a51e3
FT
2191 for (i = 0; i < dev_priv->num_fence_regs; i++) {
2192 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
5d0b1887
FT
2193
2194 /*
2195 * Commit delayed tiling changes if we have an object still
2196 * attached to the fence, otherwise just clear the fence.
2197 */
2198 if (reg->obj) {
2199 i915_gem_object_update_fence(reg->obj, reg,
2200 reg->obj->tiling_mode);
2201 } else {
2202 i915_gem_write_fence(dev, i, NULL);
2203 }
e11a51e3
FT
2204 }
2205}
2206
2207void i915_gem_reset(struct drm_device *dev)
575ea5a0 2208{
e11a51e3 2209 struct drm_i915_private *dev_priv = dev->dev_private;
575ea5a0 2210 struct drm_i915_gem_object *obj;
f192107f 2211 struct intel_ring_buffer *ring;
e11a51e3 2212 int i;
575ea5a0 2213
f192107f
FT
2214 for_each_ring(ring, dev_priv, i)
2215 i915_gem_reset_ring_lists(dev_priv, ring);
575ea5a0 2216
e11a51e3
FT
2217 /* Move everything out of the GPU domains to ensure we do any
2218 * necessary invalidation upon reuse.
ef56dbd7 2219 */
f192107f
FT
2220 list_for_each_entry(obj,
2221 &dev_priv->mm.inactive_list,
2222 mm_list)
2223 {
e11a51e3 2224 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
575ea5a0 2225 }
575ea5a0 2226
8e26cdf6 2227 i915_gem_restore_fences(dev);
e11a51e3 2228}
575ea5a0 2229
e11a51e3
FT
2230/**
2231 * This function clears the request list as sequence numbers are passed.
2232 */
2233void
2234i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
2235{
2236 uint32_t seqno;
2237
2238 if (list_empty(&ring->request_list))
2239 return;
2240
686a02f1
FT
2241 WARN_ON(i915_verify_lists(ring->dev));
2242
e11a51e3
FT
2243 seqno = ring->get_seqno(ring, true);
2244
2245 while (!list_empty(&ring->request_list)) {
2246 struct drm_i915_gem_request *request;
2247
2248 request = list_first_entry(&ring->request_list,
2249 struct drm_i915_gem_request,
2250 list);
2251
2252 if (!i915_seqno_passed(seqno, request->seqno))
2253 break;
2254
2255 /* We know the GPU must have read the request to have
2256 * sent us the seqno + interrupt, so use the position
2257 * of tail of the request to update the last known position
2258 * of the GPU head.
2259 */
2260 ring->last_retired_head = request->tail;
2261
5d0b1887 2262 i915_gem_free_request(request);
575ea5a0 2263 }
575ea5a0 2264
e11a51e3
FT
2265 /* Move any buffers on the active list that are no longer referenced
2266 * by the ringbuffer to the flushing/inactive lists as appropriate.
2267 */
2268 while (!list_empty(&ring->active_list)) {
2269 struct drm_i915_gem_object *obj;
2270
2271 obj = list_first_entry(&ring->active_list,
2272 struct drm_i915_gem_object,
2273 ring_list);
2274
686a02f1 2275 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
e11a51e3
FT
2276 break;
2277
f192107f 2278 i915_gem_object_move_to_inactive(obj);
575ea5a0 2279 }
575ea5a0 2280
e11a51e3
FT
2281 if (unlikely(ring->trace_irq_seqno &&
2282 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
2283 ring->irq_put(ring);
2284 ring->trace_irq_seqno = 0;
575ea5a0 2285 }
e11a51e3 2286
575ea5a0
FT
2287}
2288
e11a51e3
FT
2289void
2290i915_gem_retire_requests(struct drm_device *dev)
575ea5a0 2291{
e11a51e3 2292 drm_i915_private_t *dev_priv = dev->dev_private;
f192107f 2293 struct intel_ring_buffer *ring;
e11a51e3 2294 int i;
575ea5a0 2295
f192107f
FT
2296 for_each_ring(ring, dev_priv, i)
2297 i915_gem_retire_requests_ring(ring);
575ea5a0
FT
2298}
2299
a2fdbec6
FT
2300static long
2301__i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
2302 bool purgeable_only)
2303{
2304 struct drm_i915_gem_object *obj, *next;
2305 long count = 0;
2306
2307 list_for_each_entry_safe(obj, next,
2308 &dev_priv->mm.unbound_list,
5d0b1887 2309 global_list) {
a2fdbec6
FT
2310#if 0
2311 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2312 i915_gem_object_put_pages(obj) == 0) {
2313 count += obj->base.size >> PAGE_SHIFT;
2314 if (count >= target)
2315 return count;
2316 }
2317#endif
2318 }
2319
2320 list_for_each_entry_safe(obj, next,
2321 &dev_priv->mm.inactive_list,
2322 mm_list) {
2323#if 0
2324 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2325 i915_gem_object_unbind(obj) == 0 &&
2326 i915_gem_object_put_pages(obj) == 0) {
2327 count += obj->base.size >> PAGE_SHIFT;
2328 if (count >= target)
2329 return count;
2330 }
2331#endif
2332 }
2333
2334 return count;
2335}
2336
2337static long
2338i915_gem_purge(struct drm_i915_private *dev_priv, long target)
2339{
2340 return __i915_gem_shrink(dev_priv, target, true);
2341}
2342
e11a51e3
FT
2343static void
2344i915_gem_retire_work_handler(struct work_struct *work)
575ea5a0 2345{
e11a51e3
FT
2346 drm_i915_private_t *dev_priv;
2347 struct drm_device *dev;
2348 struct intel_ring_buffer *ring;
2349 bool idle;
2350 int i;
575ea5a0 2351
e11a51e3
FT
2352 dev_priv = container_of(work, drm_i915_private_t,
2353 mm.retire_work.work);
2354 dev = dev_priv->dev;
575ea5a0 2355
e11a51e3 2356 /* Come back later if the device is busy... */
a2fdbec6 2357 if (lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_NOWAIT)) {
e11a51e3
FT
2358 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2359 round_jiffies_up_relative(hz));
2360 return;
575ea5a0
FT
2361 }
2362
e11a51e3 2363 i915_gem_retire_requests(dev);
575ea5a0 2364
e11a51e3
FT
2365 /* Send a periodic flush down the ring so we don't hold onto GEM
2366 * objects indefinitely.
2367 */
2368 idle = true;
2369 for_each_ring(ring, dev_priv, i) {
2370 if (ring->gpu_caches_dirty)
5d0b1887 2371 i915_add_request(ring, NULL);
e11a51e3
FT
2372
2373 idle &= list_empty(&ring->request_list);
575ea5a0
FT
2374 }
2375
e11a51e3
FT
2376 if (!dev_priv->mm.suspended && !idle)
2377 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2378 round_jiffies_up_relative(hz));
2379 if (idle)
2380 intel_mark_idle(dev);
575ea5a0 2381
a2fdbec6 2382 mutex_unlock(&dev->struct_mutex);
575ea5a0 2383}
f0b54121
FT
2384/**
2385 * Ensures that an object will eventually get non-busy by flushing any required
2386 * write domains, emitting any outstanding lazy request and retiring and
2387 * completed requests.
2388 */
2389static int
2390i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2391{
2392 int ret;
2393
2394 if (obj->active) {
f0b54121
FT
2395 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
2396 if (ret)
2397 return ret;
2398
2399 i915_gem_retire_requests_ring(obj->ring);
2400 }
2401
2402 return 0;
2403}
575ea5a0 2404
fabb21f3
FT
2405/**
2406 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2407 * @DRM_IOCTL_ARGS: standard ioctl arguments
2408 *
2409 * Returns 0 if successful, else an error is returned with the remaining time in
2410 * the timeout parameter.
2411 * -ETIME: object is still busy after timeout
2412 * -ERESTARTSYS: signal interrupted the wait
2413 * -ENONENT: object doesn't exist
2414 * Also possible, but rare:
2415 * -EAGAIN: GPU wedged
2416 * -ENOMEM: damn
2417 * -ENODEV: Internal IRQ fail
2418 * -E?: The add request failed
2419 *
2420 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2421 * non-zero timeout parameter the wait ioctl will wait for the given number of
2422 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2423 * without holding struct_mutex the object may become re-busied before this
2424 * function completes. A similar but shorter * race condition exists in the busy
2425 * ioctl
2426 */
2427int
2428i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2429{
a2fdbec6 2430 drm_i915_private_t *dev_priv = dev->dev_private;
fabb21f3
FT
2431 struct drm_i915_gem_wait *args = data;
2432 struct drm_i915_gem_object *obj;
2433 struct intel_ring_buffer *ring = NULL;
2434 struct timespec timeout_stack, *timeout = NULL;
a2fdbec6 2435 unsigned reset_counter;
fabb21f3
FT
2436 u32 seqno = 0;
2437 int ret = 0;
2438
2439 if (args->timeout_ns >= 0) {
2440 timeout_stack = ns_to_timespec(args->timeout_ns);
2441 timeout = &timeout_stack;
2442 }
2443
2444 ret = i915_mutex_lock_interruptible(dev);
2445 if (ret)
2446 return ret;
2447
2448 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
2449 if (&obj->base == NULL) {
a2fdbec6 2450 mutex_unlock(&dev->struct_mutex);
fabb21f3
FT
2451 return -ENOENT;
2452 }
2453
2454 /* Need to make sure the object gets inactive eventually. */
2455 ret = i915_gem_object_flush_active(obj);
2456 if (ret)
2457 goto out;
2458
2459 if (obj->active) {
2460 seqno = obj->last_read_seqno;
2461 ring = obj->ring;
2462 }
2463
2464 if (seqno == 0)
2465 goto out;
2466
2467 /* Do this after OLR check to make sure we make forward progress polling
2468 * on this IOCTL with a 0 timeout (like busy ioctl)
2469 */
2470 if (!args->timeout_ns) {
2471 ret = -ETIMEDOUT;
2472 goto out;
2473 }
2474
2475 drm_gem_object_unreference(&obj->base);
a2fdbec6
FT
2476 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
2477 mutex_unlock(&dev->struct_mutex);
fabb21f3 2478
a2fdbec6 2479 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout);
8e26cdf6 2480 if (timeout)
fabb21f3 2481 args->timeout_ns = timespec_to_ns(timeout);
fabb21f3
FT
2482 return ret;
2483
2484out:
2485 drm_gem_object_unreference(&obj->base);
a2fdbec6 2486 mutex_unlock(&dev->struct_mutex);
fabb21f3
FT
2487 return ret;
2488}
2489
3d4007e0
FT
2490/**
2491 * i915_gem_object_sync - sync an object to a ring.
2492 *
2493 * @obj: object which may be in use on another ring.
2494 * @to: ring we wish to use the object on. May be NULL.
2495 *
2496 * This code is meant to abstract object synchronization with the GPU.
2497 * Calling with NULL implies synchronizing the object with the CPU
2498 * rather than a particular GPU ring.
2499 *
2500 * Returns 0 if successful, else propagates up the lower layer error.
2501 */
2502int
2503i915_gem_object_sync(struct drm_i915_gem_object *obj,
2504 struct intel_ring_buffer *to)
2505{
2506 struct intel_ring_buffer *from = obj->ring;
2507 u32 seqno;
2508 int ret, idx;
2509
2510 if (from == NULL || to == from)
2511 return 0;
2512
2513 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
686a02f1 2514 return i915_gem_object_wait_rendering(obj, false);
3d4007e0
FT
2515
2516 idx = intel_ring_sync_index(from, to);
2517
686a02f1 2518 seqno = obj->last_read_seqno;
3d4007e0
FT
2519 if (seqno <= from->sync_seqno[idx])
2520 return 0;
2521
686a02f1
FT
2522 ret = i915_gem_check_olr(obj->ring, seqno);
2523 if (ret)
2524 return ret;
3d4007e0 2525
686a02f1
FT
2526 ret = to->sync_to(to, from, seqno);
2527 if (!ret)
d2557f23
FT
2528 /* We use last_read_seqno because sync_to()
2529 * might have just caused seqno wrap under
2530 * the radar.
2531 */
2532 from->sync_seqno[idx] = obj->last_read_seqno;
3d4007e0 2533
686a02f1 2534 return ret;
3d4007e0
FT
2535}
2536
e11a51e3 2537static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
575ea5a0 2538{
e11a51e3 2539 u32 old_write_domain, old_read_domains;
575ea5a0 2540
e11a51e3
FT
2541 /* Force a pagefault for domain tracking on next user access */
2542 i915_gem_release_mmap(obj);
575ea5a0 2543
e11a51e3
FT
2544 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2545 return;
575ea5a0 2546
a2fdbec6
FT
2547 /* Wait for any direct GTT access to complete */
2548 cpu_mfence();
2549
e11a51e3
FT
2550 old_read_domains = obj->base.read_domains;
2551 old_write_domain = obj->base.write_domain;
575ea5a0 2552
e11a51e3
FT
2553 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2554 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
575ea5a0 2555
575ea5a0
FT
2556}
2557
f192107f
FT
2558/**
2559 * Unbinds an object from the GTT aperture.
2560 */
e11a51e3
FT
2561int
2562i915_gem_object_unbind(struct drm_i915_gem_object *obj)
575ea5a0 2563{
f192107f 2564 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
a2fdbec6 2565 int ret;
575ea5a0 2566
e11a51e3 2567 if (obj->gtt_space == NULL)
f192107f
FT
2568 return 0;
2569
d2557f23
FT
2570 if (obj->pin_count)
2571 return -EBUSY;
575ea5a0 2572
0b869d8a
FT
2573 BUG_ON(obj->pages == NULL);
2574
e11a51e3 2575 ret = i915_gem_object_finish_gpu(obj);
f192107f
FT
2576 if (ret)
2577 return ret;
2578 /* Continue on if we fail due to EIO, the GPU is hung so we
2579 * should be safe and we need to cleanup or else we might
2580 * cause memory corruption through use-after-free.
2581 */
575ea5a0 2582
e11a51e3 2583 i915_gem_object_finish_gtt(obj);
575ea5a0 2584
f192107f
FT
2585 /* Move the object to the CPU domain to ensure that
2586 * any possible CPU writes while it's not in the GTT
2587 * are flushed when we go to remap it.
2588 */
e11a51e3
FT
2589 if (ret == 0)
2590 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
797013cf 2591 if (ret == -ERESTARTSYS)
f192107f
FT
2592 return ret;
2593 if (ret) {
2594 /* In the event of a disaster, abandon all caches and
2595 * hope for the best.
2596 */
e11a51e3 2597 i915_gem_clflush_object(obj);
f192107f 2598 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
e11a51e3 2599 }
575ea5a0 2600
f192107f 2601 /* release the fence reg _after_ flushing */
e11a51e3 2602 ret = i915_gem_object_put_fence(obj);
f192107f
FT
2603 if (ret)
2604 return ret;
575ea5a0 2605
f192107f
FT
2606 if (obj->has_global_gtt_mapping)
2607 i915_gem_gtt_unbind_object(obj);
e11a51e3
FT
2608 if (obj->has_aliasing_ppgtt_mapping) {
2609 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2610 obj->has_aliasing_ppgtt_mapping = 0;
2611 }
f192107f
FT
2612 i915_gem_gtt_finish_object(obj);
2613
e11a51e3 2614 i915_gem_object_put_pages_gtt(obj);
575ea5a0 2615
5d0b1887 2616 list_del_init(&obj->global_list);
e11a51e3 2617 list_del_init(&obj->mm_list);
f192107f 2618 /* Avoid an unnecessary call to unbind on rebind. */
e11a51e3 2619 obj->map_and_fenceable = true;
575ea5a0 2620
e11a51e3
FT
2621 drm_mm_put_block(obj->gtt_space);
2622 obj->gtt_space = NULL;
2623 obj->gtt_offset = 0;
575ea5a0 2624
e11a51e3
FT
2625 if (i915_gem_object_is_purgeable(obj))
2626 i915_gem_object_truncate(obj);
575ea5a0 2627
f192107f 2628 return ret;
575ea5a0
FT
2629}
2630
e11a51e3 2631int i915_gpu_idle(struct drm_device *dev)
575ea5a0 2632{
e11a51e3
FT
2633 drm_i915_private_t *dev_priv = dev->dev_private;
2634 struct intel_ring_buffer *ring;
2635 int ret, i;
575ea5a0 2636
e11a51e3
FT
2637 /* Flush everything onto the inactive list. */
2638 for_each_ring(ring, dev_priv, i) {
e555d299
FT
2639 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
2640 if (ret)
2641 return ret;
2642
e11a51e3
FT
2643 ret = intel_ring_idle(ring);
2644 if (ret)
2645 return ret;
2646 }
575ea5a0 2647
e11a51e3
FT
2648 return 0;
2649}
575ea5a0 2650
e3359f38
FT
2651static void i965_write_fence_reg(struct drm_device *dev, int reg,
2652 struct drm_i915_gem_object *obj)
575ea5a0 2653{
7cbd1a46 2654 drm_i915_private_t *dev_priv = dev->dev_private;
a2fdbec6
FT
2655 int fence_reg;
2656 int fence_pitch_shift;
575ea5a0 2657
a2fdbec6
FT
2658 if (INTEL_INFO(dev)->gen >= 6) {
2659 fence_reg = FENCE_REG_SANDYBRIDGE_0;
2660 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
2661 } else {
2662 fence_reg = FENCE_REG_965_0;
2663 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
2664 }
2665
5d0b1887
FT
2666 fence_reg += reg * 8;
2667
2668 /* To w/a incoherency with non-atomic 64-bit register updates,
2669 * we split the 64-bit update into two 32-bit writes. In order
2670 * for a partial fence not to be evaluated between writes, we
2671 * precede the update with write to turn off the fence register,
2672 * and only enable the fence as the last step.
2673 *
2674 * For extra levels of paranoia, we make sure each step lands
2675 * before applying the next step.
2676 */
2677 I915_WRITE(fence_reg, 0);
2678 POSTING_READ(fence_reg);
2679
e3359f38
FT
2680 if (obj) {
2681 u32 size = obj->gtt_space->size;
5d0b1887 2682 uint64_t val;
575ea5a0 2683
e3359f38
FT
2684 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2685 0xfffff000) << 32;
2686 val |= obj->gtt_offset & 0xfffff000;
a2fdbec6 2687 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
e3359f38
FT
2688 if (obj->tiling_mode == I915_TILING_Y)
2689 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2690 val |= I965_FENCE_REG_VALID;
575ea5a0 2691
5d0b1887
FT
2692 I915_WRITE(fence_reg + 4, val >> 32);
2693 POSTING_READ(fence_reg + 4);
2694
2695 I915_WRITE(fence_reg + 0, val);
2696 POSTING_READ(fence_reg);
2697 } else {
2698 I915_WRITE(fence_reg + 4, 0);
2699 POSTING_READ(fence_reg + 4);
2700 }
e11a51e3 2701}
575ea5a0 2702
e3359f38
FT
2703static void i915_write_fence_reg(struct drm_device *dev, int reg,
2704 struct drm_i915_gem_object *obj)
e11a51e3 2705{
e11a51e3 2706 drm_i915_private_t *dev_priv = dev->dev_private;
e3359f38 2707 u32 val;
575ea5a0 2708
e3359f38
FT
2709 if (obj) {
2710 u32 size = obj->gtt_space->size;
2711 int pitch_val;
2712 int tile_width;
575ea5a0 2713
e3359f38
FT
2714 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2715 (size & -size) != size ||
2716 (obj->gtt_offset & (size - 1)),
2717 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2718 obj->gtt_offset, obj->map_and_fenceable, size);
e11a51e3 2719
e3359f38
FT
2720 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2721 tile_width = 128;
2722 else
2723 tile_width = 512;
2724
2725 /* Note: pitch better be a power of two tile widths */
2726 pitch_val = obj->stride / tile_width;
2727 pitch_val = ffs(pitch_val) - 1;
2728
2729 val = obj->gtt_offset;
2730 if (obj->tiling_mode == I915_TILING_Y)
2731 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2732 val |= I915_FENCE_SIZE_BITS(size);
2733 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2734 val |= I830_FENCE_REG_VALID;
2735 } else
2736 val = 0;
2737
2738 if (reg < 8)
2739 reg = FENCE_REG_830_0 + reg * 4;
2740 else
2741 reg = FENCE_REG_945_8 + (reg - 8) * 4;
2742
2743 I915_WRITE(reg, val);
2744 POSTING_READ(reg);
575ea5a0
FT
2745}
2746
e3359f38
FT
2747static void i830_write_fence_reg(struct drm_device *dev, int reg,
2748 struct drm_i915_gem_object *obj)
575ea5a0 2749{
e11a51e3 2750 drm_i915_private_t *dev_priv = dev->dev_private;
e11a51e3 2751 uint32_t val;
575ea5a0 2752
e3359f38
FT
2753 if (obj) {
2754 u32 size = obj->gtt_space->size;
2755 uint32_t pitch_val;
2756
2757 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2758 (size & -size) != size ||
2759 (obj->gtt_offset & (size - 1)),
2760 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2761 obj->gtt_offset, size);
2762
2763 pitch_val = obj->stride / 128;
2764 pitch_val = ffs(pitch_val) - 1;
2765
2766 val = obj->gtt_offset;
2767 if (obj->tiling_mode == I915_TILING_Y)
2768 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2769 val |= I830_FENCE_SIZE_BITS(size);
2770 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2771 val |= I830_FENCE_REG_VALID;
2772 } else
2773 val = 0;
2774
2775 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2776 POSTING_READ(FENCE_REG_830_0 + reg * 4);
2777}
2778
a2fdbec6
FT
2779inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
2780{
2781 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
2782}
2783
e3359f38
FT
2784static void i915_gem_write_fence(struct drm_device *dev, int reg,
2785 struct drm_i915_gem_object *obj)
2786{
a2fdbec6
FT
2787 struct drm_i915_private *dev_priv = dev->dev_private;
2788
2789 /* Ensure that all CPU reads are completed before installing a fence
2790 * and all writes before removing the fence.
2791 */
2792 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
2793 cpu_mfence();
2794
5d0b1887
FT
2795 WARN(obj && (!obj->stride || !obj->tiling_mode),
2796 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
2797 obj->stride, obj->tiling_mode);
2798
e3359f38
FT
2799 switch (INTEL_INFO(dev)->gen) {
2800 case 7:
a2fdbec6 2801 case 6:
e3359f38
FT
2802 case 5:
2803 case 4: i965_write_fence_reg(dev, reg, obj); break;
2804 case 3: i915_write_fence_reg(dev, reg, obj); break;
2805 case 2: i830_write_fence_reg(dev, reg, obj); break;
a2fdbec6 2806 default: BUG();
e3359f38 2807 }
a2fdbec6
FT
2808
2809 /* And similarly be paranoid that no direct access to this region
2810 * is reordered to before the fence is installed.
2811 */
2812 if (i915_gem_object_needs_mb(obj))
2813 cpu_mfence();
e3359f38 2814}
575ea5a0 2815
e3359f38
FT
2816static inline int fence_number(struct drm_i915_private *dev_priv,
2817 struct drm_i915_fence_reg *fence)
2818{
2819 return fence - dev_priv->fence_regs;
2820}
575ea5a0 2821
e3359f38
FT
2822static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2823 struct drm_i915_fence_reg *fence,
2824 bool enable)
2825{
5d0b1887
FT
2826 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2827 int reg = fence_number(dev_priv, fence);
2828
2829 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
575ea5a0 2830
e3359f38 2831 if (enable) {
5d0b1887 2832 obj->fence_reg = reg;
e3359f38
FT
2833 fence->obj = obj;
2834 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2835 } else {
2836 obj->fence_reg = I915_FENCE_REG_NONE;
2837 fence->obj = NULL;
2838 list_del_init(&fence->lru_list);
2839 }
5d0b1887 2840 obj->fence_dirty = false;
e11a51e3 2841}
575ea5a0 2842
e11a51e3 2843static int
a2fdbec6 2844i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
575ea5a0 2845{
561529b1 2846 if (obj->last_fenced_seqno) {
b312333e 2847 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
561529b1
FT
2848 if (ret)
2849 return ret;
575ea5a0 2850
e11a51e3 2851 obj->last_fenced_seqno = 0;
e11a51e3 2852 }
575ea5a0 2853
b312333e 2854 obj->fenced_gpu_access = false;
e11a51e3 2855 return 0;
575ea5a0
FT
2856}
2857
e11a51e3
FT
2858int
2859i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
575ea5a0 2860{
e3359f38 2861 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
8e26cdf6 2862 struct drm_i915_fence_reg *fence;
e11a51e3 2863 int ret;
575ea5a0 2864
a2fdbec6 2865 ret = i915_gem_object_wait_fence(obj);
e11a51e3
FT
2866 if (ret)
2867 return ret;
575ea5a0 2868
e3359f38
FT
2869 if (obj->fence_reg == I915_FENCE_REG_NONE)
2870 return 0;
575ea5a0 2871
8e26cdf6
FT
2872 fence = &dev_priv->fence_regs[obj->fence_reg];
2873
e3359f38 2874 i915_gem_object_fence_lost(obj);
8e26cdf6 2875 i915_gem_object_update_fence(obj, fence, false);
575ea5a0 2876
e11a51e3 2877 return 0;
575ea5a0
FT
2878}
2879
e11a51e3 2880static struct drm_i915_fence_reg *
561529b1 2881i915_find_fence_reg(struct drm_device *dev)
575ea5a0 2882{
e11a51e3 2883 struct drm_i915_private *dev_priv = dev->dev_private;
561529b1 2884 struct drm_i915_fence_reg *reg, *avail;
e11a51e3 2885 int i;
575ea5a0 2886
e11a51e3
FT
2887 /* First try to find a free reg */
2888 avail = NULL;
2889 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2890 reg = &dev_priv->fence_regs[i];
2891 if (!reg->obj)
2892 return reg;
575ea5a0 2893
e11a51e3
FT
2894 if (!reg->pin_count)
2895 avail = reg;
2896 }
575ea5a0 2897
e11a51e3
FT
2898 if (avail == NULL)
2899 return NULL;
575ea5a0 2900
e11a51e3 2901 /* None available, try to steal one or wait for a user to finish */
e11a51e3
FT
2902 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2903 if (reg->pin_count)
2904 continue;
575ea5a0 2905
561529b1 2906 return reg;
e11a51e3 2907 }
575ea5a0 2908
561529b1 2909 return NULL;
575ea5a0
FT
2910}
2911
561529b1
FT
2912/**
2913 * i915_gem_object_get_fence - set up fencing for an object
2914 * @obj: object to map through a fence reg
2915 *
2916 * When mapping objects through the GTT, userspace wants to be able to write
2917 * to them without having to worry about swizzling if the object is tiled.
2918 * This function walks the fence regs looking for a free one for @obj,
2919 * stealing one if it can't find any.
2920 *
2921 * It then sets up the reg based on the object's properties: address, pitch
2922 * and tiling format.
2923 *
2924 * For an untiled surface, this removes any existing fence.
2925 */
e11a51e3 2926int
561529b1 2927i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
575ea5a0 2928{
e11a51e3
FT
2929 struct drm_device *dev = obj->base.dev;
2930 struct drm_i915_private *dev_priv = dev->dev_private;
e3359f38 2931 bool enable = obj->tiling_mode != I915_TILING_NONE;
e11a51e3
FT
2932 struct drm_i915_fence_reg *reg;
2933 int ret;
575ea5a0 2934
e3359f38
FT
2935 /* Have we updated the tiling parameters upon the object and so
2936 * will need to serialise the write to the associated fence register?
2937 */
f192107f 2938 if (obj->fence_dirty) {
a2fdbec6 2939 ret = i915_gem_object_wait_fence(obj);
e3359f38
FT
2940 if (ret)
2941 return ret;
2942 }
575ea5a0 2943
561529b1 2944 /* Just update our place in the LRU if our fence is getting reused. */
e11a51e3
FT
2945 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2946 reg = &dev_priv->fence_regs[obj->fence_reg];
f192107f 2947 if (!obj->fence_dirty) {
e3359f38
FT
2948 list_move_tail(&reg->lru_list,
2949 &dev_priv->mm.fence_list);
2950 return 0;
2951 }
2952 } else if (enable) {
2953 reg = i915_find_fence_reg(dev);
2954 if (reg == NULL)
2955 return -EDEADLK;
2956
2957 if (reg->obj) {
2958 struct drm_i915_gem_object *old = reg->obj;
575ea5a0 2959
a2fdbec6 2960 ret = i915_gem_object_wait_fence(old);
e11a51e3
FT
2961 if (ret)
2962 return ret;
e11a51e3 2963
e3359f38 2964 i915_gem_object_fence_lost(old);
e11a51e3 2965 }
e3359f38 2966 } else
e11a51e3 2967 return 0;
e11a51e3 2968
e3359f38 2969 i915_gem_object_update_fence(obj, reg, enable);
e11a51e3 2970
e3359f38 2971 return 0;
575ea5a0
FT
2972}
2973
d1c259ee
FT
2974static bool i915_gem_valid_gtt_space(struct drm_device *dev,
2975 struct drm_mm_node *gtt_space,
2976 unsigned long cache_level)
2977{
2978 struct drm_mm_node *other;
2979
2980 /* On non-LLC machines we have to be careful when putting differing
2981 * types of snoopable memory together to avoid the prefetcher
a2fdbec6 2982 * crossing memory domains and dying.
d1c259ee
FT
2983 */
2984 if (HAS_LLC(dev))
2985 return true;
2986
2987 if (gtt_space == NULL)
2988 return true;
2989
2990 if (list_empty(&gtt_space->node_list))
2991 return true;
2992
2993 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2994 if (other->allocated && !other->hole_follows && other->color != cache_level)
2995 return false;
2996
2997 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2998 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2999 return false;
3000
3001 return true;
3002}
3003
3004static void i915_gem_verify_gtt(struct drm_device *dev)
3005{
3006#if WATCH_GTT
3007 struct drm_i915_private *dev_priv = dev->dev_private;
3008 struct drm_i915_gem_object *obj;
3009 int err = 0;
3010
5d0b1887 3011 list_for_each_entry(obj, &dev_priv->mm.global_list, global_list) {
d1c259ee
FT
3012 if (obj->gtt_space == NULL) {
3013 printk(KERN_ERR "object found on GTT list with no space reserved\n");
3014 err++;
3015 continue;
3016 }
3017
3018 if (obj->cache_level != obj->gtt_space->color) {
3019 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
3020 obj->gtt_space->start,
3021 obj->gtt_space->start + obj->gtt_space->size,
3022 obj->cache_level,
3023 obj->gtt_space->color);
3024 err++;
3025 continue;
3026 }
3027
3028 if (!i915_gem_valid_gtt_space(dev,
3029 obj->gtt_space,
3030 obj->cache_level)) {
3031 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
3032 obj->gtt_space->start,
3033 obj->gtt_space->start + obj->gtt_space->size,
3034 obj->cache_level);
3035 err++;
3036 continue;
3037 }
3038 }
3039
3040 WARN_ON(err);
3041#endif
3042}
3043
d2557f23
FT
3044/**
3045 * Finds free space in the GTT aperture and binds the object there.
3046 */
575ea5a0
FT
3047static int
3048i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
d2557f23 3049 unsigned alignment,
b00bc81c
FT
3050 bool map_and_fenceable,
3051 bool nonblocking)
575ea5a0 3052{
d2557f23
FT
3053 struct drm_device *dev = obj->base.dev;
3054 drm_i915_private_t *dev_priv = dev->dev_private;
5d0b1887
FT
3055 struct drm_mm_node *node;
3056 u32 size, fence_size, fence_alignment, unfenced_alignment;
575ea5a0 3057 bool mappable, fenceable;
5d0b1887
FT
3058 size_t gtt_max = map_and_fenceable ?
3059 dev_priv->gtt.mappable_end : dev_priv->gtt.total;
575ea5a0
FT
3060 int ret;
3061
a2fdbec6
FT
3062 fence_size = i915_gem_get_gtt_size(dev,
3063 obj->base.size,
3064 obj->tiling_mode);
3065 fence_alignment = i915_gem_get_gtt_alignment(dev,
3066 obj->base.size,
3067 obj->tiling_mode, true);
3068 unfenced_alignment =
3069 i915_gem_get_gtt_alignment(dev,
3070 obj->base.size,
3071 obj->tiling_mode, false);
575ea5a0 3072
575ea5a0
FT
3073 if (alignment == 0)
3074 alignment = map_and_fenceable ? fence_alignment :
a2fdbec6
FT
3075 unfenced_alignment;
3076 if (map_and_fenceable && alignment & (fence_alignment - 1)) {
575ea5a0 3077 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
d2557f23 3078 return -EINVAL;
575ea5a0
FT
3079 }
3080
3081 size = map_and_fenceable ? fence_size : obj->base.size;
3082
3083 /* If the object is bigger than the entire aperture, reject it early
3084 * before evicting everything in a vain attempt to find space.
3085 */
5d0b1887
FT
3086 if (obj->base.size > gtt_max) {
3087 DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n",
3088 obj->base.size,
3089 map_and_fenceable ? "mappable" : "total",
3090 gtt_max);
d2557f23 3091 return -E2BIG;
575ea5a0
FT
3092 }
3093
3094 search_free:
3095 if (map_and_fenceable)
5d0b1887 3096 node = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
d1c259ee 3097 size, alignment, obj->cache_level,
a2fdbec6 3098 0, dev_priv->gtt.mappable_end,
d1c259ee 3099 false);
575ea5a0 3100 else
5d0b1887 3101 node = drm_mm_search_free_color(&dev_priv->mm.gtt_space,
d1c259ee
FT
3102 size, alignment, obj->cache_level,
3103 false);
5d0b1887 3104 if (node != NULL) {
575ea5a0 3105 if (map_and_fenceable)
d1c259ee 3106 obj->gtt_space =
5d0b1887 3107 drm_mm_get_block_range_generic(node,
d1c259ee 3108 size, alignment, obj->cache_level,
a2fdbec6 3109 0, dev_priv->gtt.mappable_end,
d1c259ee 3110 false);
575ea5a0 3111 else
d1c259ee 3112 obj->gtt_space =
5d0b1887 3113 drm_mm_get_block_generic(node,
d1c259ee
FT
3114 size, alignment, obj->cache_level,
3115 false);
575ea5a0
FT
3116 }
3117 if (obj->gtt_space == NULL) {
3118 ret = i915_gem_evict_something(dev, size, alignment,
9f16360b
FT
3119 obj->cache_level,
3120 map_and_fenceable,
3121 nonblocking);
d1c259ee
FT
3122 if (ret)
3123 return ret;
3124
575ea5a0
FT
3125 goto search_free;
3126 }
f6201ebf
MD
3127
3128 /*
3129 * NOTE: i915_gem_object_get_pages_gtt() cannot
3130 * return ENOMEM, since we used VM_ALLOC_RETRY.
3131 */
dfa24183 3132 ret = i915_gem_object_get_pages_gtt(obj);
575ea5a0
FT
3133 if (ret != 0) {
3134 drm_mm_put_block(obj->gtt_space);
3135 obj->gtt_space = NULL;
d2557f23 3136 return ret;
575ea5a0
FT
3137 }
3138
7cbd1a46 3139 i915_gem_gtt_bind_object(obj, obj->cache_level);
575ea5a0
FT
3140 if (ret != 0) {
3141 i915_gem_object_put_pages_gtt(obj);
3142 drm_mm_put_block(obj->gtt_space);
3143 obj->gtt_space = NULL;
686a02f1 3144 if (i915_gem_evict_everything(dev))
575ea5a0
FT
3145 return (ret);
3146 goto search_free;
3147 }
3148
5d0b1887 3149 list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
575ea5a0
FT
3150 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3151
575ea5a0
FT
3152 obj->gtt_offset = obj->gtt_space->start;
3153
3154 fenceable =
3155 obj->gtt_space->size == fence_size &&
3156 (obj->gtt_space->start & (fence_alignment - 1)) == 0;
3157
3158 mappable =
a2fdbec6
FT
3159 obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end;
3160
575ea5a0
FT
3161 obj->map_and_fenceable = mappable && fenceable;
3162
5d0b1887 3163 trace_i915_gem_object_bind(obj, map_and_fenceable);
d1c259ee 3164 i915_gem_verify_gtt(dev);
d2557f23 3165 return 0;
575ea5a0
FT
3166}
3167
e11a51e3
FT
3168void
3169i915_gem_clflush_object(struct drm_i915_gem_object *obj)
3170{
3171
3172 /* If we don't have a page list set up, then we're not pinned
3173 * to GPU, and we can ignore the cache flush because it'll happen
3174 * again at bind time.
3175 */
3176 if (obj->pages == NULL)
3177 return;
3178
a2fdbec6
FT
3179 /*
3180 * Stolen memory is always coherent with the GPU as it is explicitly
3181 * marked as wc by the system, or the system is cache-coherent.
3182 */
3183 if (obj->stolen)
3184 return;
3185
e11a51e3
FT
3186 /* If the GPU is snooping the contents of the CPU cache,
3187 * we do not need to manually clear the CPU cache lines. However,
3188 * the caches are only snooped when the render cache is
3189 * flushed/invalidated. As we always have to emit invalidations
3190 * and flushes when moving into and out of the RENDER domain, correct
3191 * snooping behaviour occurs naturally as the result of our domain
3192 * tracking.
3193 */
3194 if (obj->cache_level != I915_CACHE_NONE)
3195 return;
3196
3197 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
3198}
3199
3200/** Flushes the GTT write domain for the object if it's dirty. */
575ea5a0 3201static void
e11a51e3 3202i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
575ea5a0 3203{
e11a51e3 3204 uint32_t old_write_domain;
575ea5a0 3205
e11a51e3
FT
3206 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3207 return;
575ea5a0 3208
e11a51e3
FT
3209 /* No actual flushing is required for the GTT write domain. Writes
3210 * to it immediately go to main memory as far as we know, so there's
3211 * no chipset flush. It also doesn't land in render cache.
3212 *
3213 * However, we do have to enforce the order so that all writes through
3214 * the GTT land before any writes to the device, such as updates to
3215 * the GATT itself.
3216 */
3217 cpu_sfence();
575ea5a0 3218
e11a51e3
FT
3219 old_write_domain = obj->base.write_domain;
3220 obj->base.write_domain = 0;
3221}
3222
3223/** Flushes the CPU write domain for the object if it's dirty. */
3224static void
3225i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3226{
3227 uint32_t old_write_domain;
3228
3229 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
575ea5a0
FT
3230 return;
3231
e11a51e3 3232 i915_gem_clflush_object(obj);
0b869d8a 3233 i915_gem_chipset_flush(obj->base.dev);
575ea5a0 3234 old_write_domain = obj->base.write_domain;
e11a51e3
FT
3235 obj->base.write_domain = 0;
3236}
575ea5a0 3237
e11a51e3
FT
3238/**
3239 * Moves a single object to the GTT read, and possibly write domain.
3240 *
3241 * This function returns when the move is complete, including waiting on
3242 * flushes to occur.
3243 */
575ea5a0 3244int
e11a51e3 3245i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
575ea5a0 3246{
686a02f1 3247 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
e11a51e3 3248 uint32_t old_write_domain, old_read_domains;
575ea5a0
FT
3249 int ret;
3250
686a02f1 3251 /* Not valid to be called on unbound objects. */
575ea5a0 3252 if (obj->gtt_space == NULL)
686a02f1 3253 return -EINVAL;
575ea5a0 3254
e11a51e3
FT
3255 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3256 return 0;
575ea5a0 3257
686a02f1
FT
3258 ret = i915_gem_object_wait_rendering(obj, !write);
3259 if (ret)
3260 return ret;
575ea5a0 3261
e11a51e3 3262 i915_gem_object_flush_cpu_write_domain(obj);
575ea5a0 3263
a2fdbec6
FT
3264 /* Serialise direct access to this object with the barriers for
3265 * coherent writes from the GPU, by effectively invalidating the
3266 * GTT domain upon first access.
3267 */
3268 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3269 cpu_mfence();
3270
e11a51e3
FT
3271 old_write_domain = obj->base.write_domain;
3272 old_read_domains = obj->base.read_domains;
575ea5a0 3273
686a02f1
FT
3274 /* It should now be out of any other write domains, and we can update
3275 * the domain values for our changes.
3276 */
3277 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
e11a51e3
FT
3278 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3279 if (write) {
3280 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3281 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3282 obj->dirty = 1;
3283 }
575ea5a0 3284
686a02f1
FT
3285 /* And bump the LRU for this access */
3286 if (i915_gem_object_is_inactive(obj))
3287 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3288
3289 return 0;
575ea5a0
FT
3290}
3291
e11a51e3
FT
3292int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3293 enum i915_cache_level cache_level)
b030f26b 3294{
e11a51e3 3295 struct drm_device *dev = obj->base.dev;
b030f26b 3296 drm_i915_private_t *dev_priv = dev->dev_private;
e11a51e3 3297 int ret;
b030f26b 3298
e11a51e3
FT
3299 if (obj->cache_level == cache_level)
3300 return 0;
3301
3302 if (obj->pin_count) {
3303 DRM_DEBUG("can not change the cache level of pinned objects\n");
3304 return -EBUSY;
b030f26b
FT
3305 }
3306
d1c259ee
FT
3307 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
3308 ret = i915_gem_object_unbind(obj);
3309 if (ret)
3310 return ret;
3311 }
3312
e11a51e3
FT
3313 if (obj->gtt_space) {
3314 ret = i915_gem_object_finish_gpu(obj);
d2557f23
FT
3315 if (ret)
3316 return ret;
b030f26b 3317
e11a51e3