drm/i915: Sync i915_gem_pwrite_ioctl() with Linux 3.11
[dragonfly.git] / sys / dev / drm / i915 / i915_gem.c
CommitLineData
561529b1 1/*
575ea5a0
FT
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 * Copyright (c) 2011 The FreeBSD Foundation
27 * All rights reserved.
28 *
29 * This software was developed by Konstantin Belousov under sponsorship from
30 * the FreeBSD Foundation.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
1b006bb0 52 *
575ea5a0
FT
53 */
54
e2b8ab05
FT
55#include <sys/resourcevar.h>
56#include <sys/sfbuf.h>
7256b59b 57#include <machine/md_var.h>
e2b8ab05 58
18e26a6d 59#include <drm/drmP.h>
5c6c6f23 60#include <drm/i915_drm.h>
5718399f 61#include "i915_drv.h"
7256b59b 62#include "i915_trace.h"
5718399f 63#include "intel_drv.h"
1964046d 64#include <linux/shmem_fs.h>
7256b59b
FT
65#include <linux/slab.h>
66#include <linux/pci.h>
575ea5a0 67
7cbd1a46
FT
68static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
69static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
b00bc81c
FT
70static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
71 unsigned alignment,
72 bool map_and_fenceable,
73 bool nonblocking);
e11a51e3 74static int i915_gem_phys_pwrite(struct drm_device *dev,
d1c259ee
FT
75 struct drm_i915_gem_object *obj,
76 struct drm_i915_gem_pwrite *args,
77 struct drm_file *file);
e3359f38
FT
78
79static void i915_gem_write_fence(struct drm_device *dev, int reg,
80 struct drm_i915_gem_object *obj);
81static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
82 struct drm_i915_fence_reg *fence,
83 bool enable);
7cbd1a46 84
a2fdbec6 85static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
575ea5a0 86static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
e3359f38
FT
87
88static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
89{
90 if (obj->tiling_mode)
91 i915_gem_release_mmap(obj);
92
93 /* As we do not have an associated fence register, we will force
94 * a tiling change if we ever need to acquire one.
95 */
f192107f 96 obj->fence_dirty = false;
e3359f38
FT
97 obj->fence_reg = I915_FENCE_REG_NONE;
98}
99
575ea5a0 100static bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj);
575ea5a0
FT
101static void i915_gem_lowmem(void *arg);
102
e11a51e3
FT
103/* some bookkeeping */
104static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
105 size_t size)
575ea5a0 106{
575ea5a0
FT
107 dev_priv->mm.object_count++;
108 dev_priv->mm.object_memory += size;
109}
110
e11a51e3
FT
111static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
112 size_t size)
575ea5a0 113{
575ea5a0
FT
114 dev_priv->mm.object_count--;
115 dev_priv->mm.object_memory -= size;
116}
117
118static int
a2fdbec6 119i915_gem_wait_for_error(struct i915_gpu_error *error)
575ea5a0 120{
575ea5a0
FT
121 int ret;
122
a2fdbec6
FT
123#define EXIT_COND (!i915_reset_in_progress(error) || \
124 i915_terminally_wedged(error))
125 if (EXIT_COND)
d65a337f 126 return 0;
575ea5a0 127
901476d5
FT
128 /*
129 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
130 * userspace. If it takes that long something really bad is going on and
131 * we should simply try to bail out and fail as gracefully as possible.
132 */
a2fdbec6
FT
133 ret = wait_event_interruptible_timeout(error->reset_queue,
134 EXIT_COND,
135 10*HZ);
901476d5
FT
136 if (ret == 0) {
137 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
138 return -EIO;
139 } else if (ret < 0) {
140 return ret;
575ea5a0 141 }
a2fdbec6 142#undef EXIT_COND
575ea5a0 143
d65a337f 144 return 0;
575ea5a0
FT
145}
146
e11a51e3 147int i915_mutex_lock_interruptible(struct drm_device *dev)
575ea5a0 148{
a2fdbec6 149 struct drm_i915_private *dev_priv = dev->dev_private;
575ea5a0
FT
150 int ret;
151
a2fdbec6 152 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
d2557f23
FT
153 if (ret)
154 return ret;
575ea5a0 155
a2fdbec6 156 ret = lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_SLEEPFAIL);
e11a51e3
FT
157 if (ret)
158 return -EINTR;
575ea5a0 159
e11a51e3 160 WARN_ON(i915_verify_lists(dev));
e11a51e3 161 return 0;
575ea5a0
FT
162}
163
e11a51e3
FT
164static inline bool
165i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
575ea5a0 166{
f192107f 167 return !obj->active;
575ea5a0
FT
168}
169
170int
171i915_gem_init_ioctl(struct drm_device *dev, void *data,
f192107f 172 struct drm_file *file)
575ea5a0 173{
7256b59b 174 struct drm_i915_private *dev_priv = dev->dev_private;
f192107f 175 struct drm_i915_gem_init *args = data;
575ea5a0 176
f192107f
FT
177 if (drm_core_check_feature(dev, DRIVER_MODESET))
178 return -ENODEV;
575ea5a0
FT
179
180 if (args->gtt_start >= args->gtt_end ||
181 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
f192107f
FT
182 return -EINVAL;
183
184 /* GEM with user mode setting was never supported on ilk and later. */
185 if (INTEL_INFO(dev)->gen >= 5)
186 return -ENODEV;
575ea5a0 187
a2fdbec6
FT
188 mutex_lock(&dev->struct_mutex);
189 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
190 args->gtt_end);
7256b59b 191 dev_priv->gtt.mappable_end = args->gtt_end;
a2fdbec6 192 mutex_unlock(&dev->struct_mutex);
04adb68c
FT
193
194 return 0;
575ea5a0
FT
195}
196
575ea5a0
FT
197int
198i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
e11a51e3 199 struct drm_file *file)
575ea5a0 200{
f192107f
FT
201 struct drm_i915_private *dev_priv = dev->dev_private;
202 struct drm_i915_gem_get_aperture *args = data;
575ea5a0
FT
203 struct drm_i915_gem_object *obj;
204 size_t pinned;
205
575ea5a0 206 pinned = 0;
a2fdbec6 207 mutex_lock(&dev->struct_mutex);
5d0b1887 208 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
f192107f
FT
209 if (obj->pin_count)
210 pinned += obj->gtt_space->size;
a2fdbec6 211 mutex_unlock(&dev->struct_mutex);
575ea5a0 212
a2fdbec6 213 args->aper_size = dev_priv->gtt.total;
575ea5a0
FT
214 args->aper_available_size = args->aper_size - pinned;
215
f192107f 216 return 0;
575ea5a0
FT
217}
218
5d0b1887
FT
219void i915_gem_object_free(struct drm_i915_gem_object *obj)
220{
221 kfree(obj);
222}
223
245593da 224static int
d2557f23
FT
225i915_gem_create(struct drm_file *file,
226 struct drm_device *dev,
227 uint64_t size,
228 uint32_t *handle_p)
575ea5a0 229{
e11a51e3 230 struct drm_i915_gem_object *obj;
575ea5a0 231 int ret;
d2557f23 232 u32 handle;
575ea5a0 233
e11a51e3
FT
234 size = roundup(size, PAGE_SIZE);
235 if (size == 0)
d2557f23 236 return -EINVAL;
575ea5a0 237
d2557f23 238 /* Allocate the new object */
e11a51e3
FT
239 obj = i915_gem_alloc_object(dev, size);
240 if (obj == NULL)
d2557f23 241 return -ENOMEM;
575ea5a0 242
e11a51e3 243 ret = drm_gem_handle_create(file, &obj->base, &handle);
d2557f23 244 if (ret) {
e11a51e3
FT
245 drm_gem_object_release(&obj->base);
246 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
7256b59b
FT
247 i915_gem_object_free(obj);
248 return ret;
575ea5a0
FT
249 }
250
e11a51e3
FT
251 /* drop reference from allocate - handle holds it now */
252 drm_gem_object_unreference(&obj->base);
7256b59b
FT
253 trace_i915_gem_object_create(obj);
254
e11a51e3 255 *handle_p = handle;
d2557f23 256 return 0;
e11a51e3 257}
575ea5a0 258
e11a51e3
FT
259int
260i915_gem_dumb_create(struct drm_file *file,
261 struct drm_device *dev,
262 struct drm_mode_create_dumb *args)
263{
575ea5a0 264
e11a51e3 265 /* have to work out size/pitch and return them */
7256b59b 266 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
e11a51e3 267 args->size = args->pitch * args->height;
d2557f23
FT
268 return i915_gem_create(file, dev,
269 args->size, &args->handle);
575ea5a0
FT
270}
271
e11a51e3
FT
272int i915_gem_dumb_destroy(struct drm_file *file,
273 struct drm_device *dev,
274 uint32_t handle)
275{
276
d2557f23 277 return drm_gem_handle_delete(file, handle);
e11a51e3
FT
278}
279
280/**
281 * Creates a new mm object and returns a handle to it.
282 */
283int
284i915_gem_create_ioctl(struct drm_device *dev, void *data,
285 struct drm_file *file)
286{
287 struct drm_i915_gem_create *args = data;
288
d2557f23
FT
289 return i915_gem_create(file, dev,
290 args->size, &args->handle);
e11a51e3
FT
291}
292
7256b59b
FT
293static inline int
294__copy_to_user_swizzled(char __user *cpu_vaddr,
295 const char *gpu_vaddr, int gpu_offset,
296 int length)
297{
298 int ret, cpu_offset = 0;
299
300 while (length > 0) {
301 int cacheline_end = ALIGN(gpu_offset + 1, 64);
302 int this_length = min(cacheline_end - gpu_offset, length);
303 int swizzled_gpu_offset = gpu_offset ^ 64;
304
305 ret = __copy_to_user(cpu_vaddr + cpu_offset,
306 gpu_vaddr + swizzled_gpu_offset,
307 this_length);
308 if (ret)
309 return ret + length;
310
311 cpu_offset += this_length;
312 gpu_offset += this_length;
313 length -= this_length;
314 }
315
316 return 0;
317}
318
319static inline int
320__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
321 const char __user *cpu_vaddr,
322 int length)
323{
324 int ret, cpu_offset = 0;
325
326 while (length > 0) {
327 int cacheline_end = ALIGN(gpu_offset + 1, 64);
328 int this_length = min(cacheline_end - gpu_offset, length);
329 int swizzled_gpu_offset = gpu_offset ^ 64;
330
331 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
332 cpu_vaddr + cpu_offset,
333 this_length);
334 if (ret)
335 return ret + length;
336
337 cpu_offset += this_length;
338 gpu_offset += this_length;
339 length -= this_length;
340 }
341
342 return 0;
343}
344
345/* Per-page copy function for the shmem pread fastpath.
346 * Flushes invalid cachelines before reading the target if
347 * needs_clflush is set. */
348static int
349shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length,
350 char __user *user_data,
351 bool page_do_bit17_swizzling, bool needs_clflush)
352{
353 char *vaddr;
354 int ret;
355
356 if (unlikely(page_do_bit17_swizzling))
357 return -EINVAL;
358
359 vaddr = kmap_atomic(page);
360 if (needs_clflush)
361 drm_clflush_virt_range(vaddr + shmem_page_offset,
362 page_length);
363 ret = __copy_to_user_inatomic(user_data,
364 vaddr + shmem_page_offset,
365 page_length);
366 kunmap_atomic(vaddr);
367
368 return ret ? -EFAULT : 0;
369}
370
371static void
372shmem_clflush_swizzled_range(char *addr, unsigned long length,
373 bool swizzled)
374{
375 if (unlikely(swizzled)) {
376 unsigned long start = (unsigned long) addr;
377 unsigned long end = (unsigned long) addr + length;
378
379 /* For swizzling simply ensure that we always flush both
380 * channels. Lame, but simple and it works. Swizzled
381 * pwrite/pread is far from a hotpath - current userspace
382 * doesn't use it at all. */
383 start = round_down(start, 128);
384 end = round_up(end, 128);
385
386 drm_clflush_virt_range((void *)start, end - start);
387 } else {
388 drm_clflush_virt_range(addr, length);
389 }
390
391}
392
393/* Only difference to the fast-path function is that this can handle bit17
394 * and uses non-atomic copy and kmap functions. */
395static int
396shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length,
397 char __user *user_data,
398 bool page_do_bit17_swizzling, bool needs_clflush)
399{
400 char *vaddr;
401 int ret;
402
403 vaddr = kmap(page);
404 if (needs_clflush)
405 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
406 page_length,
407 page_do_bit17_swizzling);
408
409 if (page_do_bit17_swizzling)
410 ret = __copy_to_user_swizzled(user_data,
411 vaddr, shmem_page_offset,
412 page_length);
413 else
414 ret = __copy_to_user(user_data,
415 vaddr + shmem_page_offset,
416 page_length);
417 kunmap(page);
418
419 return ret ? - EFAULT : 0;
420}
421
d1c259ee
FT
422static inline void vm_page_reference(vm_page_t m)
423{
424 vm_page_flag_set(m, PG_REFERENCED);
425}
426
427static int
428i915_gem_shmem_pread(struct drm_device *dev,
429 struct drm_i915_gem_object *obj,
430 struct drm_i915_gem_pread *args,
431 struct drm_file *file)
432{
7256b59b
FT
433 char __user *user_data;
434 ssize_t remain;
435 off_t offset;
436 int shmem_page_offset, page_length, ret = 0;
437 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
438 int hit_slowpath = 0;
439 int needs_clflush = 0;
440 int i;
d1c259ee 441
7256b59b
FT
442 user_data = (char __user *) (uintptr_t) args->data_ptr;
443 remain = args->size;
d1c259ee 444
7256b59b
FT
445 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
446
447 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
448 /* If we're not in the cpu read domain, set ourself into the gtt
449 * read domain and manually flush cachelines (if required). This
450 * optimizes for the case when the gpu will dirty the data
451 * anyway again before the next pread happens. */
452 if (obj->cache_level == I915_CACHE_NONE)
453 needs_clflush = 1;
454 if (obj->gtt_space) {
455 ret = i915_gem_object_set_to_gtt_domain(obj, false);
456 if (ret)
457 return ret;
458 }
459 }
d1c259ee 460
7256b59b
FT
461 ret = i915_gem_object_get_pages(obj);
462 if (ret)
463 return ret;
d1c259ee 464
7256b59b 465 i915_gem_object_pin_pages(obj);
d1c259ee 466
7256b59b 467 offset = args->offset;
d1c259ee 468
7256b59b
FT
469 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) {
470 struct vm_page *page;
471
472 if (i < offset >> PAGE_SHIFT)
473 continue;
474
475 if (remain <= 0)
d1c259ee 476 break;
7256b59b
FT
477
478 /* Operation in this page
479 *
480 * shmem_page_offset = offset within page in shmem file
481 * page_length = bytes to copy for this page
482 */
483 shmem_page_offset = offset_in_page(offset);
484 page_length = remain;
485 if ((shmem_page_offset + page_length) > PAGE_SIZE)
486 page_length = PAGE_SIZE - shmem_page_offset;
487
488#ifdef __linux__
489 page = sg_page(sg);
490 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
491 (page_to_phys(page) & (1 << 17)) != 0;
492#else
493 page = obj->pages[i];
494 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
495 (VM_PAGE_TO_PHYS(page) & (1 << 17)) != 0;
496#endif
497
498 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
499 user_data, page_do_bit17_swizzling,
500 needs_clflush);
501 if (ret == 0)
502 goto next_page;
503
504 hit_slowpath = 1;
505 mutex_unlock(&dev->struct_mutex);
506
507#ifdef __linux__
508 if (!prefaulted) {
509 ret = fault_in_multipages_writeable(user_data, remain);
510 /* Userspace is tricking us, but we've already clobbered
511 * its pages with the prefault and promised to write the
512 * data up to the first fault. Hence ignore any errors
513 * and just continue. */
514 (void)ret;
515 prefaulted = 1;
516 }
517#endif
518
519 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
520 user_data, page_do_bit17_swizzling,
521 needs_clflush);
522
523 mutex_lock(&dev->struct_mutex);
524
525next_page:
526#ifdef __linux__
527 mark_page_accessed(page);
528#endif
529
530 if (ret)
531 goto out;
532
533 remain -= page_length;
534 user_data += page_length;
535 offset += page_length;
d1c259ee 536 }
d1c259ee 537
7256b59b
FT
538out:
539 i915_gem_object_unpin_pages(obj);
540
541 if (hit_slowpath) {
542 /* Fixup: Kill any reinstated backing storage pages */
543 if (obj->madv == __I915_MADV_PURGED)
544 i915_gem_object_truncate(obj);
545 }
546
547 return ret;
d1c259ee
FT
548}
549
e11a51e3
FT
550/**
551 * Reads data from the object referenced by handle.
552 *
553 * On error, the contents of *data are undefined.
554 */
555int
556i915_gem_pread_ioctl(struct drm_device *dev, void *data,
557 struct drm_file *file)
558{
d2557f23 559 struct drm_i915_gem_pread *args = data;
d1c259ee
FT
560 struct drm_i915_gem_object *obj;
561 int ret = 0;
562
563 if (args->size == 0)
564 return 0;
565
566 ret = i915_mutex_lock_interruptible(dev);
567 if (ret)
568 return ret;
569
570 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
571 if (&obj->base == NULL) {
572 ret = -ENOENT;
573 goto unlock;
574 }
575
576 /* Bounds check source. */
577 if (args->offset > obj->base.size ||
578 args->size > obj->base.size - args->offset) {
579 ret = -EINVAL;
580 goto out;
581 }
582
583 ret = i915_gem_shmem_pread(dev, obj, args, file);
584out:
585 drm_gem_object_unreference(&obj->base);
586unlock:
a2fdbec6
FT
587 mutex_unlock(&dev->struct_mutex);
588 return ret;
589}
590
a2fdbec6
FT
591/* This is the fast write path which cannot handle
592 * page faults in the source data
593 */
594
595static inline int
596fast_user_write(struct io_mapping *mapping,
597 loff_t page_base, int page_offset,
598 char __user *user_data,
599 int length)
600{
601 void __iomem *vaddr_atomic;
602 void *vaddr;
603 unsigned long unwritten;
604
605 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
606 /* We can use the cpu mem copy function because this is X86. */
005b6ef6 607 vaddr = (char __force*)vaddr_atomic + page_offset;
a2fdbec6
FT
608 unwritten = __copy_from_user_inatomic_nocache(vaddr,
609 user_data, length);
610 io_mapping_unmap_atomic(vaddr_atomic);
611 return unwritten;
612}
613
614/**
615 * This is the fast pwrite path, where we copy the data directly from the
616 * user into the GTT, uncached.
617 */
618static int
619i915_gem_gtt_pwrite_fast(struct drm_device *dev,
620 struct drm_i915_gem_object *obj,
621 struct drm_i915_gem_pwrite *args,
622 struct drm_file *file)
623{
624 drm_i915_private_t *dev_priv = dev->dev_private;
625 ssize_t remain;
626 loff_t offset, page_base;
627 char __user *user_data;
628 int page_offset, page_length, ret;
629
630 ret = i915_gem_object_pin(obj, 0, true, true);
631 if (ret)
632 goto out;
633
634 ret = i915_gem_object_set_to_gtt_domain(obj, true);
635 if (ret)
636 goto out_unpin;
637
638 ret = i915_gem_object_put_fence(obj);
639 if (ret)
640 goto out_unpin;
641
8e26cdf6 642 user_data = to_user_ptr(args->data_ptr);
a2fdbec6
FT
643 remain = args->size;
644
645 offset = obj->gtt_offset + args->offset;
646
647 while (remain > 0) {
648 /* Operation in this page
649 *
650 * page_base = page offset within aperture
651 * page_offset = offset within page
652 * page_length = bytes to copy for this page
653 */
654 page_base = offset & PAGE_MASK;
655 page_offset = offset_in_page(offset);
656 page_length = remain;
657 if ((page_offset + remain) > PAGE_SIZE)
658 page_length = PAGE_SIZE - page_offset;
659
660 /* If we get a fault while copying data, then (presumably) our
661 * source page isn't available. Return the error and we'll
662 * retry in the slow path.
663 */
664 if (fast_user_write(dev_priv->gtt.mappable, page_base,
665 page_offset, user_data, page_length)) {
666 ret = -EFAULT;
667 goto out_unpin;
668 }
669
670 remain -= page_length;
671 user_data += page_length;
672 offset += page_length;
673 }
674
675out_unpin:
676 i915_gem_object_unpin(obj);
677out:
d1c259ee
FT
678 return ret;
679}
d1c259ee 680
7256b59b
FT
681#if 0
682/* Per-page copy function for the shmem pwrite fastpath.
683 * Flushes invalid cachelines before writing to the target if
684 * needs_clflush_before is set and flushes out any written cachelines after
685 * writing if needs_clflush is set. */
686static int
687shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length,
688 char __user *user_data,
689 bool page_do_bit17_swizzling,
690 bool needs_clflush_before,
691 bool needs_clflush_after)
692{
693 char *vaddr;
694 int ret;
695
696 if (unlikely(page_do_bit17_swizzling))
697 return -EINVAL;
698
699 vaddr = kmap_atomic(page);
700 if (needs_clflush_before)
701 drm_clflush_virt_range(vaddr + shmem_page_offset,
702 page_length);
703 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
704 user_data,
705 page_length);
706 if (needs_clflush_after)
707 drm_clflush_virt_range(vaddr + shmem_page_offset,
708 page_length);
709 kunmap_atomic(vaddr);
710
711 return ret ? -EFAULT : 0;
712}
713
714/* Only difference to the fast-path function is that this can handle bit17
715 * and uses non-atomic copy and kmap functions. */
716static int
717shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length,
718 char __user *user_data,
719 bool page_do_bit17_swizzling,
720 bool needs_clflush_before,
721 bool needs_clflush_after)
722{
723 char *vaddr;
724 int ret;
725
726 vaddr = kmap(page);
727 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
728 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
729 page_length,
730 page_do_bit17_swizzling);
731 if (page_do_bit17_swizzling)
732 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
733 user_data,
734 page_length);
735 else
736 ret = __copy_from_user(vaddr + shmem_page_offset,
737 user_data,
738 page_length);
739 if (needs_clflush_after)
740 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
741 page_length,
742 page_do_bit17_swizzling);
743 kunmap(page);
744
745 return ret ? -EFAULT : 0;
746}
747#endif
748
d1c259ee
FT
749static int
750i915_gem_shmem_pwrite(struct drm_device *dev,
751 struct drm_i915_gem_object *obj,
752 struct drm_i915_gem_pwrite *args,
753 struct drm_file *file)
754{
755 vm_object_t vm_obj;
756 vm_page_t m;
757 struct sf_buf *sf;
758 vm_offset_t mkva;
759 vm_pindex_t obj_pi;
760 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po;
761
762 do_bit17_swizzling = 0;
763
764 obj->dirty = 1;
765 vm_obj = obj->base.vm_obj;
766 ret = 0;
767
768 VM_OBJECT_LOCK(vm_obj);
769 vm_object_pip_add(vm_obj, 1);
770 while (args->size > 0) {
771 obj_pi = OFF_TO_IDX(args->offset);
772 obj_po = args->offset & PAGE_MASK;
773
56c606a8 774 m = shmem_read_mapping_page(vm_obj, obj_pi);
d1c259ee
FT
775 VM_OBJECT_UNLOCK(vm_obj);
776
777 sf = sf_buf_alloc(m);
778 mkva = sf_buf_kva(sf);
779 length = min(args->size, PAGE_SIZE - obj_po);
780 while (length > 0) {
781 if (do_bit17_swizzling &&
782 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) {
783 cnt = roundup2(obj_po + 1, 64);
784 cnt = min(cnt - obj_po, length);
785 swizzled_po = obj_po ^ 64;
786 } else {
787 cnt = length;
788 swizzled_po = obj_po;
789 }
790 ret = -copyin_nofault(
791 (void *)(uintptr_t)args->data_ptr,
792 (char *)mkva + swizzled_po, cnt);
793 if (ret != 0)
794 break;
795 args->data_ptr += cnt;
796 args->size -= cnt;
797 length -= cnt;
798 args->offset += cnt;
799 obj_po += cnt;
800 }
801 sf_buf_free(sf);
802 VM_OBJECT_LOCK(vm_obj);
803 vm_page_dirty(m);
804 vm_page_reference(m);
805 vm_page_busy_wait(m, FALSE, "i915gem");
806 vm_page_unwire(m, 1);
807 vm_page_wakeup(m);
808
809 if (ret != 0)
810 break;
811 }
812 vm_object_pip_wakeup(vm_obj);
813 VM_OBJECT_UNLOCK(vm_obj);
814
815 return (ret);
575ea5a0
FT
816}
817
e11a51e3
FT
818/**
819 * Writes data to the object referenced by handle.
820 *
821 * On error, the contents of the buffer that were to be modified are undefined.
822 */
575ea5a0 823int
e11a51e3
FT
824i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
825 struct drm_file *file)
575ea5a0 826{
d2557f23 827 struct drm_i915_gem_pwrite *args = data;
d1c259ee 828 struct drm_i915_gem_object *obj;
005b6ef6 829 int ret;
d1c259ee
FT
830
831 if (args->size == 0)
832 return 0;
833
005b6ef6
FT
834#if 0
835 if (!access_ok(VERIFY_READ,
836 to_user_ptr(args->data_ptr),
837 args->size))
838 return -EFAULT;
839
840 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
841 args->size);
842 if (ret)
843 return -EFAULT;
844#endif
7e793f0f 845
d1c259ee 846 ret = i915_mutex_lock_interruptible(dev);
005b6ef6
FT
847 if (ret)
848 return ret;
d1c259ee
FT
849
850 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
851 if (&obj->base == NULL) {
852 ret = -ENOENT;
853 goto unlock;
854 }
575ea5a0 855
d1c259ee
FT
856 /* Bounds check destination. */
857 if (args->offset > obj->base.size ||
858 args->size > obj->base.size - args->offset) {
859 ret = -EINVAL;
860 goto out;
861 }
862
005b6ef6
FT
863 /* prime objects have no backing filp to GEM pread/pwrite
864 * pages from.
865 */
866#if 0
867 if (!obj->base.filp) {
868 ret = -EINVAL;
869 goto out;
870 }
871#endif
872
873 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
874
875 ret = -EFAULT;
876 /* We can only do the GTT pwrite on untiled buffers, as otherwise
877 * it would end up going through the fenced access, and we'll get
878 * different detiling behavior between reading and writing.
879 * pread/pwrite currently are reading and writing from the CPU
880 * perspective, requiring manual detiling by the client.
881 */
d1c259ee
FT
882 if (obj->phys_obj) {
883 ret = i915_gem_phys_pwrite(dev, obj, args, file);
005b6ef6 884 goto out;
7e793f0f 885 }
005b6ef6
FT
886
887 if (obj->cache_level == I915_CACHE_NONE &&
888 obj->tiling_mode == I915_TILING_NONE &&
889 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
890 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
891 /* Note that the gtt paths might fail with non-page-backed user
892 * pointers (e.g. gtt mappings when moving data between
893 * textures). Fallback to the shmem path in that case. */
894 }
895
896 if (ret == -EFAULT || ret == -ENOSPC)
897 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
898
d1c259ee
FT
899out:
900 drm_gem_object_unreference(&obj->base);
901unlock:
a2fdbec6 902 mutex_unlock(&dev->struct_mutex);
d1c259ee 903 return ret;
e11a51e3 904}
575ea5a0 905
245593da 906int
a2fdbec6 907i915_gem_check_wedge(struct i915_gpu_error *error,
245593da
FT
908 bool interruptible)
909{
a2fdbec6 910 if (i915_reset_in_progress(error)) {
245593da
FT
911 /* Non-interruptible callers can't handle -EAGAIN, hence return
912 * -EIO unconditionally for these. */
913 if (!interruptible)
914 return -EIO;
915
a2fdbec6
FT
916 /* Recovery complete, but the reset failed ... */
917 if (i915_terminally_wedged(error))
245593da
FT
918 return -EIO;
919
920 return -EAGAIN;
921 }
922
923 return 0;
924}
925
686a02f1
FT
926/*
927 * Compare seqno against outstanding lazy request. Emit a request if they are
928 * equal.
929 */
930static int
931i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
932{
933 int ret;
934
935 DRM_LOCK_ASSERT(ring->dev);
936
937 ret = 0;
938 if (seqno == ring->outstanding_lazy_request)
5d0b1887 939 ret = i915_add_request(ring, NULL);
686a02f1
FT
940
941 return ret;
942}
943
02727ecd
FT
944/**
945 * __wait_seqno - wait until execution of seqno has finished
946 * @ring: the ring expected to report seqno
947 * @seqno: duh!
a2fdbec6 948 * @reset_counter: reset sequence associated with the given seqno
02727ecd
FT
949 * @interruptible: do an interruptible wait (normally yes)
950 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
951 *
a2fdbec6
FT
952 * Note: It is of utmost importance that the passed in seqno and reset_counter
953 * values have been read by the caller in an smp safe manner. Where read-side
954 * locks are involved, it is sufficient to read the reset_counter before
955 * unlocking the lock that protects the seqno. For lockless tricks, the
956 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
957 * inserted.
958 *
02727ecd
FT
959 * Returns 0 if the seqno was found within the alloted time. Else returns the
960 * errno with remaining time filled in timeout argument.
961 */
962static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
a2fdbec6 963 unsigned reset_counter,
02727ecd
FT
964 bool interruptible, struct timespec *timeout)
965{
966 drm_i915_private_t *dev_priv = ring->dev->dev_private;
19b28dc8
FT
967 struct timespec before, now, wait_time={1,0};
968 unsigned long timeout_jiffies;
969 long end;
970 bool wait_forever = true;
971 int ret;
02727ecd
FT
972
973 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
974 return 0;
975
19b28dc8
FT
976 if (timeout != NULL) {
977 wait_time = *timeout;
978 wait_forever = false;
979 }
980
8e26cdf6 981 timeout_jiffies = timespec_to_jiffies_timeout(&wait_time);
19b28dc8 982
02727ecd
FT
983 if (WARN_ON(!ring->irq_get(ring)))
984 return -ENODEV;
985
19b28dc8
FT
986 /* Record current time in case interrupted by signal, or wedged * */
987 getrawmonotonic(&before);
02727ecd 988
19b28dc8
FT
989#define EXIT_COND \
990 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
a2fdbec6
FT
991 i915_reset_in_progress(&dev_priv->gpu_error) || \
992 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
19b28dc8
FT
993 do {
994 if (interruptible)
995 end = wait_event_interruptible_timeout(ring->irq_queue,
996 EXIT_COND,
997 timeout_jiffies);
998 else
999 end = wait_event_timeout(ring->irq_queue, EXIT_COND,
1000 timeout_jiffies);
1001
a2fdbec6
FT
1002 /* We need to check whether any gpu reset happened in between
1003 * the caller grabbing the seqno and now ... */
1004 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
1005 end = -EAGAIN;
1006
1007 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely
1008 * gone. */
1009 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
19b28dc8
FT
1010 if (ret)
1011 end = ret;
1012 } while (end == 0 && wait_forever);
1013
1014 getrawmonotonic(&now);
02727ecd
FT
1015
1016 ring->irq_put(ring);
19b28dc8
FT
1017#undef EXIT_COND
1018
1019 if (timeout) {
1020 struct timespec sleep_time = timespec_sub(now, before);
1021 *timeout = timespec_sub(*timeout, sleep_time);
8e26cdf6
FT
1022 if (!timespec_valid(timeout)) /* i.e. negative time remains */
1023 set_normalized_timespec(timeout, 0, 0);
19b28dc8 1024 }
02727ecd 1025
19b28dc8
FT
1026 switch (end) {
1027 case -EIO:
1028 case -EAGAIN: /* Wedged */
1029 case -ERESTARTSYS: /* Signal */
1030 return (int)end;
1031 case 0: /* Timeout */
19b28dc8
FT
1032 return -ETIMEDOUT; /* -ETIME on Linux */
1033 default: /* Completed */
1034 WARN_ON(end < 0); /* We're not aware of other errors */
1035 return 0;
1036 }
02727ecd
FT
1037}
1038
e11a51e3
FT
1039/**
1040 * Waits for a sequence number to be signaled, and cleans up the
1041 * request and object lists appropriately for that event.
1042 */
1043int
1044i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1045{
d2557f23
FT
1046 struct drm_device *dev = ring->dev;
1047 struct drm_i915_private *dev_priv = dev->dev_private;
0b869d8a
FT
1048 bool interruptible = dev_priv->mm.interruptible;
1049 int ret;
575ea5a0 1050
d2557f23 1051 DRM_LOCK_ASSERT(dev);
245593da 1052 BUG_ON(seqno == 0);
575ea5a0 1053
a2fdbec6 1054 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
245593da
FT
1055 if (ret)
1056 return ret;
575ea5a0 1057
686a02f1
FT
1058 ret = i915_gem_check_olr(ring, seqno);
1059 if (ret)
1060 return ret;
e11a51e3 1061
a2fdbec6
FT
1062 return __wait_seqno(ring, seqno,
1063 atomic_read(&dev_priv->gpu_error.reset_counter),
1064 interruptible, NULL);
575ea5a0
FT
1065}
1066
5d0b1887
FT
1067static int
1068i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
1069 struct intel_ring_buffer *ring)
1070{
1071 i915_gem_retire_requests_ring(ring);
1072
1073 /* Manually manage the write flush as we may have not yet
1074 * retired the buffer.
1075 *
1076 * Note that the last_write_seqno is always the earlier of
1077 * the two (read/write) seqno, so if we haved successfully waited,
1078 * we know we have passed the last write.
1079 */
1080 obj->last_write_seqno = 0;
1081 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1082
1083 return 0;
1084}
1085
e11a51e3
FT
1086/**
1087 * Ensures that all rendering to the object has completed and the object is
1088 * safe to unbind from the GTT or access from the CPU.
1089 */
686a02f1
FT
1090static __must_check int
1091i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1092 bool readonly)
575ea5a0 1093{
d2557f23 1094 struct intel_ring_buffer *ring = obj->ring;
e11a51e3 1095 u32 seqno;
575ea5a0
FT
1096 int ret;
1097
d2557f23 1098 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
e11a51e3
FT
1099 if (seqno == 0)
1100 return 0;
575ea5a0 1101
d2557f23 1102 ret = i915_wait_seqno(ring, seqno);
686a02f1
FT
1103 if (ret)
1104 return ret;
e11a51e3 1105
5d0b1887 1106 return i915_gem_object_wait_rendering__tail(obj, ring);
686a02f1
FT
1107}
1108
67838cc5
FT
1109/* A nonblocking variant of the above wait. This is a highly dangerous routine
1110 * as the object state may change during this call.
1111 */
1112static __must_check int
1113i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1114 bool readonly)
1115{
1116 struct drm_device *dev = obj->base.dev;
1117 struct drm_i915_private *dev_priv = dev->dev_private;
1118 struct intel_ring_buffer *ring = obj->ring;
a2fdbec6 1119 unsigned reset_counter;
67838cc5
FT
1120 u32 seqno;
1121 int ret;
1122
1123 DRM_LOCK_ASSERT(dev);
1124 BUG_ON(!dev_priv->mm.interruptible);
1125
1126 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1127 if (seqno == 0)
1128 return 0;
1129
a2fdbec6 1130 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
67838cc5
FT
1131 if (ret)
1132 return ret;
1133
1134 ret = i915_gem_check_olr(ring, seqno);
1135 if (ret)
1136 return ret;
1137
a2fdbec6
FT
1138 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1139 mutex_unlock(&dev->struct_mutex);
1140 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL);
1141 mutex_lock(&dev->struct_mutex);
5d0b1887
FT
1142 if (ret)
1143 return ret;
67838cc5 1144
5d0b1887 1145 return i915_gem_object_wait_rendering__tail(obj, ring);
67838cc5
FT
1146}
1147
e11a51e3
FT
1148/**
1149 * Called when user space prepares to use an object with the CPU, either
1150 * through the mmap ioctl's mapping or a GTT mapping.
1151 */
575ea5a0 1152int
e11a51e3
FT
1153i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1154 struct drm_file *file)
575ea5a0 1155{
f192107f 1156 struct drm_i915_gem_set_domain *args = data;
575ea5a0 1157 struct drm_i915_gem_object *obj;
f192107f
FT
1158 uint32_t read_domains = args->read_domains;
1159 uint32_t write_domain = args->write_domain;
575ea5a0
FT
1160 int ret;
1161
f192107f
FT
1162 /* Only handle setting domains to types used by the CPU. */
1163 if (write_domain & I915_GEM_GPU_DOMAINS)
1164 return -EINVAL;
e11a51e3 1165
f192107f
FT
1166 if (read_domains & I915_GEM_GPU_DOMAINS)
1167 return -EINVAL;
1168
1169 /* Having something in the write domain implies it's in the read
1170 * domain, and only that read domain. Enforce that in the request.
1171 */
1172 if (write_domain != 0 && read_domains != write_domain)
1173 return -EINVAL;
575ea5a0
FT
1174
1175 ret = i915_mutex_lock_interruptible(dev);
f192107f
FT
1176 if (ret)
1177 return ret;
575ea5a0
FT
1178
1179 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1180 if (&obj->base == NULL) {
1181 ret = -ENOENT;
1182 goto unlock;
1183 }
1184
67838cc5
FT
1185 /* Try to flush the object off the GPU without holding the lock.
1186 * We will repeat the flush holding the lock in the normal manner
1187 * to catch cases where we are gazumped.
1188 */
1189 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);
1190 if (ret)
1191 goto unref;
1192
f192107f 1193 if (read_domains & I915_GEM_DOMAIN_GTT) {
e11a51e3 1194 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
f192107f
FT
1195
1196 /* Silently promote "you're not bound, there was nothing to do"
1197 * to success, since the client was just asking us to
1198 * make sure everything was done.
1199 */
e11a51e3
FT
1200 if (ret == -EINVAL)
1201 ret = 0;
f192107f 1202 } else {
e11a51e3 1203 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
f192107f 1204 }
575ea5a0 1205
67838cc5 1206unref:
575ea5a0
FT
1207 drm_gem_object_unreference(&obj->base);
1208unlock:
a2fdbec6 1209 mutex_unlock(&dev->struct_mutex);
f192107f 1210 return ret;
575ea5a0
FT
1211}
1212
7cbd1a46 1213/**
e11a51e3 1214 * Called when user space has done writes to this buffer
7cbd1a46
FT
1215 */
1216int
e11a51e3
FT
1217i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1218 struct drm_file *file)
7cbd1a46 1219{
686a02f1 1220 struct drm_i915_gem_sw_finish *args = data;
e11a51e3 1221 struct drm_i915_gem_object *obj;
686a02f1 1222 int ret = 0;
7cbd1a46 1223
e11a51e3 1224 ret = i915_mutex_lock_interruptible(dev);
d2557f23
FT
1225 if (ret)
1226 return ret;
e11a51e3
FT
1227 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1228 if (&obj->base == NULL) {
1229 ret = -ENOENT;
1230 goto unlock;
1231 }
f192107f
FT
1232
1233 /* Pinned buffers may be scanout, so flush the cache */
d2557f23 1234 if (obj->pin_count)
e11a51e3 1235 i915_gem_object_flush_cpu_write_domain(obj);
f192107f 1236
e11a51e3
FT
1237 drm_gem_object_unreference(&obj->base);
1238unlock:
a2fdbec6 1239 mutex_unlock(&dev->struct_mutex);
d2557f23 1240 return ret;
e11a51e3 1241}
7cbd1a46 1242
e11a51e3
FT
1243/**
1244 * Maps the contents of an object, returning the address it is mapped
1245 * into.
1246 *
1247 * While the mapping holds a reference on the contents of the object, it doesn't
1248 * imply a ref on the object itself.
1249 */
1250int
1251i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1252 struct drm_file *file)
1253{
c737e47c 1254 struct drm_i915_gem_mmap *args = data;
e11a51e3 1255 struct drm_gem_object *obj;
c737e47c
FT
1256 struct proc *p = curproc;
1257 vm_map_t map = &p->p_vmspace->vm_map;
e11a51e3
FT
1258 vm_offset_t addr;
1259 vm_size_t size;
c737e47c 1260 int error = 0, rv;
7cbd1a46 1261
e11a51e3
FT
1262 obj = drm_gem_object_lookup(dev, file, args->handle);
1263 if (obj == NULL)
c737e47c
FT
1264 return -ENOENT;
1265
e11a51e3
FT
1266 if (args->size == 0)
1267 goto out;
c737e47c 1268
e11a51e3 1269 size = round_page(args->size);
e11a51e3 1270 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
e11a51e3
FT
1271 error = ENOMEM;
1272 goto out;
7cbd1a46
FT
1273 }
1274
e11a51e3
FT
1275 addr = 0;
1276 vm_object_hold(obj->vm_obj);
1277 vm_object_reference_locked(obj->vm_obj);
1278 vm_object_drop(obj->vm_obj);
0adbcbd6
MD
1279 rv = vm_map_find(map, obj->vm_obj, NULL,
1280 args->offset, &addr, args->size,
1281 PAGE_SIZE, /* align */
1282 TRUE, /* fitit */
1283 VM_MAPTYPE_NORMAL, /* maptype */
1284 VM_PROT_READ | VM_PROT_WRITE, /* prot */
1285 VM_PROT_READ | VM_PROT_WRITE, /* max */
1286 MAP_SHARED /* cow */);
e11a51e3
FT
1287 if (rv != KERN_SUCCESS) {
1288 vm_object_deallocate(obj->vm_obj);
1289 error = -vm_mmap_to_errno(rv);
1290 } else {
1291 args->addr_ptr = (uint64_t)addr;
7cbd1a46 1292 }
e11a51e3
FT
1293out:
1294 drm_gem_object_unreference(obj);
1295 return (error);
7cbd1a46
FT
1296}
1297
e9587a4e
FT
1298int i915_intr_pf;
1299
0b869d8a
FT
1300/**
1301 * i915_gem_fault - fault a page into the GTT
1302 * vma: VMA in question
1303 * vmf: fault info
1304 *
1305 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1306 * from userspace. The fault handler takes care of binding the object to
1307 * the GTT (if needed), allocating and programming a fence register (again,
1308 * only if needed based on whether the old reg is still valid or the object
1309 * is tiled) and inserting a new PTE into the faulting process.
1310 *
1311 * Note that the faulting process may involve evicting existing objects
1312 * from the GTT and/or fence registers to make room. So performance may
1313 * suffer if the GTT working set is large or there are few fence registers
1314 * left.
1315 */
e9587a4e
FT
1316int
1317i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot,
1318 vm_page_t *mres)
a2fdbec6 1319{
e9587a4e
FT
1320 struct drm_gem_object *gem_obj;
1321 struct drm_i915_gem_object *obj;
1322 struct drm_device *dev;
1323 drm_i915_private_t *dev_priv;
1324 vm_page_t m, oldm;
1325 int cause, ret;
1326 bool write;
a2fdbec6 1327
e9587a4e
FT
1328 gem_obj = vm_obj->handle;
1329 obj = to_intel_bo(gem_obj);
1330 dev = obj->base.dev;
1331 dev_priv = dev->dev_private;
1332#if 0
1333 write = (prot & VM_PROT_WRITE) != 0;
1334#else
1335 write = true;
1336#endif
1337 vm_object_pip_add(vm_obj, 1);
a2fdbec6 1338
e9587a4e
FT
1339 /*
1340 * Remove the placeholder page inserted by vm_fault() from the
1341 * object before dropping the object lock. If
1342 * i915_gem_release_mmap() is active in parallel on this gem
1343 * object, then it owns the drm device sx and might find the
1344 * placeholder already. Then, since the page is busy,
1345 * i915_gem_release_mmap() sleeps waiting for the busy state
1346 * of the page cleared. We will be not able to acquire drm
1347 * device lock until i915_gem_release_mmap() is able to make a
1348 * progress.
1349 */
1350 if (*mres != NULL) {
1351 oldm = *mres;
1352 vm_page_remove(oldm);
1353 *mres = NULL;
1354 } else
1355 oldm = NULL;
1356retry:
1357 VM_OBJECT_UNLOCK(vm_obj);
1358unlocked_vmobj:
1359 cause = ret = 0;
1360 m = NULL;
a2fdbec6 1361
e9587a4e
FT
1362 if (i915_intr_pf) {
1363 ret = i915_mutex_lock_interruptible(dev);
1364 if (ret != 0) {
1365 cause = 10;
1366 goto out;
1367 }
1368 } else
1369 mutex_lock(&dev->struct_mutex);
1370
1371 /*
1372 * Since the object lock was dropped, other thread might have
1373 * faulted on the same GTT address and instantiated the
1374 * mapping for the page. Recheck.
1375 */
1376 VM_OBJECT_LOCK(vm_obj);
1377 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
1378 if (m != NULL) {
1379 if ((m->flags & PG_BUSY) != 0) {
1380 mutex_unlock(&dev->struct_mutex);
1381#if 0 /* XXX */
1382 vm_page_sleep(m, "915pee");
1383#endif
1384 goto retry;
1385 }
1386 goto have_page;
1387 } else
1388 VM_OBJECT_UNLOCK(vm_obj);
a2fdbec6
FT
1389
1390 /* Access to snoopable pages through the GTT is incoherent. */
1391 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1392 ret = -EINVAL;
1393 goto unlock;
1394 }
1395
1396 /* Now bind it into the GTT if needed */
e9587a4e
FT
1397 if (!obj->map_and_fenceable) {
1398 ret = i915_gem_object_unbind(obj);
1399 if (ret != 0) {
1400 cause = 20;
1401 goto unlock;
1402 }
1403 }
1404 if (!obj->gtt_space) {
1405 ret = i915_gem_object_bind_to_gtt(obj, 0, true, false);
1406 if (ret != 0) {
1407 cause = 30;
1408 goto unlock;
1409 }
a2fdbec6 1410
e9587a4e
FT
1411 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1412 if (ret != 0) {
1413 cause = 40;
1414 goto unlock;
1415 }
1416 }
a2fdbec6 1417
e9587a4e
FT
1418 if (obj->tiling_mode == I915_TILING_NONE)
1419 ret = i915_gem_object_put_fence(obj);
1420 else
1421 ret = i915_gem_object_get_fence(obj);
1422 if (ret != 0) {
1423 cause = 50;
1424 goto unlock;
1425 }
1426
1427 if (i915_gem_object_is_inactive(obj))
1428 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
a2fdbec6
FT
1429
1430 obj->fault_mappable = true;
e9587a4e
FT
1431 VM_OBJECT_LOCK(vm_obj);
1432 m = vm_phys_fictitious_to_vm_page(dev->agp->base + obj->gtt_offset +
1433 offset);
1434 if (m == NULL) {
1435 cause = 60;
1436 ret = -EFAULT;
1437 goto unlock;
1438 }
1439 KASSERT((m->flags & PG_FICTITIOUS) != 0,
1440 ("not fictitious %p", m));
1441 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
a2fdbec6 1442
e9587a4e
FT
1443 if ((m->flags & PG_BUSY) != 0) {
1444 mutex_unlock(&dev->struct_mutex);
1445#if 0 /* XXX */
1446 vm_page_sleep(m, "915pbs");
1447#endif
1448 goto retry;
1449 }
1450 m->valid = VM_PAGE_BITS_ALL;
1451 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
1452have_page:
1453 *mres = m;
1454 vm_page_busy_try(m, false);
1455
1456 mutex_unlock(&dev->struct_mutex);
1457 if (oldm != NULL) {
1458 vm_page_free(oldm);
1459 }
1460 vm_object_pip_wakeup(vm_obj);
1461 return (VM_PAGER_OK);
a2fdbec6 1462
a2fdbec6
FT
1463unlock:
1464 mutex_unlock(&dev->struct_mutex);
1465out:
e9587a4e
FT
1466 KASSERT(ret != 0, ("i915_gem_pager_fault: wrong return"));
1467 if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) {
1468 goto unlocked_vmobj;
a2fdbec6 1469 }
e9587a4e
FT
1470 VM_OBJECT_LOCK(vm_obj);
1471 vm_object_pip_wakeup(vm_obj);
1472 return (VM_PAGER_ERROR);
a2fdbec6 1473}
0b869d8a 1474
e11a51e3
FT
1475/**
1476 * i915_gem_release_mmap - remove physical page mappings
1477 * @obj: obj in question
901caa58 1478 *
e11a51e3
FT
1479 * Preserve the reservation of the mmapping with the DRM core code, but
1480 * relinquish ownership of the pages back to the system.
901caa58 1481 *
e11a51e3
FT
1482 * It is vital that we remove the page mapping if we have mapped a tiled
1483 * object through the GTT and then lose the fence register due to
1484 * resource pressure. Similarly if the object has been moved out of the
1485 * aperture, than pages mapped into userspace must be revoked. Removing the
1486 * mapping will then trigger a page fault on the next user access, allowing
1487 * fixup by i915_gem_fault().
901caa58 1488 */
e11a51e3
FT
1489void
1490i915_gem_release_mmap(struct drm_i915_gem_object *obj)
575ea5a0 1491{
e11a51e3
FT
1492 vm_object_t devobj;
1493 vm_page_t m;
1494 int i, page_count;
901caa58 1495
e11a51e3
FT
1496 if (!obj->fault_mappable)
1497 return;
901caa58 1498
e11a51e3
FT
1499 devobj = cdev_pager_lookup(obj);
1500 if (devobj != NULL) {
1501 page_count = OFF_TO_IDX(obj->base.size);
575ea5a0 1502
e11a51e3
FT
1503 VM_OBJECT_LOCK(devobj);
1504 for (i = 0; i < page_count; i++) {
1505 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
1506 if (m == NULL)
1507 continue;
1508 cdev_pager_free_page(devobj, m);
575ea5a0 1509 }
e11a51e3
FT
1510 VM_OBJECT_UNLOCK(devobj);
1511 vm_object_deallocate(devobj);
575ea5a0 1512 }
575ea5a0 1513
e11a51e3 1514 obj->fault_mappable = false;
575ea5a0
FT
1515}
1516
a2fdbec6 1517uint32_t
e11a51e3 1518i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
575ea5a0 1519{
e11a51e3 1520 uint32_t gtt_size;
575ea5a0 1521
e11a51e3
FT
1522 if (INTEL_INFO(dev)->gen >= 4 ||
1523 tiling_mode == I915_TILING_NONE)
d2557f23 1524 return size;
575ea5a0 1525
e11a51e3
FT
1526 /* Previous chips need a power-of-two fence region when tiling */
1527 if (INTEL_INFO(dev)->gen == 3)
1528 gtt_size = 1024*1024;
1529 else
1530 gtt_size = 512*1024;
575ea5a0 1531
e11a51e3
FT
1532 while (gtt_size < size)
1533 gtt_size <<= 1;
575ea5a0 1534
d2557f23 1535 return gtt_size;
e11a51e3 1536}
575ea5a0 1537
e11a51e3
FT
1538/**
1539 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1540 * @obj: object to check
1541 *
1542 * Return the required GTT alignment for an object, taking into account
1543 * potential fence register mapping.
1544 */
a2fdbec6
FT
1545uint32_t
1546i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
1547 int tiling_mode, bool fenced)
e11a51e3 1548{
f4e1c372 1549
e11a51e3
FT
1550 /*
1551 * Minimum alignment is 4k (GTT page size), but might be greater
1552 * if a fence register is needed for the object.
1553 */
a2fdbec6 1554 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
e11a51e3 1555 tiling_mode == I915_TILING_NONE)
d2557f23 1556 return 4096;
575ea5a0 1557
e11a51e3
FT
1558 /*
1559 * Previous chips need to be aligned to the size of the smallest
1560 * fence register that can contain the object.
1561 */
d2557f23 1562 return i915_gem_get_gtt_size(dev, size, tiling_mode);
575ea5a0
FT
1563}
1564
575ea5a0 1565int
e11a51e3
FT
1566i915_gem_mmap_gtt(struct drm_file *file,
1567 struct drm_device *dev,
1568 uint32_t handle,
1569 uint64_t *offset)
575ea5a0 1570{
d2557f23 1571 struct drm_i915_private *dev_priv = dev->dev_private;
e11a51e3 1572 struct drm_i915_gem_object *obj;
d65a337f 1573 int ret;
575ea5a0 1574
e11a51e3 1575 ret = i915_mutex_lock_interruptible(dev);
d2557f23
FT
1576 if (ret)
1577 return ret;
575ea5a0 1578
e11a51e3
FT
1579 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1580 if (&obj->base == NULL) {
1581 ret = -ENOENT;
1582 goto unlock;
575ea5a0
FT
1583 }
1584
a2fdbec6 1585 if (obj->base.size > dev_priv->gtt.mappable_end) {
e11a51e3
FT
1586 ret = -E2BIG;
1587 goto out;
1588 }
d65a337f 1589
e11a51e3
FT
1590 if (obj->madv != I915_MADV_WILLNEED) {
1591 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1592 ret = -EINVAL;
1593 goto out;
1594 }
575ea5a0 1595
e11a51e3 1596 ret = drm_gem_create_mmap_offset(&obj->base);
d2557f23 1597 if (ret)
e11a51e3 1598 goto out;
575ea5a0 1599
e11a51e3
FT
1600 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
1601 DRM_GEM_MAPPING_KEY;
1602out:
1603 drm_gem_object_unreference(&obj->base);
1604unlock:
a2fdbec6 1605 mutex_unlock(&dev->struct_mutex);
d2557f23 1606 return ret;
575ea5a0
FT
1607}
1608
e11a51e3
FT
1609/**
1610 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1611 * @dev: DRM device
1612 * @data: GTT mapping ioctl data
1613 * @file: GEM object info
1614 *
1615 * Simply returns the fake offset to userspace so it can mmap it.
1616 * The mmap call will end up in drm_gem_mmap(), which will set things
1617 * up so we can get faults in the handler above.
1618 *
1619 * The fault handler will take care of binding the object into the GTT
1620 * (since it may have been evicted to make room for something), allocating
1621 * a fence register, and mapping the appropriate aperture address into
1622 * userspace.
1623 */
575ea5a0 1624int
e11a51e3
FT
1625i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1626 struct drm_file *file)
575ea5a0 1627{
686a02f1 1628 struct drm_i915_gem_mmap_gtt *args = data;
575ea5a0 1629
d2557f23 1630 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
575ea5a0
FT
1631}
1632
e11a51e3
FT
1633/* Immediately discard the backing storage */
1634static void
1635i915_gem_object_truncate(struct drm_i915_gem_object *obj)
575ea5a0 1636{
e11a51e3 1637 vm_object_t vm_obj;
575ea5a0 1638
e11a51e3
FT
1639 vm_obj = obj->base.vm_obj;
1640 VM_OBJECT_LOCK(vm_obj);
1641 vm_object_page_remove(vm_obj, 0, 0, false);
1642 VM_OBJECT_UNLOCK(vm_obj);
1643 obj->madv = __I915_MADV_PURGED;
575ea5a0
FT
1644}
1645
e11a51e3
FT
1646static inline int
1647i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
575ea5a0 1648{
e11a51e3 1649 return obj->madv == I915_MADV_DONTNEED;
575ea5a0
FT
1650}
1651
e11a51e3
FT
1652static void
1653i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
575ea5a0 1654{
e11a51e3
FT
1655 vm_page_t m;
1656 int page_count, i;
575ea5a0 1657
e11a51e3 1658 BUG_ON(obj->madv == __I915_MADV_PURGED);
575ea5a0 1659
e11a51e3
FT
1660 if (obj->tiling_mode != I915_TILING_NONE)
1661 i915_gem_object_save_bit_17_swizzle(obj);
1662 if (obj->madv == I915_MADV_DONTNEED)
1663 obj->dirty = 0;
1664 page_count = obj->base.size / PAGE_SIZE;
1665 VM_OBJECT_LOCK(obj->base.vm_obj);
1666#if GEM_PARANOID_CHECK_GTT
1667 i915_gem_assert_pages_not_mapped(obj->base.dev, obj->pages, page_count);
1668#endif
1669 for (i = 0; i < page_count; i++) {
1670 m = obj->pages[i];
1671 if (obj->dirty)
1672 vm_page_dirty(m);
1673 if (obj->madv == I915_MADV_WILLNEED)
1674 vm_page_reference(m);
1675 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem");
1676 vm_page_unwire(obj->pages[i], 1);
1677 vm_page_wakeup(obj->pages[i]);
e11a51e3
FT
1678 }
1679 VM_OBJECT_UNLOCK(obj->base.vm_obj);
1680 obj->dirty = 0;
5a3b77d5 1681 drm_free(obj->pages, M_DRM);
e11a51e3 1682 obj->pages = NULL;
99f70504
FT
1683}
1684
a2fdbec6
FT
1685int
1686i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
1687{
1688 const struct drm_i915_gem_object_ops *ops = obj->ops;
1689
1690 if (obj->pages == NULL)
1691 return 0;
1692
1693 BUG_ON(obj->gtt_space);
1694
1695 if (obj->pages_pin_count)
1696 return -EBUSY;
1697
1698 /* ->put_pages might need to allocate memory for the bit17 swizzle
1699 * array, hence protect them from being reaped by removing them from gtt
1700 * lists early. */
5d0b1887 1701 list_del(&obj->global_list);
a2fdbec6
FT
1702
1703 ops->put_pages(obj);
1704 obj->pages = NULL;
1705
1706 if (i915_gem_object_is_purgeable(obj))
1707 i915_gem_object_truncate(obj);
1708
1709 return 0;
1710}
1711
575ea5a0 1712static int
dfa24183 1713i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
575ea5a0 1714{
a2fdbec6 1715 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
e11a51e3 1716 struct drm_device *dev;
575ea5a0 1717 vm_object_t vm_obj;
e11a51e3 1718 int page_count, i, j;
56c606a8 1719 struct vm_page *page;
575ea5a0 1720
e11a51e3
FT
1721 dev = obj->base.dev;
1722 KASSERT(obj->pages == NULL, ("Obj already has pages"));
1723 page_count = obj->base.size / PAGE_SIZE;
5a3b77d5 1724 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM,
e11a51e3 1725 M_WAITOK);
56c606a8 1726
575ea5a0 1727 vm_obj = obj->base.vm_obj;
575ea5a0 1728 VM_OBJECT_LOCK(vm_obj);
56c606a8 1729
e11a51e3 1730 for (i = 0; i < page_count; i++) {
56c606a8 1731 page = shmem_read_mapping_page(vm_obj, i);
a2fdbec6
FT
1732 if (IS_ERR(page)) {
1733 i915_gem_purge(dev_priv, page_count);
56c606a8 1734 goto err_pages;
a2fdbec6 1735 }
56c606a8
FT
1736
1737 obj->pages[i] = page;
e11a51e3 1738 }
56c606a8 1739
e11a51e3
FT
1740 VM_OBJECT_UNLOCK(vm_obj);
1741 if (i915_gem_object_needs_bit17_swizzle(obj))
1742 i915_gem_object_do_bit_17_swizzle(obj);
575ea5a0 1743
56c606a8
FT
1744 return 0;
1745
1746err_pages:
e11a51e3 1747 for (j = 0; j < i; j++) {
56c606a8
FT
1748 page = obj->pages[j];
1749 vm_page_busy_wait(page, FALSE, "i915gem");
1750 vm_page_unwire(page, 0);
1751 vm_page_wakeup(page);
575ea5a0 1752 }
575ea5a0 1753 VM_OBJECT_UNLOCK(vm_obj);
5a3b77d5 1754 drm_free(obj->pages, M_DRM);
e11a51e3
FT
1755 obj->pages = NULL;
1756 return (-EIO);
575ea5a0
FT
1757}
1758
a2fdbec6
FT
1759/* Ensure that the associated pages are gathered from the backing storage
1760 * and pinned into our object. i915_gem_object_get_pages() may be called
1761 * multiple times before they are released by a single call to
1762 * i915_gem_object_put_pages() - once the pages are no longer referenced
1763 * either as a result of memory pressure (reaping pages under the shrinker)
1764 * or as the object is itself released.
1765 */
1766int
1767i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
1768{
1769 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1770 const struct drm_i915_gem_object_ops *ops = obj->ops;
1771 int ret;
1772
1773 if (obj->pages)
1774 return 0;
1775
1776 if (obj->madv != I915_MADV_WILLNEED) {
1777 DRM_ERROR("Attempting to obtain a purgeable object\n");
1778 return -EINVAL;
1779 }
1780
1781 BUG_ON(obj->pages_pin_count);
1782
1783 ret = ops->get_pages(obj);
1784 if (ret)
1785 return ret;
1786
5d0b1887 1787 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
a2fdbec6
FT
1788 return 0;
1789}
1790
e11a51e3
FT
1791void
1792i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
b5c29a34 1793 struct intel_ring_buffer *ring)
575ea5a0 1794{
e11a51e3
FT
1795 struct drm_device *dev = obj->base.dev;
1796 struct drm_i915_private *dev_priv = dev->dev_private;
b5c29a34 1797 u32 seqno = intel_ring_get_seqno(ring);
575ea5a0 1798
686a02f1 1799 BUG_ON(ring == NULL);
5d0b1887
FT
1800 if (obj->ring != ring && obj->last_write_seqno) {
1801 /* Keep the seqno relative to the current ring */
1802 obj->last_write_seqno = seqno;
1803 }
e11a51e3 1804 obj->ring = ring;
575ea5a0 1805
e11a51e3
FT
1806 /* Add a reference if we're newly entering the active list. */
1807 if (!obj->active) {
1808 drm_gem_object_reference(&obj->base);
1809 obj->active = 1;
575ea5a0
FT
1810 }
1811
e11a51e3
FT
1812 /* Move from whatever list we were on to the tail of execution. */
1813 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1814 list_move_tail(&obj->ring_list, &ring->active_list);
575ea5a0 1815
686a02f1
FT
1816 obj->last_read_seqno = seqno;
1817
e11a51e3
FT
1818 if (obj->fenced_gpu_access) {
1819 obj->last_fenced_seqno = seqno;
575ea5a0 1820
e11a51e3
FT
1821 /* Bump MRU to take account of the delayed flush */
1822 if (obj->fence_reg != I915_FENCE_REG_NONE) {
686a02f1
FT
1823 struct drm_i915_fence_reg *reg;
1824
e11a51e3
FT
1825 reg = &dev_priv->fence_regs[obj->fence_reg];
1826 list_move_tail(&reg->lru_list,
1827 &dev_priv->mm.fence_list);
575ea5a0
FT
1828 }
1829 }
575ea5a0
FT
1830}
1831
e11a51e3
FT
1832static void
1833i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1834{
1835 struct drm_device *dev = obj->base.dev;
1836 struct drm_i915_private *dev_priv = dev->dev_private;
1837
f192107f 1838 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
686a02f1 1839 BUG_ON(!obj->active);
f192107f 1840
19df918d
FT
1841 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1842
f192107f 1843 list_del_init(&obj->ring_list);
e11a51e3 1844 obj->ring = NULL;
e11a51e3 1845
f192107f
FT
1846 obj->last_read_seqno = 0;
1847 obj->last_write_seqno = 0;
1848 obj->base.write_domain = 0;
1849
1850 obj->last_fenced_seqno = 0;
e11a51e3
FT
1851 obj->fenced_gpu_access = false;
1852
1853 obj->active = 0;
e11a51e3
FT
1854 drm_gem_object_unreference(&obj->base);
1855
e11a51e3 1856 WARN_ON(i915_verify_lists(dev));
575ea5a0
FT
1857}
1858
b5c29a34 1859static int
a2fdbec6 1860i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
575ea5a0 1861{
b5c29a34
FT
1862 struct drm_i915_private *dev_priv = dev->dev_private;
1863 struct intel_ring_buffer *ring;
1864 int ret, i, j;
1865
a2fdbec6 1866 /* Carefully retire all requests without writing to the rings */
b5c29a34 1867 for_each_ring(ring, dev_priv, i) {
a2fdbec6
FT
1868 ret = intel_ring_idle(ring);
1869 if (ret)
1870 return ret;
b5c29a34 1871 }
b5c29a34 1872 i915_gem_retire_requests(dev);
a2fdbec6
FT
1873
1874 /* Finally reset hw state */
b5c29a34 1875 for_each_ring(ring, dev_priv, i) {
a2fdbec6
FT
1876 intel_ring_init_seqno(ring, seqno);
1877
b5c29a34
FT
1878 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
1879 ring->sync_seqno[j] = 0;
1880 }
1881
1882 return 0;
1883}
1884
a2fdbec6
FT
1885int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
1886{
1887 struct drm_i915_private *dev_priv = dev->dev_private;
1888 int ret;
1889
1890 if (seqno == 0)
1891 return -EINVAL;
1892
1893 /* HWS page needs to be set less than what we
1894 * will inject to ring
1895 */
1896 ret = i915_gem_init_seqno(dev, seqno - 1);
1897 if (ret)
1898 return ret;
1899
1900 /* Carefully set the last_seqno value so that wrap
1901 * detection still works
1902 */
1903 dev_priv->next_seqno = seqno;
1904 dev_priv->last_seqno = seqno - 1;
1905 if (dev_priv->last_seqno == 0)
1906 dev_priv->last_seqno--;
1907
1908 return 0;
1909}
1910
b5c29a34
FT
1911int
1912i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
1913{
1914 struct drm_i915_private *dev_priv = dev->dev_private;
575ea5a0 1915
e11a51e3 1916 /* reserve 0 for non-seqno */
b5c29a34 1917 if (dev_priv->next_seqno == 0) {
a2fdbec6 1918 int ret = i915_gem_init_seqno(dev, 0);
b5c29a34
FT
1919 if (ret)
1920 return ret;
1921
e11a51e3 1922 dev_priv->next_seqno = 1;
b5c29a34 1923 }
e11a51e3 1924
a2fdbec6 1925 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
b5c29a34 1926 return 0;
575ea5a0
FT
1927}
1928
5d0b1887
FT
1929int __i915_add_request(struct intel_ring_buffer *ring,
1930 struct drm_file *file,
1931 struct drm_i915_gem_object *obj,
1932 u32 *out_seqno)
575ea5a0 1933{
686a02f1 1934 drm_i915_private_t *dev_priv = ring->dev->dev_private;
f192107f 1935 struct drm_i915_gem_request *request;
5d0b1887 1936 u32 request_ring_position, request_start;
e11a51e3 1937 int was_empty;
575ea5a0
FT
1938 int ret;
1939
5d0b1887 1940 request_start = intel_ring_get_tail(ring);
686a02f1
FT
1941 /*
1942 * Emit any outstanding flushes - execbuf can fail to emit the flush
1943 * after having emitted the batchbuffer command. Hence we need to fix
1944 * things up similar to emitting the lazy request. The difference here
1945 * is that the flush _must_ happen before the next request, no matter
1946 * what.
1947 */
b312333e
FT
1948 ret = intel_ring_flush_all_caches(ring);
1949 if (ret)
1950 return ret;
686a02f1 1951
159fc1d7 1952 request = kmalloc(sizeof(*request), M_DRM, M_WAITOK);
f192107f
FT
1953 if (request == NULL)
1954 return -ENOMEM;
575ea5a0 1955
d2557f23 1956
686a02f1
FT
1957 /* Record the position of the start of the request so that
1958 * should we detect the updated seqno part-way through the
1959 * GPU processing the request, we never over-estimate the
1960 * position of the head.
1961 */
e11a51e3 1962 request_ring_position = intel_ring_get_tail(ring);
575ea5a0 1963
b5c29a34 1964 ret = ring->add_request(ring);
686a02f1 1965 if (ret) {
158486a6 1966 kfree(request);
686a02f1
FT
1967 return ret;
1968 }
575ea5a0 1969
b5c29a34 1970 request->seqno = intel_ring_get_seqno(ring);
e11a51e3 1971 request->ring = ring;
5d0b1887 1972 request->head = request_start;
e11a51e3 1973 request->tail = request_ring_position;
5d0b1887
FT
1974 request->ctx = ring->last_context;
1975 request->batch_obj = obj;
1976
1977 /* Whilst this request exists, batch_obj will be on the
1978 * active_list, and so will hold the active reference. Only when this
1979 * request is retired will the the batch_obj be moved onto the
1980 * inactive_list and lose its active reference. Hence we do not need
1981 * to explicitly hold another reference here.
1982 */
1983
1984 if (request->ctx)
1985 i915_gem_context_reference(request->ctx);
1986
686a02f1 1987 request->emitted_jiffies = jiffies;
e11a51e3
FT
1988 was_empty = list_empty(&ring->request_list);
1989 list_add_tail(&request->list, &ring->request_list);
686a02f1 1990 request->file_priv = NULL;
e11a51e3 1991
686a02f1
FT
1992 if (file) {
1993 struct drm_i915_file_private *file_priv = file->driver_priv;
e11a51e3
FT
1994
1995 spin_lock(&file_priv->mm.lock);
1996 request->file_priv = file_priv;
1997 list_add_tail(&request->client_list,
686a02f1 1998 &file_priv->mm.request_list);
e11a51e3 1999 spin_unlock(&file_priv->mm.lock);
575ea5a0
FT
2000 }
2001
e11a51e3 2002 ring->outstanding_lazy_request = 0;
575ea5a0 2003
e11a51e3
FT
2004 if (!dev_priv->mm.suspended) {
2005 if (i915_enable_hangcheck) {
a2fdbec6 2006 mod_timer(&dev_priv->gpu_error.hangcheck_timer,
561529b1 2007 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
e11a51e3 2008 }
561529b1 2009 if (was_empty) {
e11a51e3 2010 queue_delayed_work(dev_priv->wq,
561529b1
FT
2011 &dev_priv->mm.retire_work,
2012 round_jiffies_up_relative(hz));
2013 intel_mark_busy(dev_priv->dev);
2014 }
e11a51e3 2015 }
686a02f1 2016
f192107f 2017 if (out_seqno)
b5c29a34 2018 *out_seqno = request->seqno;
686a02f1 2019 return 0;
575ea5a0
FT
2020}
2021
e11a51e3
FT
2022static inline void
2023i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
575ea5a0 2024{
e11a51e3 2025 struct drm_i915_file_private *file_priv = request->file_priv;
575ea5a0 2026
e11a51e3
FT
2027 if (!file_priv)
2028 return;
2029
e11a51e3 2030 spin_lock(&file_priv->mm.lock);
d2557f23 2031 if (request->file_priv) {
e11a51e3
FT
2032 list_del(&request->client_list);
2033 request->file_priv = NULL;
575ea5a0 2034 }
e11a51e3 2035 spin_unlock(&file_priv->mm.lock);
575ea5a0
FT
2036}
2037
5d0b1887
FT
2038static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj)
2039{
2040 if (acthd >= obj->gtt_offset &&
2041 acthd < obj->gtt_offset + obj->base.size)
2042 return true;
2043
2044 return false;
2045}
2046
2047static bool i915_head_inside_request(const u32 acthd_unmasked,
2048 const u32 request_start,
2049 const u32 request_end)
2050{
2051 const u32 acthd = acthd_unmasked & HEAD_ADDR;
2052
2053 if (request_start < request_end) {
2054 if (acthd >= request_start && acthd < request_end)
2055 return true;
2056 } else if (request_start > request_end) {
2057 if (acthd >= request_start || acthd < request_end)
2058 return true;
2059 }
2060
2061 return false;
2062}
2063
2064static bool i915_request_guilty(struct drm_i915_gem_request *request,
2065 const u32 acthd, bool *inside)
2066{
2067 /* There is a possibility that unmasked head address
2068 * pointing inside the ring, matches the batch_obj address range.
2069 * However this is extremely unlikely.
2070 */
2071
2072 if (request->batch_obj) {
2073 if (i915_head_inside_object(acthd, request->batch_obj)) {
2074 *inside = true;
2075 return true;
2076 }
2077 }
2078
2079 if (i915_head_inside_request(acthd, request->head, request->tail)) {
2080 *inside = false;
2081 return true;
2082 }
2083
2084 return false;
2085}
2086
2087static void i915_set_reset_status(struct intel_ring_buffer *ring,
2088 struct drm_i915_gem_request *request,
2089 u32 acthd)
2090{
2091 struct i915_ctx_hang_stats *hs = NULL;
2092 bool inside, guilty;
2093
2094 /* Innocent until proven guilty */
2095 guilty = false;
2096
2097 if (ring->hangcheck.action != wait &&
2098 i915_request_guilty(request, acthd, &inside)) {
2099 DRM_ERROR("%s hung %s bo (0x%x ctx %d) at 0x%x\n",
2100 ring->name,
2101 inside ? "inside" : "flushing",
2102 request->batch_obj ?
2103 request->batch_obj->gtt_offset : 0,
2104 request->ctx ? request->ctx->id : 0,
2105 acthd);
2106
2107 guilty = true;
2108 }
2109
2110 /* If contexts are disabled or this is the default context, use
2111 * file_priv->reset_state
2112 */
2113 if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID)
2114 hs = &request->ctx->hang_stats;
2115 else if (request->file_priv)
2116 hs = &request->file_priv->hang_stats;
2117
2118 if (hs) {
2119 if (guilty)
2120 hs->batch_active++;
2121 else
2122 hs->batch_pending++;
2123 }
2124}
2125
2126static void i915_gem_free_request(struct drm_i915_gem_request *request)
2127{
2128 list_del(&request->list);
2129 i915_gem_request_remove_from_client(request);
2130
2131 if (request->ctx)
2132 i915_gem_context_unreference(request->ctx);
2133
2134 kfree(request);
2135}
2136
d2557f23
FT
2137static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
2138 struct intel_ring_buffer *ring)
575ea5a0 2139{
5d0b1887
FT
2140 u32 completed_seqno;
2141 u32 acthd;
2142
2143 acthd = intel_ring_get_active_head(ring);
2144 completed_seqno = ring->get_seqno(ring, false);
2145
e11a51e3
FT
2146 while (!list_empty(&ring->request_list)) {
2147 struct drm_i915_gem_request *request;
575ea5a0 2148
e11a51e3 2149 request = list_first_entry(&ring->request_list,
d2557f23
FT
2150 struct drm_i915_gem_request,
2151 list);
e11a51e3 2152
5d0b1887
FT
2153 if (request->seqno > completed_seqno)
2154 i915_set_reset_status(ring, request, acthd);
2155
2156 i915_gem_free_request(request);
575ea5a0 2157 }
575ea5a0 2158
e11a51e3
FT
2159 while (!list_empty(&ring->active_list)) {
2160 struct drm_i915_gem_object *obj;
2161
2162 obj = list_first_entry(&ring->active_list,
d2557f23
FT
2163 struct drm_i915_gem_object,
2164 ring_list);
e11a51e3 2165
e11a51e3 2166 i915_gem_object_move_to_inactive(obj);
575ea5a0 2167 }
575ea5a0
FT
2168}
2169
8e26cdf6 2170void i915_gem_restore_fences(struct drm_device *dev)
575ea5a0 2171{
e11a51e3
FT
2172 struct drm_i915_private *dev_priv = dev->dev_private;
2173 int i;
575ea5a0 2174
e11a51e3
FT
2175 for (i = 0; i < dev_priv->num_fence_regs; i++) {
2176 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
5d0b1887
FT
2177
2178 /*
2179 * Commit delayed tiling changes if we have an object still
2180 * attached to the fence, otherwise just clear the fence.
2181 */
2182 if (reg->obj) {
2183 i915_gem_object_update_fence(reg->obj, reg,
2184 reg->obj->tiling_mode);
2185 } else {
2186 i915_gem_write_fence(dev, i, NULL);
2187 }
e11a51e3
FT
2188 }
2189}
2190
2191void i915_gem_reset(struct drm_device *dev)
575ea5a0 2192{
e11a51e3 2193 struct drm_i915_private *dev_priv = dev->dev_private;
575ea5a0 2194 struct drm_i915_gem_object *obj;
f192107f 2195 struct intel_ring_buffer *ring;
e11a51e3 2196 int i;
575ea5a0 2197
f192107f
FT
2198 for_each_ring(ring, dev_priv, i)
2199 i915_gem_reset_ring_lists(dev_priv, ring);
575ea5a0 2200
e11a51e3
FT
2201 /* Move everything out of the GPU domains to ensure we do any
2202 * necessary invalidation upon reuse.
ef56dbd7 2203 */
f192107f
FT
2204 list_for_each_entry(obj,
2205 &dev_priv->mm.inactive_list,
2206 mm_list)
2207 {
e11a51e3 2208 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
575ea5a0 2209 }
575ea5a0 2210
8e26cdf6 2211 i915_gem_restore_fences(dev);
e11a51e3 2212}
575ea5a0 2213
e11a51e3
FT
2214/**
2215 * This function clears the request list as sequence numbers are passed.
2216 */
2217void
2218i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
2219{
2220 uint32_t seqno;
2221
2222 if (list_empty(&ring->request_list))
2223 return;
2224
686a02f1
FT
2225 WARN_ON(i915_verify_lists(ring->dev));
2226
e11a51e3
FT
2227 seqno = ring->get_seqno(ring, true);
2228
2229 while (!list_empty(&ring->request_list)) {
2230 struct drm_i915_gem_request *request;
2231
2232 request = list_first_entry(&ring->request_list,
2233 struct drm_i915_gem_request,
2234 list);
2235
2236 if (!i915_seqno_passed(seqno, request->seqno))
2237 break;
2238
2239 /* We know the GPU must have read the request to have
2240 * sent us the seqno + interrupt, so use the position
2241 * of tail of the request to update the last known position
2242 * of the GPU head.
2243 */
2244 ring->last_retired_head = request->tail;
2245
5d0b1887 2246 i915_gem_free_request(request);
575ea5a0 2247 }
575ea5a0 2248
e11a51e3
FT
2249 /* Move any buffers on the active list that are no longer referenced
2250 * by the ringbuffer to the flushing/inactive lists as appropriate.
2251 */
2252 while (!list_empty(&ring->active_list)) {
2253 struct drm_i915_gem_object *obj;
2254
2255 obj = list_first_entry(&ring->active_list,
2256 struct drm_i915_gem_object,
2257 ring_list);
2258
686a02f1 2259 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
e11a51e3
FT
2260 break;
2261
f192107f 2262 i915_gem_object_move_to_inactive(obj);
575ea5a0 2263 }
575ea5a0 2264
e11a51e3
FT
2265 if (unlikely(ring->trace_irq_seqno &&
2266 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
2267 ring->irq_put(ring);
2268 ring->trace_irq_seqno = 0;
575ea5a0 2269 }
e11a51e3 2270
575ea5a0
FT
2271}
2272
e11a51e3
FT
2273void
2274i915_gem_retire_requests(struct drm_device *dev)
575ea5a0 2275{
e11a51e3 2276 drm_i915_private_t *dev_priv = dev->dev_private;
f192107f 2277 struct intel_ring_buffer *ring;
e11a51e3 2278 int i;
575ea5a0 2279
f192107f
FT
2280 for_each_ring(ring, dev_priv, i)
2281 i915_gem_retire_requests_ring(ring);
575ea5a0
FT
2282}
2283
a2fdbec6
FT
2284static long
2285__i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
2286 bool purgeable_only)
2287{
2288 struct drm_i915_gem_object *obj, *next;
2289 long count = 0;
2290
2291 list_for_each_entry_safe(obj, next,
2292 &dev_priv->mm.unbound_list,
5d0b1887 2293 global_list) {
a2fdbec6
FT
2294#if 0
2295 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2296 i915_gem_object_put_pages(obj) == 0) {
2297 count += obj->base.size >> PAGE_SHIFT;
2298 if (count >= target)
2299 return count;
2300 }
2301#endif
2302 }
2303
2304 list_for_each_entry_safe(obj, next,
2305 &dev_priv->mm.inactive_list,
2306 mm_list) {
2307#if 0
2308 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2309 i915_gem_object_unbind(obj) == 0 &&
2310 i915_gem_object_put_pages(obj) == 0) {
2311 count += obj->base.size >> PAGE_SHIFT;
2312 if (count >= target)
2313 return count;
2314 }
2315#endif
2316 }
2317
2318 return count;
2319}
2320
2321static long
2322i915_gem_purge(struct drm_i915_private *dev_priv, long target)
2323{
2324 return __i915_gem_shrink(dev_priv, target, true);
2325}
2326
e11a51e3
FT
2327static void
2328i915_gem_retire_work_handler(struct work_struct *work)
575ea5a0 2329{
e11a51e3
FT
2330 drm_i915_private_t *dev_priv;
2331 struct drm_device *dev;
2332 struct intel_ring_buffer *ring;
2333 bool idle;
2334 int i;
575ea5a0 2335
e11a51e3
FT
2336 dev_priv = container_of(work, drm_i915_private_t,
2337 mm.retire_work.work);
2338 dev = dev_priv->dev;
575ea5a0 2339
e11a51e3 2340 /* Come back later if the device is busy... */
a2fdbec6 2341 if (lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_NOWAIT)) {
e11a51e3
FT
2342 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2343 round_jiffies_up_relative(hz));
2344 return;
575ea5a0
FT
2345 }
2346
e11a51e3 2347 i915_gem_retire_requests(dev);
575ea5a0 2348
e11a51e3
FT
2349 /* Send a periodic flush down the ring so we don't hold onto GEM
2350 * objects indefinitely.
2351 */
2352 idle = true;
2353 for_each_ring(ring, dev_priv, i) {
2354 if (ring->gpu_caches_dirty)
5d0b1887 2355 i915_add_request(ring, NULL);
e11a51e3
FT
2356
2357 idle &= list_empty(&ring->request_list);
575ea5a0
FT
2358 }
2359
e11a51e3
FT
2360 if (!dev_priv->mm.suspended && !idle)
2361 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2362 round_jiffies_up_relative(hz));
2363 if (idle)
2364 intel_mark_idle(dev);
575ea5a0 2365
a2fdbec6 2366 mutex_unlock(&dev->struct_mutex);
575ea5a0 2367}
f0b54121
FT
2368/**
2369 * Ensures that an object will eventually get non-busy by flushing any required
2370 * write domains, emitting any outstanding lazy request and retiring and
2371 * completed requests.
2372 */
2373static int
2374i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2375{
2376 int ret;
2377
2378 if (obj->active) {
f0b54121
FT
2379 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
2380 if (ret)
2381 return ret;
2382
2383 i915_gem_retire_requests_ring(obj->ring);
2384 }
2385
2386 return 0;
2387}
575ea5a0 2388
fabb21f3
FT
2389/**
2390 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2391 * @DRM_IOCTL_ARGS: standard ioctl arguments
2392 *
2393 * Returns 0 if successful, else an error is returned with the remaining time in
2394 * the timeout parameter.
2395 * -ETIME: object is still busy after timeout
2396 * -ERESTARTSYS: signal interrupted the wait
2397 * -ENONENT: object doesn't exist
2398 * Also possible, but rare:
2399 * -EAGAIN: GPU wedged
2400 * -ENOMEM: damn
2401 * -ENODEV: Internal IRQ fail
2402 * -E?: The add request failed
2403 *
2404 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2405 * non-zero timeout parameter the wait ioctl will wait for the given number of
2406 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2407 * without holding struct_mutex the object may become re-busied before this
2408 * function completes. A similar but shorter * race condition exists in the busy
2409 * ioctl
2410 */
2411int
2412i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2413{
a2fdbec6 2414 drm_i915_private_t *dev_priv = dev->dev_private;
fabb21f3
FT
2415 struct drm_i915_gem_wait *args = data;
2416 struct drm_i915_gem_object *obj;
2417 struct intel_ring_buffer *ring = NULL;
2418 struct timespec timeout_stack, *timeout = NULL;
a2fdbec6 2419 unsigned reset_counter;
fabb21f3
FT
2420 u32 seqno = 0;
2421 int ret = 0;
2422
2423 if (args->timeout_ns >= 0) {
2424 timeout_stack = ns_to_timespec(args->timeout_ns);
2425 timeout = &timeout_stack;
2426 }
2427
2428 ret = i915_mutex_lock_interruptible(dev);
2429 if (ret)
2430 return ret;
2431
2432 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
2433 if (&obj->base == NULL) {
a2fdbec6 2434 mutex_unlock(&dev->struct_mutex);
fabb21f3
FT
2435 return -ENOENT;
2436 }
2437
2438 /* Need to make sure the object gets inactive eventually. */
2439 ret = i915_gem_object_flush_active(obj);
2440 if (ret)
2441 goto out;
2442
2443 if (obj->active) {
2444 seqno = obj->last_read_seqno;
2445 ring = obj->ring;
2446 }
2447
2448 if (seqno == 0)
2449 goto out;
2450
2451 /* Do this after OLR check to make sure we make forward progress polling
2452 * on this IOCTL with a 0 timeout (like busy ioctl)
2453 */
2454 if (!args->timeout_ns) {
2455 ret = -ETIMEDOUT;
2456 goto out;
2457 }
2458
2459 drm_gem_object_unreference(&obj->base);
a2fdbec6
FT
2460 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
2461 mutex_unlock(&dev->struct_mutex);
fabb21f3 2462
a2fdbec6 2463 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout);
8e26cdf6 2464 if (timeout)
fabb21f3 2465 args->timeout_ns = timespec_to_ns(timeout);
fabb21f3
FT
2466 return ret;
2467
2468out:
2469 drm_gem_object_unreference(&obj->base);
a2fdbec6 2470 mutex_unlock(&dev->struct_mutex);
fabb21f3
FT
2471 return ret;
2472}
2473
3d4007e0
FT
2474/**
2475 * i915_gem_object_sync - sync an object to a ring.
2476 *
2477 * @obj: object which may be in use on another ring.
2478 * @to: ring we wish to use the object on. May be NULL.
2479 *
2480 * This code is meant to abstract object synchronization with the GPU.
2481 * Calling with NULL implies synchronizing the object with the CPU
2482 * rather than a particular GPU ring.
2483 *
2484 * Returns 0 if successful, else propagates up the lower layer error.
2485 */
2486int
2487i915_gem_object_sync(struct drm_i915_gem_object *obj,
2488 struct intel_ring_buffer *to)
2489{
2490 struct intel_ring_buffer *from = obj->ring;
2491 u32 seqno;
2492 int ret, idx;
2493
2494 if (from == NULL || to == from)
2495 return 0;
2496
2497 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
686a02f1 2498 return i915_gem_object_wait_rendering(obj, false);
3d4007e0
FT
2499
2500 idx = intel_ring_sync_index(from, to);
2501
686a02f1 2502 seqno = obj->last_read_seqno;
3d4007e0
FT
2503 if (seqno <= from->sync_seqno[idx])
2504 return 0;
2505
686a02f1
FT
2506 ret = i915_gem_check_olr(obj->ring, seqno);
2507 if (ret)
2508 return ret;
3d4007e0 2509
686a02f1
FT
2510 ret = to->sync_to(to, from, seqno);
2511 if (!ret)
d2557f23
FT
2512 /* We use last_read_seqno because sync_to()
2513 * might have just caused seqno wrap under
2514 * the radar.
2515 */
2516 from->sync_seqno[idx] = obj->last_read_seqno;
3d4007e0 2517
686a02f1 2518 return ret;
3d4007e0
FT
2519}
2520
e11a51e3 2521static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
575ea5a0 2522{
e11a51e3 2523 u32 old_write_domain, old_read_domains;
575ea5a0 2524
e11a51e3
FT
2525 /* Force a pagefault for domain tracking on next user access */
2526 i915_gem_release_mmap(obj);
575ea5a0 2527
e11a51e3
FT
2528 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2529 return;
575ea5a0 2530
a2fdbec6
FT
2531 /* Wait for any direct GTT access to complete */
2532 cpu_mfence();
2533
e11a51e3
FT
2534 old_read_domains = obj->base.read_domains;
2535 old_write_domain = obj->base.write_domain;
575ea5a0 2536
e11a51e3
FT
2537 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2538 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
575ea5a0 2539
575ea5a0
FT
2540}
2541
f192107f
FT
2542/**
2543 * Unbinds an object from the GTT aperture.
2544 */
e11a51e3
FT
2545int
2546i915_gem_object_unbind(struct drm_i915_gem_object *obj)
575ea5a0 2547{
f192107f 2548 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
a2fdbec6 2549 int ret;
575ea5a0 2550
e11a51e3 2551 if (obj->gtt_space == NULL)
f192107f
FT
2552 return 0;
2553
d2557f23
FT
2554 if (obj->pin_count)
2555 return -EBUSY;
575ea5a0 2556
0b869d8a
FT
2557 BUG_ON(obj->pages == NULL);
2558
e11a51e3 2559 ret = i915_gem_object_finish_gpu(obj);
f192107f
FT
2560 if (ret)
2561 return ret;
2562 /* Continue on if we fail due to EIO, the GPU is hung so we
2563 * should be safe and we need to cleanup or else we might
2564 * cause memory corruption through use-after-free.
2565 */
575ea5a0 2566
e11a51e3 2567 i915_gem_object_finish_gtt(obj);
575ea5a0 2568
f192107f
FT
2569 /* Move the object to the CPU domain to ensure that
2570 * any possible CPU writes while it's not in the GTT
2571 * are flushed when we go to remap it.
2572 */
e11a51e3
FT
2573 if (ret == 0)
2574 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
797013cf 2575 if (ret == -ERESTARTSYS)
f192107f
FT
2576 return ret;
2577 if (ret) {
2578 /* In the event of a disaster, abandon all caches and
2579 * hope for the best.
2580 */
e11a51e3 2581 i915_gem_clflush_object(obj);
f192107f 2582 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
e11a51e3 2583 }
575ea5a0 2584
f192107f 2585 /* release the fence reg _after_ flushing */
e11a51e3 2586 ret = i915_gem_object_put_fence(obj);
f192107f
FT
2587 if (ret)
2588 return ret;
575ea5a0 2589
f192107f
FT
2590 if (obj->has_global_gtt_mapping)
2591 i915_gem_gtt_unbind_object(obj);
e11a51e3
FT
2592 if (obj->has_aliasing_ppgtt_mapping) {
2593 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2594 obj->has_aliasing_ppgtt_mapping = 0;
2595 }
f192107f
FT
2596 i915_gem_gtt_finish_object(obj);
2597
e11a51e3 2598 i915_gem_object_put_pages_gtt(obj);
575ea5a0 2599
5d0b1887 2600 list_del_init(&obj->global_list);
e11a51e3 2601 list_del_init(&obj->mm_list);
f192107f 2602 /* Avoid an unnecessary call to unbind on rebind. */
e11a51e3 2603 obj->map_and_fenceable = true;
575ea5a0 2604
e11a51e3
FT
2605 drm_mm_put_block(obj->gtt_space);
2606 obj->gtt_space = NULL;
2607 obj->gtt_offset = 0;
575ea5a0 2608
e11a51e3
FT
2609 if (i915_gem_object_is_purgeable(obj))
2610 i915_gem_object_truncate(obj);
575ea5a0 2611
f192107f 2612 return ret;
575ea5a0
FT
2613}
2614
e11a51e3 2615int i915_gpu_idle(struct drm_device *dev)
575ea5a0 2616{
e11a51e3
FT
2617 drm_i915_private_t *dev_priv = dev->dev_private;
2618 struct intel_ring_buffer *ring;
2619 int ret, i;
575ea5a0 2620
e11a51e3
FT
2621 /* Flush everything onto the inactive list. */
2622 for_each_ring(ring, dev_priv, i) {
e555d299
FT
2623 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
2624 if (ret)
2625 return ret;
2626
e11a51e3
FT
2627 ret = intel_ring_idle(ring);
2628 if (ret)
2629 return ret;
2630 }
575ea5a0 2631
e11a51e3
FT
2632 return 0;
2633}
575ea5a0 2634
e3359f38
FT
2635static void i965_write_fence_reg(struct drm_device *dev, int reg,
2636 struct drm_i915_gem_object *obj)
575ea5a0 2637{
7cbd1a46 2638 drm_i915_private_t *dev_priv = dev->dev_private;
a2fdbec6
FT
2639 int fence_reg;
2640 int fence_pitch_shift;
575ea5a0 2641
a2fdbec6
FT
2642 if (INTEL_INFO(dev)->gen >= 6) {
2643 fence_reg = FENCE_REG_SANDYBRIDGE_0;
2644 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
2645 } else {
2646 fence_reg = FENCE_REG_965_0;
2647 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
2648 }
2649
5d0b1887
FT
2650 fence_reg += reg * 8;
2651
2652 /* To w/a incoherency with non-atomic 64-bit register updates,
2653 * we split the 64-bit update into two 32-bit writes. In order
2654 * for a partial fence not to be evaluated between writes, we
2655 * precede the update with write to turn off the fence register,
2656 * and only enable the fence as the last step.
2657 *
2658 * For extra levels of paranoia, we make sure each step lands
2659 * before applying the next step.
2660 */
2661 I915_WRITE(fence_reg, 0);
2662 POSTING_READ(fence_reg);
2663
e3359f38
FT
2664 if (obj) {
2665 u32 size = obj->gtt_space->size;
5d0b1887 2666 uint64_t val;
575ea5a0 2667
e3359f38
FT
2668 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2669 0xfffff000) << 32;
2670 val |= obj->gtt_offset & 0xfffff000;
a2fdbec6 2671 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
e3359f38
FT
2672 if (obj->tiling_mode == I915_TILING_Y)
2673 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2674 val |= I965_FENCE_REG_VALID;
575ea5a0 2675
5d0b1887
FT
2676 I915_WRITE(fence_reg + 4, val >> 32);
2677 POSTING_READ(fence_reg + 4);
2678
2679 I915_WRITE(fence_reg + 0, val);
2680 POSTING_READ(fence_reg);
2681 } else {
2682 I915_WRITE(fence_reg + 4, 0);
2683 POSTING_READ(fence_reg + 4);
2684 }
e11a51e3 2685}
575ea5a0 2686
e3359f38
FT
2687static void i915_write_fence_reg(struct drm_device *dev, int reg,
2688 struct drm_i915_gem_object *obj)
e11a51e3 2689{
e11a51e3 2690 drm_i915_private_t *dev_priv = dev->dev_private;
e3359f38 2691 u32 val;
575ea5a0 2692
e3359f38
FT
2693 if (obj) {
2694 u32 size = obj->gtt_space->size;
2695 int pitch_val;
2696 int tile_width;
575ea5a0 2697
e3359f38
FT
2698 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2699 (size & -size) != size ||
2700 (obj->gtt_offset & (size - 1)),
2701 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2702 obj->gtt_offset, obj->map_and_fenceable, size);
e11a51e3 2703
e3359f38
FT
2704 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2705 tile_width = 128;
2706 else
2707 tile_width = 512;
2708
2709 /* Note: pitch better be a power of two tile widths */
2710 pitch_val = obj->stride / tile_width;
2711 pitch_val = ffs(pitch_val) - 1;
2712
2713 val = obj->gtt_offset;
2714 if (obj->tiling_mode == I915_TILING_Y)
2715 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2716 val |= I915_FENCE_SIZE_BITS(size);
2717 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2718 val |= I830_FENCE_REG_VALID;
2719 } else
2720 val = 0;
2721
2722 if (reg < 8)
2723 reg = FENCE_REG_830_0 + reg * 4;
2724 else
2725 reg = FENCE_REG_945_8 + (reg - 8) * 4;
2726
2727 I915_WRITE(reg, val);
2728 POSTING_READ(reg);
575ea5a0
FT
2729}
2730
e3359f38
FT
2731static void i830_write_fence_reg(struct drm_device *dev, int reg,
2732 struct drm_i915_gem_object *obj)
575ea5a0 2733{
e11a51e3 2734 drm_i915_private_t *dev_priv = dev->dev_private;
e11a51e3 2735 uint32_t val;
575ea5a0 2736
e3359f38
FT
2737 if (obj) {
2738 u32 size = obj->gtt_space->size;
2739 uint32_t pitch_val;
2740
2741 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2742 (size & -size) != size ||
2743 (obj->gtt_offset & (size - 1)),
2744 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2745 obj->gtt_offset, size);
2746
2747 pitch_val = obj->stride / 128;
2748 pitch_val = ffs(pitch_val) - 1;
2749
2750 val = obj->gtt_offset;
2751 if (obj->tiling_mode == I915_TILING_Y)
2752 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2753 val |= I830_FENCE_SIZE_BITS(size);
2754 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2755 val |= I830_FENCE_REG_VALID;
2756 } else
2757 val = 0;
2758
2759 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2760 POSTING_READ(FENCE_REG_830_0 + reg * 4);
2761}
2762
a2fdbec6
FT
2763inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
2764{
2765 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
2766}
2767
e3359f38
FT
2768static void i915_gem_write_fence(struct drm_device *dev, int reg,
2769 struct drm_i915_gem_object *obj)
2770{
a2fdbec6
FT
2771 struct drm_i915_private *dev_priv = dev->dev_private;
2772
2773 /* Ensure that all CPU reads are completed before installing a fence
2774 * and all writes before removing the fence.
2775 */
2776 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
2777 cpu_mfence();
2778
5d0b1887
FT
2779 WARN(obj && (!obj->stride || !obj->tiling_mode),
2780 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
2781 obj->stride, obj->tiling_mode);
2782
e3359f38
FT
2783 switch (INTEL_INFO(dev)->gen) {
2784 case 7:
a2fdbec6 2785 case 6:
e3359f38
FT
2786 case 5:
2787 case 4: i965_write_fence_reg(dev, reg, obj); break;
2788 case 3: i915_write_fence_reg(dev, reg, obj); break;
2789 case 2: i830_write_fence_reg(dev, reg, obj); break;
a2fdbec6 2790 default: BUG();
e3359f38 2791 }
a2fdbec6
FT
2792
2793 /* And similarly be paranoid that no direct access to this region
2794 * is reordered to before the fence is installed.
2795 */
2796 if (i915_gem_object_needs_mb(obj))
2797 cpu_mfence();
e3359f38 2798}
575ea5a0 2799
e3359f38
FT
2800static inline int fence_number(struct drm_i915_private *dev_priv,
2801 struct drm_i915_fence_reg *fence)
2802{
2803 return fence - dev_priv->fence_regs;
2804}
575ea5a0 2805
e3359f38
FT
2806static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2807 struct drm_i915_fence_reg *fence,
2808 bool enable)
2809{
5d0b1887
FT
2810 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2811 int reg = fence_number(dev_priv, fence);
2812
2813 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
575ea5a0 2814
e3359f38 2815 if (enable) {
5d0b1887 2816 obj->fence_reg = reg;
e3359f38
FT
2817 fence->obj = obj;
2818 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2819 } else {
2820 obj->fence_reg = I915_FENCE_REG_NONE;
2821 fence->obj = NULL;
2822 list_del_init(&fence->lru_list);
2823 }
5d0b1887 2824 obj->fence_dirty = false;
e11a51e3 2825}
575ea5a0 2826
e11a51e3 2827static int
a2fdbec6 2828i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
575ea5a0 2829{
561529b1 2830 if (obj->last_fenced_seqno) {
b312333e 2831 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
561529b1
FT
2832 if (ret)
2833 return ret;
575ea5a0 2834
e11a51e3 2835 obj->last_fenced_seqno = 0;
e11a51e3 2836 }
575ea5a0 2837
b312333e 2838 obj->fenced_gpu_access = false;
e11a51e3 2839 return 0;
575ea5a0
FT
2840}
2841
e11a51e3
FT
2842int
2843i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
575ea5a0 2844{
e3359f38 2845 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
8e26cdf6 2846 struct drm_i915_fence_reg *fence;
e11a51e3 2847 int ret;
575ea5a0 2848
a2fdbec6 2849 ret = i915_gem_object_wait_fence(obj);
e11a51e3
FT
2850 if (ret)
2851 return ret;
575ea5a0 2852
e3359f38
FT
2853 if (obj->fence_reg == I915_FENCE_REG_NONE)
2854 return 0;
575ea5a0 2855
8e26cdf6
FT
2856 fence = &dev_priv->fence_regs[obj->fence_reg];
2857
e3359f38 2858 i915_gem_object_fence_lost(obj);
8e26cdf6 2859 i915_gem_object_update_fence(obj, fence, false);
575ea5a0 2860
e11a51e3 2861 return 0;
575ea5a0
FT
2862}
2863
e11a51e3 2864static struct drm_i915_fence_reg *
561529b1 2865i915_find_fence_reg(struct drm_device *dev)
575ea5a0 2866{
e11a51e3 2867 struct drm_i915_private *dev_priv = dev->dev_private;
561529b1 2868 struct drm_i915_fence_reg *reg, *avail;
e11a51e3 2869 int i;
575ea5a0 2870
e11a51e3
FT
2871 /* First try to find a free reg */
2872 avail = NULL;
2873 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2874 reg = &dev_priv->fence_regs[i];
2875 if (!reg->obj)
2876 return reg;
575ea5a0 2877
e11a51e3
FT
2878 if (!reg->pin_count)
2879 avail = reg;
2880 }
575ea5a0 2881
e11a51e3
FT
2882 if (avail == NULL)
2883 return NULL;
575ea5a0 2884
e11a51e3 2885 /* None available, try to steal one or wait for a user to finish */
e11a51e3
FT
2886 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2887 if (reg->pin_count)
2888 continue;
575ea5a0 2889
561529b1 2890 return reg;
e11a51e3 2891 }
575ea5a0 2892
561529b1 2893 return NULL;
575ea5a0
FT
2894}
2895
561529b1
FT
2896/**
2897 * i915_gem_object_get_fence - set up fencing for an object
2898 * @obj: object to map through a fence reg
2899 *
2900 * When mapping objects through the GTT, userspace wants to be able to write
2901 * to them without having to worry about swizzling if the object is tiled.
2902 * This function walks the fence regs looking for a free one for @obj,
2903 * stealing one if it can't find any.
2904 *
2905 * It then sets up the reg based on the object's properties: address, pitch
2906 * and tiling format.
2907 *
2908 * For an untiled surface, this removes any existing fence.
2909 */
e11a51e3 2910int
561529b1 2911i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
575ea5a0 2912{
e11a51e3
FT
2913 struct drm_device *dev = obj->base.dev;
2914 struct drm_i915_private *dev_priv = dev->dev_private;
e3359f38 2915 bool enable = obj->tiling_mode != I915_TILING_NONE;
e11a51e3
FT
2916 struct drm_i915_fence_reg *reg;
2917 int ret;
575ea5a0 2918
e3359f38
FT
2919 /* Have we updated the tiling parameters upon the object and so
2920 * will need to serialise the write to the associated fence register?
2921 */
f192107f 2922 if (obj->fence_dirty) {
a2fdbec6 2923 ret = i915_gem_object_wait_fence(obj);
e3359f38
FT
2924 if (ret)
2925 return ret;
2926 }
575ea5a0 2927
561529b1 2928 /* Just update our place in the LRU if our fence is getting reused. */
e11a51e3
FT
2929 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2930 reg = &dev_priv->fence_regs[obj->fence_reg];
f192107f 2931 if (!obj->fence_dirty) {
e3359f38
FT
2932 list_move_tail(&reg->lru_list,
2933 &dev_priv->mm.fence_list);
2934 return 0;
2935 }
2936 } else if (enable) {
2937 reg = i915_find_fence_reg(dev);
2938 if (reg == NULL)
2939 return -EDEADLK;
2940
2941 if (reg->obj) {
2942 struct drm_i915_gem_object *old = reg->obj;
575ea5a0 2943
a2fdbec6 2944 ret = i915_gem_object_wait_fence(old);
e11a51e3
FT
2945 if (ret)
2946 return ret;
e11a51e3 2947
e3359f38 2948 i915_gem_object_fence_lost(old);
e11a51e3 2949 }
e3359f38 2950 } else
e11a51e3 2951 return 0;
e11a51e3 2952
e3359f38 2953 i915_gem_object_update_fence(obj, reg, enable);
e11a51e3 2954
e3359f38 2955 return 0;
575ea5a0
FT
2956}
2957
d1c259ee
FT
2958static bool i915_gem_valid_gtt_space(struct drm_device *dev,
2959 struct drm_mm_node *gtt_space,
2960 unsigned long cache_level)
2961{
2962 struct drm_mm_node *other;
2963
2964 /* On non-LLC machines we have to be careful when putting differing
2965 * types of snoopable memory together to avoid the prefetcher
a2fdbec6 2966 * crossing memory domains and dying.
d1c259ee
FT
2967 */
2968 if (HAS_LLC(dev))
2969 return true;
2970
2971 if (gtt_space == NULL)
2972 return true;
2973
2974 if (list_empty(&gtt_space->node_list))
2975 return true;
2976
2977 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2978 if (other->allocated && !other->hole_follows && other->color != cache_level)
2979 return false;
2980
2981 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2982 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2983 return false;
2984
2985 return true;
2986}
2987
2988static void i915_gem_verify_gtt(struct drm_device *dev)
2989{
2990#if WATCH_GTT
2991 struct drm_i915_private *dev_priv = dev->dev_private;
2992 struct drm_i915_gem_object *obj;
2993 int err = 0;
2994
5d0b1887 2995 list_for_each_entry(obj, &dev_priv->mm.global_list, global_list) {
d1c259ee
FT
2996 if (obj->gtt_space == NULL) {
2997 printk(KERN_ERR "object found on GTT list with no space reserved\n");
2998 err++;
2999 continue;
3000 }
3001
3002 if (obj->cache_level != obj->gtt_space->color) {
3003 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
3004 obj->gtt_space->start,
3005 obj->gtt_space->start + obj->gtt_space->size,
3006 obj->cache_level,
3007 obj->gtt_space->color);
3008 err++;
3009 continue;
3010 }
3011
3012 if (!i915_gem_valid_gtt_space(dev,
3013 obj->gtt_space,
3014 obj->cache_level)) {
3015 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
3016 obj->gtt_space->start,
3017 obj->gtt_space->start + obj->gtt_space->size,
3018 obj->cache_level);
3019 err++;
3020 continue;
3021 }
3022 }
3023
3024 WARN_ON(err);
3025#endif
3026}
3027
d2557f23
FT
3028/**
3029 * Finds free space in the GTT aperture and binds the object there.
3030 */
575ea5a0
FT
3031static int
3032i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
d2557f23 3033 unsigned alignment,
b00bc81c
FT
3034 bool map_and_fenceable,
3035 bool nonblocking)
575ea5a0 3036{
d2557f23
FT
3037 struct drm_device *dev = obj->base.dev;
3038 drm_i915_private_t *dev_priv = dev->dev_private;
5d0b1887
FT
3039 struct drm_mm_node *node;
3040 u32 size, fence_size, fence_alignment, unfenced_alignment;
575ea5a0 3041 bool mappable, fenceable;
5d0b1887
FT
3042 size_t gtt_max = map_and_fenceable ?
3043 dev_priv->gtt.mappable_end : dev_priv->gtt.total;
575ea5a0
FT
3044 int ret;
3045
a2fdbec6
FT
3046 fence_size = i915_gem_get_gtt_size(dev,
3047 obj->base.size,
3048 obj->tiling_mode);
3049 fence_alignment = i915_gem_get_gtt_alignment(dev,
3050 obj->base.size,
3051 obj->tiling_mode, true);
3052 unfenced_alignment =
3053 i915_gem_get_gtt_alignment(dev,
3054 obj->base.size,
3055 obj->tiling_mode, false);
575ea5a0 3056
575ea5a0
FT
3057 if (alignment == 0)
3058 alignment = map_and_fenceable ? fence_alignment :
a2fdbec6
FT
3059 unfenced_alignment;
3060 if (map_and_fenceable && alignment & (fence_alignment - 1)) {
575ea5a0 3061 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
d2557f23 3062 return -EINVAL;
575ea5a0
FT
3063 }
3064
3065 size = map_and_fenceable ? fence_size : obj->base.size;
3066
3067 /* If the object is bigger than the entire aperture, reject it early
3068 * before evicting everything in a vain attempt to find space.
3069 */
5d0b1887
FT
3070 if (obj->base.size > gtt_max) {
3071 DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n",
3072 obj->base.size,
3073 map_and_fenceable ? "mappable" : "total",
3074 gtt_max);
d2557f23 3075 return -E2BIG;
575ea5a0
FT
3076 }
3077
3078 search_free:
3079 if (map_and_fenceable)
5d0b1887 3080 node = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
d1c259ee 3081 size, alignment, obj->cache_level,
a2fdbec6 3082 0, dev_priv->gtt.mappable_end,
d1c259ee 3083 false);
575ea5a0 3084 else
5d0b1887 3085 node = drm_mm_search_free_color(&dev_priv->mm.gtt_space,
d1c259ee
FT
3086 size, alignment, obj->cache_level,
3087 false);
5d0b1887 3088 if (node != NULL) {
575ea5a0 3089 if (map_and_fenceable)
d1c259ee 3090 obj->gtt_space =
5d0b1887 3091 drm_mm_get_block_range_generic(node,
d1c259ee 3092 size, alignment, obj->cache_level,
a2fdbec6 3093 0, dev_priv->gtt.mappable_end,
d1c259ee 3094 false);
575ea5a0 3095 else
d1c259ee 3096 obj->gtt_space =
5d0b1887 3097 drm_mm_get_block_generic(node,
d1c259ee
FT
3098 size, alignment, obj->cache_level,
3099 false);
575ea5a0
FT
3100 }
3101 if (obj->gtt_space == NULL) {
3102 ret = i915_gem_evict_something(dev, size, alignment,
9f16360b
FT
3103 obj->cache_level,
3104 map_and_fenceable,
3105 nonblocking);
d1c259ee
FT
3106 if (ret)
3107 return ret;
3108
575ea5a0
FT
3109 goto search_free;
3110 }
f6201ebf
MD
3111
3112 /*
3113 * NOTE: i915_gem_object_get_pages_gtt() cannot
3114 * return ENOMEM, since we used VM_ALLOC_RETRY.
3115 */
dfa24183 3116 ret = i915_gem_object_get_pages_gtt(obj);
575ea5a0
FT
3117 if (ret != 0) {
3118 drm_mm_put_block(obj->gtt_space);
3119 obj->gtt_space = NULL;
d2557f23 3120 return ret;
575ea5a0
FT
3121 }
3122
7cbd1a46 3123 i915_gem_gtt_bind_object(obj, obj->cache_level);
575ea5a0
FT
3124 if (ret != 0) {
3125 i915_gem_object_put_pages_gtt(obj);
3126 drm_mm_put_block(obj->gtt_space);
3127 obj->gtt_space = NULL;
686a02f1 3128 if (i915_gem_evict_everything(dev))
575ea5a0
FT
3129 return (ret);
3130 goto search_free;
3131 }
3132
5d0b1887 3133 list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
575ea5a0
FT
3134 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3135
575ea5a0
FT
3136 obj->gtt_offset = obj->gtt_space->start;
3137
3138 fenceable =
3139 obj->gtt_space->size == fence_size &&
3140 (obj->gtt_space->start & (fence_alignment - 1)) == 0;
3141
3142 mappable =
a2fdbec6
FT
3143 obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end;
3144
575ea5a0
FT
3145 obj->map_and_fenceable = mappable && fenceable;
3146
5d0b1887 3147 trace_i915_gem_object_bind(obj, map_and_fenceable);
d1c259ee 3148 i915_gem_verify_gtt(dev);
d2557f23 3149 return 0;
575ea5a0
FT
3150}
3151
e11a51e3
FT
3152void
3153i915_gem_clflush_object(struct drm_i915_gem_object *obj)
3154{
3155
3156 /* If we don't have a page list set up, then we're not pinned
3157 * to GPU, and we can ignore the cache flush because it'll happen
3158 * again at bind time.
3159 */
3160 if (obj->pages == NULL)
3161 return;
3162
a2fdbec6
FT
3163 /*
3164 * Stolen memory is always coherent with the GPU as it is explicitly
3165 * marked as wc by the system, or the system is cache-coherent.
3166 */
3167 if (obj->stolen)
3168 return;
3169
e11a51e3
FT
3170 /* If the GPU is snooping the contents of the CPU cache,
3171 * we do not need to manually clear the CPU cache lines. However,
3172 * the caches are only snooped when the render cache is
3173 * flushed/invalidated. As we always have to emit invalidations
3174 * and flushes when moving into and out of the RENDER domain, correct
3175 * snooping behaviour occurs naturally as the result of our domain
3176 * tracking.
3177 */
3178 if (obj->cache_level != I915_CACHE_NONE)
3179 return;
3180
3181 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
3182}
3183
3184/** Flushes the GTT write domain for the object if it's dirty. */
575ea5a0 3185static void
e11a51e3 3186i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
575ea5a0 3187{
e11a51e3 3188 uint32_t old_write_domain;
575ea5a0 3189
e11a51e3
FT
3190 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3191 return;
575ea5a0 3192
e11a51e3
FT
3193 /* No actual flushing is required for the GTT write domain. Writes
3194 * to it immediately go to main memory as far as we know, so there's
3195 * no chipset flush. It also doesn't land in render cache.
3196 *
3197 * However, we do have to enforce the order so that all writes through
3198 * the GTT land before any writes to the device, such as updates to
3199 * the GATT itself.
3200 */
3201 cpu_sfence();
575ea5a0 3202
e11a51e3
FT
3203 old_write_domain = obj->base.write_domain;
3204 obj->base.write_domain = 0;
3205}
3206
3207/** Flushes the CPU write domain for the object if it's dirty. */
3208static void
3209i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3210{
3211 uint32_t old_write_domain;
3212
3213 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
575ea5a0
FT
3214 return;
3215
e11a51e3 3216 i915_gem_clflush_object(obj);
0b869d8a 3217 i915_gem_chipset_flush(obj->base.dev);
575ea5a0 3218 old_write_domain = obj->base.write_domain;
e11a51e3
FT
3219 obj->base.write_domain = 0;
3220}
575ea5a0 3221
e11a51e3
FT
3222/**
3223 * Moves a single object to the GTT read, and possibly write domain.
3224 *
3225 * This function returns when the move is complete, including waiting on
3226 * flushes to occur.
3227 */
575ea5a0 3228int
e11a51e3 3229i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
575ea5a0 3230{
686a02f1 3231 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
e11a51e3 3232 uint32_t old_write_domain, old_read_domains;
575ea5a0
FT
3233 int ret;
3234
686a02f1 3235 /* Not valid to be called on unbound objects. */
575ea5a0 3236 if (obj->gtt_space == NULL)
686a02f1 3237 return -EINVAL;
575ea5a0 3238
e11a51e3
FT
3239 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3240 return 0;
575ea5a0 3241
686a02f1
FT
3242 ret = i915_gem_object_wait_rendering(obj, !write);
3243 if (ret)
3244 return ret;
575ea5a0 3245
e11a51e3 3246 i915_gem_object_flush_cpu_write_domain(obj);
575ea5a0 3247
a2fdbec6
FT
3248 /* Serialise direct access to this object with the barriers for
3249 * coherent writes from the GPU, by effectively invalidating the
3250 * GTT domain upon first access.
3251 */
3252 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3253 cpu_mfence();
3254
e11a51e3
FT
3255 old_write_domain = obj->base.write_domain;
3256 old_read_domains = obj->base.read_domains;
575ea5a0 3257
686a02f1
FT
3258 /* It should now be out of any other write domains, and we can update
3259 * the domain values for our changes.
3260 */
3261 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
e11a51e3
FT
3262 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3263 if (write) {
3264 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3265 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3266 obj->dirty = 1;
3267 }
575ea5a0 3268
686a02f1
FT
3269 /* And bump the LRU for this access */
3270 if (i915_gem_object_is_inactive(obj))
3271 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3272
3273 return 0;
575ea5a0
FT
3274}
3275
e11a51e3
FT
3276int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3277 enum i915_cache_level cache_level)
b030f26b 3278{
e11a51e3 3279 struct drm_device *dev = obj->base.dev;
b030f26b 3280 drm_i915_private_t *dev_priv = dev->dev_private;
e11a51e3 3281 int ret;
b030f26b 3282
e11a51e3
FT
3283 if (obj->cache_level == cache_level)
3284 return 0;
3285
3286 if (obj->pin_count) {
3287 DRM_DEBUG("can not change the cache level of pinned objects\n");
3288 return -EBUSY;
b030f26b
FT
3289 }
3290
d1c259ee
FT
3291 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
3292 ret = i915_gem_object_unbind(obj);
3293 if (ret)
3294 return ret;
3295 }
3296
e11a51e3
FT
3297 if (obj->gtt_space) {
3298 ret = i915_gem_object_finish_gpu(obj);
d2557f23
FT
3299 if (ret)
3300 return ret;
b030f26b 3301
e11a51e3 3302 i915_gem_object_finish_gtt(obj);
575ea5a0 3303
e11a51e3
FT
3304 /* Before SandyBridge, you could not use tiling or fence
3305 * registers with snooped memory, so relinquish any fences
3306 * currently pointing to our region in the aperture.
3307 */
d1c259ee 3308 if (INTEL_INFO(dev)->gen < 6) {
e11a51e3
FT
3309 ret = i915_gem_object_put_fence(obj);
3310 if (ret)
3311 return ret;
3312 }
575ea5a0 3313
e11a51e3
FT
3314 if (obj->has_global_gtt_mapping)