drm/i915: Update to Linux 4.6
[dragonfly.git] / sys / dev / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include <drm/drmP.h>
29 #include <drm/drm_vma_manager.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_vgpu.h"
33 #include "i915_trace.h"
34 #include "intel_drv.h"
35 #include <linux/shmem_fs.h>
36 #include <linux/slab.h>
37 #include <linux/swap.h>
38 #include <linux/pci.h>
39
40 #define RQ_BUG_ON(expr)
41
42 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
43 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
44 static void
45 i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
46 static void
47 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
48
49 static bool cpu_cache_is_coherent(struct drm_device *dev,
50                                   enum i915_cache_level level)
51 {
52         return HAS_LLC(dev) || level != I915_CACHE_NONE;
53 }
54
55 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
56 {
57         if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
58                 return true;
59
60         return obj->pin_display;
61 }
62
63 /* some bookkeeping */
64 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
65                                   size_t size)
66 {
67         spin_lock(&dev_priv->mm.object_stat_lock);
68         dev_priv->mm.object_count++;
69         dev_priv->mm.object_memory += size;
70         spin_unlock(&dev_priv->mm.object_stat_lock);
71 }
72
73 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
74                                      size_t size)
75 {
76         spin_lock(&dev_priv->mm.object_stat_lock);
77         dev_priv->mm.object_count--;
78         dev_priv->mm.object_memory -= size;
79         spin_unlock(&dev_priv->mm.object_stat_lock);
80 }
81
82 static int
83 i915_gem_wait_for_error(struct i915_gpu_error *error)
84 {
85         int ret;
86
87 #define EXIT_COND (!i915_reset_in_progress(error) || \
88                    i915_terminally_wedged(error))
89         if (EXIT_COND)
90                 return 0;
91
92         /*
93          * Only wait 10 seconds for the gpu reset to complete to avoid hanging
94          * userspace. If it takes that long something really bad is going on and
95          * we should simply try to bail out and fail as gracefully as possible.
96          */
97         ret = wait_event_interruptible_timeout(error->reset_queue,
98                                                EXIT_COND,
99                                                10*HZ);
100         if (ret == 0) {
101                 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
102                 return -EIO;
103         } else if (ret < 0) {
104                 return ret;
105         }
106 #undef EXIT_COND
107
108         return 0;
109 }
110
111 int i915_mutex_lock_interruptible(struct drm_device *dev)
112 {
113         struct drm_i915_private *dev_priv = dev->dev_private;
114         int ret;
115
116         ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
117         if (ret)
118                 return ret;
119
120         ret = mutex_lock_interruptible(&dev->struct_mutex);
121         if (ret)
122                 return ret;
123
124         WARN_ON(i915_verify_lists(dev));
125         return 0;
126 }
127
128 int
129 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
130                             struct drm_file *file)
131 {
132         struct drm_i915_private *dev_priv = dev->dev_private;
133         struct drm_i915_gem_get_aperture *args = data;
134         struct i915_gtt *ggtt = &dev_priv->gtt;
135         struct i915_vma *vma;
136         size_t pinned;
137
138         pinned = 0;
139         mutex_lock(&dev->struct_mutex);
140         list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
141                 if (vma->pin_count)
142                         pinned += vma->node.size;
143         list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
144                 if (vma->pin_count)
145                         pinned += vma->node.size;
146         mutex_unlock(&dev->struct_mutex);
147
148         args->aper_size = dev_priv->gtt.base.total;
149         args->aper_available_size = args->aper_size - pinned;
150
151         return 0;
152 }
153
154 #if 0
155 static int
156 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
157 {
158         struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
159         char *vaddr = obj->phys_handle->vaddr;
160         struct sg_table *st;
161         struct scatterlist *sg;
162         int i;
163
164         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
165                 return -EINVAL;
166
167         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
168                 struct page *page;
169                 char *src;
170
171                 page = shmem_read_mapping_page(mapping, i);
172                 if (IS_ERR(page))
173                         return PTR_ERR(page);
174
175                 src = kmap_atomic(page);
176                 memcpy(vaddr, src, PAGE_SIZE);
177                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
178                 kunmap_atomic(src);
179
180                 put_page(page);
181                 vaddr += PAGE_SIZE;
182         }
183
184         i915_gem_chipset_flush(obj->base.dev);
185
186         st = kmalloc(sizeof(*st), GFP_KERNEL);
187         if (st == NULL)
188                 return -ENOMEM;
189
190         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
191                 kfree(st);
192                 return -ENOMEM;
193         }
194
195         sg = st->sgl;
196         sg->offset = 0;
197         sg->length = obj->base.size;
198
199         sg_dma_address(sg) = obj->phys_handle->busaddr;
200         sg_dma_len(sg) = obj->base.size;
201
202         obj->pages = st;
203         return 0;
204 }
205
206 static void
207 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
208 {
209         int ret;
210
211         BUG_ON(obj->madv == __I915_MADV_PURGED);
212
213         ret = i915_gem_object_set_to_cpu_domain(obj, true);
214         if (ret) {
215                 /* In the event of a disaster, abandon all caches and
216                  * hope for the best.
217                  */
218                 WARN_ON(ret != -EIO);
219                 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
220         }
221
222         if (obj->madv == I915_MADV_DONTNEED)
223                 obj->dirty = 0;
224
225         if (obj->dirty) {
226                 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
227                 char *vaddr = obj->phys_handle->vaddr;
228                 int i;
229
230                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
231                         struct page *page;
232                         char *dst;
233
234                         page = shmem_read_mapping_page(mapping, i);
235                         if (IS_ERR(page))
236                                 continue;
237
238                         dst = kmap_atomic(page);
239                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
240                         memcpy(dst, vaddr, PAGE_SIZE);
241                         kunmap_atomic(dst);
242
243                         set_page_dirty(page);
244                         if (obj->madv == I915_MADV_WILLNEED)
245                                 mark_page_accessed(page);
246                         put_page(page);
247                         vaddr += PAGE_SIZE;
248                 }
249                 obj->dirty = 0;
250         }
251
252         sg_free_table(obj->pages);
253         kfree(obj->pages);
254 }
255
256 static void
257 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
258 {
259         drm_pci_free(obj->base.dev, obj->phys_handle);
260 }
261
262 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
263         .get_pages = i915_gem_object_get_pages_phys,
264         .put_pages = i915_gem_object_put_pages_phys,
265         .release = i915_gem_object_release_phys,
266 };
267 #endif
268
269 static int
270 drop_pages(struct drm_i915_gem_object *obj)
271 {
272         struct i915_vma *vma, *next;
273         int ret;
274
275         drm_gem_object_reference(&obj->base);
276         list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link)
277                 if (i915_vma_unbind(vma))
278                         break;
279
280         ret = i915_gem_object_put_pages(obj);
281         drm_gem_object_unreference(&obj->base);
282
283         return ret;
284 }
285
286 int
287 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
288                             int align)
289 {
290         drm_dma_handle_t *phys;
291         int ret;
292
293         if (obj->phys_handle) {
294                 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
295                         return -EBUSY;
296
297                 return 0;
298         }
299
300         if (obj->madv != I915_MADV_WILLNEED)
301                 return -EFAULT;
302
303 #if 0
304         if (obj->base.filp == NULL)
305                 return -EINVAL;
306 #endif
307
308         ret = drop_pages(obj);
309         if (ret)
310                 return ret;
311
312         /* create a new object */
313         phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
314         if (!phys)
315                 return -ENOMEM;
316
317         obj->phys_handle = phys;
318 #if 0
319         obj->ops = &i915_gem_phys_ops;
320 #endif
321
322         return i915_gem_object_get_pages(obj);
323 }
324
325 static int
326 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
327                      struct drm_i915_gem_pwrite *args,
328                      struct drm_file *file_priv)
329 {
330         struct drm_device *dev = obj->base.dev;
331         void *vaddr = (char *)obj->phys_handle->vaddr + args->offset;
332         char __user *user_data = to_user_ptr(args->data_ptr);
333         int ret = 0;
334
335         /* We manually control the domain here and pretend that it
336          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
337          */
338         ret = i915_gem_object_wait_rendering(obj, false);
339         if (ret)
340                 return ret;
341
342         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
343         if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
344                 unsigned long unwritten;
345
346                 /* The physical object once assigned is fixed for the lifetime
347                  * of the obj, so we can safely drop the lock and continue
348                  * to access vaddr.
349                  */
350                 mutex_unlock(&dev->struct_mutex);
351                 unwritten = copy_from_user(vaddr, user_data, args->size);
352                 mutex_lock(&dev->struct_mutex);
353                 if (unwritten) {
354                         ret = -EFAULT;
355                         goto out;
356                 }
357         }
358
359         drm_clflush_virt_range(vaddr, args->size);
360         i915_gem_chipset_flush(dev);
361
362 out:
363         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
364         return ret;
365 }
366
367 void *i915_gem_object_alloc(struct drm_device *dev)
368 {
369         return kmalloc(sizeof(struct drm_i915_gem_object),
370             M_DRM, M_WAITOK | M_ZERO);
371 }
372
373 void i915_gem_object_free(struct drm_i915_gem_object *obj)
374 {
375         kfree(obj);
376 }
377
378 static int
379 i915_gem_create(struct drm_file *file,
380                 struct drm_device *dev,
381                 uint64_t size,
382                 uint32_t *handle_p)
383 {
384         struct drm_i915_gem_object *obj;
385         int ret;
386         u32 handle;
387
388         size = roundup(size, PAGE_SIZE);
389         if (size == 0)
390                 return -EINVAL;
391
392         /* Allocate the new object */
393         obj = i915_gem_alloc_object(dev, size);
394         if (obj == NULL)
395                 return -ENOMEM;
396
397         ret = drm_gem_handle_create(file, &obj->base, &handle);
398         /* drop reference from allocate - handle holds it now */
399         drm_gem_object_unreference_unlocked(&obj->base);
400         if (ret)
401                 return ret;
402
403         *handle_p = handle;
404         return 0;
405 }
406
407 int
408 i915_gem_dumb_create(struct drm_file *file,
409                      struct drm_device *dev,
410                      struct drm_mode_create_dumb *args)
411 {
412         /* have to work out size/pitch and return them */
413         args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
414         args->size = args->pitch * args->height;
415         return i915_gem_create(file, dev,
416                                args->size, &args->handle);
417 }
418
419 /**
420  * Creates a new mm object and returns a handle to it.
421  */
422 int
423 i915_gem_create_ioctl(struct drm_device *dev, void *data,
424                       struct drm_file *file)
425 {
426         struct drm_i915_gem_create *args = data;
427
428         return i915_gem_create(file, dev,
429                                args->size, &args->handle);
430 }
431
432 static inline int
433 __copy_to_user_swizzled(char __user *cpu_vaddr,
434                         const char *gpu_vaddr, int gpu_offset,
435                         int length)
436 {
437         int ret, cpu_offset = 0;
438
439         while (length > 0) {
440                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
441                 int this_length = min(cacheline_end - gpu_offset, length);
442                 int swizzled_gpu_offset = gpu_offset ^ 64;
443
444                 ret = __copy_to_user(cpu_vaddr + cpu_offset,
445                                      gpu_vaddr + swizzled_gpu_offset,
446                                      this_length);
447                 if (ret)
448                         return ret + length;
449
450                 cpu_offset += this_length;
451                 gpu_offset += this_length;
452                 length -= this_length;
453         }
454
455         return 0;
456 }
457
458 static inline int
459 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
460                           const char __user *cpu_vaddr,
461                           int length)
462 {
463         int ret, cpu_offset = 0;
464
465         while (length > 0) {
466                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
467                 int this_length = min(cacheline_end - gpu_offset, length);
468                 int swizzled_gpu_offset = gpu_offset ^ 64;
469
470                 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
471                                        cpu_vaddr + cpu_offset,
472                                        this_length);
473                 if (ret)
474                         return ret + length;
475
476                 cpu_offset += this_length;
477                 gpu_offset += this_length;
478                 length -= this_length;
479         }
480
481         return 0;
482 }
483
484 /*
485  * Pins the specified object's pages and synchronizes the object with
486  * GPU accesses. Sets needs_clflush to non-zero if the caller should
487  * flush the object from the CPU cache.
488  */
489 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
490                                     int *needs_clflush)
491 {
492         int ret;
493
494         *needs_clflush = 0;
495
496 #if 0
497         if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0))
498                 return -EINVAL;
499 #endif
500
501         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
502                 /* If we're not in the cpu read domain, set ourself into the gtt
503                  * read domain and manually flush cachelines (if required). This
504                  * optimizes for the case when the gpu will dirty the data
505                  * anyway again before the next pread happens. */
506                 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
507                                                         obj->cache_level);
508                 ret = i915_gem_object_wait_rendering(obj, true);
509                 if (ret)
510                         return ret;
511         }
512
513         ret = i915_gem_object_get_pages(obj);
514         if (ret)
515                 return ret;
516
517         i915_gem_object_pin_pages(obj);
518
519         return ret;
520 }
521
522 /* Per-page copy function for the shmem pread fastpath.
523  * Flushes invalid cachelines before reading the target if
524  * needs_clflush is set. */
525 static int
526 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length,
527                  char __user *user_data,
528                  bool page_do_bit17_swizzling, bool needs_clflush)
529 {
530         char *vaddr;
531         int ret;
532
533         if (unlikely(page_do_bit17_swizzling))
534                 return -EINVAL;
535
536         vaddr = kmap_atomic(page);
537         if (needs_clflush)
538                 drm_clflush_virt_range(vaddr + shmem_page_offset,
539                                        page_length);
540         ret = __copy_to_user_inatomic(user_data,
541                                       vaddr + shmem_page_offset,
542                                       page_length);
543         kunmap_atomic(vaddr);
544
545         return ret ? -EFAULT : 0;
546 }
547
548 static void
549 shmem_clflush_swizzled_range(char *addr, unsigned long length,
550                              bool swizzled)
551 {
552         if (unlikely(swizzled)) {
553                 unsigned long start = (unsigned long) addr;
554                 unsigned long end = (unsigned long) addr + length;
555
556                 /* For swizzling simply ensure that we always flush both
557                  * channels. Lame, but simple and it works. Swizzled
558                  * pwrite/pread is far from a hotpath - current userspace
559                  * doesn't use it at all. */
560                 start = round_down(start, 128);
561                 end = round_up(end, 128);
562
563                 drm_clflush_virt_range((void *)start, end - start);
564         } else {
565                 drm_clflush_virt_range(addr, length);
566         }
567
568 }
569
570 /* Only difference to the fast-path function is that this can handle bit17
571  * and uses non-atomic copy and kmap functions. */
572 static int
573 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length,
574                  char __user *user_data,
575                  bool page_do_bit17_swizzling, bool needs_clflush)
576 {
577         char *vaddr;
578         int ret;
579
580         vaddr = kmap(page);
581         if (needs_clflush)
582                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
583                                              page_length,
584                                              page_do_bit17_swizzling);
585
586         if (page_do_bit17_swizzling)
587                 ret = __copy_to_user_swizzled(user_data,
588                                               vaddr, shmem_page_offset,
589                                               page_length);
590         else
591                 ret = __copy_to_user(user_data,
592                                      vaddr + shmem_page_offset,
593                                      page_length);
594         kunmap(page);
595
596         return ret ? - EFAULT : 0;
597 }
598
599 static int
600 i915_gem_shmem_pread(struct drm_device *dev,
601                      struct drm_i915_gem_object *obj,
602                      struct drm_i915_gem_pread *args,
603                      struct drm_file *file)
604 {
605         char __user *user_data;
606         ssize_t remain;
607         loff_t offset;
608         int shmem_page_offset, page_length, ret = 0;
609         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
610         int prefaulted = 0;
611         int needs_clflush = 0;
612         struct sg_page_iter sg_iter;
613
614         user_data = to_user_ptr(args->data_ptr);
615         remain = args->size;
616
617         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
618
619         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
620         if (ret)
621                 return ret;
622
623         offset = args->offset;
624
625         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
626                          offset >> PAGE_SHIFT) {
627                 struct vm_page *page = sg_page_iter_page(&sg_iter);
628
629                 if (remain <= 0)
630                         break;
631
632                 /* Operation in this page
633                  *
634                  * shmem_page_offset = offset within page in shmem file
635                  * page_length = bytes to copy for this page
636                  */
637                 shmem_page_offset = offset_in_page(offset);
638                 page_length = remain;
639                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
640                         page_length = PAGE_SIZE - shmem_page_offset;
641
642                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
643                         (page_to_phys(page) & (1 << 17)) != 0;
644
645                 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
646                                        user_data, page_do_bit17_swizzling,
647                                        needs_clflush);
648                 if (ret == 0)
649                         goto next_page;
650
651                 mutex_unlock(&dev->struct_mutex);
652
653                 if (likely(!i915.prefault_disable) && !prefaulted) {
654                         ret = fault_in_multipages_writeable(user_data, remain);
655                         /* Userspace is tricking us, but we've already clobbered
656                          * its pages with the prefault and promised to write the
657                          * data up to the first fault. Hence ignore any errors
658                          * and just continue. */
659                         (void)ret;
660                         prefaulted = 1;
661                 }
662
663                 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
664                                        user_data, page_do_bit17_swizzling,
665                                        needs_clflush);
666
667                 mutex_lock(&dev->struct_mutex);
668
669                 if (ret)
670                         goto out;
671
672 next_page:
673                 remain -= page_length;
674                 user_data += page_length;
675                 offset += page_length;
676         }
677
678 out:
679         i915_gem_object_unpin_pages(obj);
680
681         return ret;
682 }
683
684 /**
685  * Reads data from the object referenced by handle.
686  *
687  * On error, the contents of *data are undefined.
688  */
689 int
690 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
691                      struct drm_file *file)
692 {
693         struct drm_i915_gem_pread *args = data;
694         struct drm_i915_gem_object *obj;
695         int ret = 0;
696
697         if (args->size == 0)
698                 return 0;
699
700         ret = i915_mutex_lock_interruptible(dev);
701         if (ret)
702                 return ret;
703
704         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
705         if (&obj->base == NULL) {
706                 ret = -ENOENT;
707                 goto unlock;
708         }
709
710         /* Bounds check source.  */
711         if (args->offset > obj->base.size ||
712             args->size > obj->base.size - args->offset) {
713                 ret = -EINVAL;
714                 goto out;
715         }
716
717         /* prime objects have no backing filp to GEM pread/pwrite
718          * pages from.
719          */
720
721         trace_i915_gem_object_pread(obj, args->offset, args->size);
722
723         ret = i915_gem_shmem_pread(dev, obj, args, file);
724
725 out:
726         drm_gem_object_unreference(&obj->base);
727 unlock:
728         mutex_unlock(&dev->struct_mutex);
729         return ret;
730 }
731
732 /* This is the fast write path which cannot handle
733  * page faults in the source data
734  */
735
736 static inline int
737 fast_user_write(struct io_mapping *mapping,
738                 loff_t page_base, int page_offset,
739                 char __user *user_data,
740                 int length)
741 {
742         void __iomem *vaddr_atomic;
743         void *vaddr;
744         unsigned long unwritten;
745
746         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
747         /* We can use the cpu mem copy function because this is X86. */
748         vaddr = (char __force*)vaddr_atomic + page_offset;
749         unwritten = __copy_from_user_inatomic_nocache(vaddr,
750                                                       user_data, length);
751         io_mapping_unmap_atomic(vaddr_atomic);
752         return unwritten;
753 }
754
755 /**
756  * This is the fast pwrite path, where we copy the data directly from the
757  * user into the GTT, uncached.
758  */
759 static int
760 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
761                          struct drm_i915_gem_object *obj,
762                          struct drm_i915_gem_pwrite *args,
763                          struct drm_file *file)
764 {
765         struct drm_i915_private *dev_priv = dev->dev_private;
766         ssize_t remain;
767         loff_t offset, page_base;
768         char __user *user_data;
769         int page_offset, page_length, ret;
770
771         ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
772         if (ret)
773                 goto out;
774
775         ret = i915_gem_object_set_to_gtt_domain(obj, true);
776         if (ret)
777                 goto out_unpin;
778
779         ret = i915_gem_object_put_fence(obj);
780         if (ret)
781                 goto out_unpin;
782
783         user_data = to_user_ptr(args->data_ptr);
784         remain = args->size;
785
786         offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
787
788         intel_fb_obj_invalidate(obj, ORIGIN_GTT);
789
790         while (remain > 0) {
791                 /* Operation in this page
792                  *
793                  * page_base = page offset within aperture
794                  * page_offset = offset within page
795                  * page_length = bytes to copy for this page
796                  */
797                 page_base = offset & ~PAGE_MASK;
798                 page_offset = offset_in_page(offset);
799                 page_length = remain;
800                 if ((page_offset + remain) > PAGE_SIZE)
801                         page_length = PAGE_SIZE - page_offset;
802
803                 /* If we get a fault while copying data, then (presumably) our
804                  * source page isn't available.  Return the error and we'll
805                  * retry in the slow path.
806                  */
807                 if (fast_user_write(dev_priv->gtt.mappable, page_base,
808                                     page_offset, user_data, page_length)) {
809                         ret = -EFAULT;
810                         goto out_flush;
811                 }
812
813                 remain -= page_length;
814                 user_data += page_length;
815                 offset += page_length;
816         }
817
818 out_flush:
819         intel_fb_obj_flush(obj, false, ORIGIN_GTT);
820 out_unpin:
821         i915_gem_object_ggtt_unpin(obj);
822 out:
823         return ret;
824 }
825
826 /* Per-page copy function for the shmem pwrite fastpath.
827  * Flushes invalid cachelines before writing to the target if
828  * needs_clflush_before is set and flushes out any written cachelines after
829  * writing if needs_clflush is set. */
830 static int
831 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length,
832                   char __user *user_data,
833                   bool page_do_bit17_swizzling,
834                   bool needs_clflush_before,
835                   bool needs_clflush_after)
836 {
837         char *vaddr;
838         int ret;
839
840         if (unlikely(page_do_bit17_swizzling))
841                 return -EINVAL;
842
843         vaddr = kmap_atomic(page);
844         if (needs_clflush_before)
845                 drm_clflush_virt_range(vaddr + shmem_page_offset,
846                                        page_length);
847         ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
848                                         user_data, page_length);
849         if (needs_clflush_after)
850                 drm_clflush_virt_range(vaddr + shmem_page_offset,
851                                        page_length);
852         kunmap_atomic(vaddr);
853
854         return ret ? -EFAULT : 0;
855 }
856
857 /* Only difference to the fast-path function is that this can handle bit17
858  * and uses non-atomic copy and kmap functions. */
859 static int
860 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length,
861                   char __user *user_data,
862                   bool page_do_bit17_swizzling,
863                   bool needs_clflush_before,
864                   bool needs_clflush_after)
865 {
866         char *vaddr;
867         int ret;
868
869         vaddr = kmap(page);
870         if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
871                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
872                                              page_length,
873                                              page_do_bit17_swizzling);
874         if (page_do_bit17_swizzling)
875                 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
876                                                 user_data,
877                                                 page_length);
878         else
879                 ret = __copy_from_user(vaddr + shmem_page_offset,
880                                        user_data,
881                                        page_length);
882         if (needs_clflush_after)
883                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
884                                              page_length,
885                                              page_do_bit17_swizzling);
886         kunmap(page);
887
888         return ret ? -EFAULT : 0;
889 }
890
891 static int
892 i915_gem_shmem_pwrite(struct drm_device *dev,
893                       struct drm_i915_gem_object *obj,
894                       struct drm_i915_gem_pwrite *args,
895                       struct drm_file *file)
896 {
897         ssize_t remain;
898         loff_t offset;
899         char __user *user_data;
900         int shmem_page_offset, page_length, ret = 0;
901         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
902         int hit_slowpath = 0;
903         int needs_clflush_after = 0;
904         int needs_clflush_before = 0;
905         struct sg_page_iter sg_iter;
906
907         user_data = to_user_ptr(args->data_ptr);
908         remain = args->size;
909
910         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
911
912         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
913                 /* If we're not in the cpu write domain, set ourself into the gtt
914                  * write domain and manually flush cachelines (if required). This
915                  * optimizes for the case when the gpu will use the data
916                  * right away and we therefore have to clflush anyway. */
917                 needs_clflush_after = cpu_write_needs_clflush(obj);
918                 ret = i915_gem_object_wait_rendering(obj, false);
919                 if (ret)
920                         return ret;
921         }
922         /* Same trick applies to invalidate partially written cachelines read
923          * before writing. */
924         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
925                 needs_clflush_before =
926                         !cpu_cache_is_coherent(dev, obj->cache_level);
927
928         ret = i915_gem_object_get_pages(obj);
929         if (ret)
930                 return ret;
931
932         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
933
934         i915_gem_object_pin_pages(obj);
935
936         offset = args->offset;
937         obj->dirty = 1;
938
939         VM_OBJECT_LOCK(obj->base.vm_obj);
940         vm_object_pip_add(obj->base.vm_obj, 1);
941
942         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
943                          offset >> PAGE_SHIFT) {
944                 struct vm_page *page = sg_page_iter_page(&sg_iter);
945                 int partial_cacheline_write;
946
947                 if (remain <= 0)
948                         break;
949
950                 /* Operation in this page
951                  *
952                  * shmem_page_offset = offset within page in shmem file
953                  * page_length = bytes to copy for this page
954                  */
955                 shmem_page_offset = offset_in_page(offset);
956
957                 page_length = remain;
958                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
959                         page_length = PAGE_SIZE - shmem_page_offset;
960
961                 /* If we don't overwrite a cacheline completely we need to be
962                  * careful to have up-to-date data by first clflushing. Don't
963                  * overcomplicate things and flush the entire patch. */
964                 partial_cacheline_write = needs_clflush_before &&
965                         ((shmem_page_offset | page_length)
966                                 & (cpu_clflush_line_size - 1));
967
968                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
969                         (page_to_phys(page) & (1 << 17)) != 0;
970
971                 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
972                                         user_data, page_do_bit17_swizzling,
973                                         partial_cacheline_write,
974                                         needs_clflush_after);
975                 if (ret == 0)
976                         goto next_page;
977
978                 hit_slowpath = 1;
979                 mutex_unlock(&dev->struct_mutex);
980                 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
981                                         user_data, page_do_bit17_swizzling,
982                                         partial_cacheline_write,
983                                         needs_clflush_after);
984
985                 mutex_lock(&dev->struct_mutex);
986
987                 if (ret)
988                         goto out;
989
990 next_page:
991                 remain -= page_length;
992                 user_data += page_length;
993                 offset += page_length;
994         }
995         vm_object_pip_wakeup(obj->base.vm_obj);
996         VM_OBJECT_UNLOCK(obj->base.vm_obj);
997
998 out:
999         i915_gem_object_unpin_pages(obj);
1000
1001         if (hit_slowpath) {
1002                 /*
1003                  * Fixup: Flush cpu caches in case we didn't flush the dirty
1004                  * cachelines in-line while writing and the object moved
1005                  * out of the cpu write domain while we've dropped the lock.
1006                  */
1007                 if (!needs_clflush_after &&
1008                     obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1009                         if (i915_gem_clflush_object(obj, obj->pin_display))
1010                                 needs_clflush_after = true;
1011                 }
1012         }
1013
1014         if (needs_clflush_after)
1015                 i915_gem_chipset_flush(dev);
1016         else
1017                 obj->cache_dirty = true;
1018
1019         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1020         return ret;
1021 }
1022
1023 /**
1024  * Writes data to the object referenced by handle.
1025  *
1026  * On error, the contents of the buffer that were to be modified are undefined.
1027  */
1028 int
1029 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1030                       struct drm_file *file)
1031 {
1032         struct drm_i915_private *dev_priv = dev->dev_private;
1033         struct drm_i915_gem_pwrite *args = data;
1034         struct drm_i915_gem_object *obj;
1035         int ret;
1036
1037         if (args->size == 0)
1038                 return 0;
1039
1040         if (likely(!i915.prefault_disable)) {
1041                 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1042                                                    args->size);
1043                 if (ret)
1044                         return -EFAULT;
1045         }
1046
1047         intel_runtime_pm_get(dev_priv);
1048
1049         ret = i915_mutex_lock_interruptible(dev);
1050         if (ret)
1051                 goto put_rpm;
1052
1053         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1054         if (&obj->base == NULL) {
1055                 ret = -ENOENT;
1056                 goto unlock;
1057         }
1058
1059         /* Bounds check destination. */
1060         if (args->offset > obj->base.size ||
1061             args->size > obj->base.size - args->offset) {
1062                 ret = -EINVAL;
1063                 goto out;
1064         }
1065
1066         /* prime objects have no backing filp to GEM pread/pwrite
1067          * pages from.
1068          */
1069
1070         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1071
1072         ret = -EFAULT;
1073         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1074          * it would end up going through the fenced access, and we'll get
1075          * different detiling behavior between reading and writing.
1076          * pread/pwrite currently are reading and writing from the CPU
1077          * perspective, requiring manual detiling by the client.
1078          */
1079         if (obj->tiling_mode == I915_TILING_NONE &&
1080             obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1081             cpu_write_needs_clflush(obj)) {
1082                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1083                 /* Note that the gtt paths might fail with non-page-backed user
1084                  * pointers (e.g. gtt mappings when moving data between
1085                  * textures). Fallback to the shmem path in that case. */
1086         }
1087
1088         if (ret == -EFAULT || ret == -ENOSPC) {
1089                 if (obj->phys_handle)
1090                         ret = i915_gem_phys_pwrite(obj, args, file);
1091                 else
1092                         ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1093         }
1094
1095 out:
1096         drm_gem_object_unreference(&obj->base);
1097 unlock:
1098         mutex_unlock(&dev->struct_mutex);
1099 put_rpm:
1100         intel_runtime_pm_put(dev_priv);
1101
1102         return ret;
1103 }
1104
1105 int
1106 i915_gem_check_wedge(struct i915_gpu_error *error,
1107                      bool interruptible)
1108 {
1109         if (i915_reset_in_progress(error)) {
1110                 /* Non-interruptible callers can't handle -EAGAIN, hence return
1111                  * -EIO unconditionally for these. */
1112                 if (!interruptible)
1113                         return -EIO;
1114
1115                 /* Recovery complete, but the reset failed ... */
1116                 if (i915_terminally_wedged(error))
1117                         return -EIO;
1118
1119                 /*
1120                  * Check if GPU Reset is in progress - we need intel_ring_begin
1121                  * to work properly to reinit the hw state while the gpu is
1122                  * still marked as reset-in-progress. Handle this with a flag.
1123                  */
1124                 if (!error->reload_in_reset)
1125                         return -EAGAIN;
1126         }
1127
1128         return 0;
1129 }
1130
1131 static void fake_irq(unsigned long data)
1132 {
1133         wakeup_one((void *)data);
1134 }
1135
1136 static bool missed_irq(struct drm_i915_private *dev_priv,
1137                        struct intel_engine_cs *ring)
1138 {
1139         return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1140 }
1141
1142 #if 0
1143 static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
1144 {
1145         unsigned long timeout;
1146         unsigned cpu;
1147
1148         /* When waiting for high frequency requests, e.g. during synchronous
1149          * rendering split between the CPU and GPU, the finite amount of time
1150          * required to set up the irq and wait upon it limits the response
1151          * rate. By busywaiting on the request completion for a short while we
1152          * can service the high frequency waits as quick as possible. However,
1153          * if it is a slow request, we want to sleep as quickly as possible.
1154          * The tradeoff between waiting and sleeping is roughly the time it
1155          * takes to sleep on a request, on the order of a microsecond.
1156          */
1157
1158         if (req->ring->irq_refcount)
1159                 return -EBUSY;
1160
1161         /* Only spin if we know the GPU is processing this request */
1162         if (!i915_gem_request_started(req, true))
1163                 return -EAGAIN;
1164
1165         timeout = local_clock_us(&cpu) + 5;
1166         while (!need_resched()) {
1167                 if (i915_gem_request_completed(req, true))
1168                         return 0;
1169
1170                 if (signal_pending_state(state, current))
1171                         break;
1172
1173                 if (busywait_stop(timeout, cpu))
1174                         break;
1175
1176                 cpu_relax_lowlatency();
1177         }
1178
1179         if (i915_gem_request_completed(req, false))
1180                 return 0;
1181
1182         return -EAGAIN;
1183 }
1184 #endif
1185
1186 /**
1187  * __i915_wait_request - wait until execution of request has finished
1188  * @req: duh!
1189  * @reset_counter: reset sequence associated with the given request
1190  * @interruptible: do an interruptible wait (normally yes)
1191  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1192  *
1193  * Note: It is of utmost importance that the passed in seqno and reset_counter
1194  * values have been read by the caller in an smp safe manner. Where read-side
1195  * locks are involved, it is sufficient to read the reset_counter before
1196  * unlocking the lock that protects the seqno. For lockless tricks, the
1197  * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1198  * inserted.
1199  *
1200  * Returns 0 if the request was found within the alloted time. Else returns the
1201  * errno with remaining time filled in timeout argument.
1202  */
1203 int __i915_wait_request(struct drm_i915_gem_request *req,
1204                         unsigned reset_counter,
1205                         bool interruptible,
1206                         s64 *timeout,
1207                         struct intel_rps_client *rps)
1208 {
1209         struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
1210         struct drm_device *dev = ring->dev;
1211         struct drm_i915_private *dev_priv = dev->dev_private;
1212         const bool irq_test_in_progress =
1213                 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1214         unsigned long timeout_expire;
1215         s64 before = 0; /* Only to silence a compiler warning. */
1216         int ret, sl_timeout = 1;
1217
1218         WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
1219
1220         if (list_empty(&req->list))
1221                 return 0;
1222
1223         if (i915_gem_request_completed(req, true))
1224                 return 0;
1225
1226         timeout_expire = 0;
1227         if (timeout) {
1228                 if (WARN_ON(*timeout < 0))
1229                         return -EINVAL;
1230
1231                 if (*timeout == 0)
1232                         return -ETIME;
1233
1234                 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
1235
1236                 /*
1237                  * Record current time in case interrupted by signal, or wedged.
1238                  */
1239                 before = ktime_get_raw_ns();
1240         }
1241
1242         if (INTEL_INFO(dev_priv)->gen >= 6)
1243                 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
1244
1245         trace_i915_gem_request_wait_begin(req);
1246
1247         /* Optimistic spin for the next jiffie before touching IRQs */
1248 #if 0
1249         ret = __i915_spin_request(req);
1250         if (ret == 0)
1251                 goto out;
1252 #endif
1253
1254         if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
1255                 ret = -ENODEV;
1256                 goto out;
1257         }
1258
1259         lockmgr(&ring->irq_queue.lock, LK_EXCLUSIVE);
1260         for (;;) {
1261                 struct timer_list timer;
1262
1263                 /* We need to check whether any gpu reset happened in between
1264                  * the caller grabbing the seqno and now ... */
1265                 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1266                         /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1267                          * is truely gone. */
1268                         ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1269                         if (ret == 0)
1270                                 ret = -EAGAIN;
1271                         break;
1272                 }
1273
1274                 if (i915_gem_request_completed(req, false)) {
1275                         ret = 0;
1276                         break;
1277                 }
1278
1279                 if (interruptible && signal_pending(curthread->td_lwp)) {
1280                         ret = -ERESTARTSYS;
1281                         break;
1282                 }
1283
1284                 if (timeout && time_after_eq(jiffies, timeout_expire)) {
1285                         ret = -ETIME;
1286                         break;
1287                 }
1288
1289                 timer.function = NULL;
1290                 if (timeout || missed_irq(dev_priv, ring)) {
1291                         unsigned long expire;
1292
1293                         setup_timer_on_stack(&timer, fake_irq, (unsigned long)&ring->irq_queue);
1294                         expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
1295                         sl_timeout = expire - jiffies;
1296                         if (sl_timeout < 1)
1297                                 sl_timeout = 1;
1298                         mod_timer(&timer, expire);
1299                 }
1300
1301 #if 0
1302                 io_schedule();
1303 #endif
1304
1305                 if (timer.function) {
1306                         del_singleshot_timer_sync(&timer);
1307                         destroy_timer_on_stack(&timer);
1308                 }
1309
1310                 lksleep(&ring->irq_queue, &ring->irq_queue.lock,
1311                         interruptible ? PCATCH : 0, "lwe", sl_timeout);
1312         }
1313         lockmgr(&ring->irq_queue.lock, LK_RELEASE);
1314         if (!irq_test_in_progress)
1315                 ring->irq_put(ring);
1316
1317 out:
1318         trace_i915_gem_request_wait_end(req);
1319
1320         if (timeout) {
1321                 s64 tres = *timeout - (ktime_get_raw_ns() - before);
1322
1323                 *timeout = tres < 0 ? 0 : tres;
1324
1325                 /*
1326                  * Apparently ktime isn't accurate enough and occasionally has a
1327                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1328                  * things up to make the test happy. We allow up to 1 jiffy.
1329                  *
1330                  * This is a regrssion from the timespec->ktime conversion.
1331                  */
1332                 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1333                         *timeout = 0;
1334         }
1335
1336         return ret;
1337 }
1338
1339 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
1340                                    struct drm_file *file)
1341 {
1342         struct drm_i915_private *dev_private;
1343         struct drm_i915_file_private *file_priv;
1344
1345         WARN_ON(!req || !file || req->file_priv);
1346
1347         if (!req || !file)
1348                 return -EINVAL;
1349
1350         if (req->file_priv)
1351                 return -EINVAL;
1352
1353         dev_private = req->ring->dev->dev_private;
1354         file_priv = file->driver_priv;
1355
1356         spin_lock(&file_priv->mm.lock);
1357         req->file_priv = file_priv;
1358         list_add_tail(&req->client_list, &file_priv->mm.request_list);
1359         spin_unlock(&file_priv->mm.lock);
1360
1361         req->pid = curproc->p_pid;
1362
1363         return 0;
1364 }
1365
1366 static inline void
1367 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1368 {
1369         struct drm_i915_file_private *file_priv = request->file_priv;
1370
1371         if (!file_priv)
1372                 return;
1373
1374         spin_lock(&file_priv->mm.lock);
1375         list_del(&request->client_list);
1376         request->file_priv = NULL;
1377         spin_unlock(&file_priv->mm.lock);
1378
1379 #if 0
1380         put_pid(request->pid);
1381         request->pid = NULL;
1382 #endif
1383 }
1384
1385 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1386 {
1387         trace_i915_gem_request_retire(request);
1388
1389         /* We know the GPU must have read the request to have
1390          * sent us the seqno + interrupt, so use the position
1391          * of tail of the request to update the last known position
1392          * of the GPU head.
1393          *
1394          * Note this requires that we are always called in request
1395          * completion order.
1396          */
1397         request->ringbuf->last_retired_head = request->postfix;
1398
1399         list_del_init(&request->list);
1400         i915_gem_request_remove_from_client(request);
1401
1402         i915_gem_request_unreference(request);
1403 }
1404
1405 static void
1406 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1407 {
1408         struct intel_engine_cs *engine = req->ring;
1409         struct drm_i915_gem_request *tmp;
1410
1411         lockdep_assert_held(&engine->dev->struct_mutex);
1412
1413         if (list_empty(&req->list))
1414                 return;
1415
1416         do {
1417                 tmp = list_first_entry(&engine->request_list,
1418                                        typeof(*tmp), list);
1419
1420                 i915_gem_request_retire(tmp);
1421         } while (tmp != req);
1422
1423         WARN_ON(i915_verify_lists(engine->dev));
1424 }
1425
1426 /**
1427  * Waits for a request to be signaled, and cleans up the
1428  * request and object lists appropriately for that event.
1429  */
1430 int
1431 i915_wait_request(struct drm_i915_gem_request *req)
1432 {
1433         struct drm_device *dev;
1434         struct drm_i915_private *dev_priv;
1435         bool interruptible;
1436         int ret;
1437
1438         BUG_ON(req == NULL);
1439
1440         dev = req->ring->dev;
1441         dev_priv = dev->dev_private;
1442         interruptible = dev_priv->mm.interruptible;
1443
1444         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1445
1446         ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1447         if (ret)
1448                 return ret;
1449
1450         ret = __i915_wait_request(req,
1451                                   atomic_read(&dev_priv->gpu_error.reset_counter),
1452                                   interruptible, NULL, NULL);
1453         if (ret)
1454                 return ret;
1455
1456         __i915_gem_request_retire__upto(req);
1457         return 0;
1458 }
1459
1460 /**
1461  * Ensures that all rendering to the object has completed and the object is
1462  * safe to unbind from the GTT or access from the CPU.
1463  */
1464 int
1465 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1466                                bool readonly)
1467 {
1468         int ret, i;
1469
1470         if (!obj->active)
1471                 return 0;
1472
1473         if (readonly) {
1474                 if (obj->last_write_req != NULL) {
1475                         ret = i915_wait_request(obj->last_write_req);
1476                         if (ret)
1477                                 return ret;
1478
1479                         i = obj->last_write_req->ring->id;
1480                         if (obj->last_read_req[i] == obj->last_write_req)
1481                                 i915_gem_object_retire__read(obj, i);
1482                         else
1483                                 i915_gem_object_retire__write(obj);
1484                 }
1485         } else {
1486                 for (i = 0; i < I915_NUM_RINGS; i++) {
1487                         if (obj->last_read_req[i] == NULL)
1488                                 continue;
1489
1490                         ret = i915_wait_request(obj->last_read_req[i]);
1491                         if (ret)
1492                                 return ret;
1493
1494                         i915_gem_object_retire__read(obj, i);
1495                 }
1496                 RQ_BUG_ON(obj->active);
1497         }
1498
1499         return 0;
1500 }
1501
1502 static void
1503 i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1504                                struct drm_i915_gem_request *req)
1505 {
1506         int ring = req->ring->id;
1507
1508         if (obj->last_read_req[ring] == req)
1509                 i915_gem_object_retire__read(obj, ring);
1510         else if (obj->last_write_req == req)
1511                 i915_gem_object_retire__write(obj);
1512
1513         __i915_gem_request_retire__upto(req);
1514 }
1515
1516 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1517  * as the object state may change during this call.
1518  */
1519 static __must_check int
1520 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1521                                             struct intel_rps_client *rps,
1522                                             bool readonly)
1523 {
1524         struct drm_device *dev = obj->base.dev;
1525         struct drm_i915_private *dev_priv = dev->dev_private;
1526         struct drm_i915_gem_request *requests[I915_NUM_RINGS];
1527         unsigned reset_counter;
1528         int ret, i, n = 0;
1529
1530         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1531         BUG_ON(!dev_priv->mm.interruptible);
1532
1533         if (!obj->active)
1534                 return 0;
1535
1536         ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1537         if (ret)
1538                 return ret;
1539
1540         reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1541
1542         if (readonly) {
1543                 struct drm_i915_gem_request *req;
1544
1545                 req = obj->last_write_req;
1546                 if (req == NULL)
1547                         return 0;
1548
1549                 requests[n++] = i915_gem_request_reference(req);
1550         } else {
1551                 for (i = 0; i < I915_NUM_RINGS; i++) {
1552                         struct drm_i915_gem_request *req;
1553
1554                         req = obj->last_read_req[i];
1555                         if (req == NULL)
1556                                 continue;
1557
1558                         requests[n++] = i915_gem_request_reference(req);
1559                 }
1560         }
1561
1562         mutex_unlock(&dev->struct_mutex);
1563         for (i = 0; ret == 0 && i < n; i++)
1564                 ret = __i915_wait_request(requests[i], reset_counter, true,
1565                                           NULL, rps);
1566         mutex_lock(&dev->struct_mutex);
1567
1568         for (i = 0; i < n; i++) {
1569                 if (ret == 0)
1570                         i915_gem_object_retire_request(obj, requests[i]);
1571                 i915_gem_request_unreference(requests[i]);
1572         }
1573
1574         return ret;
1575 }
1576
1577 static struct intel_rps_client *to_rps_client(struct drm_file *file)
1578 {
1579         struct drm_i915_file_private *fpriv = file->driver_priv;
1580         return &fpriv->rps;
1581 }
1582
1583 /**
1584  * Called when user space prepares to use an object with the CPU, either
1585  * through the mmap ioctl's mapping or a GTT mapping.
1586  */
1587 int
1588 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1589                           struct drm_file *file)
1590 {
1591         struct drm_i915_gem_set_domain *args = data;
1592         struct drm_i915_gem_object *obj;
1593         uint32_t read_domains = args->read_domains;
1594         uint32_t write_domain = args->write_domain;
1595         int ret;
1596
1597         /* Only handle setting domains to types used by the CPU. */
1598         if (write_domain & I915_GEM_GPU_DOMAINS)
1599                 return -EINVAL;
1600
1601         if (read_domains & I915_GEM_GPU_DOMAINS)
1602                 return -EINVAL;
1603
1604         /* Having something in the write domain implies it's in the read
1605          * domain, and only that read domain.  Enforce that in the request.
1606          */
1607         if (write_domain != 0 && read_domains != write_domain)
1608                 return -EINVAL;
1609
1610         ret = i915_mutex_lock_interruptible(dev);
1611         if (ret)
1612                 return ret;
1613
1614         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1615         if (&obj->base == NULL) {
1616                 ret = -ENOENT;
1617                 goto unlock;
1618         }
1619
1620         /* Try to flush the object off the GPU without holding the lock.
1621          * We will repeat the flush holding the lock in the normal manner
1622          * to catch cases where we are gazumped.
1623          */
1624         ret = i915_gem_object_wait_rendering__nonblocking(obj,
1625                                                           to_rps_client(file),
1626                                                           !write_domain);
1627         if (ret)
1628                 goto unref;
1629
1630         if (read_domains & I915_GEM_DOMAIN_GTT)
1631                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1632         else
1633                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1634
1635         if (write_domain != 0)
1636                 intel_fb_obj_invalidate(obj,
1637                                         write_domain == I915_GEM_DOMAIN_GTT ?
1638                                         ORIGIN_GTT : ORIGIN_CPU);
1639
1640 unref:
1641         drm_gem_object_unreference(&obj->base);
1642 unlock:
1643         mutex_unlock(&dev->struct_mutex);
1644         return ret;
1645 }
1646
1647 /**
1648  * Called when user space has done writes to this buffer
1649  */
1650 int
1651 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1652                          struct drm_file *file)
1653 {
1654         struct drm_i915_gem_sw_finish *args = data;
1655         struct drm_i915_gem_object *obj;
1656         int ret = 0;
1657
1658         ret = i915_mutex_lock_interruptible(dev);
1659         if (ret)
1660                 return ret;
1661
1662         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1663         if (&obj->base == NULL) {
1664                 ret = -ENOENT;
1665                 goto unlock;
1666         }
1667
1668         /* Pinned buffers may be scanout, so flush the cache */
1669         if (obj->pin_display)
1670                 i915_gem_object_flush_cpu_write_domain(obj);
1671
1672         drm_gem_object_unreference(&obj->base);
1673 unlock:
1674         mutex_unlock(&dev->struct_mutex);
1675         return ret;
1676 }
1677
1678 /**
1679  * Maps the contents of an object, returning the address it is mapped
1680  * into.
1681  *
1682  * While the mapping holds a reference on the contents of the object, it doesn't
1683  * imply a ref on the object itself.
1684  *
1685  * IMPORTANT:
1686  *
1687  * DRM driver writers who look a this function as an example for how to do GEM
1688  * mmap support, please don't implement mmap support like here. The modern way
1689  * to implement DRM mmap support is with an mmap offset ioctl (like
1690  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1691  * That way debug tooling like valgrind will understand what's going on, hiding
1692  * the mmap call in a driver private ioctl will break that. The i915 driver only
1693  * does cpu mmaps this way because we didn't know better.
1694  */
1695 int
1696 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1697                     struct drm_file *file)
1698 {
1699         struct drm_i915_gem_mmap *args = data;
1700         struct drm_gem_object *obj;
1701         unsigned long addr;
1702
1703         struct proc *p = curproc;
1704         vm_map_t map = &p->p_vmspace->vm_map;
1705         vm_size_t size;
1706         int error = 0, rv;
1707
1708         if (args->flags & ~(I915_MMAP_WC))
1709                 return -EINVAL;
1710
1711         obj = drm_gem_object_lookup(dev, file, args->handle);
1712         if (obj == NULL)
1713                 return -ENOENT;
1714
1715         if (args->size == 0)
1716                 goto out;
1717
1718         size = round_page(args->size);
1719         if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
1720                 error = -ENOMEM;
1721                 goto out;
1722         }
1723
1724         /* prime objects have no backing filp to GEM mmap
1725          * pages from.
1726          */
1727
1728         /*
1729          * Call hint to ensure that NULL is not returned as a valid address
1730          * and to reduce vm_map traversals. XXX causes instability, use a
1731          * fixed low address as the start point instead to avoid the NULL
1732          * return issue.
1733          */
1734
1735         addr = PAGE_SIZE;
1736
1737         /*
1738          * Use 256KB alignment.  It is unclear why this matters for a
1739          * virtual address but it appears to fix a number of application/X
1740          * crashes and kms console switching is much faster.
1741          */
1742         vm_object_hold(obj->vm_obj);
1743         vm_object_reference_locked(obj->vm_obj);
1744         vm_object_drop(obj->vm_obj);
1745
1746         rv = vm_map_find(map, obj->vm_obj, NULL,
1747                          args->offset, &addr, args->size,
1748                          256 * 1024, /* align */
1749                          TRUE, /* fitit */
1750                          VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM,
1751                          VM_PROT_READ | VM_PROT_WRITE, /* prot */
1752                          VM_PROT_READ | VM_PROT_WRITE, /* max */
1753                          MAP_SHARED /* cow */);
1754         if (rv != KERN_SUCCESS) {
1755                 vm_object_deallocate(obj->vm_obj);
1756                 error = -vm_mmap_to_errno(rv);
1757         } else {
1758                 args->addr_ptr = (uint64_t)addr;
1759         }
1760 out:
1761         drm_gem_object_unreference(obj);
1762         return (error);
1763 }
1764
1765 /**
1766  * i915_gem_fault - fault a page into the GTT
1767  *
1768  * vm_obj is locked on entry and expected to be locked on return.
1769  *
1770  * The vm_pager has placemarked the object with an anonymous memory page
1771  * which we must replace atomically to avoid races against concurrent faults
1772  * on the same page.  XXX we currently are unable to do this atomically.
1773  *
1774  * If we are to return an error we should not touch the anonymous page,
1775  * the caller will deallocate it.
1776  *
1777  * XXX Most GEM calls appear to be interruptable, but we can't hard loop
1778  * in that case.  Release all resources and wait 1 tick before retrying.
1779  * This is a huge problem which needs to be fixed by getting rid of most
1780  * of the interruptability.  The linux code does not retry but does appear
1781  * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level
1782  * to be able to retry.
1783  *
1784  * --
1785  * @vma: VMA in question
1786  * @vmf: fault info
1787  *
1788  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1789  * from userspace.  The fault handler takes care of binding the object to
1790  * the GTT (if needed), allocating and programming a fence register (again,
1791  * only if needed based on whether the old reg is still valid or the object
1792  * is tiled) and inserting a new PTE into the faulting process.
1793  *
1794  * Note that the faulting process may involve evicting existing objects
1795  * from the GTT and/or fence registers to make room.  So performance may
1796  * suffer if the GTT working set is large or there are few fence registers
1797  * left.
1798  *
1799  * vm_obj is locked on entry and expected to be locked on return.  The VM
1800  * pager has placed an anonymous memory page at (obj,offset) which we have
1801  * to replace.
1802  */
1803 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres)
1804 {
1805         struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle);
1806         struct drm_device *dev = obj->base.dev;
1807         struct drm_i915_private *dev_priv = dev->dev_private;
1808         struct i915_ggtt_view view = i915_ggtt_view_normal;
1809         unsigned long page_offset;
1810         vm_page_t m, oldm = NULL;
1811         int ret = 0;
1812         bool write = !!(prot & VM_PROT_WRITE);
1813
1814         intel_runtime_pm_get(dev_priv);
1815
1816         /* We don't use vmf->pgoff since that has the fake offset */
1817         page_offset = (unsigned long)offset;
1818
1819 retry:
1820         ret = i915_mutex_lock_interruptible(dev);
1821         if (ret)
1822                 goto out;
1823
1824         trace_i915_gem_object_fault(obj, page_offset, true, write);
1825
1826         /* Try to flush the object off the GPU first without holding the lock.
1827          * Upon reacquiring the lock, we will perform our sanity checks and then
1828          * repeat the flush holding the lock in the normal manner to catch cases
1829          * where we are gazumped.
1830          */
1831         ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1832         if (ret)
1833                 goto unlock;
1834
1835         /* Access to snoopable pages through the GTT is incoherent. */
1836         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1837                 ret = -EFAULT;
1838                 goto unlock;
1839         }
1840
1841         /* Use a partial view if the object is bigger than the aperture. */
1842         if (obj->base.size >= dev_priv->gtt.mappable_end &&
1843             obj->tiling_mode == I915_TILING_NONE) {
1844 #if 0
1845                 static const unsigned int chunk_size = 256; // 1 MiB
1846
1847                 memset(&view, 0, sizeof(view));
1848                 view.type = I915_GGTT_VIEW_PARTIAL;
1849                 view.params.partial.offset = rounddown(page_offset, chunk_size);
1850                 view.params.partial.size =
1851                         min_t(unsigned int,
1852                               chunk_size,
1853                               (vma->vm_end - vma->vm_start)/PAGE_SIZE -
1854                               view.params.partial.offset);
1855 #endif
1856         }
1857
1858         /* Now pin it into the GTT if needed */
1859         ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
1860         if (ret)
1861                 goto unlock;
1862
1863         ret = i915_gem_object_set_to_gtt_domain(obj, write);
1864         if (ret)
1865                 goto unpin;
1866
1867         ret = i915_gem_object_get_fence(obj);
1868         if (ret)
1869                 goto unpin;
1870
1871         /*
1872          * START FREEBSD MAGIC
1873          *
1874          * Add a pip count to avoid destruction and certain other
1875          * complex operations (such as collapses?) while unlocked.
1876          */
1877         vm_object_pip_add(vm_obj, 1);
1878
1879         /*
1880          * XXX We must currently remove the placeholder page now to avoid
1881          * a deadlock against a concurrent i915_gem_release_mmap().
1882          * Otherwise concurrent operation will block on the busy page
1883          * while holding locks which we need to obtain.
1884          */
1885         if (*mres != NULL) {
1886                 oldm = *mres;
1887                 if ((oldm->flags & PG_BUSY) == 0)
1888                         kprintf("i915_gem_fault: Page was not busy\n");
1889                 else
1890                         vm_page_remove(oldm);
1891                 *mres = NULL;
1892         } else {
1893                 oldm = NULL;
1894         }
1895
1896         ret = 0;
1897         m = NULL;
1898
1899         /*
1900          * Since the object lock was dropped, another thread might have
1901          * faulted on the same GTT address and instantiated the mapping.
1902          * Recheck.
1903          */
1904         m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
1905         if (m != NULL) {
1906                 /*
1907                  * Try to busy the page, retry on failure (non-zero ret).
1908                  */
1909                 if (vm_page_busy_try(m, false)) {
1910                         kprintf("i915_gem_fault: PG_BUSY\n");
1911                         ret = -EINTR;
1912                         goto unlock;
1913                 }
1914                 goto have_page;
1915         }
1916         /*
1917          * END FREEBSD MAGIC
1918          */
1919
1920         obj->fault_mappable = true;
1921
1922         /* Finally, remap it using the new GTT offset */
1923         m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base +
1924                         i915_gem_obj_ggtt_offset_view(obj, &view) + offset);
1925         if (m == NULL) {
1926                 ret = -EFAULT;
1927                 goto unpin;
1928         }
1929         KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m));
1930         KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
1931
1932         /*
1933          * Try to busy the page.  Fails on non-zero return.
1934          */
1935         if (vm_page_busy_try(m, false)) {
1936                 kprintf("i915_gem_fault: PG_BUSY(2)\n");
1937                 ret = -EINTR;
1938                 goto unpin;
1939         }
1940         m->valid = VM_PAGE_BITS_ALL;
1941
1942 #if 0
1943         if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
1944                 /* Overriding existing pages in partial view does not cause
1945                  * us any trouble as TLBs are still valid because the fault
1946                  * is due to userspace losing part of the mapping or never
1947                  * having accessed it before (at this partials' range).
1948                  */
1949                 unsigned long base = vma->vm_start +
1950                                      (view.params.partial.offset << PAGE_SHIFT);
1951                 unsigned int i;
1952
1953                 for (i = 0; i < view.params.partial.size; i++) {
1954                         ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
1955                         if (ret)
1956                                 break;
1957                 }
1958
1959                 obj->fault_mappable = true;
1960         } else {
1961                 if (!obj->fault_mappable) {
1962                         unsigned long size = min_t(unsigned long,
1963                                                    vma->vm_end - vma->vm_start,
1964                                                    obj->base.size);
1965                         int i;
1966
1967                         for (i = 0; i < size >> PAGE_SHIFT; i++) {
1968                                 ret = vm_insert_pfn(vma,
1969                                                     (unsigned long)vma->vm_start + i * PAGE_SIZE,
1970                                                     pfn + i);
1971                                 if (ret)
1972                                         break;
1973                         }
1974
1975                         obj->fault_mappable = true;
1976                 } else
1977                         ret = vm_insert_pfn(vma,
1978                                             (unsigned long)vmf->virtual_address,
1979                                             pfn + page_offset);
1980 #endif
1981                         vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
1982 #if 0
1983         }
1984 #endif
1985
1986 have_page:
1987         *mres = m;
1988
1989         i915_gem_object_ggtt_unpin_view(obj, &view);
1990         mutex_unlock(&dev->struct_mutex);
1991         ret = VM_PAGER_OK;
1992         goto done;
1993
1994         /*
1995          * ALTERNATIVE ERROR RETURN.
1996          *
1997          * OBJECT EXPECTED TO BE LOCKED.
1998          */
1999 unpin:
2000         i915_gem_object_ggtt_unpin_view(obj, &view);
2001 unlock:
2002         mutex_unlock(&dev->struct_mutex);
2003 out:
2004         switch (ret) {
2005         case -EIO:
2006                 /*
2007                  * We eat errors when the gpu is terminally wedged to avoid
2008                  * userspace unduly crashing (gl has no provisions for mmaps to
2009                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
2010                  * and so needs to be reported.
2011                  */
2012                 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
2013 //                      ret = VM_FAULT_SIGBUS;
2014                         break;
2015                 }
2016         case -EAGAIN:
2017                 /*
2018                  * EAGAIN means the gpu is hung and we'll wait for the error
2019                  * handler to reset everything when re-faulting in
2020                  * i915_mutex_lock_interruptible.
2021                  */
2022         case -ERESTARTSYS:
2023         case -EINTR:
2024                 VM_OBJECT_UNLOCK(vm_obj);
2025                 int dummy;
2026                 tsleep(&dummy, 0, "delay", 1); /* XXX */
2027                 VM_OBJECT_LOCK(vm_obj);
2028                 goto retry;
2029         default:
2030                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2031                 ret = VM_PAGER_ERROR;
2032                 break;
2033         }
2034
2035 done:
2036         if (oldm != NULL)
2037                 vm_page_free(oldm);
2038         vm_object_pip_wakeup(vm_obj);
2039
2040         intel_runtime_pm_put(dev_priv);
2041         return ret;
2042 }
2043
2044 /**
2045  * i915_gem_release_mmap - remove physical page mappings
2046  * @obj: obj in question
2047  *
2048  * Preserve the reservation of the mmapping with the DRM core code, but
2049  * relinquish ownership of the pages back to the system.
2050  *
2051  * It is vital that we remove the page mapping if we have mapped a tiled
2052  * object through the GTT and then lose the fence register due to
2053  * resource pressure. Similarly if the object has been moved out of the
2054  * aperture, than pages mapped into userspace must be revoked. Removing the
2055  * mapping will then trigger a page fault on the next user access, allowing
2056  * fixup by i915_gem_fault().
2057  */
2058 void
2059 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2060 {
2061         vm_object_t devobj;
2062         vm_page_t m;
2063         int i, page_count;
2064
2065         if (!obj->fault_mappable)
2066                 return;
2067
2068         devobj = cdev_pager_lookup(obj);
2069         if (devobj != NULL) {
2070                 page_count = OFF_TO_IDX(obj->base.size);
2071
2072                 VM_OBJECT_LOCK(devobj);
2073                 for (i = 0; i < page_count; i++) {
2074                         m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
2075                         if (m == NULL)
2076                                 continue;
2077                         cdev_pager_free_page(devobj, m);
2078                 }
2079                 VM_OBJECT_UNLOCK(devobj);
2080                 vm_object_deallocate(devobj);
2081         }
2082
2083         obj->fault_mappable = false;
2084 }
2085
2086 void
2087 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2088 {
2089         struct drm_i915_gem_object *obj;
2090
2091         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
2092                 i915_gem_release_mmap(obj);
2093 }
2094
2095 uint32_t
2096 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
2097 {
2098         uint32_t gtt_size;
2099
2100         if (INTEL_INFO(dev)->gen >= 4 ||
2101             tiling_mode == I915_TILING_NONE)
2102                 return size;
2103
2104         /* Previous chips need a power-of-two fence region when tiling */
2105         if (INTEL_INFO(dev)->gen == 3)
2106                 gtt_size = 1024*1024;
2107         else
2108                 gtt_size = 512*1024;
2109
2110         while (gtt_size < size)
2111                 gtt_size <<= 1;
2112
2113         return gtt_size;
2114 }
2115
2116 /**
2117  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2118  * @obj: object to check
2119  *
2120  * Return the required GTT alignment for an object, taking into account
2121  * potential fence register mapping.
2122  */
2123 uint32_t
2124 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2125                            int tiling_mode, bool fenced)
2126 {
2127         /*
2128          * Minimum alignment is 4k (GTT page size), but might be greater
2129          * if a fence register is needed for the object.
2130          */
2131         if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2132             tiling_mode == I915_TILING_NONE)
2133                 return 4096;
2134
2135         /*
2136          * Previous chips need to be aligned to the size of the smallest
2137          * fence register that can contain the object.
2138          */
2139         return i915_gem_get_gtt_size(dev, size, tiling_mode);
2140 }
2141
2142 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2143 {
2144         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2145         int ret;
2146
2147 #if 0
2148         if (drm_vma_node_has_offset(&obj->base.vma_node))
2149                 return 0;
2150 #endif
2151
2152         dev_priv->mm.shrinker_no_lock_stealing = true;
2153
2154         ret = drm_gem_create_mmap_offset(&obj->base);
2155         if (ret != -ENOSPC)
2156                 goto out;
2157
2158         /* Badly fragmented mmap space? The only way we can recover
2159          * space is by destroying unwanted objects. We can't randomly release
2160          * mmap_offsets as userspace expects them to be persistent for the
2161          * lifetime of the objects. The closest we can is to release the
2162          * offsets on purgeable objects by truncating it and marking it purged,
2163          * which prevents userspace from ever using that object again.
2164          */
2165         i915_gem_shrink(dev_priv,
2166                         obj->base.size >> PAGE_SHIFT,
2167                         I915_SHRINK_BOUND |
2168                         I915_SHRINK_UNBOUND |
2169                         I915_SHRINK_PURGEABLE);
2170         ret = drm_gem_create_mmap_offset(&obj->base);
2171         if (ret != -ENOSPC)
2172                 goto out;
2173
2174         i915_gem_shrink_all(dev_priv);
2175         ret = drm_gem_create_mmap_offset(&obj->base);
2176 out:
2177         dev_priv->mm.shrinker_no_lock_stealing = false;
2178
2179         return ret;
2180 }
2181
2182 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2183 {
2184         drm_gem_free_mmap_offset(&obj->base);
2185 }
2186
2187 int
2188 i915_gem_mmap_gtt(struct drm_file *file,
2189                   struct drm_device *dev,
2190                   uint32_t handle,
2191                   uint64_t *offset)
2192 {
2193         struct drm_i915_gem_object *obj;
2194         int ret;
2195
2196         ret = i915_mutex_lock_interruptible(dev);
2197         if (ret)
2198                 return ret;
2199
2200         obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
2201         if (&obj->base == NULL) {
2202                 ret = -ENOENT;
2203                 goto unlock;
2204         }
2205
2206         if (obj->madv != I915_MADV_WILLNEED) {
2207                 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2208                 ret = -EFAULT;
2209                 goto out;
2210         }
2211
2212         ret = i915_gem_object_create_mmap_offset(obj);
2213         if (ret)
2214                 goto out;
2215
2216         *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
2217             DRM_GEM_MAPPING_KEY;
2218
2219 out:
2220         drm_gem_object_unreference(&obj->base);
2221 unlock:
2222         mutex_unlock(&dev->struct_mutex);
2223         return ret;
2224 }
2225
2226 /**
2227  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2228  * @dev: DRM device
2229  * @data: GTT mapping ioctl data
2230  * @file: GEM object info
2231  *
2232  * Simply returns the fake offset to userspace so it can mmap it.
2233  * The mmap call will end up in drm_gem_mmap(), which will set things
2234  * up so we can get faults in the handler above.
2235  *
2236  * The fault handler will take care of binding the object into the GTT
2237  * (since it may have been evicted to make room for something), allocating
2238  * a fence register, and mapping the appropriate aperture address into
2239  * userspace.
2240  */
2241 int
2242 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2243                         struct drm_file *file)
2244 {
2245         struct drm_i915_gem_mmap_gtt *args = data;
2246
2247         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2248 }
2249
2250 /* Immediately discard the backing storage */
2251 static void
2252 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2253 {
2254         vm_object_t vm_obj;
2255
2256         vm_obj = obj->base.vm_obj;
2257         VM_OBJECT_LOCK(vm_obj);
2258         vm_object_page_remove(vm_obj, 0, 0, false);
2259         VM_OBJECT_UNLOCK(vm_obj);
2260
2261         obj->madv = __I915_MADV_PURGED;
2262 }
2263
2264 /* Try to discard unwanted pages */
2265 static void
2266 i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2267 {
2268 #if 0
2269         struct address_space *mapping;
2270 #endif
2271
2272         switch (obj->madv) {
2273         case I915_MADV_DONTNEED:
2274                 i915_gem_object_truncate(obj);
2275         case __I915_MADV_PURGED:
2276                 return;
2277         }
2278
2279 #if 0
2280         if (obj->base.filp == NULL)
2281                 return;
2282
2283         mapping = file_inode(obj->base.filp)->i_mapping,
2284         invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2285 #endif
2286 }
2287
2288 static void
2289 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2290 {
2291         struct sg_page_iter sg_iter;
2292         int ret;
2293
2294         BUG_ON(obj->madv == __I915_MADV_PURGED);
2295
2296         ret = i915_gem_object_set_to_cpu_domain(obj, true);
2297         if (ret) {
2298                 /* In the event of a disaster, abandon all caches and
2299                  * hope for the best.
2300                  */
2301                 WARN_ON(ret != -EIO);
2302                 i915_gem_clflush_object(obj, true);
2303                 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2304         }
2305
2306         i915_gem_gtt_finish_object(obj);
2307
2308         if (i915_gem_object_needs_bit17_swizzle(obj))
2309                 i915_gem_object_save_bit_17_swizzle(obj);
2310
2311         if (obj->madv == I915_MADV_DONTNEED)
2312                 obj->dirty = 0;
2313
2314         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2315                 struct vm_page *page = sg_page_iter_page(&sg_iter);
2316
2317                 if (obj->dirty)
2318                         set_page_dirty(page);
2319
2320                 if (obj->madv == I915_MADV_WILLNEED)
2321                         mark_page_accessed(page);
2322
2323                 vm_page_busy_wait(page, FALSE, "i915gem");
2324                 vm_page_unwire(page, 1);
2325                 vm_page_wakeup(page);
2326         }
2327         obj->dirty = 0;
2328
2329         sg_free_table(obj->pages);
2330         kfree(obj->pages);
2331 }
2332
2333 int
2334 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2335 {
2336         const struct drm_i915_gem_object_ops *ops = obj->ops;
2337
2338         if (obj->pages == NULL)
2339                 return 0;
2340
2341         if (obj->pages_pin_count)
2342                 return -EBUSY;
2343
2344         BUG_ON(i915_gem_obj_bound_any(obj));
2345
2346         /* ->put_pages might need to allocate memory for the bit17 swizzle
2347          * array, hence protect them from being reaped by removing them from gtt
2348          * lists early. */
2349         list_del(&obj->global_list);
2350
2351         ops->put_pages(obj);
2352         obj->pages = NULL;
2353
2354         i915_gem_object_invalidate(obj);
2355
2356         return 0;
2357 }
2358
2359 static int
2360 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2361 {
2362         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2363         int page_count, i;
2364         vm_object_t vm_obj;
2365         struct sg_table *st;
2366         struct scatterlist *sg;
2367         struct sg_page_iter sg_iter;
2368         struct vm_page *page;
2369         unsigned long last_pfn = 0;     /* suppress gcc warning */
2370         int ret;
2371
2372         /* Assert that the object is not currently in any GPU domain. As it
2373          * wasn't in the GTT, there shouldn't be any way it could have been in
2374          * a GPU cache
2375          */
2376         BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2377         BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2378
2379         st = kmalloc(sizeof(*st), M_DRM, M_WAITOK);
2380         if (st == NULL)
2381                 return -ENOMEM;
2382
2383         page_count = obj->base.size / PAGE_SIZE;
2384         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2385                 kfree(st);
2386                 return -ENOMEM;
2387         }
2388
2389         /* Get the list of pages out of our struct file.  They'll be pinned
2390          * at this point until we release them.
2391          *
2392          * Fail silently without starting the shrinker
2393          */
2394         vm_obj = obj->base.vm_obj;
2395         VM_OBJECT_LOCK(vm_obj);
2396         sg = st->sgl;
2397         st->nents = 0;
2398         for (i = 0; i < page_count; i++) {
2399                 page = shmem_read_mapping_page(vm_obj, i);
2400                 if (IS_ERR(page)) {
2401                         i915_gem_shrink(dev_priv,
2402                                         page_count,
2403                                         I915_SHRINK_BOUND |
2404                                         I915_SHRINK_UNBOUND |
2405                                         I915_SHRINK_PURGEABLE);
2406                         page = shmem_read_mapping_page(vm_obj, i);
2407                 }
2408                 if (IS_ERR(page)) {
2409                         /* We've tried hard to allocate the memory by reaping
2410                          * our own buffer, now let the real VM do its job and
2411                          * go down in flames if truly OOM.
2412                          */
2413                         i915_gem_shrink_all(dev_priv);
2414                         page = shmem_read_mapping_page(vm_obj, i);
2415                         if (IS_ERR(page)) {
2416                                 ret = PTR_ERR(page);
2417                                 goto err_pages;
2418                         }
2419                 }
2420 #ifdef CONFIG_SWIOTLB
2421                 if (swiotlb_nr_tbl()) {
2422                         st->nents++;
2423                         sg_set_page(sg, page, PAGE_SIZE, 0);
2424                         sg = sg_next(sg);
2425                         continue;
2426                 }
2427 #endif
2428                 if (!i || page_to_pfn(page) != last_pfn + 1) {
2429                         if (i)
2430                                 sg = sg_next(sg);
2431                         st->nents++;
2432                         sg_set_page(sg, page, PAGE_SIZE, 0);
2433                 } else {
2434                         sg->length += PAGE_SIZE;
2435                 }
2436                 last_pfn = page_to_pfn(page);
2437
2438                 /* Check that the i965g/gm workaround works. */
2439         }
2440 #ifdef CONFIG_SWIOTLB
2441         if (!swiotlb_nr_tbl())
2442 #endif
2443                 sg_mark_end(sg);
2444         obj->pages = st;
2445         VM_OBJECT_UNLOCK(vm_obj);
2446
2447         ret = i915_gem_gtt_prepare_object(obj);
2448         if (ret)
2449                 goto err_pages;
2450
2451         if (i915_gem_object_needs_bit17_swizzle(obj))
2452                 i915_gem_object_do_bit_17_swizzle(obj);
2453
2454         if (obj->tiling_mode != I915_TILING_NONE &&
2455             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2456                 i915_gem_object_pin_pages(obj);
2457
2458         return 0;
2459
2460 err_pages:
2461         sg_mark_end(sg);
2462         for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2463                 page = sg_page_iter_page(&sg_iter);
2464                 vm_page_busy_wait(page, FALSE, "i915gem");
2465                 vm_page_unwire(page, 0);
2466                 vm_page_wakeup(page);
2467         }
2468         VM_OBJECT_UNLOCK(vm_obj);
2469         sg_free_table(st);
2470         kfree(st);
2471
2472         /* shmemfs first checks if there is enough memory to allocate the page
2473          * and reports ENOSPC should there be insufficient, along with the usual
2474          * ENOMEM for a genuine allocation failure.
2475          *
2476          * We use ENOSPC in our driver to mean that we have run out of aperture
2477          * space and so want to translate the error from shmemfs back to our
2478          * usual understanding of ENOMEM.
2479          */
2480         if (ret == -ENOSPC)
2481                 ret = -ENOMEM;
2482
2483         return ret;
2484 }
2485
2486 /* Ensure that the associated pages are gathered from the backing storage
2487  * and pinned into our object. i915_gem_object_get_pages() may be called
2488  * multiple times before they are released by a single call to
2489  * i915_gem_object_put_pages() - once the pages are no longer referenced
2490  * either as a result of memory pressure (reaping pages under the shrinker)
2491  * or as the object is itself released.
2492  */
2493 int
2494 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2495 {
2496         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2497         const struct drm_i915_gem_object_ops *ops = obj->ops;
2498         int ret;
2499
2500         if (obj->pages)
2501                 return 0;
2502
2503         if (obj->madv != I915_MADV_WILLNEED) {
2504                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2505                 return -EFAULT;
2506         }
2507
2508         BUG_ON(obj->pages_pin_count);
2509
2510         ret = ops->get_pages(obj);
2511         if (ret)
2512                 return ret;
2513
2514         list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2515
2516         obj->get_page.sg = obj->pages->sgl;
2517         obj->get_page.last = 0;
2518
2519         return 0;
2520 }
2521
2522 void i915_vma_move_to_active(struct i915_vma *vma,
2523                              struct drm_i915_gem_request *req)
2524 {
2525         struct drm_i915_gem_object *obj = vma->obj;
2526         struct intel_engine_cs *ring;
2527
2528         ring = i915_gem_request_get_ring(req);
2529
2530         /* Add a reference if we're newly entering the active list. */
2531         if (obj->active == 0)
2532                 drm_gem_object_reference(&obj->base);
2533         obj->active |= intel_ring_flag(ring);
2534
2535         list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
2536         i915_gem_request_assign(&obj->last_read_req[ring->id], req);
2537
2538         list_move_tail(&vma->vm_link, &vma->vm->active_list);
2539 }
2540
2541 static void
2542 i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
2543 {
2544         RQ_BUG_ON(obj->last_write_req == NULL);
2545         RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
2546
2547         i915_gem_request_assign(&obj->last_write_req, NULL);
2548         intel_fb_obj_flush(obj, true, ORIGIN_CS);
2549 }
2550
2551 static void
2552 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
2553 {
2554         struct i915_vma *vma;
2555
2556         RQ_BUG_ON(obj->last_read_req[ring] == NULL);
2557         RQ_BUG_ON(!(obj->active & (1 << ring)));
2558
2559         list_del_init(&obj->ring_list[ring]);
2560         i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2561
2562         if (obj->last_write_req && obj->last_write_req->ring->id == ring)
2563                 i915_gem_object_retire__write(obj);
2564
2565         obj->active &= ~(1 << ring);
2566         if (obj->active)
2567                 return;
2568
2569         /* Bump our place on the bound list to keep it roughly in LRU order
2570          * so that we don't steal from recently used but inactive objects
2571          * (unless we are forced to ofc!)
2572          */
2573         list_move_tail(&obj->global_list,
2574                        &to_i915(obj->base.dev)->mm.bound_list);
2575
2576         list_for_each_entry(vma, &obj->vma_list, obj_link) {
2577                 if (!list_empty(&vma->vm_link))
2578                         list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
2579         }
2580
2581         i915_gem_request_assign(&obj->last_fenced_req, NULL);
2582         drm_gem_object_unreference(&obj->base);
2583 }
2584
2585 static int
2586 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2587 {
2588         struct drm_i915_private *dev_priv = dev->dev_private;
2589         struct intel_engine_cs *ring;
2590         int ret, i, j;
2591
2592         /* Carefully retire all requests without writing to the rings */
2593         for_each_ring(ring, dev_priv, i) {
2594                 ret = intel_ring_idle(ring);
2595                 if (ret)
2596                         return ret;
2597         }
2598         i915_gem_retire_requests(dev);
2599
2600         /* Finally reset hw state */
2601         for_each_ring(ring, dev_priv, i) {
2602                 intel_ring_init_seqno(ring, seqno);
2603
2604                 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
2605                         ring->semaphore.sync_seqno[j] = 0;
2606         }
2607
2608         return 0;
2609 }
2610
2611 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2612 {
2613         struct drm_i915_private *dev_priv = dev->dev_private;
2614         int ret;
2615
2616         if (seqno == 0)
2617                 return -EINVAL;
2618
2619         /* HWS page needs to be set less than what we
2620          * will inject to ring
2621          */
2622         ret = i915_gem_init_seqno(dev, seqno - 1);
2623         if (ret)
2624                 return ret;
2625
2626         /* Carefully set the last_seqno value so that wrap
2627          * detection still works
2628          */
2629         dev_priv->next_seqno = seqno;
2630         dev_priv->last_seqno = seqno - 1;
2631         if (dev_priv->last_seqno == 0)
2632                 dev_priv->last_seqno--;
2633
2634         return 0;
2635 }
2636
2637 int
2638 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2639 {
2640         struct drm_i915_private *dev_priv = dev->dev_private;
2641
2642         /* reserve 0 for non-seqno */
2643         if (dev_priv->next_seqno == 0) {
2644                 int ret = i915_gem_init_seqno(dev, 0);
2645                 if (ret)
2646                         return ret;
2647
2648                 dev_priv->next_seqno = 1;
2649         }
2650
2651         *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2652         return 0;
2653 }
2654
2655 /*
2656  * NB: This function is not allowed to fail. Doing so would mean the the
2657  * request is not being tracked for completion but the work itself is
2658  * going to happen on the hardware. This would be a Bad Thing(tm).
2659  */
2660 void __i915_add_request(struct drm_i915_gem_request *request,
2661                         struct drm_i915_gem_object *obj,
2662                         bool flush_caches)
2663 {
2664         struct intel_engine_cs *ring;
2665         struct drm_i915_private *dev_priv;
2666         struct intel_ringbuffer *ringbuf;
2667         u32 request_start;
2668         int ret;
2669
2670         if (WARN_ON(request == NULL))
2671                 return;
2672
2673         ring = request->ring;
2674         dev_priv = ring->dev->dev_private;
2675         ringbuf = request->ringbuf;
2676
2677         /*
2678          * To ensure that this call will not fail, space for its emissions
2679          * should already have been reserved in the ring buffer. Let the ring
2680          * know that it is time to use that space up.
2681          */
2682         intel_ring_reserved_space_use(ringbuf);
2683
2684         request_start = intel_ring_get_tail(ringbuf);
2685         /*
2686          * Emit any outstanding flushes - execbuf can fail to emit the flush
2687          * after having emitted the batchbuffer command. Hence we need to fix
2688          * things up similar to emitting the lazy request. The difference here
2689          * is that the flush _must_ happen before the next request, no matter
2690          * what.
2691          */
2692         if (flush_caches) {
2693                 if (i915.enable_execlists)
2694                         ret = logical_ring_flush_all_caches(request);
2695                 else
2696                         ret = intel_ring_flush_all_caches(request);
2697                 /* Not allowed to fail! */
2698                 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
2699         }
2700
2701         /* Record the position of the start of the request so that
2702          * should we detect the updated seqno part-way through the
2703          * GPU processing the request, we never over-estimate the
2704          * position of the head.
2705          */
2706         request->postfix = intel_ring_get_tail(ringbuf);
2707
2708         if (i915.enable_execlists)
2709                 ret = ring->emit_request(request);
2710         else {
2711                 ret = ring->add_request(request);
2712
2713                 request->tail = intel_ring_get_tail(ringbuf);
2714         }
2715
2716         /* Not allowed to fail! */
2717         WARN(ret, "emit|add_request failed: %d!\n", ret);
2718
2719         request->head = request_start;
2720
2721         /* Whilst this request exists, batch_obj will be on the
2722          * active_list, and so will hold the active reference. Only when this
2723          * request is retired will the the batch_obj be moved onto the
2724          * inactive_list and lose its active reference. Hence we do not need
2725          * to explicitly hold another reference here.
2726          */
2727         request->batch_obj = obj;
2728
2729         request->emitted_jiffies = jiffies;
2730         request->previous_seqno = ring->last_submitted_seqno;
2731         ring->last_submitted_seqno = request->seqno;
2732         list_add_tail(&request->list, &ring->request_list);
2733
2734         trace_i915_gem_request_add(request);
2735
2736         i915_queue_hangcheck(ring->dev);
2737
2738         queue_delayed_work(dev_priv->wq,
2739                            &dev_priv->mm.retire_work,
2740                            round_jiffies_up_relative(HZ));
2741         intel_mark_busy(dev_priv->dev);
2742
2743         /* Sanity check that the reserved size was large enough. */
2744         intel_ring_reserved_space_end(ringbuf);
2745 }
2746
2747 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
2748                                    const struct intel_context *ctx)
2749 {
2750         unsigned long elapsed;
2751
2752         elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2753
2754         if (ctx->hang_stats.banned)
2755                 return true;
2756
2757         if (ctx->hang_stats.ban_period_seconds &&
2758             elapsed <= ctx->hang_stats.ban_period_seconds) {
2759                 if (!i915_gem_context_is_default(ctx)) {
2760                         DRM_DEBUG("context hanging too fast, banning!\n");
2761                         return true;
2762                 } else if (i915_stop_ring_allow_ban(dev_priv)) {
2763                         if (i915_stop_ring_allow_warn(dev_priv))
2764                                 DRM_ERROR("gpu hanging too fast, banning!\n");
2765                         return true;
2766                 }
2767         }
2768
2769         return false;
2770 }
2771
2772 static void i915_set_reset_status(struct drm_i915_private *dev_priv,
2773                                   struct intel_context *ctx,
2774                                   const bool guilty)
2775 {
2776         struct i915_ctx_hang_stats *hs;
2777
2778         if (WARN_ON(!ctx))
2779                 return;
2780
2781         hs = &ctx->hang_stats;
2782
2783         if (guilty) {
2784                 hs->banned = i915_context_is_banned(dev_priv, ctx);
2785                 hs->batch_active++;
2786                 hs->guilty_ts = get_seconds();
2787         } else {
2788                 hs->batch_pending++;
2789         }
2790 }
2791
2792 void i915_gem_request_free(struct kref *req_ref)
2793 {
2794         struct drm_i915_gem_request *req = container_of(req_ref,
2795                                                  typeof(*req), ref);
2796         struct intel_context *ctx = req->ctx;
2797
2798         if (req->file_priv)
2799                 i915_gem_request_remove_from_client(req);
2800
2801         if (ctx) {
2802                 if (i915.enable_execlists && ctx != req->i915->kernel_context)
2803                         intel_lr_context_unpin(ctx, req->ring);
2804
2805                 i915_gem_context_unreference(ctx);
2806         }
2807
2808         kfree(req);
2809 }
2810
2811 static inline int
2812 __i915_gem_request_alloc(struct intel_engine_cs *ring,
2813                          struct intel_context *ctx,
2814                          struct drm_i915_gem_request **req_out)
2815 {
2816         struct drm_i915_private *dev_priv = to_i915(ring->dev);
2817         struct drm_i915_gem_request *req;
2818         int ret;
2819
2820         if (!req_out)
2821                 return -EINVAL;
2822
2823         *req_out = NULL;
2824
2825         req = kzalloc(sizeof(*req), GFP_KERNEL);
2826         if (req == NULL)
2827                 return -ENOMEM;
2828
2829         ret = i915_gem_get_seqno(ring->dev, &req->seqno);
2830         if (ret)
2831                 goto err;
2832
2833         kref_init(&req->ref);
2834         req->i915 = dev_priv;
2835         req->ring = ring;
2836         req->ctx  = ctx;
2837         i915_gem_context_reference(req->ctx);
2838
2839         if (i915.enable_execlists)
2840                 ret = intel_logical_ring_alloc_request_extras(req);
2841         else
2842                 ret = intel_ring_alloc_request_extras(req);
2843         if (ret) {
2844                 i915_gem_context_unreference(req->ctx);
2845                 goto err;
2846         }
2847
2848         /*
2849          * Reserve space in the ring buffer for all the commands required to
2850          * eventually emit this request. This is to guarantee that the
2851          * i915_add_request() call can't fail. Note that the reserve may need
2852          * to be redone if the request is not actually submitted straight
2853          * away, e.g. because a GPU scheduler has deferred it.
2854          */
2855         if (i915.enable_execlists)
2856                 ret = intel_logical_ring_reserve_space(req);
2857         else
2858                 ret = intel_ring_reserve_space(req);
2859         if (ret) {
2860                 /*
2861                  * At this point, the request is fully allocated even if not
2862                  * fully prepared. Thus it can be cleaned up using the proper
2863                  * free code.
2864                  */
2865                 i915_gem_request_cancel(req);
2866                 return ret;
2867         }
2868
2869         *req_out = req;
2870         return 0;
2871
2872 err:
2873         kfree(req);
2874         return ret;
2875 }
2876
2877 /**
2878  * i915_gem_request_alloc - allocate a request structure
2879  *
2880  * @engine: engine that we wish to issue the request on.
2881  * @ctx: context that the request will be associated with.
2882  *       This can be NULL if the request is not directly related to
2883  *       any specific user context, in which case this function will
2884  *       choose an appropriate context to use.
2885  *
2886  * Returns a pointer to the allocated request if successful,
2887  * or an error code if not.
2888  */
2889 struct drm_i915_gem_request *
2890 i915_gem_request_alloc(struct intel_engine_cs *engine,
2891                        struct intel_context *ctx)
2892 {
2893         struct drm_i915_gem_request *req;
2894         int err;
2895
2896         if (ctx == NULL)
2897                 ctx = to_i915(engine->dev)->kernel_context;
2898         err = __i915_gem_request_alloc(engine, ctx, &req);
2899         return err ? ERR_PTR(err) : req;
2900 }
2901
2902 void i915_gem_request_cancel(struct drm_i915_gem_request *req)
2903 {
2904         intel_ring_reserved_space_cancel(req->ringbuf);
2905
2906         i915_gem_request_unreference(req);
2907 }
2908
2909 struct drm_i915_gem_request *
2910 i915_gem_find_active_request(struct intel_engine_cs *ring)
2911 {
2912         struct drm_i915_gem_request *request;
2913
2914         list_for_each_entry(request, &ring->request_list, list) {
2915                 if (i915_gem_request_completed(request, false))
2916                         continue;
2917
2918                 return request;
2919         }
2920
2921         return NULL;
2922 }
2923
2924 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
2925                                        struct intel_engine_cs *ring)
2926 {
2927         struct drm_i915_gem_request *request;
2928         bool ring_hung;
2929
2930         request = i915_gem_find_active_request(ring);
2931
2932         if (request == NULL)
2933                 return;
2934
2935         ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2936
2937         i915_set_reset_status(dev_priv, request->ctx, ring_hung);
2938
2939         list_for_each_entry_continue(request, &ring->request_list, list)
2940                 i915_set_reset_status(dev_priv, request->ctx, false);
2941 }
2942
2943 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
2944                                         struct intel_engine_cs *ring)
2945 {
2946         struct intel_ringbuffer *buffer;
2947
2948         while (!list_empty(&ring->active_list)) {
2949                 struct drm_i915_gem_object *obj;
2950
2951                 obj = list_first_entry(&ring->active_list,
2952                                        struct drm_i915_gem_object,
2953                                        ring_list[ring->id]);
2954
2955                 i915_gem_object_retire__read(obj, ring->id);
2956         }
2957
2958         /*
2959          * Clear the execlists queue up before freeing the requests, as those
2960          * are the ones that keep the context and ringbuffer backing objects
2961          * pinned in place.
2962          */
2963
2964         if (i915.enable_execlists) {
2965                 spin_lock_irq(&ring->execlist_lock);
2966
2967                 /* list_splice_tail_init checks for empty lists */
2968                 list_splice_tail_init(&ring->execlist_queue,
2969                                       &ring->execlist_retired_req_list);
2970
2971                 spin_unlock_irq(&ring->execlist_lock);
2972                 intel_execlists_retire_requests(ring);
2973         }
2974
2975         /*
2976          * We must free the requests after all the corresponding objects have
2977          * been moved off active lists. Which is the same order as the normal
2978          * retire_requests function does. This is important if object hold
2979          * implicit references on things like e.g. ppgtt address spaces through
2980          * the request.
2981          */
2982         while (!list_empty(&ring->request_list)) {
2983                 struct drm_i915_gem_request *request;
2984
2985                 request = list_first_entry(&ring->request_list,
2986                                            struct drm_i915_gem_request,
2987                                            list);
2988
2989                 i915_gem_request_retire(request);
2990         }
2991
2992         /* Having flushed all requests from all queues, we know that all
2993          * ringbuffers must now be empty. However, since we do not reclaim
2994          * all space when retiring the request (to prevent HEADs colliding
2995          * with rapid ringbuffer wraparound) the amount of available space
2996          * upon reset is less than when we start. Do one more pass over
2997          * all the ringbuffers to reset last_retired_head.
2998          */
2999         list_for_each_entry(buffer, &ring->buffers, link) {
3000                 buffer->last_retired_head = buffer->tail;
3001                 intel_ring_update_space(buffer);
3002         }
3003 }
3004
3005 void i915_gem_reset(struct drm_device *dev)
3006 {
3007         struct drm_i915_private *dev_priv = dev->dev_private;
3008         struct intel_engine_cs *ring;
3009         int i;
3010
3011         /*
3012          * Before we free the objects from the requests, we need to inspect
3013          * them for finding the guilty party. As the requests only borrow
3014          * their reference to the objects, the inspection must be done first.
3015          */
3016         for_each_ring(ring, dev_priv, i)
3017                 i915_gem_reset_ring_status(dev_priv, ring);
3018
3019         for_each_ring(ring, dev_priv, i)
3020                 i915_gem_reset_ring_cleanup(dev_priv, ring);
3021
3022         i915_gem_context_reset(dev);
3023
3024         i915_gem_restore_fences(dev);
3025
3026         WARN_ON(i915_verify_lists(dev));
3027 }
3028
3029 /**
3030  * This function clears the request list as sequence numbers are passed.
3031  */
3032 void
3033 i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
3034 {
3035         WARN_ON(i915_verify_lists(ring->dev));
3036
3037         /* Retire requests first as we use it above for the early return.
3038          * If we retire requests last, we may use a later seqno and so clear
3039          * the requests lists without clearing the active list, leading to
3040          * confusion.
3041          */
3042         while (!list_empty(&ring->request_list)) {
3043                 struct drm_i915_gem_request *request;
3044
3045                 request = list_first_entry(&ring->request_list,
3046                                            struct drm_i915_gem_request,
3047                                            list);
3048
3049                 if (!i915_gem_request_completed(request, true))
3050                         break;
3051
3052                 i915_gem_request_retire(request);
3053         }
3054
3055         /* Move any buffers on the active list that are no longer referenced
3056          * by the ringbuffer to the flushing/inactive lists as appropriate,
3057          * before we free the context associated with the requests.
3058          */
3059         while (!list_empty(&ring->active_list)) {
3060                 struct drm_i915_gem_object *obj;
3061
3062                 obj = list_first_entry(&ring->active_list,
3063                                       struct drm_i915_gem_object,
3064                                       ring_list[ring->id]);
3065
3066                 if (!list_empty(&obj->last_read_req[ring->id]->list))
3067                         break;
3068
3069                 i915_gem_object_retire__read(obj, ring->id);
3070         }
3071
3072         if (unlikely(ring->trace_irq_req &&
3073                      i915_gem_request_completed(ring->trace_irq_req, true))) {
3074                 ring->irq_put(ring);
3075                 i915_gem_request_assign(&ring->trace_irq_req, NULL);
3076         }
3077
3078         WARN_ON(i915_verify_lists(ring->dev));
3079 }
3080
3081 bool
3082 i915_gem_retire_requests(struct drm_device *dev)
3083 {
3084         struct drm_i915_private *dev_priv = dev->dev_private;
3085         struct intel_engine_cs *ring;
3086         bool idle = true;
3087         int i;
3088
3089         for_each_ring(ring, dev_priv, i) {
3090                 i915_gem_retire_requests_ring(ring);
3091                 idle &= list_empty(&ring->request_list);
3092                 if (i915.enable_execlists) {
3093                         spin_lock_irq(&ring->execlist_lock);
3094                         idle &= list_empty(&ring->execlist_queue);
3095                         spin_unlock_irq(&ring->execlist_lock);
3096
3097                         intel_execlists_retire_requests(ring);
3098                 }
3099         }
3100
3101         if (idle)
3102                 mod_delayed_work(dev_priv->wq,
3103                                    &dev_priv->mm.idle_work,
3104                                    msecs_to_jiffies(100));
3105
3106         return idle;
3107 }
3108
3109 static void
3110 i915_gem_retire_work_handler(struct work_struct *work)
3111 {
3112         struct drm_i915_private *dev_priv =
3113                 container_of(work, typeof(*dev_priv), mm.retire_work.work);
3114         struct drm_device *dev = dev_priv->dev;
3115         bool idle;
3116
3117         /* Come back later if the device is busy... */
3118         idle = false;
3119         if (mutex_trylock(&dev->struct_mutex)) {
3120                 idle = i915_gem_retire_requests(dev);
3121                 mutex_unlock(&dev->struct_mutex);
3122         }
3123         if (!idle)
3124                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3125                                    round_jiffies_up_relative(HZ));
3126 }
3127
3128 static void
3129 i915_gem_idle_work_handler(struct work_struct *work)
3130 {
3131         struct drm_i915_private *dev_priv =
3132                 container_of(work, typeof(*dev_priv), mm.idle_work.work);
3133         struct drm_device *dev = dev_priv->dev;
3134         struct intel_engine_cs *ring;
3135         int i;
3136
3137         for_each_ring(ring, dev_priv, i)
3138                 if (!list_empty(&ring->request_list))
3139                         return;
3140
3141         /* we probably should sync with hangcheck here, using cancel_work_sync.
3142          * Also locking seems to be fubar here, ring->request_list is protected
3143          * by dev->struct_mutex. */
3144
3145         intel_mark_idle(dev);
3146
3147         if (mutex_trylock(&dev->struct_mutex)) {
3148                 struct intel_engine_cs *ring;
3149                 int i;
3150
3151                 for_each_ring(ring, dev_priv, i)
3152                         i915_gem_batch_pool_fini(&ring->batch_pool);
3153
3154                 mutex_unlock(&dev->struct_mutex);
3155         }
3156 }
3157
3158 /**
3159  * Ensures that an object will eventually get non-busy by flushing any required
3160  * write domains, emitting any outstanding lazy request and retiring and
3161  * completed requests.
3162  */
3163 static int
3164 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3165 {
3166         int i;
3167
3168         if (!obj->active)
3169                 return 0;
3170
3171         for (i = 0; i < I915_NUM_RINGS; i++) {
3172                 struct drm_i915_gem_request *req;
3173
3174                 req = obj->last_read_req[i];
3175                 if (req == NULL)
3176                         continue;
3177
3178                 if (list_empty(&req->list))
3179                         goto retire;
3180
3181                 if (i915_gem_request_completed(req, true)) {
3182                         __i915_gem_request_retire__upto(req);
3183 retire:
3184                         i915_gem_object_retire__read(obj, i);
3185                 }
3186         }
3187
3188         return 0;
3189 }
3190
3191 /**
3192  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3193  * @DRM_IOCTL_ARGS: standard ioctl arguments
3194  *
3195  * Returns 0 if successful, else an error is returned with the remaining time in
3196  * the timeout parameter.
3197  *  -ETIME: object is still busy after timeout
3198  *  -ERESTARTSYS: signal interrupted the wait
3199  *  -ENONENT: object doesn't exist
3200  * Also possible, but rare:
3201  *  -EAGAIN: GPU wedged
3202  *  -ENOMEM: damn
3203  *  -ENODEV: Internal IRQ fail
3204  *  -E?: The add request failed
3205  *
3206  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3207  * non-zero timeout parameter the wait ioctl will wait for the given number of
3208  * nanoseconds on an object becoming unbusy. Since the wait itself does so
3209  * without holding struct_mutex the object may become re-busied before this
3210  * function completes. A similar but shorter * race condition exists in the busy
3211  * ioctl
3212  */
3213 int
3214 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3215 {
3216         struct drm_i915_private *dev_priv = dev->dev_private;
3217         struct drm_i915_gem_wait *args = data;
3218         struct drm_i915_gem_object *obj;
3219         struct drm_i915_gem_request *req[I915_NUM_RINGS];
3220         unsigned reset_counter;
3221         int i, n = 0;
3222         int ret;
3223
3224         if (args->flags != 0)
3225                 return -EINVAL;
3226
3227         ret = i915_mutex_lock_interruptible(dev);
3228         if (ret)
3229                 return ret;
3230
3231         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
3232         if (&obj->base == NULL) {
3233                 mutex_unlock(&dev->struct_mutex);
3234                 return -ENOENT;
3235         }
3236
3237         /* Need to make sure the object gets inactive eventually. */
3238         ret = i915_gem_object_flush_active(obj);
3239         if (ret)
3240                 goto out;
3241
3242         if (!obj->active)
3243                 goto out;
3244
3245         /* Do this after OLR check to make sure we make forward progress polling
3246          * on this IOCTL with a timeout == 0 (like busy ioctl)
3247          */
3248         if (args->timeout_ns == 0) {
3249                 ret = -ETIME;
3250                 goto out;
3251         }
3252
3253         drm_gem_object_unreference(&obj->base);
3254         reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3255
3256         for (i = 0; i < I915_NUM_RINGS; i++) {
3257                 if (obj->last_read_req[i] == NULL)
3258                         continue;
3259
3260                 req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
3261         }
3262
3263         mutex_unlock(&dev->struct_mutex);
3264
3265         for (i = 0; i < n; i++) {
3266                 if (ret == 0)
3267                         ret = __i915_wait_request(req[i], reset_counter, true,
3268                                                   args->timeout_ns > 0 ? &args->timeout_ns : NULL,
3269                                                   to_rps_client(file));
3270                 i915_gem_request_unreference__unlocked(req[i]);
3271         }
3272         return ret;
3273
3274 out:
3275         drm_gem_object_unreference(&obj->base);
3276         mutex_unlock(&dev->struct_mutex);
3277         return ret;
3278 }
3279
3280 static int
3281 __i915_gem_object_sync(struct drm_i915_gem_object *obj,
3282                        struct intel_engine_cs *to,
3283                        struct drm_i915_gem_request *from_req,
3284                        struct drm_i915_gem_request **to_req)
3285 {
3286         struct intel_engine_cs *from;
3287         int ret;
3288
3289         from = i915_gem_request_get_ring(from_req);
3290         if (to == from)
3291                 return 0;
3292
3293         if (i915_gem_request_completed(from_req, true))
3294                 return 0;
3295
3296         if (!i915_semaphore_is_enabled(obj->base.dev)) {
3297                 struct drm_i915_private *i915 = to_i915(obj->base.dev);
3298                 ret = __i915_wait_request(from_req,
3299                                           atomic_read(&i915->gpu_error.reset_counter),
3300                                           i915->mm.interruptible,
3301                                           NULL,
3302                                           &i915->rps.semaphores);
3303                 if (ret)
3304                         return ret;
3305
3306                 i915_gem_object_retire_request(obj, from_req);
3307         } else {
3308                 int idx = intel_ring_sync_index(from, to);
3309                 u32 seqno = i915_gem_request_get_seqno(from_req);
3310
3311                 WARN_ON(!to_req);
3312
3313                 if (seqno <= from->semaphore.sync_seqno[idx])
3314                         return 0;
3315
3316                 if (*to_req == NULL) {
3317                         struct drm_i915_gem_request *req;
3318
3319                         req = i915_gem_request_alloc(to, NULL);
3320                         if (IS_ERR(req))
3321                                 return PTR_ERR(req);
3322
3323                         *to_req = req;
3324                 }
3325
3326                 trace_i915_gem_ring_sync_to(*to_req, from, from_req);
3327                 ret = to->semaphore.sync_to(*to_req, from, seqno);
3328                 if (ret)
3329                         return ret;
3330
3331                 /* We use last_read_req because sync_to()
3332                  * might have just caused seqno wrap under
3333                  * the radar.
3334                  */
3335                 from->semaphore.sync_seqno[idx] =
3336                         i915_gem_request_get_seqno(obj->last_read_req[from->id]);
3337         }
3338
3339         return 0;
3340 }
3341
3342 /**
3343  * i915_gem_object_sync - sync an object to a ring.
3344  *
3345  * @obj: object which may be in use on another ring.
3346  * @to: ring we wish to use the object on. May be NULL.
3347  * @to_req: request we wish to use the object for. See below.
3348  *          This will be allocated and returned if a request is
3349  *          required but not passed in.
3350  *
3351  * This code is meant to abstract object synchronization with the GPU.
3352  * Calling with NULL implies synchronizing the object with the CPU
3353  * rather than a particular GPU ring. Conceptually we serialise writes
3354  * between engines inside the GPU. We only allow one engine to write
3355  * into a buffer at any time, but multiple readers. To ensure each has
3356  * a coherent view of memory, we must:
3357  *
3358  * - If there is an outstanding write request to the object, the new
3359  *   request must wait for it to complete (either CPU or in hw, requests
3360  *   on the same ring will be naturally ordered).
3361  *
3362  * - If we are a write request (pending_write_domain is set), the new
3363  *   request must wait for outstanding read requests to complete.
3364  *
3365  * For CPU synchronisation (NULL to) no request is required. For syncing with
3366  * rings to_req must be non-NULL. However, a request does not have to be
3367  * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
3368  * request will be allocated automatically and returned through *to_req. Note
3369  * that it is not guaranteed that commands will be emitted (because the system
3370  * might already be idle). Hence there is no need to create a request that
3371  * might never have any work submitted. Note further that if a request is
3372  * returned in *to_req, it is the responsibility of the caller to submit
3373  * that request (after potentially adding more work to it).
3374  *
3375  * Returns 0 if successful, else propagates up the lower layer error.
3376  */
3377 int
3378 i915_gem_object_sync(struct drm_i915_gem_object *obj,
3379                      struct intel_engine_cs *to,
3380                      struct drm_i915_gem_request **to_req)
3381 {
3382         const bool readonly = obj->base.pending_write_domain == 0;
3383         struct drm_i915_gem_request *req[I915_NUM_RINGS];
3384         int ret, i, n;
3385
3386         if (!obj->active)
3387                 return 0;
3388
3389         if (to == NULL)
3390                 return i915_gem_object_wait_rendering(obj, readonly);
3391
3392         n = 0;
3393         if (readonly) {
3394                 if (obj->last_write_req)
3395                         req[n++] = obj->last_write_req;
3396         } else {
3397                 for (i = 0; i < I915_NUM_RINGS; i++)
3398                         if (obj->last_read_req[i])
3399                                 req[n++] = obj->last_read_req[i];
3400         }
3401         for (i = 0; i < n; i++) {
3402                 ret = __i915_gem_object_sync(obj, to, req[i], to_req);
3403                 if (ret)
3404                         return ret;
3405         }
3406
3407         return 0;
3408 }
3409
3410 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3411 {
3412         u32 old_write_domain, old_read_domains;
3413
3414         /* Force a pagefault for domain tracking on next user access */
3415         i915_gem_release_mmap(obj);
3416
3417         if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3418                 return;
3419
3420         /* Wait for any direct GTT access to complete */
3421         mb();
3422
3423         old_read_domains = obj->base.read_domains;
3424         old_write_domain = obj->base.write_domain;
3425
3426         obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3427         obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3428
3429         trace_i915_gem_object_change_domain(obj,
3430                                             old_read_domains,
3431                                             old_write_domain);
3432 }
3433
3434 static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
3435 {
3436         struct drm_i915_gem_object *obj = vma->obj;
3437         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3438         int ret;
3439
3440         if (list_empty(&vma->obj_link))
3441                 return 0;
3442
3443         if (!drm_mm_node_allocated(&vma->node)) {
3444                 i915_gem_vma_destroy(vma);
3445                 return 0;
3446         }
3447
3448         if (vma->pin_count)
3449                 return -EBUSY;
3450
3451         BUG_ON(obj->pages == NULL);
3452
3453         if (wait) {
3454                 ret = i915_gem_object_wait_rendering(obj, false);
3455                 if (ret)
3456                         return ret;
3457         }
3458
3459         if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3460                 i915_gem_object_finish_gtt(obj);
3461
3462                 /* release the fence reg _after_ flushing */
3463                 ret = i915_gem_object_put_fence(obj);
3464                 if (ret)
3465                         return ret;
3466         }
3467
3468         trace_i915_vma_unbind(vma);
3469
3470         vma->vm->unbind_vma(vma);
3471         vma->bound = 0;
3472
3473         list_del_init(&vma->vm_link);
3474         if (vma->is_ggtt) {
3475                 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3476                         obj->map_and_fenceable = false;
3477                 } else if (vma->ggtt_view.pages) {
3478                         sg_free_table(vma->ggtt_view.pages);
3479                         kfree(vma->ggtt_view.pages);
3480                 }
3481                 vma->ggtt_view.pages = NULL;
3482         }
3483
3484         drm_mm_remove_node(&vma->node);
3485         i915_gem_vma_destroy(vma);
3486
3487         /* Since the unbound list is global, only move to that list if
3488          * no more VMAs exist. */
3489         if (list_empty(&obj->vma_list))
3490                 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3491
3492         /* And finally now the object is completely decoupled from this vma,
3493          * we can drop its hold on the backing storage and allow it to be
3494          * reaped by the shrinker.
3495          */
3496         i915_gem_object_unpin_pages(obj);
3497
3498         return 0;
3499 }
3500
3501 int i915_vma_unbind(struct i915_vma *vma)
3502 {
3503         return __i915_vma_unbind(vma, true);
3504 }
3505
3506 int __i915_vma_unbind_no_wait(struct i915_vma *vma)
3507 {
3508         return __i915_vma_unbind(vma, false);
3509 }
3510
3511 int i915_gpu_idle(struct drm_device *dev)
3512 {
3513         struct drm_i915_private *dev_priv = dev->dev_private;
3514         struct intel_engine_cs *ring;
3515         int ret, i;
3516
3517         /* Flush everything onto the inactive list. */
3518         for_each_ring(ring, dev_priv, i) {
3519                 if (!i915.enable_execlists) {
3520                         struct drm_i915_gem_request *req;
3521
3522                         req = i915_gem_request_alloc(ring, NULL);
3523                         if (IS_ERR(req))
3524                                 return PTR_ERR(req);
3525
3526                         ret = i915_switch_context(req);
3527                         if (ret) {
3528                                 i915_gem_request_cancel(req);
3529                                 return ret;
3530                         }
3531
3532                         i915_add_request_no_flush(req);
3533                 }
3534
3535                 ret = intel_ring_idle(ring);
3536                 if (ret)
3537                         return ret;
3538         }
3539
3540         WARN_ON(i915_verify_lists(dev));
3541         return 0;
3542 }
3543
3544 static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3545                                      unsigned long cache_level)
3546 {
3547         struct drm_mm_node *gtt_space = &vma->node;
3548         struct drm_mm_node *other;
3549
3550         /*
3551          * On some machines we have to be careful when putting differing types
3552          * of snoopable memory together to avoid the prefetcher crossing memory
3553          * domains and dying. During vm initialisation, we decide whether or not
3554          * these constraints apply and set the drm_mm.color_adjust
3555          * appropriately.
3556          */
3557         if (vma->vm->mm.color_adjust == NULL)
3558                 return true;
3559
3560         if (!drm_mm_node_allocated(gtt_space))
3561                 return true;
3562
3563         if (list_empty(&gtt_space->node_list))
3564                 return true;
3565
3566         other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3567         if (other->allocated && !other->hole_follows && other->color != cache_level)
3568                 return false;
3569
3570         other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3571         if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3572                 return false;
3573
3574         return true;
3575 }
3576
3577 /**
3578  * Finds free space in the GTT aperture and binds the object or a view of it
3579  * there.
3580  */
3581 static struct i915_vma *
3582 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3583                            struct i915_address_space *vm,
3584                            const struct i915_ggtt_view *ggtt_view,
3585                            unsigned alignment,
3586                            uint64_t flags)
3587 {
3588         struct drm_device *dev = obj->base.dev;
3589         struct drm_i915_private *dev_priv = dev->dev_private;
3590         u32 fence_alignment, unfenced_alignment;
3591         u32 search_flag, alloc_flag;
3592         u64 start, end;
3593         u64 size, fence_size;
3594         struct i915_vma *vma;
3595         int ret;
3596
3597         if (i915_is_ggtt(vm)) {
3598                 u32 view_size;
3599
3600                 if (WARN_ON(!ggtt_view))
3601                         return ERR_PTR(-EINVAL);
3602
3603                 view_size = i915_ggtt_view_size(obj, ggtt_view);
3604
3605                 fence_size = i915_gem_get_gtt_size(dev,
3606                                                    view_size,
3607                                                    obj->tiling_mode);
3608                 fence_alignment = i915_gem_get_gtt_alignment(dev,
3609                                                              view_size,
3610                                                              obj->tiling_mode,
3611                                                              true);
3612                 unfenced_alignment = i915_gem_get_gtt_alignment(dev,
3613                                                                 view_size,
3614                                                                 obj->tiling_mode,
3615                                                                 false);
3616                 size = flags & PIN_MAPPABLE ? fence_size : view_size;
3617         } else {
3618                 fence_size = i915_gem_get_gtt_size(dev,
3619                                                    obj->base.size,
3620                                                    obj->tiling_mode);
3621                 fence_alignment = i915_gem_get_gtt_alignment(dev,
3622                                                              obj->base.size,
3623                                                              obj->tiling_mode,
3624                                                              true);
3625                 unfenced_alignment =
3626                         i915_gem_get_gtt_alignment(dev,
3627                                                    obj->base.size,
3628                                                    obj->tiling_mode,
3629                                                    false);
3630                 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3631         }
3632
3633         start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3634         end = vm->total;
3635         if (flags & PIN_MAPPABLE)
3636                 end = min_t(u64, end, dev_priv->gtt.mappable_end);
3637         if (flags & PIN_ZONE_4G)
3638                 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
3639
3640         if (alignment == 0)
3641                 alignment = flags & PIN_MAPPABLE ? fence_alignment :
3642                                                 unfenced_alignment;
3643         if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
3644                 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3645                           ggtt_view ? ggtt_view->type : 0,
3646                           alignment);
3647                 return ERR_PTR(-EINVAL);
3648         }
3649
3650         /* If binding the object/GGTT view requires more space than the entire
3651          * aperture has, reject it early before evicting everything in a vain
3652          * attempt to find space.
3653          */
3654         if (size > end) {
3655                 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%lu > %s aperture=%lu\n",
3656                           ggtt_view ? ggtt_view->type : 0,
3657                           size,
3658                           flags & PIN_MAPPABLE ? "mappable" : "total",
3659                           end);
3660                 return ERR_PTR(-E2BIG);
3661         }
3662
3663         ret = i915_gem_object_get_pages(obj);
3664         if (ret)
3665                 return ERR_PTR(ret);
3666
3667         i915_gem_object_pin_pages(obj);
3668
3669         vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3670                           i915_gem_obj_lookup_or_create_vma(obj, vm);
3671
3672         if (IS_ERR(vma))
3673                 goto err_unpin;
3674
3675         if (flags & PIN_OFFSET_FIXED) {
3676                 uint64_t offset = flags & PIN_OFFSET_MASK;
3677
3678                 if (offset & (alignment - 1) || offset + size > end) {
3679                         ret = -EINVAL;
3680                         goto err_free_vma;
3681                 }
3682                 vma->node.start = offset;
3683                 vma->node.size = size;
3684                 vma->node.color = obj->cache_level;
3685                 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3686                 if (ret) {
3687                         ret = i915_gem_evict_for_vma(vma);
3688                         if (ret == 0)
3689                                 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3690                 }
3691                 if (ret)
3692                         goto err_free_vma;
3693         } else {
3694                 if (flags & PIN_HIGH) {
3695                         search_flag = DRM_MM_SEARCH_BELOW;
3696                         alloc_flag = DRM_MM_CREATE_TOP;
3697                 } else {
3698                         search_flag = DRM_MM_SEARCH_DEFAULT;
3699                         alloc_flag = DRM_MM_CREATE_DEFAULT;
3700                 }
3701
3702 search_free:
3703                 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3704                                                           size, alignment,
3705                                                           obj->cache_level,
3706                                                           start, end,
3707                                                           search_flag,
3708                                                           alloc_flag);
3709                 if (ret) {
3710                         ret = i915_gem_evict_something(dev, vm, size, alignment,
3711                                                        obj->cache_level,
3712                                                        start, end,
3713                                                        flags);
3714                         if (ret == 0)
3715                                 goto search_free;
3716
3717                         goto err_free_vma;
3718                 }
3719         }
3720         if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
3721                 ret = -EINVAL;
3722                 goto err_remove_node;
3723         }
3724
3725         trace_i915_vma_bind(vma, flags);
3726         ret = i915_vma_bind(vma, obj->cache_level, flags);
3727         if (ret)
3728                 goto err_remove_node;
3729
3730         list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3731         list_add_tail(&vma->vm_link, &vm->inactive_list);
3732
3733         return vma;
3734
3735 err_remove_node:
3736         drm_mm_remove_node(&vma->node);
3737 err_free_vma:
3738         i915_gem_vma_destroy(vma);
3739         vma = ERR_PTR(ret);
3740 err_unpin:
3741         i915_gem_object_unpin_pages(obj);
3742         return vma;
3743 }
3744
3745 bool
3746 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3747                         bool force)
3748 {
3749         /* If we don't have a page list set up, then we're not pinned
3750          * to GPU, and we can ignore the cache flush because it'll happen
3751          * again at bind time.
3752          */
3753         if (obj->pages == NULL)
3754                 return false;
3755
3756         /*
3757          * Stolen memory is always coherent with the GPU as it is explicitly
3758          * marked as wc by the system, or the system is cache-coherent.
3759          */
3760         if (obj->stolen || obj->phys_handle)
3761                 return false;
3762
3763         /* If the GPU is snooping the contents of the CPU cache,
3764          * we do not need to manually clear the CPU cache lines.  However,
3765          * the caches are only snooped when the render cache is
3766          * flushed/invalidated.  As we always have to emit invalidations
3767          * and flushes when moving into and out of the RENDER domain, correct
3768          * snooping behaviour occurs naturally as the result of our domain
3769          * tracking.
3770          */
3771         if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3772                 obj->cache_dirty = true;
3773                 return false;
3774         }
3775
3776         trace_i915_gem_object_clflush(obj);
3777         drm_clflush_sg(obj->pages);
3778         obj->cache_dirty = false;
3779
3780         return true;
3781 }
3782
3783 /** Flushes the GTT write domain for the object if it's dirty. */
3784 static void
3785 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3786 {
3787         uint32_t old_write_domain;
3788
3789         if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3790                 return;
3791
3792         /* No actual flushing is required for the GTT write domain.  Writes
3793          * to it immediately go to main memory as far as we know, so there's
3794          * no chipset flush.  It also doesn't land in render cache.
3795          *
3796          * However, we do have to enforce the order so that all writes through
3797          * the GTT land before any writes to the device, such as updates to
3798          * the GATT itself.
3799          */
3800         wmb();
3801
3802         old_write_domain = obj->base.write_domain;
3803         obj->base.write_domain = 0;
3804
3805         intel_fb_obj_flush(obj, false, ORIGIN_GTT);
3806
3807         trace_i915_gem_object_change_domain(obj,
3808                                             obj->base.read_domains,
3809                                             old_write_domain);
3810 }
3811
3812 /** Flushes the CPU write domain for the object if it's dirty. */
3813 static void
3814 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3815 {
3816         uint32_t old_write_domain;
3817
3818         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3819                 return;
3820
3821         if (i915_gem_clflush_object(obj, obj->pin_display))
3822                 i915_gem_chipset_flush(obj->base.dev);
3823
3824         old_write_domain = obj->base.write_domain;
3825         obj->base.write_domain = 0;
3826
3827         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3828
3829         trace_i915_gem_object_change_domain(obj,
3830                                             obj->base.read_domains,
3831                                             old_write_domain);
3832 }
3833
3834 /**
3835  * Moves a single object to the GTT read, and possibly write domain.
3836  *
3837  * This function returns when the move is complete, including waiting on
3838  * flushes to occur.
3839  */
3840 int
3841 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3842 {
3843         uint32_t old_write_domain, old_read_domains;
3844         struct i915_vma *vma;
3845         int ret;
3846
3847         if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3848                 return 0;
3849
3850         ret = i915_gem_object_wait_rendering(obj, !write);
3851         if (ret)
3852                 return ret;
3853
3854         /* Flush and acquire obj->pages so that we are coherent through
3855          * direct access in memory with previous cached writes through
3856          * shmemfs and that our cache domain tracking remains valid.
3857          * For example, if the obj->filp was moved to swap without us
3858          * being notified and releasing the pages, we would mistakenly
3859          * continue to assume that the obj remained out of the CPU cached
3860          * domain.
3861          */
3862         ret = i915_gem_object_get_pages(obj);
3863         if (ret)
3864                 return ret;
3865
3866         i915_gem_object_flush_cpu_write_domain(obj);
3867
3868         /* Serialise direct access to this object with the barriers for
3869          * coherent writes from the GPU, by effectively invalidating the
3870          * GTT domain upon first access.
3871          */
3872         if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3873                 mb();
3874
3875         old_write_domain = obj->base.write_domain;
3876         old_read_domains = obj->base.read_domains;
3877
3878         /* It should now be out of any other write domains, and we can update
3879          * the domain values for our changes.
3880          */
3881         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3882         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3883         if (write) {
3884                 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3885                 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3886                 obj->dirty = 1;
3887         }
3888
3889         trace_i915_gem_object_change_domain(obj,
3890                                             old_read_domains,
3891                                             old_write_domain);
3892
3893         /* And bump the LRU for this access */
3894         vma = i915_gem_obj_to_ggtt(obj);
3895         if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
3896                 list_move_tail(&vma->vm_link,
3897                                &to_i915(obj->base.dev)->gtt.base.inactive_list);
3898
3899         return 0;
3900 }
3901
3902 /**
3903  * Changes the cache-level of an object across all VMA.
3904  *
3905  * After this function returns, the object will be in the new cache-level
3906  * across all GTT and the contents of the backing storage will be coherent,
3907  * with respect to the new cache-level. In order to keep the backing storage
3908  * coherent for all users, we only allow a single cache level to be set
3909  * globally on the object and prevent it from being changed whilst the
3910  * hardware is reading from the object. That is if the object is currently
3911  * on the scanout it will be set to uncached (or equivalent display
3912  * cache coherency) and all non-MOCS GPU access will also be uncached so
3913  * that all direct access to the scanout remains coherent.
3914  */
3915 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3916                                     enum i915_cache_level cache_level)
3917 {
3918         struct drm_device *dev = obj->base.dev;
3919         struct i915_vma *vma, *next;
3920         bool bound = false;
3921         int ret = 0;
3922
3923         if (obj->cache_level == cache_level)
3924                 goto out;
3925
3926         /* Inspect the list of currently bound VMA and unbind any that would
3927          * be invalid given the new cache-level. This is principally to
3928          * catch the issue of the CS prefetch crossing page boundaries and
3929          * reading an invalid PTE on older architectures.
3930          */
3931         list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
3932                 if (!drm_mm_node_allocated(&vma->node))
3933                         continue;
3934
3935                 if (vma->pin_count) {
3936                         DRM_DEBUG("can not change the cache level of pinned objects\n");
3937                         return -EBUSY;
3938                 }
3939
3940                 if (!i915_gem_valid_gtt_space(vma, cache_level)) {
3941                         ret = i915_vma_unbind(vma);
3942                         if (ret)
3943                                 return ret;
3944                 } else
3945                         bound = true;
3946         }
3947
3948         /* We can reuse the existing drm_mm nodes but need to change the
3949          * cache-level on the PTE. We could simply unbind them all and
3950          * rebind with the correct cache-level on next use. However since
3951          * we already have a valid slot, dma mapping, pages etc, we may as
3952          * rewrite the PTE in the belief that doing so tramples upon less
3953          * state and so involves less work.
3954          */
3955         if (bound) {
3956                 /* Before we change the PTE, the GPU must not be accessing it.
3957                  * If we wait upon the object, we know that all the bound
3958                  * VMA are no longer active.
3959                  */
3960                 ret = i915_gem_object_wait_rendering(obj, false);
3961                 if (ret)
3962                         return ret;
3963
3964                 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
3965                         /* Access to snoopable pages through the GTT is
3966                          * incoherent and on some machines causes a hard
3967                          * lockup. Relinquish the CPU mmaping to force
3968                          * userspace to refault in the pages and we can
3969                          * then double check if the GTT mapping is still
3970                          * valid for that pointer access.
3971                          */
3972                         i915_gem_release_mmap(obj);
3973
3974                         /* As we no longer need a fence for GTT access,
3975                          * we can relinquish it now (and so prevent having
3976                          * to steal a fence from someone else on the next
3977                          * fence request). Note GPU activity would have
3978                          * dropped the fence as all snoopable access is
3979                          * supposed to be linear.
3980                          */
3981                         ret = i915_gem_object_put_fence(obj);
3982                         if (ret)
3983                                 return ret;
3984                 } else {
3985                         /* We either have incoherent backing store and
3986                          * so no GTT access or the architecture is fully
3987                          * coherent. In such cases, existing GTT mmaps
3988                          * ignore the cache bit in the PTE and we can
3989                          * rewrite it without confusing the GPU or having
3990                          * to force userspace to fault back in its mmaps.
3991                          */
3992                 }
3993
3994                 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3995                         if (!drm_mm_node_allocated(&vma->node))
3996                                 continue;
3997
3998                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3999                         if (ret)
4000                                 return ret;
4001                 }
4002         }
4003
4004         list_for_each_entry(vma, &obj->vma_list, obj_link)
4005                 vma->node.color = cache_level;
4006         obj->cache_level = cache_level;
4007
4008 out:
4009         /* Flush the dirty CPU caches to the backing storage so that the
4010          * object is now coherent at its new cache level (with respect
4011          * to the access domain).
4012          */
4013         if (obj->cache_dirty &&
4014             obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
4015             cpu_write_needs_clflush(obj)) {
4016                 if (i915_gem_clflush_object(obj, true))
4017                         i915_gem_chipset_flush(obj->base.dev);
4018         }
4019
4020         return 0;
4021 }
4022
4023 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4024                                struct drm_file *file)
4025 {
4026         struct drm_i915_gem_caching *args = data;
4027         struct drm_i915_gem_object *obj;
4028
4029         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4030         if (&obj->base == NULL)
4031                 return -ENOENT;
4032
4033         switch (obj->cache_level) {
4034         case I915_CACHE_LLC:
4035         case I915_CACHE_L3_LLC:
4036                 args->caching = I915_CACHING_CACHED;
4037                 break;
4038
4039         case I915_CACHE_WT:
4040                 args->caching = I915_CACHING_DISPLAY;
4041                 break;
4042
4043         default:
4044                 args->caching = I915_CACHING_NONE;
4045                 break;
4046         }
4047
4048         drm_gem_object_unreference_unlocked(&obj->base);
4049         return 0;
4050 }
4051
4052 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4053                                struct drm_file *file)
4054 {
4055         struct drm_i915_private *dev_priv = dev->dev_private;
4056         struct drm_i915_gem_caching *args = data;
4057         struct drm_i915_gem_object *obj;
4058         enum i915_cache_level level;
4059         int ret;
4060
4061         switch (args->caching) {
4062         case I915_CACHING_NONE:
4063                 level = I915_CACHE_NONE;
4064                 break;
4065         case I915_CACHING_CACHED:
4066                 /*
4067                  * Due to a HW issue on BXT A stepping, GPU stores via a
4068                  * snooped mapping may leave stale data in a corresponding CPU
4069                  * cacheline, whereas normally such cachelines would get