1bc7154ebd78f3b5255d74358f703061f761729c
[dragonfly.git] / sys / dev / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include <drm/drmP.h>
29 #include <drm/drm_vma_manager.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_vgpu.h"
33 #include "i915_trace.h"
34 #include "intel_drv.h"
35 #include <linux/shmem_fs.h>
36 #include <linux/slab.h>
37 #include <linux/swap.h>
38 #include <linux/pci.h>
39
40 #define RQ_BUG_ON(expr)
41
42 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
43 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
44 static void
45 i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
46 static void
47 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
48
49 static bool cpu_cache_is_coherent(struct drm_device *dev,
50                                   enum i915_cache_level level)
51 {
52         return HAS_LLC(dev) || level != I915_CACHE_NONE;
53 }
54
55 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
56 {
57         if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
58                 return true;
59
60         return obj->pin_display;
61 }
62
63 /* some bookkeeping */
64 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
65                                   size_t size)
66 {
67         spin_lock(&dev_priv->mm.object_stat_lock);
68         dev_priv->mm.object_count++;
69         dev_priv->mm.object_memory += size;
70         spin_unlock(&dev_priv->mm.object_stat_lock);
71 }
72
73 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
74                                      size_t size)
75 {
76         spin_lock(&dev_priv->mm.object_stat_lock);
77         dev_priv->mm.object_count--;
78         dev_priv->mm.object_memory -= size;
79         spin_unlock(&dev_priv->mm.object_stat_lock);
80 }
81
82 static int
83 i915_gem_wait_for_error(struct i915_gpu_error *error)
84 {
85         int ret;
86
87 #define EXIT_COND (!i915_reset_in_progress(error) || \
88                    i915_terminally_wedged(error))
89         if (EXIT_COND)
90                 return 0;
91
92         /*
93          * Only wait 10 seconds for the gpu reset to complete to avoid hanging
94          * userspace. If it takes that long something really bad is going on and
95          * we should simply try to bail out and fail as gracefully as possible.
96          */
97         ret = wait_event_interruptible_timeout(error->reset_queue,
98                                                EXIT_COND,
99                                                10*HZ);
100         if (ret == 0) {
101                 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
102                 return -EIO;
103         } else if (ret < 0) {
104                 return ret;
105         }
106 #undef EXIT_COND
107
108         return 0;
109 }
110
111 int i915_mutex_lock_interruptible(struct drm_device *dev)
112 {
113         struct drm_i915_private *dev_priv = dev->dev_private;
114         int ret;
115
116         ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
117         if (ret)
118                 return ret;
119
120         ret = mutex_lock_interruptible(&dev->struct_mutex);
121         if (ret)
122                 return ret;
123
124         WARN_ON(i915_verify_lists(dev));
125         return 0;
126 }
127
128 int
129 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
130                             struct drm_file *file)
131 {
132         struct drm_i915_private *dev_priv = dev->dev_private;
133         struct drm_i915_gem_get_aperture *args = data;
134         struct i915_gtt *ggtt = &dev_priv->gtt;
135         struct i915_vma *vma;
136         size_t pinned;
137
138         pinned = 0;
139         mutex_lock(&dev->struct_mutex);
140         list_for_each_entry(vma, &ggtt->base.active_list, mm_list)
141                 if (vma->pin_count)
142                         pinned += vma->node.size;
143         list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list)
144                 if (vma->pin_count)
145                         pinned += vma->node.size;
146         mutex_unlock(&dev->struct_mutex);
147
148         args->aper_size = dev_priv->gtt.base.total;
149         args->aper_available_size = args->aper_size - pinned;
150
151         return 0;
152 }
153
154 #if 0
155 static int
156 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
157 {
158         struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
159         char *vaddr = obj->phys_handle->vaddr;
160         struct sg_table *st;
161         struct scatterlist *sg;
162         int i;
163
164         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
165                 return -EINVAL;
166
167         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
168                 struct page *page;
169                 char *src;
170
171                 page = shmem_read_mapping_page(mapping, i);
172                 if (IS_ERR(page))
173                         return PTR_ERR(page);
174
175                 src = kmap_atomic(page);
176                 memcpy(vaddr, src, PAGE_SIZE);
177                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
178                 kunmap_atomic(src);
179
180                 page_cache_release(page);
181                 vaddr += PAGE_SIZE;
182         }
183
184         i915_gem_chipset_flush(obj->base.dev);
185
186         st = kmalloc(sizeof(*st), GFP_KERNEL);
187         if (st == NULL)
188                 return -ENOMEM;
189
190         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
191                 kfree(st);
192                 return -ENOMEM;
193         }
194
195         sg = st->sgl;
196         sg->offset = 0;
197         sg->length = obj->base.size;
198
199         sg_dma_address(sg) = obj->phys_handle->busaddr;
200         sg_dma_len(sg) = obj->base.size;
201
202         obj->pages = st;
203         return 0;
204 }
205
206 static void
207 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
208 {
209         int ret;
210
211         BUG_ON(obj->madv == __I915_MADV_PURGED);
212
213         ret = i915_gem_object_set_to_cpu_domain(obj, true);
214         if (ret) {
215                 /* In the event of a disaster, abandon all caches and
216                  * hope for the best.
217                  */
218                 WARN_ON(ret != -EIO);
219                 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
220         }
221
222         if (obj->madv == I915_MADV_DONTNEED)
223                 obj->dirty = 0;
224
225         if (obj->dirty) {
226                 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
227                 char *vaddr = obj->phys_handle->vaddr;
228                 int i;
229
230                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
231                         struct page *page;
232                         char *dst;
233
234                         page = shmem_read_mapping_page(mapping, i);
235                         if (IS_ERR(page))
236                                 continue;
237
238                         dst = kmap_atomic(page);
239                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
240                         memcpy(dst, vaddr, PAGE_SIZE);
241                         kunmap_atomic(dst);
242
243                         set_page_dirty(page);
244                         if (obj->madv == I915_MADV_WILLNEED)
245                                 mark_page_accessed(page);
246                         page_cache_release(page);
247                         vaddr += PAGE_SIZE;
248                 }
249                 obj->dirty = 0;
250         }
251
252         sg_free_table(obj->pages);
253         kfree(obj->pages);
254 }
255
256 static void
257 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
258 {
259         drm_pci_free(obj->base.dev, obj->phys_handle);
260 }
261
262 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
263         .get_pages = i915_gem_object_get_pages_phys,
264         .put_pages = i915_gem_object_put_pages_phys,
265         .release = i915_gem_object_release_phys,
266 };
267 #endif
268
269 static int
270 drop_pages(struct drm_i915_gem_object *obj)
271 {
272         struct i915_vma *vma, *next;
273         int ret;
274
275         drm_gem_object_reference(&obj->base);
276         list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
277                 if (i915_vma_unbind(vma))
278                         break;
279
280         ret = i915_gem_object_put_pages(obj);
281         drm_gem_object_unreference(&obj->base);
282
283         return ret;
284 }
285
286 int
287 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
288                             int align)
289 {
290         drm_dma_handle_t *phys;
291         int ret;
292
293         if (obj->phys_handle) {
294                 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
295                         return -EBUSY;
296
297                 return 0;
298         }
299
300         if (obj->madv != I915_MADV_WILLNEED)
301                 return -EFAULT;
302
303 #if 0
304         if (obj->base.filp == NULL)
305                 return -EINVAL;
306 #endif
307
308         ret = drop_pages(obj);
309         if (ret)
310                 return ret;
311
312         /* create a new object */
313         phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
314         if (!phys)
315                 return -ENOMEM;
316
317         obj->phys_handle = phys;
318 #if 0
319         obj->ops = &i915_gem_phys_ops;
320 #endif
321
322         return i915_gem_object_get_pages(obj);
323 }
324
325 static int
326 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
327                      struct drm_i915_gem_pwrite *args,
328                      struct drm_file *file_priv)
329 {
330         struct drm_device *dev = obj->base.dev;
331         void *vaddr = (char *)obj->phys_handle->vaddr + args->offset;
332         char __user *user_data = to_user_ptr(args->data_ptr);
333         int ret = 0;
334
335         /* We manually control the domain here and pretend that it
336          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
337          */
338         ret = i915_gem_object_wait_rendering(obj, false);
339         if (ret)
340                 return ret;
341
342         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
343         if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
344                 unsigned long unwritten;
345
346                 /* The physical object once assigned is fixed for the lifetime
347                  * of the obj, so we can safely drop the lock and continue
348                  * to access vaddr.
349                  */
350                 mutex_unlock(&dev->struct_mutex);
351                 unwritten = copy_from_user(vaddr, user_data, args->size);
352                 mutex_lock(&dev->struct_mutex);
353                 if (unwritten) {
354                         ret = -EFAULT;
355                         goto out;
356                 }
357         }
358
359         drm_clflush_virt_range(vaddr, args->size);
360         i915_gem_chipset_flush(dev);
361
362 out:
363         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
364         return ret;
365 }
366
367 void *i915_gem_object_alloc(struct drm_device *dev)
368 {
369         return kmalloc(sizeof(struct drm_i915_gem_object),
370             M_DRM, M_WAITOK | M_ZERO);
371 }
372
373 void i915_gem_object_free(struct drm_i915_gem_object *obj)
374 {
375         kfree(obj);
376 }
377
378 static int
379 i915_gem_create(struct drm_file *file,
380                 struct drm_device *dev,
381                 uint64_t size,
382                 uint32_t *handle_p)
383 {
384         struct drm_i915_gem_object *obj;
385         int ret;
386         u32 handle;
387
388         size = roundup(size, PAGE_SIZE);
389         if (size == 0)
390                 return -EINVAL;
391
392         /* Allocate the new object */
393         obj = i915_gem_alloc_object(dev, size);
394         if (obj == NULL)
395                 return -ENOMEM;
396
397         ret = drm_gem_handle_create(file, &obj->base, &handle);
398         /* drop reference from allocate - handle holds it now */
399         drm_gem_object_unreference_unlocked(&obj->base);
400         if (ret)
401                 return ret;
402
403         *handle_p = handle;
404         return 0;
405 }
406
407 int
408 i915_gem_dumb_create(struct drm_file *file,
409                      struct drm_device *dev,
410                      struct drm_mode_create_dumb *args)
411 {
412         /* have to work out size/pitch and return them */
413         args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
414         args->size = args->pitch * args->height;
415         return i915_gem_create(file, dev,
416                                args->size, &args->handle);
417 }
418
419 /**
420  * Creates a new mm object and returns a handle to it.
421  */
422 int
423 i915_gem_create_ioctl(struct drm_device *dev, void *data,
424                       struct drm_file *file)
425 {
426         struct drm_i915_gem_create *args = data;
427
428         return i915_gem_create(file, dev,
429                                args->size, &args->handle);
430 }
431
432 static inline int
433 __copy_to_user_swizzled(char __user *cpu_vaddr,
434                         const char *gpu_vaddr, int gpu_offset,
435                         int length)
436 {
437         int ret, cpu_offset = 0;
438
439         while (length > 0) {
440                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
441                 int this_length = min(cacheline_end - gpu_offset, length);
442                 int swizzled_gpu_offset = gpu_offset ^ 64;
443
444                 ret = __copy_to_user(cpu_vaddr + cpu_offset,
445                                      gpu_vaddr + swizzled_gpu_offset,
446                                      this_length);
447                 if (ret)
448                         return ret + length;
449
450                 cpu_offset += this_length;
451                 gpu_offset += this_length;
452                 length -= this_length;
453         }
454
455         return 0;
456 }
457
458 static inline int
459 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
460                           const char __user *cpu_vaddr,
461                           int length)
462 {
463         int ret, cpu_offset = 0;
464
465         while (length > 0) {
466                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
467                 int this_length = min(cacheline_end - gpu_offset, length);
468                 int swizzled_gpu_offset = gpu_offset ^ 64;
469
470                 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
471                                        cpu_vaddr + cpu_offset,
472                                        this_length);
473                 if (ret)
474                         return ret + length;
475
476                 cpu_offset += this_length;
477                 gpu_offset += this_length;
478                 length -= this_length;
479         }
480
481         return 0;
482 }
483
484 /*
485  * Pins the specified object's pages and synchronizes the object with
486  * GPU accesses. Sets needs_clflush to non-zero if the caller should
487  * flush the object from the CPU cache.
488  */
489 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
490                                     int *needs_clflush)
491 {
492         int ret;
493
494         *needs_clflush = 0;
495
496 #if 0
497         if (!obj->base.filp)
498                 return -EINVAL;
499 #endif
500
501         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
502                 /* If we're not in the cpu read domain, set ourself into the gtt
503                  * read domain and manually flush cachelines (if required). This
504                  * optimizes for the case when the gpu will dirty the data
505                  * anyway again before the next pread happens. */
506                 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
507                                                         obj->cache_level);
508                 ret = i915_gem_object_wait_rendering(obj, true);
509                 if (ret)
510                         return ret;
511         }
512
513         ret = i915_gem_object_get_pages(obj);
514         if (ret)
515                 return ret;
516
517         i915_gem_object_pin_pages(obj);
518
519         return ret;
520 }
521
522 /* Per-page copy function for the shmem pread fastpath.
523  * Flushes invalid cachelines before reading the target if
524  * needs_clflush is set. */
525 static int
526 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length,
527                  char __user *user_data,
528                  bool page_do_bit17_swizzling, bool needs_clflush)
529 {
530         char *vaddr;
531         int ret;
532
533         if (unlikely(page_do_bit17_swizzling))
534                 return -EINVAL;
535
536         vaddr = kmap_atomic(page);
537         if (needs_clflush)
538                 drm_clflush_virt_range(vaddr + shmem_page_offset,
539                                        page_length);
540         ret = __copy_to_user_inatomic(user_data,
541                                       vaddr + shmem_page_offset,
542                                       page_length);
543         kunmap_atomic(vaddr);
544
545         return ret ? -EFAULT : 0;
546 }
547
548 static void
549 shmem_clflush_swizzled_range(char *addr, unsigned long length,
550                              bool swizzled)
551 {
552         if (unlikely(swizzled)) {
553                 unsigned long start = (unsigned long) addr;
554                 unsigned long end = (unsigned long) addr + length;
555
556                 /* For swizzling simply ensure that we always flush both
557                  * channels. Lame, but simple and it works. Swizzled
558                  * pwrite/pread is far from a hotpath - current userspace
559                  * doesn't use it at all. */
560                 start = round_down(start, 128);
561                 end = round_up(end, 128);
562
563                 drm_clflush_virt_range((void *)start, end - start);
564         } else {
565                 drm_clflush_virt_range(addr, length);
566         }
567
568 }
569
570 /* Only difference to the fast-path function is that this can handle bit17
571  * and uses non-atomic copy and kmap functions. */
572 static int
573 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length,
574                  char __user *user_data,
575                  bool page_do_bit17_swizzling, bool needs_clflush)
576 {
577         char *vaddr;
578         int ret;
579
580         vaddr = kmap(page);
581         if (needs_clflush)
582                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
583                                              page_length,
584                                              page_do_bit17_swizzling);
585
586         if (page_do_bit17_swizzling)
587                 ret = __copy_to_user_swizzled(user_data,
588                                               vaddr, shmem_page_offset,
589                                               page_length);
590         else
591                 ret = __copy_to_user(user_data,
592                                      vaddr + shmem_page_offset,
593                                      page_length);
594         kunmap(page);
595
596         return ret ? - EFAULT : 0;
597 }
598
599 static int
600 i915_gem_shmem_pread(struct drm_device *dev,
601                      struct drm_i915_gem_object *obj,
602                      struct drm_i915_gem_pread *args,
603                      struct drm_file *file)
604 {
605         char __user *user_data;
606         ssize_t remain;
607         loff_t offset;
608         int shmem_page_offset, page_length, ret = 0;
609         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
610         int prefaulted = 0;
611         int needs_clflush = 0;
612         struct sg_page_iter sg_iter;
613
614         user_data = to_user_ptr(args->data_ptr);
615         remain = args->size;
616
617         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
618
619         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
620         if (ret)
621                 return ret;
622
623         offset = args->offset;
624
625         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
626                          offset >> PAGE_SHIFT) {
627                 struct vm_page *page = sg_page_iter_page(&sg_iter);
628
629                 if (remain <= 0)
630                         break;
631
632                 /* Operation in this page
633                  *
634                  * shmem_page_offset = offset within page in shmem file
635                  * page_length = bytes to copy for this page
636                  */
637                 shmem_page_offset = offset_in_page(offset);
638                 page_length = remain;
639                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
640                         page_length = PAGE_SIZE - shmem_page_offset;
641
642                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
643                         (page_to_phys(page) & (1 << 17)) != 0;
644
645                 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
646                                        user_data, page_do_bit17_swizzling,
647                                        needs_clflush);
648                 if (ret == 0)
649                         goto next_page;
650
651                 mutex_unlock(&dev->struct_mutex);
652
653                 if (likely(!i915.prefault_disable) && !prefaulted) {
654                         ret = fault_in_multipages_writeable(user_data, remain);
655                         /* Userspace is tricking us, but we've already clobbered
656                          * its pages with the prefault and promised to write the
657                          * data up to the first fault. Hence ignore any errors
658                          * and just continue. */
659                         (void)ret;
660                         prefaulted = 1;
661                 }
662
663                 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
664                                        user_data, page_do_bit17_swizzling,
665                                        needs_clflush);
666
667                 mutex_lock(&dev->struct_mutex);
668
669                 if (ret)
670                         goto out;
671
672 next_page:
673                 remain -= page_length;
674                 user_data += page_length;
675                 offset += page_length;
676         }
677
678 out:
679         i915_gem_object_unpin_pages(obj);
680
681         return ret;
682 }
683
684 /**
685  * Reads data from the object referenced by handle.
686  *
687  * On error, the contents of *data are undefined.
688  */
689 int
690 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
691                      struct drm_file *file)
692 {
693         struct drm_i915_gem_pread *args = data;
694         struct drm_i915_gem_object *obj;
695         int ret = 0;
696
697         if (args->size == 0)
698                 return 0;
699
700         ret = i915_mutex_lock_interruptible(dev);
701         if (ret)
702                 return ret;
703
704         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
705         if (&obj->base == NULL) {
706                 ret = -ENOENT;
707                 goto unlock;
708         }
709
710         /* Bounds check source.  */
711         if (args->offset > obj->base.size ||
712             args->size > obj->base.size - args->offset) {
713                 ret = -EINVAL;
714                 goto out;
715         }
716
717         /* prime objects have no backing filp to GEM pread/pwrite
718          * pages from.
719          */
720
721         trace_i915_gem_object_pread(obj, args->offset, args->size);
722
723         ret = i915_gem_shmem_pread(dev, obj, args, file);
724
725 out:
726         drm_gem_object_unreference(&obj->base);
727 unlock:
728         mutex_unlock(&dev->struct_mutex);
729         return ret;
730 }
731
732 /* This is the fast write path which cannot handle
733  * page faults in the source data
734  */
735
736 static inline int
737 fast_user_write(struct io_mapping *mapping,
738                 loff_t page_base, int page_offset,
739                 char __user *user_data,
740                 int length)
741 {
742         void __iomem *vaddr_atomic;
743         void *vaddr;
744         unsigned long unwritten;
745
746         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
747         /* We can use the cpu mem copy function because this is X86. */
748         vaddr = (char __force*)vaddr_atomic + page_offset;
749         unwritten = __copy_from_user_inatomic_nocache(vaddr,
750                                                       user_data, length);
751         io_mapping_unmap_atomic(vaddr_atomic);
752         return unwritten;
753 }
754
755 /**
756  * This is the fast pwrite path, where we copy the data directly from the
757  * user into the GTT, uncached.
758  */
759 static int
760 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
761                          struct drm_i915_gem_object *obj,
762                          struct drm_i915_gem_pwrite *args,
763                          struct drm_file *file)
764 {
765         struct drm_i915_private *dev_priv = dev->dev_private;
766         ssize_t remain;
767         loff_t offset, page_base;
768         char __user *user_data;
769         int page_offset, page_length, ret;
770
771         ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
772         if (ret)
773                 goto out;
774
775         ret = i915_gem_object_set_to_gtt_domain(obj, true);
776         if (ret)
777                 goto out_unpin;
778
779         ret = i915_gem_object_put_fence(obj);
780         if (ret)
781                 goto out_unpin;
782
783         user_data = to_user_ptr(args->data_ptr);
784         remain = args->size;
785
786         offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
787
788         intel_fb_obj_invalidate(obj, ORIGIN_GTT);
789
790         while (remain > 0) {
791                 /* Operation in this page
792                  *
793                  * page_base = page offset within aperture
794                  * page_offset = offset within page
795                  * page_length = bytes to copy for this page
796                  */
797                 page_base = offset & ~PAGE_MASK;
798                 page_offset = offset_in_page(offset);
799                 page_length = remain;
800                 if ((page_offset + remain) > PAGE_SIZE)
801                         page_length = PAGE_SIZE - page_offset;
802
803                 /* If we get a fault while copying data, then (presumably) our
804                  * source page isn't available.  Return the error and we'll
805                  * retry in the slow path.
806                  */
807                 if (fast_user_write(dev_priv->gtt.mappable, page_base,
808                                     page_offset, user_data, page_length)) {
809                         ret = -EFAULT;
810                         goto out_flush;
811                 }
812
813                 remain -= page_length;
814                 user_data += page_length;
815                 offset += page_length;
816         }
817
818 out_flush:
819         intel_fb_obj_flush(obj, false, ORIGIN_GTT);
820 out_unpin:
821         i915_gem_object_ggtt_unpin(obj);
822 out:
823         return ret;
824 }
825
826 /* Per-page copy function for the shmem pwrite fastpath.
827  * Flushes invalid cachelines before writing to the target if
828  * needs_clflush_before is set and flushes out any written cachelines after
829  * writing if needs_clflush is set. */
830 static int
831 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length,
832                   char __user *user_data,
833                   bool page_do_bit17_swizzling,
834                   bool needs_clflush_before,
835                   bool needs_clflush_after)
836 {
837         char *vaddr;
838         int ret;
839
840         if (unlikely(page_do_bit17_swizzling))
841                 return -EINVAL;
842
843         vaddr = kmap_atomic(page);
844         if (needs_clflush_before)
845                 drm_clflush_virt_range(vaddr + shmem_page_offset,
846                                        page_length);
847         ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
848                                         user_data, page_length);
849         if (needs_clflush_after)
850                 drm_clflush_virt_range(vaddr + shmem_page_offset,
851                                        page_length);
852         kunmap_atomic(vaddr);
853
854         return ret ? -EFAULT : 0;
855 }
856
857 /* Only difference to the fast-path function is that this can handle bit17
858  * and uses non-atomic copy and kmap functions. */
859 static int
860 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length,
861                   char __user *user_data,
862                   bool page_do_bit17_swizzling,
863                   bool needs_clflush_before,
864                   bool needs_clflush_after)
865 {
866         char *vaddr;
867         int ret;
868
869         vaddr = kmap(page);
870         if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
871                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
872                                              page_length,
873                                              page_do_bit17_swizzling);
874         if (page_do_bit17_swizzling)
875                 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
876                                                 user_data,
877                                                 page_length);
878         else
879                 ret = __copy_from_user(vaddr + shmem_page_offset,
880                                        user_data,
881                                        page_length);
882         if (needs_clflush_after)
883                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
884                                              page_length,
885                                              page_do_bit17_swizzling);
886         kunmap(page);
887
888         return ret ? -EFAULT : 0;
889 }
890
891 static int
892 i915_gem_shmem_pwrite(struct drm_device *dev,
893                       struct drm_i915_gem_object *obj,
894                       struct drm_i915_gem_pwrite *args,
895                       struct drm_file *file)
896 {
897         ssize_t remain;
898         loff_t offset;
899         char __user *user_data;
900         int shmem_page_offset, page_length, ret = 0;
901         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
902         int hit_slowpath = 0;
903         int needs_clflush_after = 0;
904         int needs_clflush_before = 0;
905         struct sg_page_iter sg_iter;
906
907         user_data = to_user_ptr(args->data_ptr);
908         remain = args->size;
909
910         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
911
912         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
913                 /* If we're not in the cpu write domain, set ourself into the gtt
914                  * write domain and manually flush cachelines (if required). This
915                  * optimizes for the case when the gpu will use the data
916                  * right away and we therefore have to clflush anyway. */
917                 needs_clflush_after = cpu_write_needs_clflush(obj);
918                 ret = i915_gem_object_wait_rendering(obj, false);
919                 if (ret)
920                         return ret;
921         }
922         /* Same trick applies to invalidate partially written cachelines read
923          * before writing. */
924         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
925                 needs_clflush_before =
926                         !cpu_cache_is_coherent(dev, obj->cache_level);
927
928         ret = i915_gem_object_get_pages(obj);
929         if (ret)
930                 return ret;
931
932         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
933
934         i915_gem_object_pin_pages(obj);
935
936         offset = args->offset;
937         obj->dirty = 1;
938
939         VM_OBJECT_LOCK(obj->base.vm_obj);
940         vm_object_pip_add(obj->base.vm_obj, 1);
941
942         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
943                          offset >> PAGE_SHIFT) {
944                 struct vm_page *page = sg_page_iter_page(&sg_iter);
945                 int partial_cacheline_write;
946
947                 if (remain <= 0)
948                         break;
949
950                 /* Operation in this page
951                  *
952                  * shmem_page_offset = offset within page in shmem file
953                  * page_length = bytes to copy for this page
954                  */
955                 shmem_page_offset = offset_in_page(offset);
956
957                 page_length = remain;
958                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
959                         page_length = PAGE_SIZE - shmem_page_offset;
960
961                 /* If we don't overwrite a cacheline completely we need to be
962                  * careful to have up-to-date data by first clflushing. Don't
963                  * overcomplicate things and flush the entire patch. */
964                 partial_cacheline_write = needs_clflush_before &&
965                         ((shmem_page_offset | page_length)
966                                 & (cpu_clflush_line_size - 1));
967
968                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
969                         (page_to_phys(page) & (1 << 17)) != 0;
970
971                 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
972                                         user_data, page_do_bit17_swizzling,
973                                         partial_cacheline_write,
974                                         needs_clflush_after);
975                 if (ret == 0)
976                         goto next_page;
977
978                 hit_slowpath = 1;
979                 mutex_unlock(&dev->struct_mutex);
980                 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
981                                         user_data, page_do_bit17_swizzling,
982                                         partial_cacheline_write,
983                                         needs_clflush_after);
984
985                 mutex_lock(&dev->struct_mutex);
986
987                 if (ret)
988                         goto out;
989
990 next_page:
991                 remain -= page_length;
992                 user_data += page_length;
993                 offset += page_length;
994         }
995         vm_object_pip_wakeup(obj->base.vm_obj);
996         VM_OBJECT_UNLOCK(obj->base.vm_obj);
997
998 out:
999         i915_gem_object_unpin_pages(obj);
1000
1001         if (hit_slowpath) {
1002                 /*
1003                  * Fixup: Flush cpu caches in case we didn't flush the dirty
1004                  * cachelines in-line while writing and the object moved
1005                  * out of the cpu write domain while we've dropped the lock.
1006                  */
1007                 if (!needs_clflush_after &&
1008                     obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1009                         if (i915_gem_clflush_object(obj, obj->pin_display))
1010                                 needs_clflush_after = true;
1011                 }
1012         }
1013
1014         if (needs_clflush_after)
1015                 i915_gem_chipset_flush(dev);
1016         else
1017                 obj->cache_dirty = true;
1018
1019         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1020         return ret;
1021 }
1022
1023 /**
1024  * Writes data to the object referenced by handle.
1025  *
1026  * On error, the contents of the buffer that were to be modified are undefined.
1027  */
1028 int
1029 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1030                       struct drm_file *file)
1031 {
1032         struct drm_i915_private *dev_priv = dev->dev_private;
1033         struct drm_i915_gem_pwrite *args = data;
1034         struct drm_i915_gem_object *obj;
1035         int ret;
1036
1037         if (args->size == 0)
1038                 return 0;
1039
1040         if (likely(!i915.prefault_disable)) {
1041                 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1042                                                    args->size);
1043                 if (ret)
1044                         return -EFAULT;
1045         }
1046
1047         intel_runtime_pm_get(dev_priv);
1048
1049         ret = i915_mutex_lock_interruptible(dev);
1050         if (ret)
1051                 goto put_rpm;
1052
1053         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1054         if (&obj->base == NULL) {
1055                 ret = -ENOENT;
1056                 goto unlock;
1057         }
1058
1059         /* Bounds check destination. */
1060         if (args->offset > obj->base.size ||
1061             args->size > obj->base.size - args->offset) {
1062                 ret = -EINVAL;
1063                 goto out;
1064         }
1065
1066         /* prime objects have no backing filp to GEM pread/pwrite
1067          * pages from.
1068          */
1069
1070         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1071
1072         ret = -EFAULT;
1073         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1074          * it would end up going through the fenced access, and we'll get
1075          * different detiling behavior between reading and writing.
1076          * pread/pwrite currently are reading and writing from the CPU
1077          * perspective, requiring manual detiling by the client.
1078          */
1079         if (obj->tiling_mode == I915_TILING_NONE &&
1080             obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1081             cpu_write_needs_clflush(obj)) {
1082                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1083                 /* Note that the gtt paths might fail with non-page-backed user
1084                  * pointers (e.g. gtt mappings when moving data between
1085                  * textures). Fallback to the shmem path in that case. */
1086         }
1087
1088         if (ret == -EFAULT || ret == -ENOSPC) {
1089                 if (obj->phys_handle)
1090                         ret = i915_gem_phys_pwrite(obj, args, file);
1091                 else
1092                         ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1093         }
1094
1095 out:
1096         drm_gem_object_unreference(&obj->base);
1097 unlock:
1098         mutex_unlock(&dev->struct_mutex);
1099 put_rpm:
1100         intel_runtime_pm_put(dev_priv);
1101
1102         return ret;
1103 }
1104
1105 int
1106 i915_gem_check_wedge(struct i915_gpu_error *error,
1107                      bool interruptible)
1108 {
1109         if (i915_reset_in_progress(error)) {
1110                 /* Non-interruptible callers can't handle -EAGAIN, hence return
1111                  * -EIO unconditionally for these. */
1112                 if (!interruptible)
1113                         return -EIO;
1114
1115                 /* Recovery complete, but the reset failed ... */
1116                 if (i915_terminally_wedged(error))
1117                         return -EIO;
1118
1119                 /*
1120                  * Check if GPU Reset is in progress - we need intel_ring_begin
1121                  * to work properly to reinit the hw state while the gpu is
1122                  * still marked as reset-in-progress. Handle this with a flag.
1123                  */
1124                 if (!error->reload_in_reset)
1125                         return -EAGAIN;
1126         }
1127
1128         return 0;
1129 }
1130
1131 static void fake_irq(unsigned long data)
1132 {
1133         wakeup_one((void *)data);
1134 }
1135
1136 static bool missed_irq(struct drm_i915_private *dev_priv,
1137                        struct intel_engine_cs *ring)
1138 {
1139         return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1140 }
1141
1142 #if 0
1143 static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
1144 {
1145         unsigned long timeout;
1146         unsigned cpu;
1147
1148         /* When waiting for high frequency requests, e.g. during synchronous
1149          * rendering split between the CPU and GPU, the finite amount of time
1150          * required to set up the irq and wait upon it limits the response
1151          * rate. By busywaiting on the request completion for a short while we
1152          * can service the high frequency waits as quick as possible. However,
1153          * if it is a slow request, we want to sleep as quickly as possible.
1154          * The tradeoff between waiting and sleeping is roughly the time it
1155          * takes to sleep on a request, on the order of a microsecond.
1156          */
1157
1158         if (req->ring->irq_refcount)
1159                 return -EBUSY;
1160
1161         /* Only spin if we know the GPU is processing this request */
1162         if (!i915_gem_request_started(req, true))
1163                 return -EAGAIN;
1164
1165         timeout = local_clock_us(&cpu) + 5;
1166         while (!need_resched()) {
1167                 if (i915_gem_request_completed(req, true))
1168                         return 0;
1169
1170                 if (signal_pending_state(state, current))
1171                         break;
1172
1173                 if (busywait_stop(timeout, cpu))
1174                         break;
1175
1176                 cpu_relax_lowlatency();
1177         }
1178
1179         if (i915_gem_request_completed(req, false))
1180                 return 0;
1181
1182         return -EAGAIN;
1183 }
1184 #endif
1185
1186 /**
1187  * __i915_wait_request - wait until execution of request has finished
1188  * @req: duh!
1189  * @reset_counter: reset sequence associated with the given request
1190  * @interruptible: do an interruptible wait (normally yes)
1191  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1192  *
1193  * Note: It is of utmost importance that the passed in seqno and reset_counter
1194  * values have been read by the caller in an smp safe manner. Where read-side
1195  * locks are involved, it is sufficient to read the reset_counter before
1196  * unlocking the lock that protects the seqno. For lockless tricks, the
1197  * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1198  * inserted.
1199  *
1200  * Returns 0 if the request was found within the alloted time. Else returns the
1201  * errno with remaining time filled in timeout argument.
1202  */
1203 int __i915_wait_request(struct drm_i915_gem_request *req,
1204                         unsigned reset_counter,
1205                         bool interruptible,
1206                         s64 *timeout,
1207                         struct intel_rps_client *rps)
1208 {
1209         struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
1210         struct drm_device *dev = ring->dev;
1211         struct drm_i915_private *dev_priv = dev->dev_private;
1212         const bool irq_test_in_progress =
1213                 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1214         unsigned long timeout_expire;
1215         s64 before, now;
1216         int ret, sl_timeout = 1;
1217
1218         WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
1219
1220         if (list_empty(&req->list))
1221                 return 0;
1222
1223         if (i915_gem_request_completed(req, true))
1224                 return 0;
1225
1226         timeout_expire = 0;
1227         if (timeout) {
1228                 if (WARN_ON(*timeout < 0))
1229                         return -EINVAL;
1230
1231                 if (*timeout == 0)
1232                         return -ETIME;
1233
1234                 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
1235         }
1236
1237         if (INTEL_INFO(dev_priv)->gen >= 6)
1238                 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
1239
1240         /* Record current time in case interrupted by signal, or wedged */
1241         trace_i915_gem_request_wait_begin(req);
1242         before = ktime_get_raw_ns();
1243
1244         /* Optimistic spin for the next jiffie before touching IRQs */
1245 #if 0
1246         ret = __i915_spin_request(req);
1247         if (ret == 0)
1248                 goto out;
1249 #endif
1250
1251         if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
1252                 ret = -ENODEV;
1253                 goto out;
1254         }
1255
1256         lockmgr(&ring->irq_queue.lock, LK_EXCLUSIVE);
1257         for (;;) {
1258                 struct timer_list timer;
1259
1260                 /* We need to check whether any gpu reset happened in between
1261                  * the caller grabbing the seqno and now ... */
1262                 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1263                         /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1264                          * is truely gone. */
1265                         ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1266                         if (ret == 0)
1267                                 ret = -EAGAIN;
1268                         break;
1269                 }
1270
1271                 if (i915_gem_request_completed(req, false)) {
1272                         ret = 0;
1273                         break;
1274                 }
1275
1276                 if (interruptible && signal_pending(curthread->td_lwp)) {
1277                         ret = -ERESTARTSYS;
1278                         break;
1279                 }
1280
1281                 if (timeout && time_after_eq(jiffies, timeout_expire)) {
1282                         ret = -ETIME;
1283                         break;
1284                 }
1285
1286                 timer.function = NULL;
1287                 if (timeout || missed_irq(dev_priv, ring)) {
1288                         unsigned long expire;
1289
1290                         setup_timer_on_stack(&timer, fake_irq, (unsigned long)&ring->irq_queue);
1291                         expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
1292                         sl_timeout = expire - jiffies;
1293                         if (sl_timeout < 1)
1294                                 sl_timeout = 1;
1295                         mod_timer(&timer, expire);
1296                 }
1297
1298 #if 0
1299                 io_schedule();
1300 #endif
1301
1302                 if (timer.function) {
1303                         del_singleshot_timer_sync(&timer);
1304                         destroy_timer_on_stack(&timer);
1305                 }
1306
1307                 lksleep(&ring->irq_queue, &ring->irq_queue.lock,
1308                         interruptible ? PCATCH : 0, "lwe", sl_timeout);
1309         }
1310         lockmgr(&ring->irq_queue.lock, LK_RELEASE);
1311         if (!irq_test_in_progress)
1312                 ring->irq_put(ring);
1313
1314 out:
1315         now = ktime_get_raw_ns();
1316         trace_i915_gem_request_wait_end(req);
1317
1318         if (timeout) {
1319                 s64 tres = *timeout - (now - before);
1320
1321                 *timeout = tres < 0 ? 0 : tres;
1322
1323                 /*
1324                  * Apparently ktime isn't accurate enough and occasionally has a
1325                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1326                  * things up to make the test happy. We allow up to 1 jiffy.
1327                  *
1328                  * This is a regrssion from the timespec->ktime conversion.
1329                  */
1330                 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1331                         *timeout = 0;
1332         }
1333
1334         return ret;
1335 }
1336
1337 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
1338                                    struct drm_file *file)
1339 {
1340         struct drm_i915_private *dev_private;
1341         struct drm_i915_file_private *file_priv;
1342
1343         WARN_ON(!req || !file || req->file_priv);
1344
1345         if (!req || !file)
1346                 return -EINVAL;
1347
1348         if (req->file_priv)
1349                 return -EINVAL;
1350
1351         dev_private = req->ring->dev->dev_private;
1352         file_priv = file->driver_priv;
1353
1354         spin_lock(&file_priv->mm.lock);
1355         req->file_priv = file_priv;
1356         list_add_tail(&req->client_list, &file_priv->mm.request_list);
1357         spin_unlock(&file_priv->mm.lock);
1358
1359         req->pid = curproc->p_pid;
1360
1361         return 0;
1362 }
1363
1364 static inline void
1365 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1366 {
1367         struct drm_i915_file_private *file_priv = request->file_priv;
1368
1369         if (!file_priv)
1370                 return;
1371
1372         spin_lock(&file_priv->mm.lock);
1373         list_del(&request->client_list);
1374         request->file_priv = NULL;
1375         spin_unlock(&file_priv->mm.lock);
1376
1377 #if 0
1378         put_pid(request->pid);
1379         request->pid = NULL;
1380 #endif
1381 }
1382
1383 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1384 {
1385         trace_i915_gem_request_retire(request);
1386
1387         /* We know the GPU must have read the request to have
1388          * sent us the seqno + interrupt, so use the position
1389          * of tail of the request to update the last known position
1390          * of the GPU head.
1391          *
1392          * Note this requires that we are always called in request
1393          * completion order.
1394          */
1395         request->ringbuf->last_retired_head = request->postfix;
1396
1397         list_del_init(&request->list);
1398         i915_gem_request_remove_from_client(request);
1399
1400         i915_gem_request_unreference(request);
1401 }
1402
1403 static void
1404 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1405 {
1406         struct intel_engine_cs *engine = req->ring;
1407         struct drm_i915_gem_request *tmp;
1408
1409         lockdep_assert_held(&engine->dev->struct_mutex);
1410
1411         if (list_empty(&req->list))
1412                 return;
1413
1414         do {
1415                 tmp = list_first_entry(&engine->request_list,
1416                                        typeof(*tmp), list);
1417
1418                 i915_gem_request_retire(tmp);
1419         } while (tmp != req);
1420
1421         WARN_ON(i915_verify_lists(engine->dev));
1422 }
1423
1424 /**
1425  * Waits for a request to be signaled, and cleans up the
1426  * request and object lists appropriately for that event.
1427  */
1428 int
1429 i915_wait_request(struct drm_i915_gem_request *req)
1430 {
1431         struct drm_device *dev;
1432         struct drm_i915_private *dev_priv;
1433         bool interruptible;
1434         int ret;
1435
1436         BUG_ON(req == NULL);
1437
1438         dev = req->ring->dev;
1439         dev_priv = dev->dev_private;
1440         interruptible = dev_priv->mm.interruptible;
1441
1442         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1443
1444         ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1445         if (ret)
1446                 return ret;
1447
1448         ret = __i915_wait_request(req,
1449                                   atomic_read(&dev_priv->gpu_error.reset_counter),
1450                                   interruptible, NULL, NULL);
1451         if (ret)
1452                 return ret;
1453
1454         __i915_gem_request_retire__upto(req);
1455         return 0;
1456 }
1457
1458 /**
1459  * Ensures that all rendering to the object has completed and the object is
1460  * safe to unbind from the GTT or access from the CPU.
1461  */
1462 int
1463 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1464                                bool readonly)
1465 {
1466         int ret, i;
1467
1468         if (!obj->active)
1469                 return 0;
1470
1471         if (readonly) {
1472                 if (obj->last_write_req != NULL) {
1473                         ret = i915_wait_request(obj->last_write_req);
1474                         if (ret)
1475                                 return ret;
1476
1477                         i = obj->last_write_req->ring->id;
1478                         if (obj->last_read_req[i] == obj->last_write_req)
1479                                 i915_gem_object_retire__read(obj, i);
1480                         else
1481                                 i915_gem_object_retire__write(obj);
1482                 }
1483         } else {
1484                 for (i = 0; i < I915_NUM_RINGS; i++) {
1485                         if (obj->last_read_req[i] == NULL)
1486                                 continue;
1487
1488                         ret = i915_wait_request(obj->last_read_req[i]);
1489                         if (ret)
1490                                 return ret;
1491
1492                         i915_gem_object_retire__read(obj, i);
1493                 }
1494                 RQ_BUG_ON(obj->active);
1495         }
1496
1497         return 0;
1498 }
1499
1500 static void
1501 i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1502                                struct drm_i915_gem_request *req)
1503 {
1504         int ring = req->ring->id;
1505
1506         if (obj->last_read_req[ring] == req)
1507                 i915_gem_object_retire__read(obj, ring);
1508         else if (obj->last_write_req == req)
1509                 i915_gem_object_retire__write(obj);
1510
1511         __i915_gem_request_retire__upto(req);
1512 }
1513
1514 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1515  * as the object state may change during this call.
1516  */
1517 static __must_check int
1518 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1519                                             struct intel_rps_client *rps,
1520                                             bool readonly)
1521 {
1522         struct drm_device *dev = obj->base.dev;
1523         struct drm_i915_private *dev_priv = dev->dev_private;
1524         struct drm_i915_gem_request *requests[I915_NUM_RINGS];
1525         unsigned reset_counter;
1526         int ret, i, n = 0;
1527
1528         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1529         BUG_ON(!dev_priv->mm.interruptible);
1530
1531         if (!obj->active)
1532                 return 0;
1533
1534         ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1535         if (ret)
1536                 return ret;
1537
1538         reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1539
1540         if (readonly) {
1541                 struct drm_i915_gem_request *req;
1542
1543                 req = obj->last_write_req;
1544                 if (req == NULL)
1545                         return 0;
1546
1547                 requests[n++] = i915_gem_request_reference(req);
1548         } else {
1549                 for (i = 0; i < I915_NUM_RINGS; i++) {
1550                         struct drm_i915_gem_request *req;
1551
1552                         req = obj->last_read_req[i];
1553                         if (req == NULL)
1554                                 continue;
1555
1556                         requests[n++] = i915_gem_request_reference(req);
1557                 }
1558         }
1559
1560         mutex_unlock(&dev->struct_mutex);
1561         for (i = 0; ret == 0 && i < n; i++)
1562                 ret = __i915_wait_request(requests[i], reset_counter, true,
1563                                           NULL, rps);
1564         mutex_lock(&dev->struct_mutex);
1565
1566         for (i = 0; i < n; i++) {
1567                 if (ret == 0)
1568                         i915_gem_object_retire_request(obj, requests[i]);
1569                 i915_gem_request_unreference(requests[i]);
1570         }
1571
1572         return ret;
1573 }
1574
1575 static struct intel_rps_client *to_rps_client(struct drm_file *file)
1576 {
1577         struct drm_i915_file_private *fpriv = file->driver_priv;
1578         return &fpriv->rps;
1579 }
1580
1581 /**
1582  * Called when user space prepares to use an object with the CPU, either
1583  * through the mmap ioctl's mapping or a GTT mapping.
1584  */
1585 int
1586 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1587                           struct drm_file *file)
1588 {
1589         struct drm_i915_gem_set_domain *args = data;
1590         struct drm_i915_gem_object *obj;
1591         uint32_t read_domains = args->read_domains;
1592         uint32_t write_domain = args->write_domain;
1593         int ret;
1594
1595         /* Only handle setting domains to types used by the CPU. */
1596         if (write_domain & I915_GEM_GPU_DOMAINS)
1597                 return -EINVAL;
1598
1599         if (read_domains & I915_GEM_GPU_DOMAINS)
1600                 return -EINVAL;
1601
1602         /* Having something in the write domain implies it's in the read
1603          * domain, and only that read domain.  Enforce that in the request.
1604          */
1605         if (write_domain != 0 && read_domains != write_domain)
1606                 return -EINVAL;
1607
1608         ret = i915_mutex_lock_interruptible(dev);
1609         if (ret)
1610                 return ret;
1611
1612         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1613         if (&obj->base == NULL) {
1614                 ret = -ENOENT;
1615                 goto unlock;
1616         }
1617
1618         /* Try to flush the object off the GPU without holding the lock.
1619          * We will repeat the flush holding the lock in the normal manner
1620          * to catch cases where we are gazumped.
1621          */
1622         ret = i915_gem_object_wait_rendering__nonblocking(obj,
1623                                                           to_rps_client(file),
1624                                                           !write_domain);
1625         if (ret)
1626                 goto unref;
1627
1628         if (read_domains & I915_GEM_DOMAIN_GTT)
1629                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1630         else
1631                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1632
1633         if (write_domain != 0)
1634                 intel_fb_obj_invalidate(obj,
1635                                         write_domain == I915_GEM_DOMAIN_GTT ?
1636                                         ORIGIN_GTT : ORIGIN_CPU);
1637
1638 unref:
1639         drm_gem_object_unreference(&obj->base);
1640 unlock:
1641         mutex_unlock(&dev->struct_mutex);
1642         return ret;
1643 }
1644
1645 /**
1646  * Called when user space has done writes to this buffer
1647  */
1648 int
1649 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1650                          struct drm_file *file)
1651 {
1652         struct drm_i915_gem_sw_finish *args = data;
1653         struct drm_i915_gem_object *obj;
1654         int ret = 0;
1655
1656         ret = i915_mutex_lock_interruptible(dev);
1657         if (ret)
1658                 return ret;
1659
1660         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1661         if (&obj->base == NULL) {
1662                 ret = -ENOENT;
1663                 goto unlock;
1664         }
1665
1666         /* Pinned buffers may be scanout, so flush the cache */
1667         if (obj->pin_display)
1668                 i915_gem_object_flush_cpu_write_domain(obj);
1669
1670         drm_gem_object_unreference(&obj->base);
1671 unlock:
1672         mutex_unlock(&dev->struct_mutex);
1673         return ret;
1674 }
1675
1676 /**
1677  * Maps the contents of an object, returning the address it is mapped
1678  * into.
1679  *
1680  * While the mapping holds a reference on the contents of the object, it doesn't
1681  * imply a ref on the object itself.
1682  *
1683  * IMPORTANT:
1684  *
1685  * DRM driver writers who look a this function as an example for how to do GEM
1686  * mmap support, please don't implement mmap support like here. The modern way
1687  * to implement DRM mmap support is with an mmap offset ioctl (like
1688  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1689  * That way debug tooling like valgrind will understand what's going on, hiding
1690  * the mmap call in a driver private ioctl will break that. The i915 driver only
1691  * does cpu mmaps this way because we didn't know better.
1692  */
1693 int
1694 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1695                     struct drm_file *file)
1696 {
1697         struct drm_i915_gem_mmap *args = data;
1698         struct drm_gem_object *obj;
1699         unsigned long addr;
1700
1701         struct proc *p = curproc;
1702         vm_map_t map = &p->p_vmspace->vm_map;
1703         vm_size_t size;
1704         int error = 0, rv;
1705
1706         if (args->flags & ~(I915_MMAP_WC))
1707                 return -EINVAL;
1708
1709         obj = drm_gem_object_lookup(dev, file, args->handle);
1710         if (obj == NULL)
1711                 return -ENOENT;
1712
1713         if (args->size == 0)
1714                 goto out;
1715
1716         size = round_page(args->size);
1717         if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
1718                 error = -ENOMEM;
1719                 goto out;
1720         }
1721
1722         /* prime objects have no backing filp to GEM mmap
1723          * pages from.
1724          */
1725
1726         /*
1727          * Call hint to ensure that NULL is not returned as a valid address
1728          * and to reduce vm_map traversals. XXX causes instability, use a
1729          * fixed low address as the start point instead to avoid the NULL
1730          * return issue.
1731          */
1732
1733         addr = PAGE_SIZE;
1734
1735         /*
1736          * Use 256KB alignment.  It is unclear why this matters for a
1737          * virtual address but it appears to fix a number of application/X
1738          * crashes and kms console switching is much faster.
1739          */
1740         vm_object_hold(obj->vm_obj);
1741         vm_object_reference_locked(obj->vm_obj);
1742         vm_object_drop(obj->vm_obj);
1743
1744         rv = vm_map_find(map, obj->vm_obj, NULL,
1745                          args->offset, &addr, args->size,
1746                          256 * 1024, /* align */
1747                          TRUE, /* fitit */
1748                          VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM,
1749                          VM_PROT_READ | VM_PROT_WRITE, /* prot */
1750                          VM_PROT_READ | VM_PROT_WRITE, /* max */
1751                          MAP_SHARED /* cow */);
1752         if (rv != KERN_SUCCESS) {
1753                 vm_object_deallocate(obj->vm_obj);
1754                 error = -vm_mmap_to_errno(rv);
1755         } else {
1756                 args->addr_ptr = (uint64_t)addr;
1757         }
1758 out:
1759         drm_gem_object_unreference(obj);
1760         return (error);
1761 }
1762
1763 /**
1764  * i915_gem_fault - fault a page into the GTT
1765  *
1766  * vm_obj is locked on entry and expected to be locked on return.
1767  *
1768  * The vm_pager has placemarked the object with an anonymous memory page
1769  * which we must replace atomically to avoid races against concurrent faults
1770  * on the same page.  XXX we currently are unable to do this atomically.
1771  *
1772  * If we are to return an error we should not touch the anonymous page,
1773  * the caller will deallocate it.
1774  *
1775  * XXX Most GEM calls appear to be interruptable, but we can't hard loop
1776  * in that case.  Release all resources and wait 1 tick before retrying.
1777  * This is a huge problem which needs to be fixed by getting rid of most
1778  * of the interruptability.  The linux code does not retry but does appear
1779  * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level
1780  * to be able to retry.
1781  *
1782  * --
1783  * @vma: VMA in question
1784  * @vmf: fault info
1785  *
1786  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1787  * from userspace.  The fault handler takes care of binding the object to
1788  * the GTT (if needed), allocating and programming a fence register (again,
1789  * only if needed based on whether the old reg is still valid or the object
1790  * is tiled) and inserting a new PTE into the faulting process.
1791  *
1792  * Note that the faulting process may involve evicting existing objects
1793  * from the GTT and/or fence registers to make room.  So performance may
1794  * suffer if the GTT working set is large or there are few fence registers
1795  * left.
1796  *
1797  * vm_obj is locked on entry and expected to be locked on return.  The VM
1798  * pager has placed an anonymous memory page at (obj,offset) which we have
1799  * to replace.
1800  */
1801 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres)
1802 {
1803         struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle);
1804         struct drm_device *dev = obj->base.dev;
1805         struct drm_i915_private *dev_priv = dev->dev_private;
1806         struct i915_ggtt_view view = i915_ggtt_view_normal;
1807         unsigned long page_offset;
1808         vm_page_t m, oldm = NULL;
1809         int ret = 0;
1810         bool write = !!(prot & VM_PROT_WRITE);
1811
1812         intel_runtime_pm_get(dev_priv);
1813
1814         /* We don't use vmf->pgoff since that has the fake offset */
1815         page_offset = (unsigned long)offset;
1816
1817 retry:
1818         ret = i915_mutex_lock_interruptible(dev);
1819         if (ret)
1820                 goto out;
1821
1822         trace_i915_gem_object_fault(obj, page_offset, true, write);
1823
1824         /* Try to flush the object off the GPU first without holding the lock.
1825          * Upon reacquiring the lock, we will perform our sanity checks and then
1826          * repeat the flush holding the lock in the normal manner to catch cases
1827          * where we are gazumped.
1828          */
1829         ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1830         if (ret)
1831                 goto unlock;
1832
1833         /* Access to snoopable pages through the GTT is incoherent. */
1834         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1835                 ret = -EFAULT;
1836                 goto unlock;
1837         }
1838
1839         /* Use a partial view if the object is bigger than the aperture. */
1840         if (obj->base.size >= dev_priv->gtt.mappable_end &&
1841             obj->tiling_mode == I915_TILING_NONE) {
1842 #if 0
1843                 static const unsigned int chunk_size = 256; // 1 MiB
1844
1845                 memset(&view, 0, sizeof(view));
1846                 view.type = I915_GGTT_VIEW_PARTIAL;
1847                 view.params.partial.offset = rounddown(page_offset, chunk_size);
1848                 view.params.partial.size =
1849                         min_t(unsigned int,
1850                               chunk_size,
1851                               (vma->vm_end - vma->vm_start)/PAGE_SIZE -
1852                               view.params.partial.offset);
1853 #endif
1854         }
1855
1856         /* Now pin it into the GTT if needed */
1857         ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
1858         if (ret)
1859                 goto unlock;
1860
1861         ret = i915_gem_object_set_to_gtt_domain(obj, write);
1862         if (ret)
1863                 goto unpin;
1864
1865         ret = i915_gem_object_get_fence(obj);
1866         if (ret)
1867                 goto unpin;
1868
1869         /*
1870          * START FREEBSD MAGIC
1871          *
1872          * Add a pip count to avoid destruction and certain other
1873          * complex operations (such as collapses?) while unlocked.
1874          */
1875         vm_object_pip_add(vm_obj, 1);
1876
1877         /*
1878          * XXX We must currently remove the placeholder page now to avoid
1879          * a deadlock against a concurrent i915_gem_release_mmap().
1880          * Otherwise concurrent operation will block on the busy page
1881          * while holding locks which we need to obtain.
1882          */
1883         if (*mres != NULL) {
1884                 oldm = *mres;
1885                 if ((oldm->flags & PG_BUSY) == 0)
1886                         kprintf("i915_gem_fault: Page was not busy\n");
1887                 else
1888                         vm_page_remove(oldm);
1889                 *mres = NULL;
1890         } else {
1891                 oldm = NULL;
1892         }
1893
1894         ret = 0;
1895         m = NULL;
1896
1897         /*
1898          * Since the object lock was dropped, another thread might have
1899          * faulted on the same GTT address and instantiated the mapping.
1900          * Recheck.
1901          */
1902         m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
1903         if (m != NULL) {
1904                 /*
1905                  * Try to busy the page, retry on failure (non-zero ret).
1906                  */
1907                 if (vm_page_busy_try(m, false)) {
1908                         kprintf("i915_gem_fault: PG_BUSY\n");
1909                         ret = -EINTR;
1910                         goto unlock;
1911                 }
1912                 goto have_page;
1913         }
1914         /*
1915          * END FREEBSD MAGIC
1916          */
1917
1918         obj->fault_mappable = true;
1919
1920         /* Finally, remap it using the new GTT offset */
1921         m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base +
1922                         i915_gem_obj_ggtt_offset_view(obj, &view) + offset);
1923         if (m == NULL) {
1924                 ret = -EFAULT;
1925                 goto unpin;
1926         }
1927         KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m));
1928         KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
1929
1930         /*
1931          * Try to busy the page.  Fails on non-zero return.
1932          */
1933         if (vm_page_busy_try(m, false)) {
1934                 kprintf("i915_gem_fault: PG_BUSY(2)\n");
1935                 ret = -EINTR;
1936                 goto unpin;
1937         }
1938         m->valid = VM_PAGE_BITS_ALL;
1939
1940 #if 0
1941         if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
1942                 /* Overriding existing pages in partial view does not cause
1943                  * us any trouble as TLBs are still valid because the fault
1944                  * is due to userspace losing part of the mapping or never
1945                  * having accessed it before (at this partials' range).
1946                  */
1947                 unsigned long base = vma->vm_start +
1948                                      (view.params.partial.offset << PAGE_SHIFT);
1949                 unsigned int i;
1950
1951                 for (i = 0; i < view.params.partial.size; i++) {
1952                         ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
1953                         if (ret)
1954                                 break;
1955                 }
1956
1957                 obj->fault_mappable = true;
1958         } else {
1959                 if (!obj->fault_mappable) {
1960                         unsigned long size = min_t(unsigned long,
1961                                                    vma->vm_end - vma->vm_start,
1962                                                    obj->base.size);
1963                         int i;
1964
1965                         for (i = 0; i < size >> PAGE_SHIFT; i++) {
1966                                 ret = vm_insert_pfn(vma,
1967                                                     (unsigned long)vma->vm_start + i * PAGE_SIZE,
1968                                                     pfn + i);
1969                                 if (ret)
1970                                         break;
1971                         }
1972
1973                         obj->fault_mappable = true;
1974                 } else
1975                         ret = vm_insert_pfn(vma,
1976                                             (unsigned long)vmf->virtual_address,
1977                                             pfn + page_offset);
1978 #endif
1979                         vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
1980 #if 0
1981         }
1982 #endif
1983
1984 have_page:
1985         *mres = m;
1986
1987         i915_gem_object_ggtt_unpin_view(obj, &view);
1988         mutex_unlock(&dev->struct_mutex);
1989         ret = VM_PAGER_OK;
1990         goto done;
1991
1992         /*
1993          * ALTERNATIVE ERROR RETURN.
1994          *
1995          * OBJECT EXPECTED TO BE LOCKED.
1996          */
1997 unpin:
1998         i915_gem_object_ggtt_unpin_view(obj, &view);
1999 unlock:
2000         mutex_unlock(&dev->struct_mutex);
2001 out:
2002         switch (ret) {
2003         case -EIO:
2004                 /*
2005                  * We eat errors when the gpu is terminally wedged to avoid
2006                  * userspace unduly crashing (gl has no provisions for mmaps to
2007                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
2008                  * and so needs to be reported.
2009                  */
2010                 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
2011 //                      ret = VM_FAULT_SIGBUS;
2012                         break;
2013                 }
2014         case -EAGAIN:
2015                 /*
2016                  * EAGAIN means the gpu is hung and we'll wait for the error
2017                  * handler to reset everything when re-faulting in
2018                  * i915_mutex_lock_interruptible.
2019                  */
2020         case -ERESTARTSYS:
2021         case -EINTR:
2022                 VM_OBJECT_UNLOCK(vm_obj);
2023                 int dummy;
2024                 tsleep(&dummy, 0, "delay", 1); /* XXX */
2025                 VM_OBJECT_LOCK(vm_obj);
2026                 goto retry;
2027         default:
2028                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2029                 ret = VM_PAGER_ERROR;
2030                 break;
2031         }
2032
2033 done:
2034         if (oldm != NULL)
2035                 vm_page_free(oldm);
2036         vm_object_pip_wakeup(vm_obj);
2037
2038         intel_runtime_pm_put(dev_priv);
2039         return ret;
2040 }
2041
2042 /**
2043  * i915_gem_release_mmap - remove physical page mappings
2044  * @obj: obj in question
2045  *
2046  * Preserve the reservation of the mmapping with the DRM core code, but
2047  * relinquish ownership of the pages back to the system.
2048  *
2049  * It is vital that we remove the page mapping if we have mapped a tiled
2050  * object through the GTT and then lose the fence register due to
2051  * resource pressure. Similarly if the object has been moved out of the
2052  * aperture, than pages mapped into userspace must be revoked. Removing the
2053  * mapping will then trigger a page fault on the next user access, allowing
2054  * fixup by i915_gem_fault().
2055  */
2056 void
2057 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2058 {
2059         vm_object_t devobj;
2060         vm_page_t m;
2061         int i, page_count;
2062
2063         if (!obj->fault_mappable)
2064                 return;
2065
2066         devobj = cdev_pager_lookup(obj);
2067         if (devobj != NULL) {
2068                 page_count = OFF_TO_IDX(obj->base.size);
2069
2070                 VM_OBJECT_LOCK(devobj);
2071                 for (i = 0; i < page_count; i++) {
2072                         m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
2073                         if (m == NULL)
2074                                 continue;
2075                         cdev_pager_free_page(devobj, m);
2076                 }
2077                 VM_OBJECT_UNLOCK(devobj);
2078                 vm_object_deallocate(devobj);
2079         }
2080
2081         obj->fault_mappable = false;
2082 }
2083
2084 void
2085 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2086 {
2087         struct drm_i915_gem_object *obj;
2088
2089         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
2090                 i915_gem_release_mmap(obj);
2091 }
2092
2093 uint32_t
2094 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
2095 {
2096         uint32_t gtt_size;
2097
2098         if (INTEL_INFO(dev)->gen >= 4 ||
2099             tiling_mode == I915_TILING_NONE)
2100                 return size;
2101
2102         /* Previous chips need a power-of-two fence region when tiling */
2103         if (INTEL_INFO(dev)->gen == 3)
2104                 gtt_size = 1024*1024;
2105         else
2106                 gtt_size = 512*1024;
2107
2108         while (gtt_size < size)
2109                 gtt_size <<= 1;
2110
2111         return gtt_size;
2112 }
2113
2114 /**
2115  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2116  * @obj: object to check
2117  *
2118  * Return the required GTT alignment for an object, taking into account
2119  * potential fence register mapping.
2120  */
2121 uint32_t
2122 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2123                            int tiling_mode, bool fenced)
2124 {
2125         /*
2126          * Minimum alignment is 4k (GTT page size), but might be greater
2127          * if a fence register is needed for the object.
2128          */
2129         if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2130             tiling_mode == I915_TILING_NONE)
2131                 return 4096;
2132
2133         /*
2134          * Previous chips need to be aligned to the size of the smallest
2135          * fence register that can contain the object.
2136          */
2137         return i915_gem_get_gtt_size(dev, size, tiling_mode);
2138 }
2139
2140 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2141 {
2142         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2143         int ret;
2144
2145 #if 0
2146         if (drm_vma_node_has_offset(&obj->base.vma_node))
2147                 return 0;
2148 #endif
2149
2150         dev_priv->mm.shrinker_no_lock_stealing = true;
2151
2152         ret = drm_gem_create_mmap_offset(&obj->base);
2153         if (ret != -ENOSPC)
2154                 goto out;
2155
2156         /* Badly fragmented mmap space? The only way we can recover
2157          * space is by destroying unwanted objects. We can't randomly release
2158          * mmap_offsets as userspace expects them to be persistent for the
2159          * lifetime of the objects. The closest we can is to release the
2160          * offsets on purgeable objects by truncating it and marking it purged,
2161          * which prevents userspace from ever using that object again.
2162          */
2163         i915_gem_shrink(dev_priv,
2164                         obj->base.size >> PAGE_SHIFT,
2165                         I915_SHRINK_BOUND |
2166                         I915_SHRINK_UNBOUND |
2167                         I915_SHRINK_PURGEABLE);
2168         ret = drm_gem_create_mmap_offset(&obj->base);
2169         if (ret != -ENOSPC)
2170                 goto out;
2171
2172         i915_gem_shrink_all(dev_priv);
2173         ret = drm_gem_create_mmap_offset(&obj->base);
2174 out:
2175         dev_priv->mm.shrinker_no_lock_stealing = false;
2176
2177         return ret;
2178 }
2179
2180 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2181 {
2182         drm_gem_free_mmap_offset(&obj->base);
2183 }
2184
2185 int
2186 i915_gem_mmap_gtt(struct drm_file *file,
2187                   struct drm_device *dev,
2188                   uint32_t handle,
2189                   uint64_t *offset)
2190 {
2191         struct drm_i915_gem_object *obj;
2192         int ret;
2193
2194         ret = i915_mutex_lock_interruptible(dev);
2195         if (ret)
2196                 return ret;
2197
2198         obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
2199         if (&obj->base == NULL) {
2200                 ret = -ENOENT;
2201                 goto unlock;
2202         }
2203
2204         if (obj->madv != I915_MADV_WILLNEED) {
2205                 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2206                 ret = -EFAULT;
2207                 goto out;
2208         }
2209
2210         ret = i915_gem_object_create_mmap_offset(obj);
2211         if (ret)
2212                 goto out;
2213
2214         *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
2215             DRM_GEM_MAPPING_KEY;
2216
2217 out:
2218         drm_gem_object_unreference(&obj->base);
2219 unlock:
2220         mutex_unlock(&dev->struct_mutex);
2221         return ret;
2222 }
2223
2224 /**
2225  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2226  * @dev: DRM device
2227  * @data: GTT mapping ioctl data
2228  * @file: GEM object info
2229  *
2230  * Simply returns the fake offset to userspace so it can mmap it.
2231  * The mmap call will end up in drm_gem_mmap(), which will set things
2232  * up so we can get faults in the handler above.
2233  *
2234  * The fault handler will take care of binding the object into the GTT
2235  * (since it may have been evicted to make room for something), allocating
2236  * a fence register, and mapping the appropriate aperture address into
2237  * userspace.
2238  */
2239 int
2240 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2241                         struct drm_file *file)
2242 {
2243         struct drm_i915_gem_mmap_gtt *args = data;
2244
2245         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2246 }
2247
2248 /* Immediately discard the backing storage */
2249 static void
2250 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2251 {
2252         vm_object_t vm_obj;
2253
2254         vm_obj = obj->base.vm_obj;
2255         VM_OBJECT_LOCK(vm_obj);
2256         vm_object_page_remove(vm_obj, 0, 0, false);
2257         VM_OBJECT_UNLOCK(vm_obj);
2258
2259         obj->madv = __I915_MADV_PURGED;
2260 }
2261
2262 /* Try to discard unwanted pages */
2263 static void
2264 i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2265 {
2266 #if 0
2267         struct address_space *mapping;
2268 #endif
2269
2270         switch (obj->madv) {
2271         case I915_MADV_DONTNEED:
2272                 i915_gem_object_truncate(obj);
2273         case __I915_MADV_PURGED:
2274                 return;
2275         }
2276
2277 #if 0
2278         if (obj->base.filp == NULL)
2279                 return;
2280
2281         mapping = file_inode(obj->base.filp)->i_mapping,
2282         invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2283 #endif
2284 }
2285
2286 static void
2287 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2288 {
2289         struct sg_page_iter sg_iter;
2290         int ret;
2291
2292         BUG_ON(obj->madv == __I915_MADV_PURGED);
2293
2294         ret = i915_gem_object_set_to_cpu_domain(obj, true);
2295         if (ret) {
2296                 /* In the event of a disaster, abandon all caches and
2297                  * hope for the best.
2298                  */
2299                 WARN_ON(ret != -EIO);
2300                 i915_gem_clflush_object(obj, true);
2301                 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2302         }
2303
2304         i915_gem_gtt_finish_object(obj);
2305
2306         if (i915_gem_object_needs_bit17_swizzle(obj))
2307                 i915_gem_object_save_bit_17_swizzle(obj);
2308
2309         if (obj->madv == I915_MADV_DONTNEED)
2310                 obj->dirty = 0;
2311
2312         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2313                 struct vm_page *page = sg_page_iter_page(&sg_iter);
2314
2315                 if (obj->dirty)
2316                         set_page_dirty(page);
2317
2318                 if (obj->madv == I915_MADV_WILLNEED)
2319                         mark_page_accessed(page);
2320
2321                 vm_page_busy_wait(page, FALSE, "i915gem");
2322                 vm_page_unwire(page, 1);
2323                 vm_page_wakeup(page);
2324         }
2325         obj->dirty = 0;
2326
2327         sg_free_table(obj->pages);
2328         kfree(obj->pages);
2329 }
2330
2331 int
2332 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2333 {
2334         const struct drm_i915_gem_object_ops *ops = obj->ops;
2335
2336         if (obj->pages == NULL)
2337                 return 0;
2338
2339         if (obj->pages_pin_count)
2340                 return -EBUSY;
2341
2342         BUG_ON(i915_gem_obj_bound_any(obj));
2343
2344         /* ->put_pages might need to allocate memory for the bit17 swizzle
2345          * array, hence protect them from being reaped by removing them from gtt
2346          * lists early. */
2347         list_del(&obj->global_list);
2348
2349         ops->put_pages(obj);
2350         obj->pages = NULL;
2351
2352         i915_gem_object_invalidate(obj);
2353
2354         return 0;
2355 }
2356
2357 static int
2358 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2359 {
2360         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2361         int page_count, i;
2362         vm_object_t vm_obj;
2363         struct sg_table *st;
2364         struct scatterlist *sg;
2365         struct sg_page_iter sg_iter;
2366         struct vm_page *page;
2367         unsigned long last_pfn = 0;     /* suppress gcc warning */
2368         int ret;
2369
2370         /* Assert that the object is not currently in any GPU domain. As it
2371          * wasn't in the GTT, there shouldn't be any way it could have been in
2372          * a GPU cache
2373          */
2374         BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2375         BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2376
2377         st = kmalloc(sizeof(*st), M_DRM, M_WAITOK);
2378         if (st == NULL)
2379                 return -ENOMEM;
2380
2381         page_count = obj->base.size / PAGE_SIZE;
2382         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2383                 kfree(st);
2384                 return -ENOMEM;
2385         }
2386
2387         /* Get the list of pages out of our struct file.  They'll be pinned
2388          * at this point until we release them.
2389          *
2390          * Fail silently without starting the shrinker
2391          */
2392         vm_obj = obj->base.vm_obj;
2393         VM_OBJECT_LOCK(vm_obj);
2394         sg = st->sgl;
2395         st->nents = 0;
2396         for (i = 0; i < page_count; i++) {
2397                 page = shmem_read_mapping_page(vm_obj, i);
2398                 if (IS_ERR(page)) {
2399                         i915_gem_shrink(dev_priv,
2400                                         page_count,
2401                                         I915_SHRINK_BOUND |
2402                                         I915_SHRINK_UNBOUND |
2403                                         I915_SHRINK_PURGEABLE);
2404                         page = shmem_read_mapping_page(vm_obj, i);
2405                 }
2406                 if (IS_ERR(page)) {
2407                         /* We've tried hard to allocate the memory by reaping
2408                          * our own buffer, now let the real VM do its job and
2409                          * go down in flames if truly OOM.
2410                          */
2411                         i915_gem_shrink_all(dev_priv);
2412                         page = shmem_read_mapping_page(vm_obj, i);
2413                         if (IS_ERR(page)) {
2414                                 ret = PTR_ERR(page);
2415                                 goto err_pages;
2416                         }
2417                 }
2418 #ifdef CONFIG_SWIOTLB
2419                 if (swiotlb_nr_tbl()) {
2420                         st->nents++;
2421                         sg_set_page(sg, page, PAGE_SIZE, 0);
2422                         sg = sg_next(sg);
2423                         continue;
2424                 }
2425 #endif
2426                 if (!i || page_to_pfn(page) != last_pfn + 1) {
2427                         if (i)
2428                                 sg = sg_next(sg);
2429                         st->nents++;
2430                         sg_set_page(sg, page, PAGE_SIZE, 0);
2431                 } else {
2432                         sg->length += PAGE_SIZE;
2433                 }
2434                 last_pfn = page_to_pfn(page);
2435
2436                 /* Check that the i965g/gm workaround works. */
2437         }
2438 #ifdef CONFIG_SWIOTLB
2439         if (!swiotlb_nr_tbl())
2440 #endif
2441                 sg_mark_end(sg);
2442         obj->pages = st;
2443         VM_OBJECT_UNLOCK(vm_obj);
2444
2445         ret = i915_gem_gtt_prepare_object(obj);
2446         if (ret)
2447                 goto err_pages;
2448
2449         if (i915_gem_object_needs_bit17_swizzle(obj))
2450                 i915_gem_object_do_bit_17_swizzle(obj);
2451
2452         if (obj->tiling_mode != I915_TILING_NONE &&
2453             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2454                 i915_gem_object_pin_pages(obj);
2455
2456         return 0;
2457
2458 err_pages:
2459         sg_mark_end(sg);
2460         for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2461                 page = sg_page_iter_page(&sg_iter);
2462                 vm_page_busy_wait(page, FALSE, "i915gem");
2463                 vm_page_unwire(page, 0);
2464                 vm_page_wakeup(page);
2465         }
2466         VM_OBJECT_UNLOCK(vm_obj);
2467         sg_free_table(st);
2468         kfree(st);
2469
2470         /* shmemfs first checks if there is enough memory to allocate the page
2471          * and reports ENOSPC should there be insufficient, along with the usual
2472          * ENOMEM for a genuine allocation failure.
2473          *
2474          * We use ENOSPC in our driver to mean that we have run out of aperture
2475          * space and so want to translate the error from shmemfs back to our
2476          * usual understanding of ENOMEM.
2477          */
2478         if (ret == -ENOSPC)
2479                 ret = -ENOMEM;
2480
2481         return ret;
2482 }
2483
2484 /* Ensure that the associated pages are gathered from the backing storage
2485  * and pinned into our object. i915_gem_object_get_pages() may be called
2486  * multiple times before they are released by a single call to
2487  * i915_gem_object_put_pages() - once the pages are no longer referenced
2488  * either as a result of memory pressure (reaping pages under the shrinker)
2489  * or as the object is itself released.
2490  */
2491 int
2492 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2493 {
2494         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2495         const struct drm_i915_gem_object_ops *ops = obj->ops;
2496         int ret;
2497
2498         if (obj->pages)
2499                 return 0;
2500
2501         if (obj->madv != I915_MADV_WILLNEED) {
2502                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2503                 return -EFAULT;
2504         }
2505
2506         BUG_ON(obj->pages_pin_count);
2507
2508         ret = ops->get_pages(obj);
2509         if (ret)
2510                 return ret;
2511
2512         list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2513
2514         obj->get_page.sg = obj->pages->sgl;
2515         obj->get_page.last = 0;
2516
2517         return 0;
2518 }
2519
2520 void i915_vma_move_to_active(struct i915_vma *vma,
2521                              struct drm_i915_gem_request *req)
2522 {
2523         struct drm_i915_gem_object *obj = vma->obj;
2524         struct intel_engine_cs *ring;
2525
2526         ring = i915_gem_request_get_ring(req);
2527
2528         /* Add a reference if we're newly entering the active list. */
2529         if (obj->active == 0)
2530                 drm_gem_object_reference(&obj->base);
2531         obj->active |= intel_ring_flag(ring);
2532
2533         list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
2534         i915_gem_request_assign(&obj->last_read_req[ring->id], req);
2535
2536         list_move_tail(&vma->mm_list, &vma->vm->active_list);
2537 }
2538
2539 static void
2540 i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
2541 {
2542         RQ_BUG_ON(obj->last_write_req == NULL);
2543         RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
2544
2545         i915_gem_request_assign(&obj->last_write_req, NULL);
2546         intel_fb_obj_flush(obj, true, ORIGIN_CS);
2547 }
2548
2549 static void
2550 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
2551 {
2552         struct i915_vma *vma;
2553
2554         RQ_BUG_ON(obj->last_read_req[ring] == NULL);
2555         RQ_BUG_ON(!(obj->active & (1 << ring)));
2556
2557         list_del_init(&obj->ring_list[ring]);
2558         i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2559
2560         if (obj->last_write_req && obj->last_write_req->ring->id == ring)
2561                 i915_gem_object_retire__write(obj);
2562
2563         obj->active &= ~(1 << ring);
2564         if (obj->active)
2565                 return;
2566
2567         /* Bump our place on the bound list to keep it roughly in LRU order
2568          * so that we don't steal from recently used but inactive objects
2569          * (unless we are forced to ofc!)
2570          */
2571         list_move_tail(&obj->global_list,
2572                        &to_i915(obj->base.dev)->mm.bound_list);
2573
2574         list_for_each_entry(vma, &obj->vma_list, vma_link) {
2575                 if (!list_empty(&vma->mm_list))
2576                         list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
2577         }
2578
2579         i915_gem_request_assign(&obj->last_fenced_req, NULL);
2580         drm_gem_object_unreference(&obj->base);
2581 }
2582
2583 static int
2584 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2585 {
2586         struct drm_i915_private *dev_priv = dev->dev_private;
2587         struct intel_engine_cs *ring;
2588         int ret, i, j;
2589
2590         /* Carefully retire all requests without writing to the rings */
2591         for_each_ring(ring, dev_priv, i) {
2592                 ret = intel_ring_idle(ring);
2593                 if (ret)
2594                         return ret;
2595         }
2596         i915_gem_retire_requests(dev);
2597
2598         /* Finally reset hw state */
2599         for_each_ring(ring, dev_priv, i) {
2600                 intel_ring_init_seqno(ring, seqno);
2601
2602                 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
2603                         ring->semaphore.sync_seqno[j] = 0;
2604         }
2605
2606         return 0;
2607 }
2608
2609 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2610 {
2611         struct drm_i915_private *dev_priv = dev->dev_private;
2612         int ret;
2613
2614         if (seqno == 0)
2615                 return -EINVAL;
2616
2617         /* HWS page needs to be set less than what we
2618          * will inject to ring
2619          */
2620         ret = i915_gem_init_seqno(dev, seqno - 1);
2621         if (ret)
2622                 return ret;
2623
2624         /* Carefully set the last_seqno value so that wrap
2625          * detection still works
2626          */
2627         dev_priv->next_seqno = seqno;
2628         dev_priv->last_seqno = seqno - 1;
2629         if (dev_priv->last_seqno == 0)
2630                 dev_priv->last_seqno--;
2631
2632         return 0;
2633 }
2634
2635 int
2636 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2637 {
2638         struct drm_i915_private *dev_priv = dev->dev_private;
2639
2640         /* reserve 0 for non-seqno */
2641         if (dev_priv->next_seqno == 0) {
2642                 int ret = i915_gem_init_seqno(dev, 0);
2643                 if (ret)
2644                         return ret;
2645
2646                 dev_priv->next_seqno = 1;
2647         }
2648
2649         *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2650         return 0;
2651 }
2652
2653 /*
2654  * NB: This function is not allowed to fail. Doing so would mean the the
2655  * request is not being tracked for completion but the work itself is
2656  * going to happen on the hardware. This would be a Bad Thing(tm).
2657  */
2658 void __i915_add_request(struct drm_i915_gem_request *request,
2659                         struct drm_i915_gem_object *obj,
2660                         bool flush_caches)
2661 {
2662         struct intel_engine_cs *ring;
2663         struct drm_i915_private *dev_priv;
2664         struct intel_ringbuffer *ringbuf;
2665         u32 request_start;
2666         int ret;
2667
2668         if (WARN_ON(request == NULL))
2669                 return;
2670
2671         ring = request->ring;
2672         dev_priv = ring->dev->dev_private;
2673         ringbuf = request->ringbuf;
2674
2675         /*
2676          * To ensure that this call will not fail, space for its emissions
2677          * should already have been reserved in the ring buffer. Let the ring
2678          * know that it is time to use that space up.
2679          */
2680         intel_ring_reserved_space_use(ringbuf);
2681
2682         request_start = intel_ring_get_tail(ringbuf);
2683         /*
2684          * Emit any outstanding flushes - execbuf can fail to emit the flush
2685          * after having emitted the batchbuffer command. Hence we need to fix
2686          * things up similar to emitting the lazy request. The difference here
2687          * is that the flush _must_ happen before the next request, no matter
2688          * what.
2689          */
2690         if (flush_caches) {
2691                 if (i915.enable_execlists)
2692                         ret = logical_ring_flush_all_caches(request);
2693                 else
2694                         ret = intel_ring_flush_all_caches(request);
2695                 /* Not allowed to fail! */
2696                 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
2697         }
2698
2699         /* Record the position of the start of the request so that
2700          * should we detect the updated seqno part-way through the
2701          * GPU processing the request, we never over-estimate the
2702          * position of the head.
2703          */
2704         request->postfix = intel_ring_get_tail(ringbuf);
2705
2706         if (i915.enable_execlists)
2707                 ret = ring->emit_request(request);
2708         else {
2709                 ret = ring->add_request(request);
2710
2711                 request->tail = intel_ring_get_tail(ringbuf);
2712         }
2713
2714         /* Not allowed to fail! */
2715         WARN(ret, "emit|add_request failed: %d!\n", ret);
2716
2717         request->head = request_start;
2718
2719         /* Whilst this request exists, batch_obj will be on the
2720          * active_list, and so will hold the active reference. Only when this
2721          * request is retired will the the batch_obj be moved onto the
2722          * inactive_list and lose its active reference. Hence we do not need
2723          * to explicitly hold another reference here.
2724          */
2725         request->batch_obj = obj;
2726
2727         request->emitted_jiffies = jiffies;
2728         request->previous_seqno = ring->last_submitted_seqno;
2729         ring->last_submitted_seqno = request->seqno;
2730         list_add_tail(&request->list, &ring->request_list);
2731
2732         trace_i915_gem_request_add(request);
2733
2734         i915_queue_hangcheck(ring->dev);
2735
2736         queue_delayed_work(dev_priv->wq,
2737                            &dev_priv->mm.retire_work,
2738                            round_jiffies_up_relative(HZ));
2739         intel_mark_busy(dev_priv->dev);
2740
2741         /* Sanity check that the reserved size was large enough. */
2742         intel_ring_reserved_space_end(ringbuf);
2743 }
2744
2745 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
2746                                    const struct intel_context *ctx)
2747 {
2748         unsigned long elapsed;
2749
2750         elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2751
2752         if (ctx->hang_stats.banned)
2753                 return true;
2754
2755         if (ctx->hang_stats.ban_period_seconds &&
2756             elapsed <= ctx->hang_stats.ban_period_seconds) {
2757                 if (!i915_gem_context_is_default(ctx)) {
2758                         DRM_DEBUG("context hanging too fast, banning!\n");
2759                         return true;
2760                 } else if (i915_stop_ring_allow_ban(dev_priv)) {
2761                         if (i915_stop_ring_allow_warn(dev_priv))
2762                                 DRM_ERROR("gpu hanging too fast, banning!\n");
2763                         return true;
2764                 }
2765         }
2766
2767         return false;
2768 }
2769
2770 static void i915_set_reset_status(struct drm_i915_private *dev_priv,
2771                                   struct intel_context *ctx,
2772                                   const bool guilty)
2773 {
2774         struct i915_ctx_hang_stats *hs;
2775
2776         if (WARN_ON(!ctx))
2777                 return;
2778
2779         hs = &ctx->hang_stats;
2780
2781         if (guilty) {
2782                 hs->banned = i915_context_is_banned(dev_priv, ctx);
2783                 hs->batch_active++;
2784                 hs->guilty_ts = get_seconds();
2785         } else {
2786                 hs->batch_pending++;
2787         }
2788 }
2789
2790 void i915_gem_request_free(struct kref *req_ref)
2791 {
2792         struct drm_i915_gem_request *req = container_of(req_ref,
2793                                                  typeof(*req), ref);
2794         struct intel_context *ctx = req->ctx;
2795
2796         if (req->file_priv)
2797                 i915_gem_request_remove_from_client(req);
2798
2799         if (ctx) {
2800                 if (i915.enable_execlists) {
2801                         if (ctx != req->ring->default_context)
2802                                 intel_lr_context_unpin(req);
2803                 }
2804
2805                 i915_gem_context_unreference(ctx);
2806         }
2807
2808         kfree(req);
2809 }
2810
2811 int i915_gem_request_alloc(struct intel_engine_cs *ring,
2812                            struct intel_context *ctx,
2813                            struct drm_i915_gem_request **req_out)
2814 {
2815         struct drm_i915_private *dev_priv = to_i915(ring->dev);
2816         struct drm_i915_gem_request *req;
2817         int ret;
2818
2819         if (!req_out)
2820                 return -EINVAL;
2821
2822         *req_out = NULL;
2823
2824         req = kzalloc(sizeof(*req), GFP_KERNEL);
2825         if (req == NULL)
2826                 return -ENOMEM;
2827
2828         ret = i915_gem_get_seqno(ring->dev, &req->seqno);
2829         if (ret)
2830                 goto err;
2831
2832         kref_init(&req->ref);
2833         req->i915 = dev_priv;
2834         req->ring = ring;
2835         req->ctx  = ctx;
2836         i915_gem_context_reference(req->ctx);
2837
2838         if (i915.enable_execlists)
2839                 ret = intel_logical_ring_alloc_request_extras(req);
2840         else
2841                 ret = intel_ring_alloc_request_extras(req);
2842         if (ret) {
2843                 i915_gem_context_unreference(req->ctx);
2844                 goto err;
2845         }
2846
2847         /*
2848          * Reserve space in the ring buffer for all the commands required to
2849          * eventually emit this request. This is to guarantee that the
2850          * i915_add_request() call can't fail. Note that the reserve may need
2851          * to be redone if the request is not actually submitted straight
2852          * away, e.g. because a GPU scheduler has deferred it.
2853          */
2854         if (i915.enable_execlists)
2855                 ret = intel_logical_ring_reserve_space(req);
2856         else
2857                 ret = intel_ring_reserve_space(req);
2858         if (ret) {
2859                 /*
2860                  * At this point, the request is fully allocated even if not
2861                  * fully prepared. Thus it can be cleaned up using the proper
2862                  * free code.
2863                  */
2864                 i915_gem_request_cancel(req);
2865                 return ret;
2866         }
2867
2868         *req_out = req;
2869         return 0;
2870
2871 err:
2872         kfree(req);
2873         return ret;
2874 }
2875
2876 void i915_gem_request_cancel(struct drm_i915_gem_request *req)
2877 {
2878         intel_ring_reserved_space_cancel(req->ringbuf);
2879
2880         i915_gem_request_unreference(req);
2881 }
2882
2883 struct drm_i915_gem_request *
2884 i915_gem_find_active_request(struct intel_engine_cs *ring)
2885 {
2886         struct drm_i915_gem_request *request;
2887
2888         list_for_each_entry(request, &ring->request_list, list) {
2889                 if (i915_gem_request_completed(request, false))
2890                         continue;
2891
2892                 return request;
2893         }
2894
2895         return NULL;
2896 }
2897
2898 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
2899                                        struct intel_engine_cs *ring)
2900 {
2901         struct drm_i915_gem_request *request;
2902         bool ring_hung;
2903
2904         request = i915_gem_find_active_request(ring);
2905
2906         if (request == NULL)
2907                 return;
2908
2909         ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2910
2911         i915_set_reset_status(dev_priv, request->ctx, ring_hung);
2912
2913         list_for_each_entry_continue(request, &ring->request_list, list)
2914                 i915_set_reset_status(dev_priv, request->ctx, false);
2915 }
2916
2917 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
2918                                         struct intel_engine_cs *ring)
2919 {
2920         struct intel_ringbuffer *buffer;
2921
2922         while (!list_empty(&ring->active_list)) {
2923                 struct drm_i915_gem_object *obj;
2924
2925                 obj = list_first_entry(&ring->active_list,
2926                                        struct drm_i915_gem_object,
2927                                        ring_list[ring->id]);
2928
2929                 i915_gem_object_retire__read(obj, ring->id);
2930         }
2931
2932         /*
2933          * Clear the execlists queue up before freeing the requests, as those
2934          * are the ones that keep the context and ringbuffer backing objects
2935          * pinned in place.
2936          */
2937
2938         if (i915.enable_execlists) {
2939                 spin_lock_irq(&ring->execlist_lock);
2940
2941                 /* list_splice_tail_init checks for empty lists */
2942                 list_splice_tail_init(&ring->execlist_queue,
2943                                       &ring->execlist_retired_req_list);
2944
2945                 spin_unlock_irq(&ring->execlist_lock);
2946                 intel_execlists_retire_requests(ring);
2947         }
2948
2949         /*
2950          * We must free the requests after all the corresponding objects have
2951          * been moved off active lists. Which is the same order as the normal
2952          * retire_requests function does. This is important if object hold
2953          * implicit references on things like e.g. ppgtt address spaces through
2954          * the request.
2955          */
2956         while (!list_empty(&ring->request_list)) {
2957                 struct drm_i915_gem_request *request;
2958
2959                 request = list_first_entry(&ring->request_list,
2960                                            struct drm_i915_gem_request,
2961                                            list);
2962
2963                 i915_gem_request_retire(request);
2964         }
2965
2966         /* Having flushed all requests from all queues, we know that all
2967          * ringbuffers must now be empty. However, since we do not reclaim
2968          * all space when retiring the request (to prevent HEADs colliding
2969          * with rapid ringbuffer wraparound) the amount of available space
2970          * upon reset is less than when we start. Do one more pass over
2971          * all the ringbuffers to reset last_retired_head.
2972          */
2973         list_for_each_entry(buffer, &ring->buffers, link) {
2974                 buffer->last_retired_head = buffer->tail;
2975                 intel_ring_update_space(buffer);
2976         }
2977 }
2978
2979 void i915_gem_reset(struct drm_device *dev)
2980 {
2981         struct drm_i915_private *dev_priv = dev->dev_private;
2982         struct intel_engine_cs *ring;
2983         int i;
2984
2985         /*
2986          * Before we free the objects from the requests, we need to inspect
2987          * them for finding the guilty party. As the requests only borrow
2988          * their reference to the objects, the inspection must be done first.
2989          */
2990         for_each_ring(ring, dev_priv, i)
2991                 i915_gem_reset_ring_status(dev_priv, ring);
2992
2993         for_each_ring(ring, dev_priv, i)
2994                 i915_gem_reset_ring_cleanup(dev_priv, ring);
2995
2996         i915_gem_context_reset(dev);
2997
2998         i915_gem_restore_fences(dev);
2999
3000         WARN_ON(i915_verify_lists(dev));
3001 }
3002
3003 /**
3004  * This function clears the request list as sequence numbers are passed.
3005  */
3006 void
3007 i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
3008 {
3009         WARN_ON(i915_verify_lists(ring->dev));
3010
3011         /* Retire requests first as we use it above for the early return.
3012          * If we retire requests last, we may use a later seqno and so clear
3013          * the requests lists without clearing the active list, leading to
3014          * confusion.
3015          */
3016         while (!list_empty(&ring->request_list)) {
3017                 struct drm_i915_gem_request *request;
3018
3019                 request = list_first_entry(&ring->request_list,
3020                                            struct drm_i915_gem_request,
3021                                            list);
3022
3023                 if (!i915_gem_request_completed(request, true))
3024                         break;
3025
3026                 i915_gem_request_retire(request);
3027         }
3028
3029         /* Move any buffers on the active list that are no longer referenced
3030          * by the ringbuffer to the flushing/inactive lists as appropriate,
3031          * before we free the context associated with the requests.
3032          */
3033         while (!list_empty(&ring->active_list)) {
3034                 struct drm_i915_gem_object *obj;
3035
3036                 obj = list_first_entry(&ring->active_list,
3037                                       struct drm_i915_gem_object,
3038                                       ring_list[ring->id]);
3039
3040                 if (!list_empty(&obj->last_read_req[ring->id]->list))
3041                         break;
3042
3043                 i915_gem_object_retire__read(obj, ring->id);
3044         }
3045
3046         if (unlikely(ring->trace_irq_req &&
3047                      i915_gem_request_completed(ring->trace_irq_req, true))) {
3048                 ring->irq_put(ring);
3049                 i915_gem_request_assign(&ring->trace_irq_req, NULL);
3050         }
3051
3052         WARN_ON(i915_verify_lists(ring->dev));
3053 }
3054
3055 bool
3056 i915_gem_retire_requests(struct drm_device *dev)
3057 {
3058         struct drm_i915_private *dev_priv = dev->dev_private;
3059         struct intel_engine_cs *ring;
3060         bool idle = true;
3061         int i;
3062
3063         for_each_ring(ring, dev_priv, i) {
3064                 i915_gem_retire_requests_ring(ring);
3065                 idle &= list_empty(&ring->request_list);
3066                 if (i915.enable_execlists) {
3067                         unsigned long flags;
3068
3069                         spin_lock_irqsave(&ring->execlist_lock, flags);
3070                         idle &= list_empty(&ring->execlist_queue);
3071                         spin_unlock_irqrestore(&ring->execlist_lock, flags);
3072
3073                         intel_execlists_retire_requests(ring);
3074                 }
3075         }
3076
3077         if (idle)
3078                 mod_delayed_work(dev_priv->wq,
3079                                    &dev_priv->mm.idle_work,
3080                                    msecs_to_jiffies(100));
3081
3082         return idle;
3083 }
3084
3085 static void
3086 i915_gem_retire_work_handler(struct work_struct *work)
3087 {
3088         struct drm_i915_private *dev_priv =
3089                 container_of(work, typeof(*dev_priv), mm.retire_work.work);
3090         struct drm_device *dev = dev_priv->dev;
3091         bool idle;
3092
3093         /* Come back later if the device is busy... */
3094         idle = false;
3095         if (mutex_trylock(&dev->struct_mutex)) {
3096                 idle = i915_gem_retire_requests(dev);
3097                 mutex_unlock(&dev->struct_mutex);
3098         }
3099         if (!idle)
3100                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3101                                    round_jiffies_up_relative(HZ));
3102 }
3103
3104 static void
3105 i915_gem_idle_work_handler(struct work_struct *work)
3106 {
3107         struct drm_i915_private *dev_priv =
3108                 container_of(work, typeof(*dev_priv), mm.idle_work.work);
3109         struct drm_device *dev = dev_priv->dev;
3110         struct intel_engine_cs *ring;
3111         int i;
3112
3113         for_each_ring(ring, dev_priv, i)
3114                 if (!list_empty(&ring->request_list))
3115                         return;
3116
3117         /* we probably should sync with hangcheck here, using cancel_work_sync.
3118          * Also locking seems to be fubar here, ring->request_list is protected
3119          * by dev->struct_mutex. */
3120
3121         intel_mark_idle(dev);
3122
3123         if (mutex_trylock(&dev->struct_mutex)) {
3124                 struct intel_engine_cs *ring;
3125                 int i;
3126
3127                 for_each_ring(ring, dev_priv, i)
3128                         i915_gem_batch_pool_fini(&ring->batch_pool);
3129
3130                 mutex_unlock(&dev->struct_mutex);
3131         }
3132 }
3133
3134 /**
3135  * Ensures that an object will eventually get non-busy by flushing any required
3136  * write domains, emitting any outstanding lazy request and retiring and
3137  * completed requests.
3138  */
3139 static int
3140 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3141 {
3142         int i;
3143
3144         if (!obj->active)
3145                 return 0;
3146
3147         for (i = 0; i < I915_NUM_RINGS; i++) {
3148                 struct drm_i915_gem_request *req;
3149
3150                 req = obj->last_read_req[i];
3151                 if (req == NULL)
3152                         continue;
3153
3154                 if (list_empty(&req->list))
3155                         goto retire;
3156
3157                 if (i915_gem_request_completed(req, true)) {
3158                         __i915_gem_request_retire__upto(req);
3159 retire:
3160                         i915_gem_object_retire__read(obj, i);
3161                 }
3162         }
3163
3164         return 0;
3165 }
3166
3167 /**
3168  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3169  * @DRM_IOCTL_ARGS: standard ioctl arguments
3170  *
3171  * Returns 0 if successful, else an error is returned with the remaining time in
3172  * the timeout parameter.
3173  *  -ETIME: object is still busy after timeout
3174  *  -ERESTARTSYS: signal interrupted the wait
3175  *  -ENONENT: object doesn't exist
3176  * Also possible, but rare:
3177  *  -EAGAIN: GPU wedged
3178  *  -ENOMEM: damn
3179  *  -ENODEV: Internal IRQ fail
3180  *  -E?: The add request failed
3181  *
3182  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3183  * non-zero timeout parameter the wait ioctl will wait for the given number of
3184  * nanoseconds on an object becoming unbusy. Since the wait itself does so
3185  * without holding struct_mutex the object may become re-busied before this
3186  * function completes. A similar but shorter * race condition exists in the busy
3187  * ioctl
3188  */
3189 int
3190 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3191 {
3192         struct drm_i915_private *dev_priv = dev->dev_private;
3193         struct drm_i915_gem_wait *args = data;
3194         struct drm_i915_gem_object *obj;
3195         struct drm_i915_gem_request *req[I915_NUM_RINGS];
3196         unsigned reset_counter;
3197         int i, n = 0;
3198         int ret;
3199
3200         if (args->flags != 0)
3201                 return -EINVAL;
3202
3203         ret = i915_mutex_lock_interruptible(dev);
3204         if (ret)
3205                 return ret;
3206
3207         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
3208         if (&obj->base == NULL) {
3209                 mutex_unlock(&dev->struct_mutex);
3210                 return -ENOENT;
3211         }
3212
3213         /* Need to make sure the object gets inactive eventually. */
3214         ret = i915_gem_object_flush_active(obj);
3215         if (ret)
3216                 goto out;
3217
3218         if (!obj->active)
3219                 goto out;
3220
3221         /* Do this after OLR check to make sure we make forward progress polling
3222          * on this IOCTL with a timeout == 0 (like busy ioctl)
3223          */
3224         if (args->timeout_ns == 0) {
3225                 ret = -ETIME;
3226                 goto out;
3227         }
3228
3229         drm_gem_object_unreference(&obj->base);
3230         reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3231
3232         for (i = 0; i < I915_NUM_RINGS; i++) {
3233                 if (obj->last_read_req[i] == NULL)
3234                         continue;
3235
3236                 req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
3237         }
3238
3239         mutex_unlock(&dev->struct_mutex);
3240
3241         for (i = 0; i < n; i++) {
3242                 if (ret == 0)
3243                         ret = __i915_wait_request(req[i], reset_counter, true,
3244                                                   args->timeout_ns > 0 ? &args->timeout_ns : NULL,
3245                                                   to_rps_client(file));
3246                 i915_gem_request_unreference__unlocked(req[i]);
3247         }
3248         return ret;
3249
3250 out:
3251         drm_gem_object_unreference(&obj->base);
3252         mutex_unlock(&dev->struct_mutex);
3253         return ret;
3254 }
3255
3256 static int
3257 __i915_gem_object_sync(struct drm_i915_gem_object *obj,
3258                        struct intel_engine_cs *to,
3259                        struct drm_i915_gem_request *from_req,
3260                        struct drm_i915_gem_request **to_req)
3261 {
3262         struct intel_engine_cs *from;
3263         int ret;
3264
3265         from = i915_gem_request_get_ring(from_req);
3266         if (to == from)
3267                 return 0;
3268
3269         if (i915_gem_request_completed(from_req, true))
3270                 return 0;
3271
3272         if (!i915_semaphore_is_enabled(obj->base.dev)) {
3273                 struct drm_i915_private *i915 = to_i915(obj->base.dev);
3274                 ret = __i915_wait_request(from_req,
3275                                           atomic_read(&i915->gpu_error.reset_counter),
3276                                           i915->mm.interruptible,
3277                                           NULL,
3278                                           &i915->rps.semaphores);
3279                 if (ret)
3280                         return ret;
3281
3282                 i915_gem_object_retire_request(obj, from_req);
3283         } else {
3284                 int idx = intel_ring_sync_index(from, to);
3285                 u32 seqno = i915_gem_request_get_seqno(from_req);
3286
3287                 WARN_ON(!to_req);
3288
3289                 if (seqno <= from->semaphore.sync_seqno[idx])
3290                         return 0;
3291
3292                 if (*to_req == NULL) {
3293                         ret = i915_gem_request_alloc(to, to->default_context, to_req);
3294                         if (ret)
3295                                 return ret;
3296                 }
3297
3298                 trace_i915_gem_ring_sync_to(*to_req, from, from_req);
3299                 ret = to->semaphore.sync_to(*to_req, from, seqno);
3300                 if (ret)
3301                         return ret;
3302
3303                 /* We use last_read_req because sync_to()
3304                  * might have just caused seqno wrap under
3305                  * the radar.
3306                  */
3307                 from->semaphore.sync_seqno[idx] =
3308                         i915_gem_request_get_seqno(obj->last_read_req[from->id]);
3309         }
3310
3311         return 0;
3312 }
3313
3314 /**
3315  * i915_gem_object_sync - sync an object to a ring.
3316  *
3317  * @obj: object which may be in use on another ring.
3318  * @to: ring we wish to use the object on. May be NULL.
3319  * @to_req: request we wish to use the object for. See below.
3320  *          This will be allocated and returned if a request is
3321  *          required but not passed in.
3322  *
3323  * This code is meant to abstract object synchronization with the GPU.
3324  * Calling with NULL implies synchronizing the object with the CPU
3325  * rather than a particular GPU ring. Conceptually we serialise writes
3326  * between engines inside the GPU. We only allow one engine to write
3327  * into a buffer at any time, but multiple readers. To ensure each has
3328  * a coherent view of memory, we must:
3329  *
3330  * - If there is an outstanding write request to the object, the new
3331  *   request must wait for it to complete (either CPU or in hw, requests
3332  *   on the same ring will be naturally ordered).
3333  *
3334  * - If we are a write request (pending_write_domain is set), the new
3335  *   request must wait for outstanding read requests to complete.
3336  *
3337  * For CPU synchronisation (NULL to) no request is required. For syncing with
3338  * rings to_req must be non-NULL. However, a request does not have to be
3339  * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
3340  * request will be allocated automatically and returned through *to_req. Note
3341  * that it is not guaranteed that commands will be emitted (because the system
3342  * might already be idle). Hence there is no need to create a request that
3343  * might never have any work submitted. Note further that if a request is
3344  * returned in *to_req, it is the responsibility of the caller to submit
3345  * that request (after potentially adding more work to it).
3346  *
3347  * Returns 0 if successful, else propagates up the lower layer error.
3348  */
3349 int
3350 i915_gem_object_sync(struct drm_i915_gem_object *obj,
3351                      struct intel_engine_cs *to,
3352                      struct drm_i915_gem_request **to_req)
3353 {
3354         const bool readonly = obj->base.pending_write_domain == 0;
3355         struct drm_i915_gem_request *req[I915_NUM_RINGS];
3356         int ret, i, n;
3357
3358         if (!obj->active)
3359                 return 0;
3360
3361         if (to == NULL)
3362                 return i915_gem_object_wait_rendering(obj, readonly);
3363
3364         n = 0;
3365         if (readonly) {
3366                 if (obj->last_write_req)
3367                         req[n++] = obj->last_write_req;
3368         } else {
3369                 for (i = 0; i < I915_NUM_RINGS; i++)
3370                         if (obj->last_read_req[i])
3371                                 req[n++] = obj->last_read_req[i];
3372         }
3373         for (i = 0; i < n; i++) {
3374                 ret = __i915_gem_object_sync(obj, to, req[i], to_req);
3375                 if (ret)
3376                         return ret;
3377         }
3378
3379         return 0;
3380 }
3381
3382 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3383 {
3384         u32 old_write_domain, old_read_domains;
3385
3386         /* Force a pagefault for domain tracking on next user access */
3387         i915_gem_release_mmap(obj);
3388
3389         if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3390                 return;
3391
3392         /* Wait for any direct GTT access to complete */
3393         mb();
3394
3395         old_read_domains = obj->base.read_domains;
3396         old_write_domain = obj->base.write_domain;
3397
3398         obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3399         obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3400
3401         trace_i915_gem_object_change_domain(obj,
3402                                             old_read_domains,
3403                                             old_write_domain);
3404 }
3405
3406 static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
3407 {
3408         struct drm_i915_gem_object *obj = vma->obj;
3409         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3410         int ret;
3411
3412         if (list_empty(&vma->vma_link))
3413                 return 0;
3414
3415         if (!drm_mm_node_allocated(&vma->node)) {
3416                 i915_gem_vma_destroy(vma);
3417                 return 0;
3418         }
3419
3420         if (vma->pin_count)
3421                 return -EBUSY;
3422
3423         BUG_ON(obj->pages == NULL);
3424
3425         if (wait) {
3426                 ret = i915_gem_object_wait_rendering(obj, false);
3427                 if (ret)
3428                         return ret;
3429         }
3430
3431         if (i915_is_ggtt(vma->vm) &&
3432             vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3433                 i915_gem_object_finish_gtt(obj);
3434
3435                 /* release the fence reg _after_ flushing */
3436                 ret = i915_gem_object_put_fence(obj);
3437                 if (ret)
3438                         return ret;
3439         }
3440
3441         trace_i915_vma_unbind(vma);
3442
3443         vma->vm->unbind_vma(vma);
3444         vma->bound = 0;
3445
3446         list_del_init(&vma->mm_list);
3447         if (i915_is_ggtt(vma->vm)) {
3448                 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3449                         obj->map_and_fenceable = false;
3450                 } else if (vma->ggtt_view.pages) {
3451                         sg_free_table(vma->ggtt_view.pages);
3452                         kfree(vma->ggtt_view.pages);
3453                 }
3454                 vma->ggtt_view.pages = NULL;
3455         }
3456
3457         drm_mm_remove_node(&vma->node);
3458         i915_gem_vma_destroy(vma);
3459
3460         /* Since the unbound list is global, only move to that list if
3461          * no more VMAs exist. */
3462         if (list_empty(&obj->vma_list))
3463                 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3464
3465         /* And finally now the object is completely decoupled from this vma,
3466          * we can drop its hold on the backing storage and allow it to be
3467          * reaped by the shrinker.
3468          */
3469         i915_gem_object_unpin_pages(obj);
3470
3471         return 0;
3472 }
3473
3474 int i915_vma_unbind(struct i915_vma *vma)
3475 {
3476         return __i915_vma_unbind(vma, true);
3477 }
3478
3479 int __i915_vma_unbind_no_wait(struct i915_vma *vma)
3480 {
3481         return __i915_vma_unbind(vma, false);
3482 }
3483
3484 int i915_gpu_idle(struct drm_device *dev)
3485 {
3486         struct drm_i915_private *dev_priv = dev->dev_private;
3487         struct intel_engine_cs *ring;
3488         int ret, i;
3489
3490         /* Flush everything onto the inactive list. */
3491         for_each_ring(ring, dev_priv, i) {
3492                 if (!i915.enable_execlists) {
3493                         struct drm_i915_gem_request *req;
3494
3495                         ret = i915_gem_request_alloc(ring, ring->default_context, &req);
3496                         if (ret)
3497                                 return ret;
3498
3499                         ret = i915_switch_context(req);
3500                         if (ret) {
3501                                 i915_gem_request_cancel(req);
3502                                 return ret;
3503                         }
3504
3505                         i915_add_request_no_flush(req);
3506                 }
3507
3508                 ret = intel_ring_idle(ring);
3509                 if (ret)
3510                         return ret;
3511         }
3512
3513         WARN_ON(i915_verify_lists(dev));
3514         return 0;
3515 }
3516
3517 static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3518                                      unsigned long cache_level)
3519 {
3520         struct drm_mm_node *gtt_space = &vma->node;
3521         struct drm_mm_node *other;
3522
3523         /*
3524          * On some machines we have to be careful when putting differing types
3525          * of snoopable memory together to avoid the prefetcher crossing memory
3526          * domains and dying. During vm initialisation, we decide whether or not
3527          * these constraints apply and set the drm_mm.color_adjust
3528          * appropriately.
3529          */
3530         if (vma->vm->mm.color_adjust == NULL)
3531                 return true;
3532
3533         if (!drm_mm_node_allocated(gtt_space))
3534                 return true;
3535
3536         if (list_empty(&gtt_space->node_list))
3537                 return true;
3538
3539         other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3540         if (other->allocated && !other->hole_follows && other->color != cache_level)
3541                 return false;
3542
3543         other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3544         if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3545                 return false;
3546
3547         return true;
3548 }
3549
3550 /**
3551  * Finds free space in the GTT aperture and binds the object or a view of it
3552  * there.
3553  */
3554 static struct i915_vma *
3555 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3556                            struct i915_address_space *vm,
3557                            const struct i915_ggtt_view *ggtt_view,
3558                            unsigned alignment,
3559                            uint64_t flags)
3560 {
3561         struct drm_device *dev = obj->base.dev;
3562         struct drm_i915_private *dev_priv = dev->dev_private;
3563         u32 fence_alignment, unfenced_alignment;
3564         u32 search_flag, alloc_flag;
3565         u64 start, end;
3566         u64 size, fence_size;
3567         struct i915_vma *vma;
3568         int ret;
3569
3570         if (i915_is_ggtt(vm)) {
3571                 u32 view_size;
3572
3573                 if (WARN_ON(!ggtt_view))
3574                         return ERR_PTR(-EINVAL);
3575
3576                 view_size = i915_ggtt_view_size(obj, ggtt_view);
3577
3578                 fence_size = i915_gem_get_gtt_size(dev,
3579                                                    view_size,
3580                                                    obj->tiling_mode);
3581                 fence_alignment = i915_gem_get_gtt_alignment(dev,
3582                                                              view_size,
3583                                                              obj->tiling_mode,
3584                                                              true);
3585                 unfenced_alignment = i915_gem_get_gtt_alignment(dev,
3586                                                                 view_size,
3587                                                                 obj->tiling_mode,
3588                                                                 false);
3589                 size = flags & PIN_MAPPABLE ? fence_size : view_size;
3590         } else {
3591                 fence_size = i915_gem_get_gtt_size(dev,
3592                                                    obj->base.size,
3593                                                    obj->tiling_mode);
3594                 fence_alignment = i915_gem_get_gtt_alignment(dev,
3595                                                              obj->base.size,
3596                                                              obj->tiling_mode,
3597                                                              true);
3598                 unfenced_alignment =
3599                         i915_gem_get_gtt_alignment(dev,
3600                                                    obj->base.size,
3601                                                    obj->tiling_mode,
3602                                                    false);
3603                 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3604         }
3605
3606         start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3607         end = vm->total;
3608         if (flags & PIN_MAPPABLE)
3609                 end = min_t(u64, end, dev_priv->gtt.mappable_end);
3610         if (flags & PIN_ZONE_4G)
3611                 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
3612
3613         if (alignment == 0)
3614                 alignment = flags & PIN_MAPPABLE ? fence_alignment :
3615                                                 unfenced_alignment;
3616         if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
3617                 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3618                           ggtt_view ? ggtt_view->type : 0,
3619                           alignment);
3620                 return ERR_PTR(-EINVAL);
3621         }
3622
3623         /* If binding the object/GGTT view requires more space than the entire
3624          * aperture has, reject it early before evicting everything in a vain
3625          * attempt to find space.
3626          */
3627         if (size > end) {
3628                 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%lu > %s aperture=%lu\n",
3629                           ggtt_view ? ggtt_view->type : 0,
3630                           size,
3631                           flags & PIN_MAPPABLE ? "mappable" : "total",
3632                           end);
3633                 return ERR_PTR(-E2BIG);
3634         }
3635
3636         ret = i915_gem_object_get_pages(obj);
3637         if (ret)
3638                 return ERR_PTR(ret);
3639
3640         i915_gem_object_pin_pages(obj);
3641
3642         vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3643                           i915_gem_obj_lookup_or_create_vma(obj, vm);
3644
3645         if (IS_ERR(vma))
3646                 goto err_unpin;
3647
3648         if (flags & PIN_OFFSET_FIXED) {
3649                 uint64_t offset = flags & PIN_OFFSET_MASK;
3650
3651                 if (offset & (alignment - 1) || offset + size > end) {
3652                         ret = -EINVAL;
3653                         goto err_free_vma;
3654                 }
3655                 vma->node.start = offset;
3656                 vma->node.size = size;
3657                 vma->node.color = obj->cache_level;
3658                 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3659                 if (ret) {
3660                         ret = i915_gem_evict_for_vma(vma);
3661                         if (ret == 0)
3662                                 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3663                 }
3664                 if (ret)
3665                         goto err_free_vma;
3666         } else {
3667                 if (flags & PIN_HIGH) {
3668                         search_flag = DRM_MM_SEARCH_BELOW;
3669                         alloc_flag = DRM_MM_CREATE_TOP;
3670                 } else {
3671                         search_flag = DRM_MM_SEARCH_DEFAULT;
3672                         alloc_flag = DRM_MM_CREATE_DEFAULT;
3673                 }
3674
3675 search_free:
3676                 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3677                                                           size, alignment,
3678                                                           obj->cache_level,
3679                                                           start, end,
3680                                                           search_flag,
3681                                                           alloc_flag);
3682                 if (ret) {
3683                         ret = i915_gem_evict_something(dev, vm, size, alignment,
3684                                                        obj->cache_level,
3685                                                        start, end,
3686                                                        flags);
3687                         if (ret == 0)
3688                                 goto search_free;
3689
3690                         goto err_free_vma;
3691                 }
3692         }
3693         if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
3694                 ret = -EINVAL;
3695                 goto err_remove_node;
3696         }
3697
3698         trace_i915_vma_bind(vma, flags);
3699         ret = i915_vma_bind(vma, obj->cache_level, flags);
3700         if (ret)
3701                 goto err_remove_node;
3702
3703         list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3704         list_add_tail(&vma->mm_list, &vm->inactive_list);
3705
3706         return vma;
3707
3708 err_remove_node:
3709         drm_mm_remove_node(&vma->node);
3710 err_free_vma:
3711         i915_gem_vma_destroy(vma);
3712         vma = ERR_PTR(ret);
3713 err_unpin:
3714         i915_gem_object_unpin_pages(obj);
3715         return vma;
3716 }
3717
3718 bool
3719 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3720                         bool force)
3721 {
3722         /* If we don't have a page list set up, then we're not pinned
3723          * to GPU, and we can ignore the cache flush because it'll happen
3724          * again at bind time.
3725          */
3726         if (obj->pages == NULL)
3727                 return false;
3728
3729         /*
3730          * Stolen memory is always coherent with the GPU as it is explicitly
3731          * marked as wc by the system, or the system is cache-coherent.
3732          */
3733         if (obj->stolen || obj->phys_handle)
3734                 return false;
3735
3736         /* If the GPU is snooping the contents of the CPU cache,
3737          * we do not need to manually clear the CPU cache lines.  However,
3738          * the caches are only snooped when the render cache is
3739          * flushed/invalidated.  As we always have to emit invalidations
3740          * and flushes when moving into and out of the RENDER domain, correct
3741          * snooping behaviour occurs naturally as the result of our domain
3742          * tracking.
3743          */
3744         if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3745                 obj->cache_dirty = true;
3746                 return false;
3747         }
3748
3749         trace_i915_gem_object_clflush(obj);
3750         drm_clflush_sg(obj->pages);
3751         obj->cache_dirty = false;
3752
3753         return true;
3754 }
3755
3756 /** Flushes the GTT write domain for the object if it's dirty. */
3757 static void
3758 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3759 {
3760         uint32_t old_write_domain;
3761
3762         if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3763                 return;
3764
3765         /* No actual flushing is required for the GTT write domain.  Writes
3766          * to it immediately go to main memory as far as we know, so there's
3767          * no chipset flush.  It also doesn't land in render cache.
3768          *
3769          * However, we do have to enforce the order so that all writes through
3770          * the GTT land before any writes to the device, such as updates to
3771          * the GATT itself.
3772          */
3773         wmb();
3774
3775         old_write_domain = obj->base.write_domain;
3776         obj->base.write_domain = 0;
3777
3778         intel_fb_obj_flush(obj, false, ORIGIN_GTT);
3779
3780         trace_i915_gem_object_change_domain(obj,
3781                                             obj->base.read_domains,
3782                                             old_write_domain);
3783 }
3784
3785 /** Flushes the CPU write domain for the object if it's dirty. */
3786 static void
3787 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3788 {
3789         uint32_t old_write_domain;
3790
3791         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3792                 return;
3793
3794         if (i915_gem_clflush_object(obj, obj->pin_display))
3795                 i915_gem_chipset_flush(obj->base.dev);
3796
3797         old_write_domain = obj->base.write_domain;
3798         obj->base.write_domain = 0;
3799
3800         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3801
3802         trace_i915_gem_object_change_domain(obj,
3803                                             obj->base.read_domains,
3804                                             old_write_domain);
3805 }
3806
3807 /**
3808  * Moves a single object to the GTT read, and possibly write domain.
3809  *
3810  * This function returns when the move is complete, including waiting on
3811  * flushes to occur.
3812  */
3813 int
3814 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3815 {
3816         uint32_t old_write_domain, old_read_domains;
3817         struct i915_vma *vma;
3818         int ret;
3819
3820         if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3821                 return 0;
3822
3823         ret = i915_gem_object_wait_rendering(obj, !write);
3824         if (ret)
3825                 return ret;
3826
3827         /* Flush and acquire obj->pages so that we are coherent through
3828          * direct access in memory with previous cached writes through
3829          * shmemfs and that our cache domain tracking remains valid.
3830          * For example, if the obj->filp was moved to swap without us
3831          * being notified and releasing the pages, we would mistakenly
3832          * continue to assume that the obj remained out of the CPU cached
3833          * domain.
3834          */
3835         ret = i915_gem_object_get_pages(obj);
3836         if (ret)
3837                 return ret;
3838
3839         i915_gem_object_flush_cpu_write_domain(obj);
3840
3841         /* Serialise direct access to this object with the barriers for
3842          * coherent writes from the GPU, by effectively invalidating the
3843          * GTT domain upon first access.
3844          */
3845         if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3846                 mb();
3847
3848         old_write_domain = obj->base.write_domain;
3849         old_read_domains = obj->base.read_domains;
3850
3851         /* It should now be out of any other write domains, and we can update
3852          * the domain values for our changes.
3853          */
3854         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3855         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3856         if (write) {
3857                 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3858                 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3859                 obj->dirty = 1;
3860         }
3861
3862         trace_i915_gem_object_change_domain(obj,
3863                                             old_read_domains,
3864                                             old_write_domain);
3865
3866         /* And bump the LRU for this access */
3867         vma = i915_gem_obj_to_ggtt(obj);
3868         if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
3869                 list_move_tail(&vma->mm_list,
3870                                &to_i915(obj->base.dev)->gtt.base.inactive_list);
3871
3872         return 0;
3873 }
3874
3875 /**
3876  * Changes the cache-level of an object across all VMA.
3877  *
3878  * After this function returns, the object will be in the new cache-level
3879  * across all GTT and the contents of the backing storage will be coherent,
3880  * with respect to the new cache-level. In order to keep the backing storage
3881  * coherent for all users, we only allow a single cache level to be set
3882  * globally on the object and prevent it from being changed whilst the
3883  * hardware is reading from the object. That is if the object is currently
3884  * on the scanout it will be set to uncached (or equivalent display
3885  * cache coherency) and all non-MOCS GPU access will also be uncached so
3886  * that all direct access to the scanout remains coherent.
3887  */
3888 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3889                                     enum i915_cache_level cache_level)
3890 {
3891         struct drm_device *dev = obj->base.dev;
3892         struct i915_vma *vma, *next;
3893         bool bound = false;
3894         int ret = 0;
3895
3896         if (obj->cache_level == cache_level)
3897                 goto out;
3898
3899         /* Inspect the list of currently bound VMA and unbind any that would
3900          * be invalid given the new cache-level. This is principally to
3901          * catch the issue of the CS prefetch crossing page boundaries and
3902          * reading an invalid PTE on older architectures.
3903          */
3904         list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
3905                 if (!drm_mm_node_allocated(&vma->node))
3906                         continue;
3907
3908                 if (vma->pin_count) {
3909                         DRM_DEBUG("can not change the cache level of pinned objects\n");
3910                         return -EBUSY;
3911                 }
3912
3913                 if (!i915_gem_valid_gtt_space(vma, cache_level)) {
3914                         ret = i915_vma_unbind(vma);
3915                         if (ret)
3916                                 return ret;
3917                 } else
3918                         bound = true;
3919         }
3920
3921         /* We can reuse the existing drm_mm nodes but need to change the
3922          * cache-level on the PTE. We could simply unbind them all and
3923          * rebind with the correct cache-level on next use. However since
3924          * we already have a valid slot, dma mapping, pages etc, we may as
3925          * rewrite the PTE in the belief that doing so tramples upon less
3926          * state and so involves less work.
3927          */
3928         if (bound) {
3929                 /* Before we change the PTE, the GPU must not be accessing it.
3930                  * If we wait upon the object, we know that all the bound
3931                  * VMA are no longer active.
3932                  */
3933                 ret = i915_gem_object_wait_rendering(obj, false);
3934                 if (ret)
3935                         return ret;
3936
3937                 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
3938                         /* Access to snoopable pages through the GTT is
3939                          * incoherent and on some machines causes a hard
3940                          * lockup. Relinquish the CPU mmaping to force
3941                          * userspace to refault in the pages and we can
3942                          * then double check if the GTT mapping is still
3943                          * valid for that pointer access.
3944                          */
3945                         i915_gem_release_mmap(obj);
3946
3947                         /* As we no longer need a fence for GTT access,
3948                          * we can relinquish it now (and so prevent having
3949                          * to steal a fence from someone else on the next
3950                          * fence request). Note GPU activity would have
3951                          * dropped the fence as all snoopable access is
3952                          * supposed to be linear.
3953                          */
3954                         ret = i915_gem_object_put_fence(obj);
3955                         if (ret)
3956                                 return ret;
3957                 } else {
3958                         /* We either have incoherent backing store and
3959                          * so no GTT access or the architecture is fully
3960                          * coherent. In such cases, existing GTT mmaps
3961                          * ignore the cache bit in the PTE and we can
3962                          * rewrite it without confusing the GPU or having
3963                          * to force userspace to fault back in its mmaps.
3964                          */
3965                 }
3966
3967                 list_for_each_entry(vma, &obj->vma_list, vma_link) {
3968                         if (!drm_mm_node_allocated(&vma->node))
3969                                 continue;
3970
3971                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3972                         if (ret)
3973                                 return ret;
3974                 }
3975         }
3976
3977         list_for_each_entry(vma, &obj->vma_list, vma_link)
3978                 vma->node.color = cache_level;
3979         obj->cache_level = cache_level;
3980
3981 out:
3982         /* Flush the dirty CPU caches to the backing storage so that the
3983          * object is now coherent at its new cache level (with respect
3984          * to the access domain).
3985          */
3986         if (obj->cache_dirty &&
3987             obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
3988             cpu_write_needs_clflush(obj)) {
3989                 if (i915_gem_clflush_object(obj, true))
3990                         i915_gem_chipset_flush(obj->base.dev);
3991         }
3992
3993         return 0;
3994 }
3995
3996 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3997                                struct drm_file *file)
3998 {
3999         struct drm_i915_gem_caching *args = data;
4000         struct drm_i915_gem_object *obj;
4001
4002         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4003         if (&obj->base == NULL)
4004                 return -ENOENT;
4005
4006         switch (obj->cache_level) {
4007         case I915_CACHE_LLC:
4008         case I915_CACHE_L3_LLC:
4009                 args->caching = I915_CACHING_CACHED;
4010                 break;
4011
4012         case I915_CACHE_WT:
4013                 args->caching = I915_CACHING_DISPLAY;
4014                 break;
4015
4016         default:
4017                 args->caching = I915_CACHING_NONE;
4018                 break;
4019         }
4020
4021         drm_gem_object_unreference_unlocked(&obj->base);
4022         return 0;
4023 }
4024
4025 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4026                                struct drm_file *file)
4027 {
4028         struct drm_i915_private *dev_priv = dev->dev_private;
4029         struct drm_i915_gem_caching *args = data;
4030         struct drm_i915_gem_object *obj;
4031         enum i915_cache_level level;
4032         int ret;
4033
4034         switch (args->caching) {
4035         case I915_CACHING_NONE:
4036                 level = I915_CACHE_NONE;
4037                 break;
4038         case I915_CACHING_CACHED:
4039                 /*
4040                  * Due to a HW issue on BXT A stepping, GPU stores via a
4041                  * snooped mapping may leave stale data in a corresponding CPU
4042                  * cacheline, whereas normally such cachelines would get
4043                  * invalidated.
4044                  */
4045                 if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
4046                         return -ENODEV;
4047
4048                 level = I915_CACHE_LLC;
4049                 break;
4050         case I915_CACHING_DISPLAY:
4051                 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
4052                 break;
4053         default:
4054                 return -EINVAL;
4055         }