Merge tag 'trace-v4.7-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[linux.git] / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28
29 #include <drm/drmP.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/dma_remapping.h>
35 #include <linux/uaccess.h>
36
37 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
38 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
39 #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
40 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
41
42 #define BATCH_OFFSET_BIAS (256*1024)
43
44 struct eb_vmas {
45         struct list_head vmas;
46         int and;
47         union {
48                 struct i915_vma *lut[0];
49                 struct hlist_head buckets[0];
50         };
51 };
52
53 static struct eb_vmas *
54 eb_create(struct drm_i915_gem_execbuffer2 *args)
55 {
56         struct eb_vmas *eb = NULL;
57
58         if (args->flags & I915_EXEC_HANDLE_LUT) {
59                 unsigned size = args->buffer_count;
60                 size *= sizeof(struct i915_vma *);
61                 size += sizeof(struct eb_vmas);
62                 eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
63         }
64
65         if (eb == NULL) {
66                 unsigned size = args->buffer_count;
67                 unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
68                 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
69                 while (count > 2*size)
70                         count >>= 1;
71                 eb = kzalloc(count*sizeof(struct hlist_head) +
72                              sizeof(struct eb_vmas),
73                              GFP_TEMPORARY);
74                 if (eb == NULL)
75                         return eb;
76
77                 eb->and = count - 1;
78         } else
79                 eb->and = -args->buffer_count;
80
81         INIT_LIST_HEAD(&eb->vmas);
82         return eb;
83 }
84
85 static void
86 eb_reset(struct eb_vmas *eb)
87 {
88         if (eb->and >= 0)
89                 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
90 }
91
92 static int
93 eb_lookup_vmas(struct eb_vmas *eb,
94                struct drm_i915_gem_exec_object2 *exec,
95                const struct drm_i915_gem_execbuffer2 *args,
96                struct i915_address_space *vm,
97                struct drm_file *file)
98 {
99         struct drm_i915_gem_object *obj;
100         struct list_head objects;
101         int i, ret;
102
103         INIT_LIST_HEAD(&objects);
104         spin_lock(&file->table_lock);
105         /* Grab a reference to the object and release the lock so we can lookup
106          * or create the VMA without using GFP_ATOMIC */
107         for (i = 0; i < args->buffer_count; i++) {
108                 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
109                 if (obj == NULL) {
110                         spin_unlock(&file->table_lock);
111                         DRM_DEBUG("Invalid object handle %d at index %d\n",
112                                    exec[i].handle, i);
113                         ret = -ENOENT;
114                         goto err;
115                 }
116
117                 if (!list_empty(&obj->obj_exec_link)) {
118                         spin_unlock(&file->table_lock);
119                         DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
120                                    obj, exec[i].handle, i);
121                         ret = -EINVAL;
122                         goto err;
123                 }
124
125                 drm_gem_object_reference(&obj->base);
126                 list_add_tail(&obj->obj_exec_link, &objects);
127         }
128         spin_unlock(&file->table_lock);
129
130         i = 0;
131         while (!list_empty(&objects)) {
132                 struct i915_vma *vma;
133
134                 obj = list_first_entry(&objects,
135                                        struct drm_i915_gem_object,
136                                        obj_exec_link);
137
138                 /*
139                  * NOTE: We can leak any vmas created here when something fails
140                  * later on. But that's no issue since vma_unbind can deal with
141                  * vmas which are not actually bound. And since only
142                  * lookup_or_create exists as an interface to get at the vma
143                  * from the (obj, vm) we don't run the risk of creating
144                  * duplicated vmas for the same vm.
145                  */
146                 vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
147                 if (IS_ERR(vma)) {
148                         DRM_DEBUG("Failed to lookup VMA\n");
149                         ret = PTR_ERR(vma);
150                         goto err;
151                 }
152
153                 /* Transfer ownership from the objects list to the vmas list. */
154                 list_add_tail(&vma->exec_list, &eb->vmas);
155                 list_del_init(&obj->obj_exec_link);
156
157                 vma->exec_entry = &exec[i];
158                 if (eb->and < 0) {
159                         eb->lut[i] = vma;
160                 } else {
161                         uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
162                         vma->exec_handle = handle;
163                         hlist_add_head(&vma->exec_node,
164                                        &eb->buckets[handle & eb->and]);
165                 }
166                 ++i;
167         }
168
169         return 0;
170
171
172 err:
173         while (!list_empty(&objects)) {
174                 obj = list_first_entry(&objects,
175                                        struct drm_i915_gem_object,
176                                        obj_exec_link);
177                 list_del_init(&obj->obj_exec_link);
178                 drm_gem_object_unreference(&obj->base);
179         }
180         /*
181          * Objects already transfered to the vmas list will be unreferenced by
182          * eb_destroy.
183          */
184
185         return ret;
186 }
187
188 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
189 {
190         if (eb->and < 0) {
191                 if (handle >= -eb->and)
192                         return NULL;
193                 return eb->lut[handle];
194         } else {
195                 struct hlist_head *head;
196                 struct i915_vma *vma;
197
198                 head = &eb->buckets[handle & eb->and];
199                 hlist_for_each_entry(vma, head, exec_node) {
200                         if (vma->exec_handle == handle)
201                                 return vma;
202                 }
203                 return NULL;
204         }
205 }
206
207 static void
208 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
209 {
210         struct drm_i915_gem_exec_object2 *entry;
211         struct drm_i915_gem_object *obj = vma->obj;
212
213         if (!drm_mm_node_allocated(&vma->node))
214                 return;
215
216         entry = vma->exec_entry;
217
218         if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
219                 i915_gem_object_unpin_fence(obj);
220
221         if (entry->flags & __EXEC_OBJECT_HAS_PIN)
222                 vma->pin_count--;
223
224         entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
225 }
226
227 static void eb_destroy(struct eb_vmas *eb)
228 {
229         while (!list_empty(&eb->vmas)) {
230                 struct i915_vma *vma;
231
232                 vma = list_first_entry(&eb->vmas,
233                                        struct i915_vma,
234                                        exec_list);
235                 list_del_init(&vma->exec_list);
236                 i915_gem_execbuffer_unreserve_vma(vma);
237                 drm_gem_object_unreference(&vma->obj->base);
238         }
239         kfree(eb);
240 }
241
242 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
243 {
244         return (HAS_LLC(obj->base.dev) ||
245                 obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
246                 obj->cache_level != I915_CACHE_NONE);
247 }
248
249 /* Used to convert any address to canonical form.
250  * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
251  * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
252  * addresses to be in a canonical form:
253  * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
254  * canonical form [63:48] == [47]."
255  */
256 #define GEN8_HIGH_ADDRESS_BIT 47
257 static inline uint64_t gen8_canonical_addr(uint64_t address)
258 {
259         return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
260 }
261
262 static inline uint64_t gen8_noncanonical_addr(uint64_t address)
263 {
264         return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1);
265 }
266
267 static inline uint64_t
268 relocation_target(struct drm_i915_gem_relocation_entry *reloc,
269                   uint64_t target_offset)
270 {
271         return gen8_canonical_addr((int)reloc->delta + target_offset);
272 }
273
274 static int
275 relocate_entry_cpu(struct drm_i915_gem_object *obj,
276                    struct drm_i915_gem_relocation_entry *reloc,
277                    uint64_t target_offset)
278 {
279         struct drm_device *dev = obj->base.dev;
280         uint32_t page_offset = offset_in_page(reloc->offset);
281         uint64_t delta = relocation_target(reloc, target_offset);
282         char *vaddr;
283         int ret;
284
285         ret = i915_gem_object_set_to_cpu_domain(obj, true);
286         if (ret)
287                 return ret;
288
289         vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
290                                 reloc->offset >> PAGE_SHIFT));
291         *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
292
293         if (INTEL_INFO(dev)->gen >= 8) {
294                 page_offset = offset_in_page(page_offset + sizeof(uint32_t));
295
296                 if (page_offset == 0) {
297                         kunmap_atomic(vaddr);
298                         vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
299                             (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
300                 }
301
302                 *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
303         }
304
305         kunmap_atomic(vaddr);
306
307         return 0;
308 }
309
310 static int
311 relocate_entry_gtt(struct drm_i915_gem_object *obj,
312                    struct drm_i915_gem_relocation_entry *reloc,
313                    uint64_t target_offset)
314 {
315         struct drm_device *dev = obj->base.dev;
316         struct drm_i915_private *dev_priv = dev->dev_private;
317         uint64_t delta = relocation_target(reloc, target_offset);
318         uint64_t offset;
319         void __iomem *reloc_page;
320         int ret;
321
322         ret = i915_gem_object_set_to_gtt_domain(obj, true);
323         if (ret)
324                 return ret;
325
326         ret = i915_gem_object_put_fence(obj);
327         if (ret)
328                 return ret;
329
330         /* Map the page containing the relocation we're going to perform.  */
331         offset = i915_gem_obj_ggtt_offset(obj);
332         offset += reloc->offset;
333         reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
334                                               offset & PAGE_MASK);
335         iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
336
337         if (INTEL_INFO(dev)->gen >= 8) {
338                 offset += sizeof(uint32_t);
339
340                 if (offset_in_page(offset) == 0) {
341                         io_mapping_unmap_atomic(reloc_page);
342                         reloc_page =
343                                 io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
344                                                          offset);
345                 }
346
347                 iowrite32(upper_32_bits(delta),
348                           reloc_page + offset_in_page(offset));
349         }
350
351         io_mapping_unmap_atomic(reloc_page);
352
353         return 0;
354 }
355
356 static void
357 clflush_write32(void *addr, uint32_t value)
358 {
359         /* This is not a fast path, so KISS. */
360         drm_clflush_virt_range(addr, sizeof(uint32_t));
361         *(uint32_t *)addr = value;
362         drm_clflush_virt_range(addr, sizeof(uint32_t));
363 }
364
365 static int
366 relocate_entry_clflush(struct drm_i915_gem_object *obj,
367                        struct drm_i915_gem_relocation_entry *reloc,
368                        uint64_t target_offset)
369 {
370         struct drm_device *dev = obj->base.dev;
371         uint32_t page_offset = offset_in_page(reloc->offset);
372         uint64_t delta = relocation_target(reloc, target_offset);
373         char *vaddr;
374         int ret;
375
376         ret = i915_gem_object_set_to_gtt_domain(obj, true);
377         if (ret)
378                 return ret;
379
380         vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
381                                 reloc->offset >> PAGE_SHIFT));
382         clflush_write32(vaddr + page_offset, lower_32_bits(delta));
383
384         if (INTEL_INFO(dev)->gen >= 8) {
385                 page_offset = offset_in_page(page_offset + sizeof(uint32_t));
386
387                 if (page_offset == 0) {
388                         kunmap_atomic(vaddr);
389                         vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
390                             (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
391                 }
392
393                 clflush_write32(vaddr + page_offset, upper_32_bits(delta));
394         }
395
396         kunmap_atomic(vaddr);
397
398         return 0;
399 }
400
401 static int
402 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
403                                    struct eb_vmas *eb,
404                                    struct drm_i915_gem_relocation_entry *reloc)
405 {
406         struct drm_device *dev = obj->base.dev;
407         struct drm_gem_object *target_obj;
408         struct drm_i915_gem_object *target_i915_obj;
409         struct i915_vma *target_vma;
410         uint64_t target_offset;
411         int ret;
412
413         /* we've already hold a reference to all valid objects */
414         target_vma = eb_get_vma(eb, reloc->target_handle);
415         if (unlikely(target_vma == NULL))
416                 return -ENOENT;
417         target_i915_obj = target_vma->obj;
418         target_obj = &target_vma->obj->base;
419
420         target_offset = gen8_canonical_addr(target_vma->node.start);
421
422         /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
423          * pipe_control writes because the gpu doesn't properly redirect them
424          * through the ppgtt for non_secure batchbuffers. */
425         if (unlikely(IS_GEN6(dev) &&
426             reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
427                 ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
428                                     PIN_GLOBAL);
429                 if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
430                         return ret;
431         }
432
433         /* Validate that the target is in a valid r/w GPU domain */
434         if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
435                 DRM_DEBUG("reloc with multiple write domains: "
436                           "obj %p target %d offset %d "
437                           "read %08x write %08x",
438                           obj, reloc->target_handle,
439                           (int) reloc->offset,
440                           reloc->read_domains,
441                           reloc->write_domain);
442                 return -EINVAL;
443         }
444         if (unlikely((reloc->write_domain | reloc->read_domains)
445                      & ~I915_GEM_GPU_DOMAINS)) {
446                 DRM_DEBUG("reloc with read/write non-GPU domains: "
447                           "obj %p target %d offset %d "
448                           "read %08x write %08x",
449                           obj, reloc->target_handle,
450                           (int) reloc->offset,
451                           reloc->read_domains,
452                           reloc->write_domain);
453                 return -EINVAL;
454         }
455
456         target_obj->pending_read_domains |= reloc->read_domains;
457         target_obj->pending_write_domain |= reloc->write_domain;
458
459         /* If the relocation already has the right value in it, no
460          * more work needs to be done.
461          */
462         if (target_offset == reloc->presumed_offset)
463                 return 0;
464
465         /* Check that the relocation address is valid... */
466         if (unlikely(reloc->offset >
467                 obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
468                 DRM_DEBUG("Relocation beyond object bounds: "
469                           "obj %p target %d offset %d size %d.\n",
470                           obj, reloc->target_handle,
471                           (int) reloc->offset,
472                           (int) obj->base.size);
473                 return -EINVAL;
474         }
475         if (unlikely(reloc->offset & 3)) {
476                 DRM_DEBUG("Relocation not 4-byte aligned: "
477                           "obj %p target %d offset %d.\n",
478                           obj, reloc->target_handle,
479                           (int) reloc->offset);
480                 return -EINVAL;
481         }
482
483         /* We can't wait for rendering with pagefaults disabled */
484         if (obj->active && pagefault_disabled())
485                 return -EFAULT;
486
487         if (use_cpu_reloc(obj))
488                 ret = relocate_entry_cpu(obj, reloc, target_offset);
489         else if (obj->map_and_fenceable)
490                 ret = relocate_entry_gtt(obj, reloc, target_offset);
491         else if (static_cpu_has(X86_FEATURE_CLFLUSH))
492                 ret = relocate_entry_clflush(obj, reloc, target_offset);
493         else {
494                 WARN_ONCE(1, "Impossible case in relocation handling\n");
495                 ret = -ENODEV;
496         }
497
498         if (ret)
499                 return ret;
500
501         /* and update the user's relocation entry */
502         reloc->presumed_offset = target_offset;
503
504         return 0;
505 }
506
507 static int
508 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
509                                  struct eb_vmas *eb)
510 {
511 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
512         struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
513         struct drm_i915_gem_relocation_entry __user *user_relocs;
514         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
515         int remain, ret;
516
517         user_relocs = u64_to_user_ptr(entry->relocs_ptr);
518
519         remain = entry->relocation_count;
520         while (remain) {
521                 struct drm_i915_gem_relocation_entry *r = stack_reloc;
522                 int count = remain;
523                 if (count > ARRAY_SIZE(stack_reloc))
524                         count = ARRAY_SIZE(stack_reloc);
525                 remain -= count;
526
527                 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
528                         return -EFAULT;
529
530                 do {
531                         u64 offset = r->presumed_offset;
532
533                         ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
534                         if (ret)
535                                 return ret;
536
537                         if (r->presumed_offset != offset &&
538                             __put_user(r->presumed_offset, &user_relocs->presumed_offset)) {
539                                 return -EFAULT;
540                         }
541
542                         user_relocs++;
543                         r++;
544                 } while (--count);
545         }
546
547         return 0;
548 #undef N_RELOC
549 }
550
551 static int
552 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
553                                       struct eb_vmas *eb,
554                                       struct drm_i915_gem_relocation_entry *relocs)
555 {
556         const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
557         int i, ret;
558
559         for (i = 0; i < entry->relocation_count; i++) {
560                 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
561                 if (ret)
562                         return ret;
563         }
564
565         return 0;
566 }
567
568 static int
569 i915_gem_execbuffer_relocate(struct eb_vmas *eb)
570 {
571         struct i915_vma *vma;
572         int ret = 0;
573
574         /* This is the fast path and we cannot handle a pagefault whilst
575          * holding the struct mutex lest the user pass in the relocations
576          * contained within a mmaped bo. For in such a case we, the page
577          * fault handler would call i915_gem_fault() and we would try to
578          * acquire the struct mutex again. Obviously this is bad and so
579          * lockdep complains vehemently.
580          */
581         pagefault_disable();
582         list_for_each_entry(vma, &eb->vmas, exec_list) {
583                 ret = i915_gem_execbuffer_relocate_vma(vma, eb);
584                 if (ret)
585                         break;
586         }
587         pagefault_enable();
588
589         return ret;
590 }
591
592 static bool only_mappable_for_reloc(unsigned int flags)
593 {
594         return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
595                 __EXEC_OBJECT_NEEDS_MAP;
596 }
597
598 static int
599 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
600                                 struct intel_engine_cs *ring,
601                                 bool *need_reloc)
602 {
603         struct drm_i915_gem_object *obj = vma->obj;
604         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
605         uint64_t flags;
606         int ret;
607
608         flags = PIN_USER;
609         if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
610                 flags |= PIN_GLOBAL;
611
612         if (!drm_mm_node_allocated(&vma->node)) {
613                 /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
614                  * limit address to the first 4GBs for unflagged objects.
615                  */
616                 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
617                         flags |= PIN_ZONE_4G;
618                 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
619                         flags |= PIN_GLOBAL | PIN_MAPPABLE;
620                 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
621                         flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
622                 if (entry->flags & EXEC_OBJECT_PINNED)
623                         flags |= entry->offset | PIN_OFFSET_FIXED;
624                 if ((flags & PIN_MAPPABLE) == 0)
625                         flags |= PIN_HIGH;
626         }
627
628         ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
629         if ((ret == -ENOSPC  || ret == -E2BIG) &&
630             only_mappable_for_reloc(entry->flags))
631                 ret = i915_gem_object_pin(obj, vma->vm,
632                                           entry->alignment,
633                                           flags & ~PIN_MAPPABLE);
634         if (ret)
635                 return ret;
636
637         entry->flags |= __EXEC_OBJECT_HAS_PIN;
638
639         if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
640                 ret = i915_gem_object_get_fence(obj);
641                 if (ret)
642                         return ret;
643
644                 if (i915_gem_object_pin_fence(obj))
645                         entry->flags |= __EXEC_OBJECT_HAS_FENCE;
646         }
647
648         if (entry->offset != vma->node.start) {
649                 entry->offset = vma->node.start;
650                 *need_reloc = true;
651         }
652
653         if (entry->flags & EXEC_OBJECT_WRITE) {
654                 obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
655                 obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
656         }
657
658         return 0;
659 }
660
661 static bool
662 need_reloc_mappable(struct i915_vma *vma)
663 {
664         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
665
666         if (entry->relocation_count == 0)
667                 return false;
668
669         if (!vma->is_ggtt)
670                 return false;
671
672         /* See also use_cpu_reloc() */
673         if (HAS_LLC(vma->obj->base.dev))
674                 return false;
675
676         if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
677                 return false;
678
679         return true;
680 }
681
682 static bool
683 eb_vma_misplaced(struct i915_vma *vma)
684 {
685         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
686         struct drm_i915_gem_object *obj = vma->obj;
687
688         WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt);
689
690         if (entry->alignment &&
691             vma->node.start & (entry->alignment - 1))
692                 return true;
693
694         if (entry->flags & EXEC_OBJECT_PINNED &&
695             vma->node.start != entry->offset)
696                 return true;
697
698         if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
699             vma->node.start < BATCH_OFFSET_BIAS)
700                 return true;
701
702         /* avoid costly ping-pong once a batch bo ended up non-mappable */
703         if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
704                 return !only_mappable_for_reloc(entry->flags);
705
706         if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
707             (vma->node.start + vma->node.size - 1) >> 32)
708                 return true;
709
710         return false;
711 }
712
713 static int
714 i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
715                             struct list_head *vmas,
716                             struct intel_context *ctx,
717                             bool *need_relocs)
718 {
719         struct drm_i915_gem_object *obj;
720         struct i915_vma *vma;
721         struct i915_address_space *vm;
722         struct list_head ordered_vmas;
723         struct list_head pinned_vmas;
724         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
725         int retry;
726
727         i915_gem_retire_requests_ring(ring);
728
729         vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
730
731         INIT_LIST_HEAD(&ordered_vmas);
732         INIT_LIST_HEAD(&pinned_vmas);
733         while (!list_empty(vmas)) {
734                 struct drm_i915_gem_exec_object2 *entry;
735                 bool need_fence, need_mappable;
736
737                 vma = list_first_entry(vmas, struct i915_vma, exec_list);
738                 obj = vma->obj;
739                 entry = vma->exec_entry;
740
741                 if (ctx->flags & CONTEXT_NO_ZEROMAP)
742                         entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
743
744                 if (!has_fenced_gpu_access)
745                         entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
746                 need_fence =
747                         entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
748                         obj->tiling_mode != I915_TILING_NONE;
749                 need_mappable = need_fence || need_reloc_mappable(vma);
750
751                 if (entry->flags & EXEC_OBJECT_PINNED)
752                         list_move_tail(&vma->exec_list, &pinned_vmas);
753                 else if (need_mappable) {
754                         entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
755                         list_move(&vma->exec_list, &ordered_vmas);
756                 } else
757                         list_move_tail(&vma->exec_list, &ordered_vmas);
758
759                 obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
760                 obj->base.pending_write_domain = 0;
761         }
762         list_splice(&ordered_vmas, vmas);
763         list_splice(&pinned_vmas, vmas);
764
765         /* Attempt to pin all of the buffers into the GTT.
766          * This is done in 3 phases:
767          *
768          * 1a. Unbind all objects that do not match the GTT constraints for
769          *     the execbuffer (fenceable, mappable, alignment etc).
770          * 1b. Increment pin count for already bound objects.
771          * 2.  Bind new objects.
772          * 3.  Decrement pin count.
773          *
774          * This avoid unnecessary unbinding of later objects in order to make
775          * room for the earlier objects *unless* we need to defragment.
776          */
777         retry = 0;
778         do {
779                 int ret = 0;
780
781                 /* Unbind any ill-fitting objects or pin. */
782                 list_for_each_entry(vma, vmas, exec_list) {
783                         if (!drm_mm_node_allocated(&vma->node))
784                                 continue;
785
786                         if (eb_vma_misplaced(vma))
787                                 ret = i915_vma_unbind(vma);
788                         else
789                                 ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
790                         if (ret)
791                                 goto err;
792                 }
793
794                 /* Bind fresh objects */
795                 list_for_each_entry(vma, vmas, exec_list) {
796                         if (drm_mm_node_allocated(&vma->node))
797                                 continue;
798
799                         ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
800                         if (ret)
801                                 goto err;
802                 }
803
804 err:
805                 if (ret != -ENOSPC || retry++)
806                         return ret;
807
808                 /* Decrement pin count for bound objects */
809                 list_for_each_entry(vma, vmas, exec_list)
810                         i915_gem_execbuffer_unreserve_vma(vma);
811
812                 ret = i915_gem_evict_vm(vm, true);
813                 if (ret)
814                         return ret;
815         } while (1);
816 }
817
818 static int
819 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
820                                   struct drm_i915_gem_execbuffer2 *args,
821                                   struct drm_file *file,
822                                   struct intel_engine_cs *ring,
823                                   struct eb_vmas *eb,
824                                   struct drm_i915_gem_exec_object2 *exec,
825                                   struct intel_context *ctx)
826 {
827         struct drm_i915_gem_relocation_entry *reloc;
828         struct i915_address_space *vm;
829         struct i915_vma *vma;
830         bool need_relocs;
831         int *reloc_offset;
832         int i, total, ret;
833         unsigned count = args->buffer_count;
834
835         vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
836
837         /* We may process another execbuffer during the unlock... */
838         while (!list_empty(&eb->vmas)) {
839                 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
840                 list_del_init(&vma->exec_list);
841                 i915_gem_execbuffer_unreserve_vma(vma);
842                 drm_gem_object_unreference(&vma->obj->base);
843         }
844
845         mutex_unlock(&dev->struct_mutex);
846
847         total = 0;
848         for (i = 0; i < count; i++)
849                 total += exec[i].relocation_count;
850
851         reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
852         reloc = drm_malloc_ab(total, sizeof(*reloc));
853         if (reloc == NULL || reloc_offset == NULL) {
854                 drm_free_large(reloc);
855                 drm_free_large(reloc_offset);
856                 mutex_lock(&dev->struct_mutex);
857                 return -ENOMEM;
858         }
859
860         total = 0;
861         for (i = 0; i < count; i++) {
862                 struct drm_i915_gem_relocation_entry __user *user_relocs;
863                 u64 invalid_offset = (u64)-1;
864                 int j;
865
866                 user_relocs = u64_to_user_ptr(exec[i].relocs_ptr);
867
868                 if (copy_from_user(reloc+total, user_relocs,
869                                    exec[i].relocation_count * sizeof(*reloc))) {
870                         ret = -EFAULT;
871                         mutex_lock(&dev->struct_mutex);
872                         goto err;
873                 }
874
875                 /* As we do not update the known relocation offsets after
876                  * relocating (due to the complexities in lock handling),
877                  * we need to mark them as invalid now so that we force the
878                  * relocation processing next time. Just in case the target
879                  * object is evicted and then rebound into its old
880                  * presumed_offset before the next execbuffer - if that
881                  * happened we would make the mistake of assuming that the
882                  * relocations were valid.
883                  */
884                 for (j = 0; j < exec[i].relocation_count; j++) {
885                         if (__copy_to_user(&user_relocs[j].presumed_offset,
886                                            &invalid_offset,
887                                            sizeof(invalid_offset))) {
888                                 ret = -EFAULT;
889                                 mutex_lock(&dev->struct_mutex);
890                                 goto err;
891                         }
892                 }
893
894                 reloc_offset[i] = total;
895                 total += exec[i].relocation_count;
896         }
897
898         ret = i915_mutex_lock_interruptible(dev);
899         if (ret) {
900                 mutex_lock(&dev->struct_mutex);
901                 goto err;
902         }
903
904         /* reacquire the objects */
905         eb_reset(eb);
906         ret = eb_lookup_vmas(eb, exec, args, vm, file);
907         if (ret)
908                 goto err;
909
910         need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
911         ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, ctx, &need_relocs);
912         if (ret)
913                 goto err;
914
915         list_for_each_entry(vma, &eb->vmas, exec_list) {
916                 int offset = vma->exec_entry - exec;
917                 ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
918                                                             reloc + reloc_offset[offset]);
919                 if (ret)
920                         goto err;
921         }
922
923         /* Leave the user relocations as are, this is the painfully slow path,
924          * and we want to avoid the complication of dropping the lock whilst
925          * having buffers reserved in the aperture and so causing spurious
926          * ENOSPC for random operations.
927          */
928
929 err:
930         drm_free_large(reloc);
931         drm_free_large(reloc_offset);
932         return ret;
933 }
934
935 static int
936 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
937                                 struct list_head *vmas)
938 {
939         const unsigned other_rings = ~intel_ring_flag(req->ring);
940         struct i915_vma *vma;
941         uint32_t flush_domains = 0;
942         bool flush_chipset = false;
943         int ret;
944
945         list_for_each_entry(vma, vmas, exec_list) {
946                 struct drm_i915_gem_object *obj = vma->obj;
947
948                 if (obj->active & other_rings) {
949                         ret = i915_gem_object_sync(obj, req->ring, &req);
950                         if (ret)
951                                 return ret;
952                 }
953
954                 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
955                         flush_chipset |= i915_gem_clflush_object(obj, false);
956
957                 flush_domains |= obj->base.write_domain;
958         }
959
960         if (flush_chipset)
961                 i915_gem_chipset_flush(req->ring->dev);
962
963         if (flush_domains & I915_GEM_DOMAIN_GTT)
964                 wmb();
965
966         /* Unconditionally invalidate gpu caches and ensure that we do flush
967          * any residual writes from the previous batch.
968          */
969         return intel_ring_invalidate_all_caches(req);
970 }
971
972 static bool
973 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
974 {
975         if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
976                 return false;
977
978         /* Kernel clipping was a DRI1 misfeature */
979         if (exec->num_cliprects || exec->cliprects_ptr)
980                 return false;
981
982         if (exec->DR4 == 0xffffffff) {
983                 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
984                 exec->DR4 = 0;
985         }
986         if (exec->DR1 || exec->DR4)
987                 return false;
988
989         if ((exec->batch_start_offset | exec->batch_len) & 0x7)
990                 return false;
991
992         return true;
993 }
994
995 static int
996 validate_exec_list(struct drm_device *dev,
997                    struct drm_i915_gem_exec_object2 *exec,
998                    int count)
999 {
1000         unsigned relocs_total = 0;
1001         unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
1002         unsigned invalid_flags;
1003         int i;
1004
1005         invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1006         if (USES_FULL_PPGTT(dev))
1007                 invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
1008
1009         for (i = 0; i < count; i++) {
1010                 char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr);
1011                 int length; /* limited by fault_in_pages_readable() */
1012
1013                 if (exec[i].flags & invalid_flags)
1014                         return -EINVAL;
1015
1016                 /* Offset can be used as input (EXEC_OBJECT_PINNED), reject
1017                  * any non-page-aligned or non-canonical addresses.
1018                  */
1019                 if (exec[i].flags & EXEC_OBJECT_PINNED) {
1020                         if (exec[i].offset !=
1021                             gen8_canonical_addr(exec[i].offset & PAGE_MASK))
1022                                 return -EINVAL;
1023
1024                         /* From drm_mm perspective address space is continuous,
1025                          * so from this point we're always using non-canonical
1026                          * form internally.
1027                          */
1028                         exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
1029                 }
1030
1031                 if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
1032                         return -EINVAL;
1033
1034                 /* First check for malicious input causing overflow in
1035                  * the worst case where we need to allocate the entire
1036                  * relocation tree as a single array.
1037                  */
1038                 if (exec[i].relocation_count > relocs_max - relocs_total)
1039                         return -EINVAL;
1040                 relocs_total += exec[i].relocation_count;
1041
1042                 length = exec[i].relocation_count *
1043                         sizeof(struct drm_i915_gem_relocation_entry);
1044                 /*
1045                  * We must check that the entire relocation array is safe
1046                  * to read, but since we may need to update the presumed
1047                  * offsets during execution, check for full write access.
1048                  */
1049                 if (!access_ok(VERIFY_WRITE, ptr, length))
1050                         return -EFAULT;
1051
1052                 if (likely(!i915.prefault_disable)) {
1053                         if (fault_in_multipages_readable(ptr, length))
1054                                 return -EFAULT;
1055                 }
1056         }
1057
1058         return 0;
1059 }
1060
1061 static struct intel_context *
1062 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
1063                           struct intel_engine_cs *ring, const u32 ctx_id)
1064 {
1065         struct intel_context *ctx = NULL;
1066         struct i915_ctx_hang_stats *hs;
1067
1068         if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
1069                 return ERR_PTR(-EINVAL);
1070
1071         ctx = i915_gem_context_get(file->driver_priv, ctx_id);
1072         if (IS_ERR(ctx))
1073                 return ctx;
1074
1075         hs = &ctx->hang_stats;
1076         if (hs->banned) {
1077                 DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
1078                 return ERR_PTR(-EIO);
1079         }
1080
1081         if (i915.enable_execlists && !ctx->engine[ring->id].state) {
1082                 int ret = intel_lr_context_deferred_alloc(ctx, ring);
1083                 if (ret) {
1084                         DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
1085                         return ERR_PTR(ret);
1086                 }
1087         }
1088
1089         return ctx;
1090 }
1091
1092 void
1093 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
1094                                    struct drm_i915_gem_request *req)
1095 {
1096         struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
1097         struct i915_vma *vma;
1098
1099         list_for_each_entry(vma, vmas, exec_list) {
1100                 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
1101                 struct drm_i915_gem_object *obj = vma->obj;
1102                 u32 old_read = obj->base.read_domains;
1103                 u32 old_write = obj->base.write_domain;
1104
1105                 obj->dirty = 1; /* be paranoid  */
1106                 obj->base.write_domain = obj->base.pending_write_domain;
1107                 if (obj->base.write_domain == 0)
1108                         obj->base.pending_read_domains |= obj->base.read_domains;
1109                 obj->base.read_domains = obj->base.pending_read_domains;
1110
1111                 i915_vma_move_to_active(vma, req);
1112                 if (obj->base.write_domain) {
1113                         i915_gem_request_assign(&obj->last_write_req, req);
1114
1115                         intel_fb_obj_invalidate(obj, ORIGIN_CS);
1116
1117                         /* update for the implicit flush after a batch */
1118                         obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1119                 }
1120                 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
1121                         i915_gem_request_assign(&obj->last_fenced_req, req);
1122                         if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
1123                                 struct drm_i915_private *dev_priv = to_i915(ring->dev);
1124                                 list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
1125                                                &dev_priv->mm.fence_list);
1126                         }
1127                 }
1128
1129                 trace_i915_gem_object_change_domain(obj, old_read, old_write);
1130         }
1131 }
1132
1133 void
1134 i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
1135 {
1136         /* Unconditionally force add_request to emit a full flush. */
1137         params->ring->gpu_caches_dirty = true;
1138
1139         /* Add a breadcrumb for the completion of the batch buffer */
1140         __i915_add_request(params->request, params->batch_obj, true);
1141 }
1142
1143 static int
1144 i915_reset_gen7_sol_offsets(struct drm_device *dev,
1145                             struct drm_i915_gem_request *req)
1146 {
1147         struct intel_engine_cs *ring = req->ring;
1148         struct drm_i915_private *dev_priv = dev->dev_private;
1149         int ret, i;
1150
1151         if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
1152                 DRM_DEBUG("sol reset is gen7/rcs only\n");
1153                 return -EINVAL;
1154         }
1155
1156         ret = intel_ring_begin(req, 4 * 3);
1157         if (ret)
1158                 return ret;
1159
1160         for (i = 0; i < 4; i++) {
1161                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1162                 intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
1163                 intel_ring_emit(ring, 0);
1164         }
1165
1166         intel_ring_advance(ring);
1167
1168         return 0;
1169 }
1170
1171 static struct drm_i915_gem_object*
1172 i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
1173                           struct drm_i915_gem_exec_object2 *shadow_exec_entry,
1174                           struct eb_vmas *eb,
1175                           struct drm_i915_gem_object *batch_obj,
1176                           u32 batch_start_offset,
1177                           u32 batch_len,
1178                           bool is_master)
1179 {
1180         struct drm_i915_gem_object *shadow_batch_obj;
1181         struct i915_vma *vma;
1182         int ret;
1183
1184         shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
1185                                                    PAGE_ALIGN(batch_len));
1186         if (IS_ERR(shadow_batch_obj))
1187                 return shadow_batch_obj;
1188
1189         ret = i915_parse_cmds(ring,
1190                               batch_obj,
1191                               shadow_batch_obj,
1192                               batch_start_offset,
1193                               batch_len,
1194                               is_master);
1195         if (ret)
1196                 goto err;
1197
1198         ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0);
1199         if (ret)
1200                 goto err;
1201
1202         i915_gem_object_unpin_pages(shadow_batch_obj);
1203
1204         memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
1205
1206         vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
1207         vma->exec_entry = shadow_exec_entry;
1208         vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1209         drm_gem_object_reference(&shadow_batch_obj->base);
1210         list_add_tail(&vma->exec_list, &eb->vmas);
1211
1212         shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
1213
1214         return shadow_batch_obj;
1215
1216 err:
1217         i915_gem_object_unpin_pages(shadow_batch_obj);
1218         if (ret == -EACCES) /* unhandled chained batch */
1219                 return batch_obj;
1220         else
1221                 return ERR_PTR(ret);
1222 }
1223
1224 int
1225 i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
1226                                struct drm_i915_gem_execbuffer2 *args,
1227                                struct list_head *vmas)
1228 {
1229         struct drm_device *dev = params->dev;
1230         struct intel_engine_cs *ring = params->ring;
1231         struct drm_i915_private *dev_priv = dev->dev_private;
1232         u64 exec_start, exec_len;
1233         int instp_mode;
1234         u32 instp_mask;
1235         int ret;
1236
1237         ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
1238         if (ret)
1239                 return ret;
1240
1241         ret = i915_switch_context(params->request);
1242         if (ret)
1243                 return ret;
1244
1245         WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<<ring->id),
1246              "%s didn't clear reload\n", ring->name);
1247
1248         instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1249         instp_mask = I915_EXEC_CONSTANTS_MASK;
1250         switch (instp_mode) {
1251         case I915_EXEC_CONSTANTS_REL_GENERAL:
1252         case I915_EXEC_CONSTANTS_ABSOLUTE:
1253         case I915_EXEC_CONSTANTS_REL_SURFACE:
1254                 if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
1255                         DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1256                         return -EINVAL;
1257                 }
1258
1259                 if (instp_mode != dev_priv->relative_constants_mode) {
1260                         if (INTEL_INFO(dev)->gen < 4) {
1261                                 DRM_DEBUG("no rel constants on pre-gen4\n");
1262                                 return -EINVAL;
1263                         }
1264
1265                         if (INTEL_INFO(dev)->gen > 5 &&
1266                             instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1267                                 DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1268                                 return -EINVAL;
1269                         }
1270
1271                         /* The HW changed the meaning on this bit on gen6 */
1272                         if (INTEL_INFO(dev)->gen >= 6)
1273                                 instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1274                 }
1275                 break;
1276         default:
1277                 DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
1278                 return -EINVAL;
1279         }
1280
1281         if (ring == &dev_priv->ring[RCS] &&
1282             instp_mode != dev_priv->relative_constants_mode) {
1283                 ret = intel_ring_begin(params->request, 4);
1284                 if (ret)
1285                         return ret;
1286
1287                 intel_ring_emit(ring, MI_NOOP);
1288                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1289                 intel_ring_emit_reg(ring, INSTPM);
1290                 intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1291                 intel_ring_advance(ring);
1292
1293                 dev_priv->relative_constants_mode = instp_mode;
1294         }
1295
1296         if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1297                 ret = i915_reset_gen7_sol_offsets(dev, params->request);
1298                 if (ret)
1299                         return ret;
1300         }
1301
1302         exec_len   = args->batch_len;
1303         exec_start = params->batch_obj_vm_offset +
1304                      params->args_batch_start_offset;
1305
1306         if (exec_len == 0)
1307                 exec_len = params->batch_obj->base.size;
1308
1309         ret = ring->dispatch_execbuffer(params->request,
1310                                         exec_start, exec_len,
1311                                         params->dispatch_flags);
1312         if (ret)
1313                 return ret;
1314
1315         trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
1316
1317         i915_gem_execbuffer_move_to_active(vmas, params->request);
1318         i915_gem_execbuffer_retire_commands(params);
1319
1320         return 0;
1321 }
1322
1323 /**
1324  * Find one BSD ring to dispatch the corresponding BSD command.
1325  * The ring index is returned.
1326  */
1327 static unsigned int
1328 gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file)
1329 {
1330         struct drm_i915_file_private *file_priv = file->driver_priv;
1331
1332         /* Check whether the file_priv has already selected one ring. */
1333         if ((int)file_priv->bsd_ring < 0) {
1334                 /* If not, use the ping-pong mechanism to select one. */
1335                 mutex_lock(&dev_priv->dev->struct_mutex);
1336                 file_priv->bsd_ring = dev_priv->mm.bsd_ring_dispatch_index;
1337                 dev_priv->mm.bsd_ring_dispatch_index ^= 1;
1338                 mutex_unlock(&dev_priv->dev->struct_mutex);
1339         }
1340
1341         return file_priv->bsd_ring;
1342 }
1343
1344 static struct drm_i915_gem_object *
1345 eb_get_batch(struct eb_vmas *eb)
1346 {
1347         struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1348
1349         /*
1350          * SNA is doing fancy tricks with compressing batch buffers, which leads
1351          * to negative relocation deltas. Usually that works out ok since the
1352          * relocate address is still positive, except when the batch is placed
1353          * very low in the GTT. Ensure this doesn't happen.
1354          *
1355          * Note that actual hangs have only been observed on gen7, but for
1356          * paranoia do it everywhere.
1357          */
1358         if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
1359                 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1360
1361         return vma->obj;
1362 }
1363
1364 #define I915_USER_RINGS (4)
1365
1366 static const enum intel_ring_id user_ring_map[I915_USER_RINGS + 1] = {
1367         [I915_EXEC_DEFAULT]     = RCS,
1368         [I915_EXEC_RENDER]      = RCS,
1369         [I915_EXEC_BLT]         = BCS,
1370         [I915_EXEC_BSD]         = VCS,
1371         [I915_EXEC_VEBOX]       = VECS
1372 };
1373
1374 static int
1375 eb_select_ring(struct drm_i915_private *dev_priv,
1376                struct drm_file *file,
1377                struct drm_i915_gem_execbuffer2 *args,
1378                struct intel_engine_cs **ring)
1379 {
1380         unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
1381
1382         if (user_ring_id > I915_USER_RINGS) {
1383                 DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
1384                 return -EINVAL;
1385         }
1386
1387         if ((user_ring_id != I915_EXEC_BSD) &&
1388             ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
1389                 DRM_DEBUG("execbuf with non bsd ring but with invalid "
1390                           "bsd dispatch flags: %d\n", (int)(args->flags));
1391                 return -EINVAL;
1392         }
1393
1394         if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
1395                 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
1396
1397                 if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
1398                         bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file);
1399                 } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
1400                            bsd_idx <= I915_EXEC_BSD_RING2) {
1401                         bsd_idx >>= I915_EXEC_BSD_SHIFT;
1402                         bsd_idx--;
1403                 } else {
1404                         DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
1405                                   bsd_idx);
1406                         return -EINVAL;
1407                 }
1408
1409                 *ring = &dev_priv->ring[_VCS(bsd_idx)];
1410         } else {
1411                 *ring = &dev_priv->ring[user_ring_map[user_ring_id]];
1412         }
1413
1414         if (!intel_ring_initialized(*ring)) {
1415                 DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
1416                 return -EINVAL;
1417         }
1418
1419         return 0;
1420 }
1421
1422 static int
1423 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1424                        struct drm_file *file,
1425                        struct drm_i915_gem_execbuffer2 *args,
1426                        struct drm_i915_gem_exec_object2 *exec)
1427 {
1428         struct drm_i915_private *dev_priv = dev->dev_private;
1429         struct drm_i915_gem_request *req = NULL;
1430         struct eb_vmas *eb;
1431         struct drm_i915_gem_object *batch_obj;
1432         struct drm_i915_gem_exec_object2 shadow_exec_entry;
1433         struct intel_engine_cs *ring;
1434         struct intel_context *ctx;
1435         struct i915_address_space *vm;
1436         struct i915_execbuffer_params params_master; /* XXX: will be removed later */
1437         struct i915_execbuffer_params *params = &params_master;
1438         const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1439         u32 dispatch_flags;
1440         int ret;
1441         bool need_relocs;
1442
1443         if (!i915_gem_check_execbuffer(args))
1444                 return -EINVAL;
1445
1446         ret = validate_exec_list(dev, exec, args->buffer_count);
1447         if (ret)
1448                 return ret;
1449
1450         dispatch_flags = 0;
1451         if (args->flags & I915_EXEC_SECURE) {
1452                 if (!file->is_master || !capable(CAP_SYS_ADMIN))
1453                     return -EPERM;
1454
1455                 dispatch_flags |= I915_DISPATCH_SECURE;
1456         }
1457         if (args->flags & I915_EXEC_IS_PINNED)
1458                 dispatch_flags |= I915_DISPATCH_PINNED;
1459
1460         ret = eb_select_ring(dev_priv, file, args, &ring);
1461         if (ret)
1462                 return ret;
1463
1464         if (args->buffer_count < 1) {
1465                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1466                 return -EINVAL;
1467         }
1468
1469         if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
1470                 if (!HAS_RESOURCE_STREAMER(dev)) {
1471                         DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
1472                         return -EINVAL;
1473                 }
1474                 if (ring->id != RCS) {
1475                         DRM_DEBUG("RS is not available on %s\n",
1476                                  ring->name);
1477                         return -EINVAL;
1478                 }
1479
1480                 dispatch_flags |= I915_DISPATCH_RS;
1481         }
1482
1483         intel_runtime_pm_get(dev_priv);
1484
1485         ret = i915_mutex_lock_interruptible(dev);
1486         if (ret)
1487                 goto pre_mutex_err;
1488
1489         ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1490         if (IS_ERR(ctx)) {
1491                 mutex_unlock(&dev->struct_mutex);
1492                 ret = PTR_ERR(ctx);
1493                 goto pre_mutex_err;
1494         }
1495
1496         i915_gem_context_reference(ctx);
1497
1498         if (ctx->ppgtt)
1499                 vm = &ctx->ppgtt->base;
1500         else
1501                 vm = &dev_priv->gtt.base;
1502
1503         memset(&params_master, 0x00, sizeof(params_master));
1504
1505         eb = eb_create(args);
1506         if (eb == NULL) {
1507                 i915_gem_context_unreference(ctx);
1508                 mutex_unlock(&dev->struct_mutex);
1509                 ret = -ENOMEM;
1510                 goto pre_mutex_err;
1511         }
1512
1513         /* Look up object handles */
1514         ret = eb_lookup_vmas(eb, exec, args, vm, file);
1515         if (ret)
1516                 goto err;
1517
1518         /* take note of the batch buffer before we might reorder the lists */
1519         batch_obj = eb_get_batch(eb);
1520
1521         /* Move the objects en-masse into the GTT, evicting if necessary. */
1522         need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1523         ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, ctx, &need_relocs);
1524         if (ret)
1525                 goto err;
1526
1527         /* The objects are in their final locations, apply the relocations. */
1528         if (need_relocs)
1529                 ret = i915_gem_execbuffer_relocate(eb);
1530         if (ret) {
1531                 if (ret == -EFAULT) {
1532                         ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1533                                                                 eb, exec, ctx);
1534                         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1535                 }
1536                 if (ret)
1537                         goto err;
1538         }
1539
1540         /* Set the pending read domains for the batch buffer to COMMAND */
1541         if (batch_obj->base.pending_write_domain) {
1542                 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1543                 ret = -EINVAL;
1544                 goto err;
1545         }
1546
1547         params->args_batch_start_offset = args->batch_start_offset;
1548         if (i915_needs_cmd_parser(ring) && args->batch_len) {
1549                 struct drm_i915_gem_object *parsed_batch_obj;
1550
1551                 parsed_batch_obj = i915_gem_execbuffer_parse(ring,
1552                                                       &shadow_exec_entry,
1553                                                       eb,
1554                                                       batch_obj,
1555                                                       args->batch_start_offset,
1556                                                       args->batch_len,
1557                                                       file->is_master);
1558                 if (IS_ERR(parsed_batch_obj)) {
1559                         ret = PTR_ERR(parsed_batch_obj);
1560                         goto err;
1561                 }
1562
1563                 /*
1564                  * parsed_batch_obj == batch_obj means batch not fully parsed:
1565                  * Accept, but don't promote to secure.
1566                  */
1567
1568                 if (parsed_batch_obj != batch_obj) {
1569                         /*
1570                          * Batch parsed and accepted:
1571                          *
1572                          * Set the DISPATCH_SECURE bit to remove the NON_SECURE
1573                          * bit from MI_BATCH_BUFFER_START commands issued in
1574                          * the dispatch_execbuffer implementations. We
1575                          * specifically don't want that set on batches the
1576                          * command parser has accepted.
1577                          */
1578                         dispatch_flags |= I915_DISPATCH_SECURE;
1579                         params->args_batch_start_offset = 0;
1580                         batch_obj = parsed_batch_obj;
1581                 }
1582         }
1583
1584         batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1585
1586         /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1587          * batch" bit. Hence we need to pin secure batches into the global gtt.
1588          * hsw should have this fixed, but bdw mucks it up again. */
1589         if (dispatch_flags & I915_DISPATCH_SECURE) {
1590                 /*
1591                  * So on first glance it looks freaky that we pin the batch here
1592                  * outside of the reservation loop. But:
1593                  * - The batch is already pinned into the relevant ppgtt, so we
1594                  *   already have the backing storage fully allocated.
1595                  * - No other BO uses the global gtt (well contexts, but meh),
1596                  *   so we don't really have issues with multiple objects not
1597                  *   fitting due to fragmentation.
1598                  * So this is actually safe.
1599                  */
1600                 ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
1601                 if (ret)
1602                         goto err;
1603
1604                 params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj);
1605         } else
1606                 params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
1607
1608         /* Allocate a request for this batch buffer nice and early. */
1609         req = i915_gem_request_alloc(ring, ctx);
1610         if (IS_ERR(req)) {
1611                 ret = PTR_ERR(req);
1612                 goto err_batch_unpin;
1613         }
1614
1615         ret = i915_gem_request_add_to_client(req, file);
1616         if (ret)
1617                 goto err_batch_unpin;
1618
1619         /*
1620          * Save assorted stuff away to pass through to *_submission().
1621          * NB: This data should be 'persistent' and not local as it will
1622          * kept around beyond the duration of the IOCTL once the GPU
1623          * scheduler arrives.
1624          */
1625         params->dev                     = dev;
1626         params->file                    = file;
1627         params->ring                    = ring;
1628         params->dispatch_flags          = dispatch_flags;
1629         params->batch_obj               = batch_obj;
1630         params->ctx                     = ctx;
1631         params->request                 = req;
1632
1633         ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
1634
1635 err_batch_unpin:
1636         /*
1637          * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1638          * batch vma for correctness. For less ugly and less fragility this
1639          * needs to be adjusted to also track the ggtt batch vma properly as
1640          * active.
1641          */
1642         if (dispatch_flags & I915_DISPATCH_SECURE)
1643                 i915_gem_object_ggtt_unpin(batch_obj);
1644
1645 err:
1646         /* the request owns the ref now */
1647         i915_gem_context_unreference(ctx);
1648         eb_destroy(eb);
1649
1650         /*
1651          * If the request was created but not successfully submitted then it
1652          * must be freed again. If it was submitted then it is being tracked
1653          * on the active request list and no clean up is required here.
1654          */
1655         if (ret && !IS_ERR_OR_NULL(req))
1656                 i915_gem_request_cancel(req);
1657
1658         mutex_unlock(&dev->struct_mutex);
1659
1660 pre_mutex_err:
1661         /* intel_gpu_busy should also get a ref, so it will free when the device
1662          * is really idle. */
1663         intel_runtime_pm_put(dev_priv);
1664         return ret;
1665 }
1666
1667 /*
1668  * Legacy execbuffer just creates an exec2 list from the original exec object
1669  * list array and passes it to the real function.
1670  */
1671 int
1672 i915_gem_execbuffer(struct drm_device *dev, void *data,
1673                     struct drm_file *file)
1674 {
1675         struct drm_i915_gem_execbuffer *args = data;
1676         struct drm_i915_gem_execbuffer2 exec2;
1677         struct drm_i915_gem_exec_object *exec_list = NULL;
1678         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1679         int ret, i;
1680
1681         if (args->buffer_count < 1) {
1682                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1683                 return -EINVAL;
1684         }
1685
1686         /* Copy in the exec list from userland */
1687         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1688         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1689         if (exec_list == NULL || exec2_list == NULL) {
1690                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1691                           args->buffer_count);
1692                 drm_free_large(exec_list);
1693                 drm_free_large(exec2_list);
1694                 return -ENOMEM;
1695         }
1696         ret = copy_from_user(exec_list,
1697                              u64_to_user_ptr(args->buffers_ptr),
1698                              sizeof(*exec_list) * args->buffer_count);
1699         if (ret != 0) {
1700                 DRM_DEBUG("copy %d exec entries failed %d\n",
1701                           args->buffer_count, ret);
1702                 drm_free_large(exec_list);
1703                 drm_free_large(exec2_list);
1704                 return -EFAULT;
1705         }
1706
1707         for (i = 0; i < args->buffer_count; i++) {
1708                 exec2_list[i].handle = exec_list[i].handle;
1709                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
1710                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1711                 exec2_list[i].alignment = exec_list[i].alignment;
1712                 exec2_list[i].offset = exec_list[i].offset;
1713                 if (INTEL_INFO(dev)->gen < 4)
1714                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1715                 else
1716                         exec2_list[i].flags = 0;
1717         }
1718
1719         exec2.buffers_ptr = args->buffers_ptr;
1720         exec2.buffer_count = args->buffer_count;
1721         exec2.batch_start_offset = args->batch_start_offset;
1722         exec2.batch_len = args->batch_len;
1723         exec2.DR1 = args->DR1;
1724         exec2.DR4 = args->DR4;
1725         exec2.num_cliprects = args->num_cliprects;
1726         exec2.cliprects_ptr = args->cliprects_ptr;
1727         exec2.flags = I915_EXEC_RENDER;
1728         i915_execbuffer2_set_context_id(exec2, 0);
1729
1730         ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1731         if (!ret) {
1732                 struct drm_i915_gem_exec_object __user *user_exec_list =
1733                         u64_to_user_ptr(args->buffers_ptr);
1734
1735                 /* Copy the new buffer offsets back to the user's exec list. */
1736                 for (i = 0; i < args->buffer_count; i++) {
1737                         exec2_list[i].offset =
1738                                 gen8_canonical_addr(exec2_list[i].offset);
1739                         ret = __copy_to_user(&user_exec_list[i].offset,
1740                                              &exec2_list[i].offset,
1741                                              sizeof(user_exec_list[i].offset));
1742                         if (ret) {
1743                                 ret = -EFAULT;
1744                                 DRM_DEBUG("failed to copy %d exec entries "
1745                                           "back to user (%d)\n",
1746                                           args->buffer_count, ret);
1747                                 break;
1748                         }
1749                 }
1750         }
1751
1752         drm_free_large(exec_list);
1753         drm_free_large(exec2_list);
1754         return ret;
1755 }
1756
1757 int
1758 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1759                      struct drm_file *file)
1760 {
1761         struct drm_i915_gem_execbuffer2 *args = data;
1762         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1763         int ret;
1764
1765         if (args->buffer_count < 1 ||
1766             args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1767                 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1768                 return -EINVAL;
1769         }
1770
1771         if (args->rsvd2 != 0) {
1772                 DRM_DEBUG("dirty rvsd2 field\n");
1773                 return -EINVAL;
1774         }
1775
1776         exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1777                              GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
1778         if (exec2_list == NULL)
1779                 exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1780                                            args->buffer_count);
1781         if (exec2_list == NULL) {
1782                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1783                           args->buffer_count);
1784                 return -ENOMEM;
1785         }
1786         ret = copy_from_user(exec2_list,
1787                              u64_to_user_ptr(args->buffers_ptr),
1788                              sizeof(*exec2_list) * args->buffer_count);
1789         if (ret != 0) {
1790                 DRM_DEBUG("copy %d exec entries failed %d\n",
1791                           args->buffer_count, ret);
1792                 drm_free_large(exec2_list);
1793                 return -EFAULT;
1794         }
1795
1796         ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1797         if (!ret) {
1798                 /* Copy the new buffer offsets back to the user's exec list. */
1799                 struct drm_i915_gem_exec_object2 __user *user_exec_list =
1800                                    u64_to_user_ptr(args->buffers_ptr);
1801                 int i;
1802
1803                 for (i = 0; i < args->buffer_count; i++) {
1804                         exec2_list[i].offset =
1805                                 gen8_canonical_addr(exec2_list[i].offset);
1806                         ret = __copy_to_user(&user_exec_list[i].offset,
1807                                              &exec2_list[i].offset,
1808                                              sizeof(user_exec_list[i].offset));
1809                         if (ret) {
1810                                 ret = -EFAULT;
1811                                 DRM_DEBUG("failed to copy %d exec entries "
1812                                           "back to user\n",
1813                                           args->buffer_count);
1814                                 break;
1815                         }
1816                 }
1817         }
1818
1819         drm_free_large(exec2_list);
1820         return ret;
1821 }