2 * Copyright © 2008-2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
28 #include <drm/drm_vma_manager.h>
29 #include <drm/drm_pci.h>
30 #include <drm/i915_drm.h>
31 #include <linux/dma-fence-array.h>
32 #include <linux/kthread.h>
33 #include <linux/reservation.h>
34 #include <linux/shmem_fs.h>
35 #include <linux/slab.h>
36 #include <linux/stop_machine.h>
37 #include <linux/swap.h>
38 #include <linux/pci.h>
39 #include <linux/dma-buf.h>
40 #include <linux/mman.h>
43 #include "i915_gem_clflush.h"
44 #include "i915_gemfs.h"
45 #include "i915_globals.h"
46 #include "i915_reset.h"
47 #include "i915_trace.h"
48 #include "i915_vgpu.h"
50 #include "intel_drv.h"
51 #include "intel_frontbuffer.h"
52 #include "intel_mocs.h"
54 #include "intel_workarounds.h"
56 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
58 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
63 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
66 return obj->pin_global; /* currently in use by HW, keep flushed */
70 insert_mappable_node(struct i915_ggtt *ggtt,
71 struct drm_mm_node *node, u32 size)
73 memset(node, 0, sizeof(*node));
74 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
75 size, 0, I915_COLOR_UNEVICTABLE,
76 0, ggtt->mappable_end,
81 remove_mappable_node(struct drm_mm_node *node)
83 drm_mm_remove_node(node);
86 /* some bookkeeping */
87 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
90 spin_lock(&dev_priv->mm.object_stat_lock);
91 dev_priv->mm.object_count++;
92 dev_priv->mm.object_memory += size;
93 spin_unlock(&dev_priv->mm.object_stat_lock);
96 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
99 spin_lock(&dev_priv->mm.object_stat_lock);
100 dev_priv->mm.object_count--;
101 dev_priv->mm.object_memory -= size;
102 spin_unlock(&dev_priv->mm.object_stat_lock);
105 static void __i915_gem_park(struct drm_i915_private *i915)
107 intel_wakeref_t wakeref;
111 lockdep_assert_held(&i915->drm.struct_mutex);
112 GEM_BUG_ON(i915->gt.active_requests);
113 GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
119 * Be paranoid and flush a concurrent interrupt to make sure
120 * we don't reactivate any irq tasklets after parking.
122 * FIXME: Note that even though we have waited for execlists to be idle,
123 * there may still be an in-flight interrupt even though the CSB
124 * is now empty. synchronize_irq() makes sure that a residual interrupt
125 * is completed before we continue, but it doesn't prevent the HW from
126 * raising a spurious interrupt later. To complete the shield we should
127 * coordinate disabling the CS irq with flushing the interrupts.
129 synchronize_irq(i915->drm.irq);
131 intel_engines_park(i915);
132 i915_timelines_park(i915);
134 i915_pmu_gt_parked(i915);
135 i915_vma_parked(i915);
137 wakeref = fetch_and_zero(&i915->gt.awake);
138 GEM_BUG_ON(!wakeref);
140 if (INTEL_GEN(i915) >= 6)
143 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
148 void i915_gem_park(struct drm_i915_private *i915)
152 lockdep_assert_held(&i915->drm.struct_mutex);
153 GEM_BUG_ON(i915->gt.active_requests);
158 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
159 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100));
162 void i915_gem_unpark(struct drm_i915_private *i915)
166 lockdep_assert_held(&i915->drm.struct_mutex);
167 GEM_BUG_ON(!i915->gt.active_requests);
168 assert_rpm_wakelock_held(i915);
174 * It seems that the DMC likes to transition between the DC states a lot
175 * when there are no connected displays (no active power domains) during
176 * command submission.
178 * This activity has negative impact on the performance of the chip with
179 * huge latencies observed in the interrupt handler and elsewhere.
181 * Work around it by grabbing a GT IRQ power domain whilst there is any
182 * GT activity, preventing any DC state transitions.
184 i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
185 GEM_BUG_ON(!i915->gt.awake);
187 i915_globals_unpark();
189 intel_enable_gt_powersave(i915);
190 i915_update_gfx_val(i915);
191 if (INTEL_GEN(i915) >= 6)
193 i915_pmu_gt_unparked(i915);
195 intel_engines_unpark(i915);
197 i915_queue_hangcheck(i915);
199 queue_delayed_work(i915->wq,
200 &i915->gt.retire_work,
201 round_jiffies_up_relative(HZ));
205 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
206 struct drm_file *file)
208 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
209 struct drm_i915_gem_get_aperture *args = data;
210 struct i915_vma *vma;
213 mutex_lock(&ggtt->vm.mutex);
215 pinned = ggtt->vm.reserved;
216 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
217 if (i915_vma_is_pinned(vma))
218 pinned += vma->node.size;
220 mutex_unlock(&ggtt->vm.mutex);
222 args->aper_size = ggtt->vm.total;
223 args->aper_available_size = args->aper_size - pinned;
228 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
230 struct address_space *mapping = obj->base.filp->f_mapping;
231 drm_dma_handle_t *phys;
233 struct scatterlist *sg;
238 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
241 /* Always aligning to the object size, allows a single allocation
242 * to handle all possible callers, and given typical object sizes,
243 * the alignment of the buddy allocation will naturally match.
245 phys = drm_pci_alloc(obj->base.dev,
246 roundup_pow_of_two(obj->base.size),
247 roundup_pow_of_two(obj->base.size));
252 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
256 page = shmem_read_mapping_page(mapping, i);
262 src = kmap_atomic(page);
263 memcpy(vaddr, src, PAGE_SIZE);
264 drm_clflush_virt_range(vaddr, PAGE_SIZE);
271 i915_gem_chipset_flush(to_i915(obj->base.dev));
273 st = kmalloc(sizeof(*st), GFP_KERNEL);
279 if (sg_alloc_table(st, 1, GFP_KERNEL)) {
287 sg->length = obj->base.size;
289 sg_dma_address(sg) = phys->busaddr;
290 sg_dma_len(sg) = obj->base.size;
292 obj->phys_handle = phys;
294 __i915_gem_object_set_pages(obj, st, sg->length);
299 drm_pci_free(obj->base.dev, phys);
304 static void __start_cpu_write(struct drm_i915_gem_object *obj)
306 obj->read_domains = I915_GEM_DOMAIN_CPU;
307 obj->write_domain = I915_GEM_DOMAIN_CPU;
308 if (cpu_write_needs_clflush(obj))
309 obj->cache_dirty = true;
313 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
314 struct sg_table *pages,
317 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
319 if (obj->mm.madv == I915_MADV_DONTNEED)
320 obj->mm.dirty = false;
323 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
324 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
325 drm_clflush_sg(pages);
327 __start_cpu_write(obj);
331 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
332 struct sg_table *pages)
334 __i915_gem_object_release_shmem(obj, pages, false);
337 struct address_space *mapping = obj->base.filp->f_mapping;
338 char *vaddr = obj->phys_handle->vaddr;
341 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
345 page = shmem_read_mapping_page(mapping, i);
349 dst = kmap_atomic(page);
350 drm_clflush_virt_range(vaddr, PAGE_SIZE);
351 memcpy(dst, vaddr, PAGE_SIZE);
354 set_page_dirty(page);
355 if (obj->mm.madv == I915_MADV_WILLNEED)
356 mark_page_accessed(page);
360 obj->mm.dirty = false;
363 sg_free_table(pages);
366 drm_pci_free(obj->base.dev, obj->phys_handle);
370 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
372 i915_gem_object_unpin_pages(obj);
375 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
376 .get_pages = i915_gem_object_get_pages_phys,
377 .put_pages = i915_gem_object_put_pages_phys,
378 .release = i915_gem_object_release_phys,
381 static const struct drm_i915_gem_object_ops i915_gem_object_ops;
383 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
385 struct i915_vma *vma;
386 LIST_HEAD(still_in_list);
389 lockdep_assert_held(&obj->base.dev->struct_mutex);
391 /* Closed vma are removed from the obj->vma_list - but they may
392 * still have an active binding on the object. To remove those we
393 * must wait for all rendering to complete to the object (as unbinding
394 * must anyway), and retire the requests.
396 ret = i915_gem_object_set_to_cpu_domain(obj, false);
400 spin_lock(&obj->vma.lock);
401 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
404 list_move_tail(&vma->obj_link, &still_in_list);
405 spin_unlock(&obj->vma.lock);
407 ret = i915_vma_unbind(vma);
409 spin_lock(&obj->vma.lock);
411 list_splice(&still_in_list, &obj->vma.list);
412 spin_unlock(&obj->vma.lock);
418 i915_gem_object_wait_fence(struct dma_fence *fence,
422 struct i915_request *rq;
424 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
426 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
429 if (!dma_fence_is_i915(fence))
430 return dma_fence_wait_timeout(fence,
431 flags & I915_WAIT_INTERRUPTIBLE,
434 rq = to_request(fence);
435 if (i915_request_completed(rq))
438 timeout = i915_request_wait(rq, flags, timeout);
441 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
442 i915_request_retire_upto(rq);
448 i915_gem_object_wait_reservation(struct reservation_object *resv,
452 unsigned int seq = __read_seqcount_begin(&resv->seq);
453 struct dma_fence *excl;
454 bool prune_fences = false;
456 if (flags & I915_WAIT_ALL) {
457 struct dma_fence **shared;
458 unsigned int count, i;
461 ret = reservation_object_get_fences_rcu(resv,
462 &excl, &count, &shared);
466 for (i = 0; i < count; i++) {
467 timeout = i915_gem_object_wait_fence(shared[i],
472 dma_fence_put(shared[i]);
475 for (; i < count; i++)
476 dma_fence_put(shared[i]);
480 * If both shared fences and an exclusive fence exist,
481 * then by construction the shared fences must be later
482 * than the exclusive fence. If we successfully wait for
483 * all the shared fences, we know that the exclusive fence
484 * must all be signaled. If all the shared fences are
485 * signaled, we can prune the array and recover the
486 * floating references on the fences/requests.
488 prune_fences = count && timeout >= 0;
490 excl = reservation_object_get_excl_rcu(resv);
493 if (excl && timeout >= 0)
494 timeout = i915_gem_object_wait_fence(excl, flags, timeout);
499 * Opportunistically prune the fences iff we know they have *all* been
500 * signaled and that the reservation object has not been changed (i.e.
501 * no new fences have been added).
503 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
504 if (reservation_object_trylock(resv)) {
505 if (!__read_seqcount_retry(&resv->seq, seq))
506 reservation_object_add_excl_fence(resv, NULL);
507 reservation_object_unlock(resv);
514 static void __fence_set_priority(struct dma_fence *fence,
515 const struct i915_sched_attr *attr)
517 struct i915_request *rq;
518 struct intel_engine_cs *engine;
520 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
523 rq = to_request(fence);
527 rcu_read_lock(); /* RCU serialisation for set-wedged protection */
528 if (engine->schedule)
529 engine->schedule(rq, attr);
531 local_bh_enable(); /* kick the tasklets if queues were reprioritised */
534 static void fence_set_priority(struct dma_fence *fence,
535 const struct i915_sched_attr *attr)
537 /* Recurse once into a fence-array */
538 if (dma_fence_is_array(fence)) {
539 struct dma_fence_array *array = to_dma_fence_array(fence);
542 for (i = 0; i < array->num_fences; i++)
543 __fence_set_priority(array->fences[i], attr);
545 __fence_set_priority(fence, attr);
550 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
552 const struct i915_sched_attr *attr)
554 struct dma_fence *excl;
556 if (flags & I915_WAIT_ALL) {
557 struct dma_fence **shared;
558 unsigned int count, i;
561 ret = reservation_object_get_fences_rcu(obj->resv,
562 &excl, &count, &shared);
566 for (i = 0; i < count; i++) {
567 fence_set_priority(shared[i], attr);
568 dma_fence_put(shared[i]);
573 excl = reservation_object_get_excl_rcu(obj->resv);
577 fence_set_priority(excl, attr);
584 * Waits for rendering to the object to be completed
585 * @obj: i915 gem object
586 * @flags: how to wait (under a lock, for all rendering or just for writes etc)
587 * @timeout: how long to wait
590 i915_gem_object_wait(struct drm_i915_gem_object *obj,
595 GEM_BUG_ON(timeout < 0);
597 timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
598 return timeout < 0 ? timeout : 0;
602 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
603 struct drm_i915_gem_pwrite *args,
604 struct drm_file *file)
606 void *vaddr = obj->phys_handle->vaddr + args->offset;
607 char __user *user_data = u64_to_user_ptr(args->data_ptr);
609 /* We manually control the domain here and pretend that it
610 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
612 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
613 if (copy_from_user(vaddr, user_data, args->size))
616 drm_clflush_virt_range(vaddr, args->size);
617 i915_gem_chipset_flush(to_i915(obj->base.dev));
619 intel_fb_obj_flush(obj, ORIGIN_CPU);
624 i915_gem_create(struct drm_file *file,
625 struct drm_i915_private *dev_priv,
629 struct drm_i915_gem_object *obj;
634 size = round_up(*size_p, PAGE_SIZE);
638 /* Allocate the new object */
639 obj = i915_gem_object_create(dev_priv, size);
643 ret = drm_gem_handle_create(file, &obj->base, &handle);
644 /* drop reference from allocate - handle holds it now */
645 i915_gem_object_put(obj);
655 i915_gem_dumb_create(struct drm_file *file,
656 struct drm_device *dev,
657 struct drm_mode_create_dumb *args)
659 /* have to work out size/pitch and return them */
660 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
661 args->size = args->pitch * args->height;
662 return i915_gem_create(file, to_i915(dev),
663 &args->size, &args->handle);
666 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
668 return !(obj->cache_level == I915_CACHE_NONE ||
669 obj->cache_level == I915_CACHE_WT);
673 * Creates a new mm object and returns a handle to it.
674 * @dev: drm device pointer
675 * @data: ioctl data blob
676 * @file: drm file pointer
679 i915_gem_create_ioctl(struct drm_device *dev, void *data,
680 struct drm_file *file)
682 struct drm_i915_private *dev_priv = to_i915(dev);
683 struct drm_i915_gem_create *args = data;
685 i915_gem_flush_free_objects(dev_priv);
687 return i915_gem_create(file, dev_priv,
688 &args->size, &args->handle);
691 static inline enum fb_op_origin
692 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
694 return (domain == I915_GEM_DOMAIN_GTT ?
695 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
698 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
700 intel_wakeref_t wakeref;
703 * No actual flushing is required for the GTT write domain for reads
704 * from the GTT domain. Writes to it "immediately" go to main memory
705 * as far as we know, so there's no chipset flush. It also doesn't
706 * land in the GPU render cache.
708 * However, we do have to enforce the order so that all writes through
709 * the GTT land before any writes to the device, such as updates to
712 * We also have to wait a bit for the writes to land from the GTT.
713 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
714 * timing. This issue has only been observed when switching quickly
715 * between GTT writes and CPU reads from inside the kernel on recent hw,
716 * and it appears to only affect discrete GTT blocks (i.e. on LLC
717 * system agents we cannot reproduce this behaviour, until Cannonlake
723 if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
726 i915_gem_chipset_flush(dev_priv);
728 with_intel_runtime_pm(dev_priv, wakeref) {
729 spin_lock_irq(&dev_priv->uncore.lock);
731 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
733 spin_unlock_irq(&dev_priv->uncore.lock);
738 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
740 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
741 struct i915_vma *vma;
743 if (!(obj->write_domain & flush_domains))
746 switch (obj->write_domain) {
747 case I915_GEM_DOMAIN_GTT:
748 i915_gem_flush_ggtt_writes(dev_priv);
750 intel_fb_obj_flush(obj,
751 fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
753 for_each_ggtt_vma(vma, obj) {
757 i915_vma_unset_ggtt_write(vma);
761 case I915_GEM_DOMAIN_WC:
765 case I915_GEM_DOMAIN_CPU:
766 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
769 case I915_GEM_DOMAIN_RENDER:
770 if (gpu_write_needs_clflush(obj))
771 obj->cache_dirty = true;
775 obj->write_domain = 0;
779 * Pins the specified object's pages and synchronizes the object with
780 * GPU accesses. Sets needs_clflush to non-zero if the caller should
781 * flush the object from the CPU cache.
783 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
784 unsigned int *needs_clflush)
788 lockdep_assert_held(&obj->base.dev->struct_mutex);
791 if (!i915_gem_object_has_struct_page(obj))
794 ret = i915_gem_object_wait(obj,
795 I915_WAIT_INTERRUPTIBLE |
797 MAX_SCHEDULE_TIMEOUT);
801 ret = i915_gem_object_pin_pages(obj);
805 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
806 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
807 ret = i915_gem_object_set_to_cpu_domain(obj, false);
814 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
816 /* If we're not in the cpu read domain, set ourself into the gtt
817 * read domain and manually flush cachelines (if required). This
818 * optimizes for the case when the gpu will dirty the data
819 * anyway again before the next pread happens.
821 if (!obj->cache_dirty &&
822 !(obj->read_domains & I915_GEM_DOMAIN_CPU))
823 *needs_clflush = CLFLUSH_BEFORE;
826 /* return with the pages pinned */
830 i915_gem_object_unpin_pages(obj);
834 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
835 unsigned int *needs_clflush)
839 lockdep_assert_held(&obj->base.dev->struct_mutex);
842 if (!i915_gem_object_has_struct_page(obj))
845 ret = i915_gem_object_wait(obj,
846 I915_WAIT_INTERRUPTIBLE |
849 MAX_SCHEDULE_TIMEOUT);
853 ret = i915_gem_object_pin_pages(obj);
857 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
858 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
859 ret = i915_gem_object_set_to_cpu_domain(obj, true);
866 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
868 /* If we're not in the cpu write domain, set ourself into the
869 * gtt write domain and manually flush cachelines (as required).
870 * This optimizes for the case when the gpu will use the data
871 * right away and we therefore have to clflush anyway.
873 if (!obj->cache_dirty) {
874 *needs_clflush |= CLFLUSH_AFTER;
877 * Same trick applies to invalidate partially written
878 * cachelines read before writing.
880 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
881 *needs_clflush |= CLFLUSH_BEFORE;
885 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
886 obj->mm.dirty = true;
887 /* return with the pages pinned */
891 i915_gem_object_unpin_pages(obj);
896 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
905 drm_clflush_virt_range(vaddr + offset, len);
907 ret = __copy_to_user(user_data, vaddr + offset, len);
911 return ret ? -EFAULT : 0;
915 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
916 struct drm_i915_gem_pread *args)
918 char __user *user_data;
920 unsigned int needs_clflush;
921 unsigned int idx, offset;
924 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
928 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
929 mutex_unlock(&obj->base.dev->struct_mutex);
934 user_data = u64_to_user_ptr(args->data_ptr);
935 offset = offset_in_page(args->offset);
936 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
937 struct page *page = i915_gem_object_get_page(obj, idx);
938 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
940 ret = shmem_pread(page, offset, length, user_data,
950 i915_gem_obj_finish_shmem_access(obj);
955 gtt_user_read(struct io_mapping *mapping,
956 loff_t base, int offset,
957 char __user *user_data, int length)
960 unsigned long unwritten;
962 /* We can use the cpu mem copy function because this is X86. */
963 vaddr = io_mapping_map_atomic_wc(mapping, base);
964 unwritten = __copy_to_user_inatomic(user_data,
965 (void __force *)vaddr + offset,
967 io_mapping_unmap_atomic(vaddr);
969 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
970 unwritten = copy_to_user(user_data,
971 (void __force *)vaddr + offset,
973 io_mapping_unmap(vaddr);
979 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
980 const struct drm_i915_gem_pread *args)
982 struct drm_i915_private *i915 = to_i915(obj->base.dev);
983 struct i915_ggtt *ggtt = &i915->ggtt;
984 intel_wakeref_t wakeref;
985 struct drm_mm_node node;
986 struct i915_vma *vma;
987 void __user *user_data;
991 ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
995 wakeref = intel_runtime_pm_get(i915);
996 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1001 node.start = i915_ggtt_offset(vma);
1002 node.allocated = false;
1003 ret = i915_vma_put_fence(vma);
1005 i915_vma_unpin(vma);
1010 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1013 GEM_BUG_ON(!node.allocated);
1016 ret = i915_gem_object_set_to_gtt_domain(obj, false);
1020 mutex_unlock(&i915->drm.struct_mutex);
1022 user_data = u64_to_user_ptr(args->data_ptr);
1023 remain = args->size;
1024 offset = args->offset;
1026 while (remain > 0) {
1027 /* Operation in this page
1029 * page_base = page offset within aperture
1030 * page_offset = offset within page
1031 * page_length = bytes to copy for this page
1033 u32 page_base = node.start;
1034 unsigned page_offset = offset_in_page(offset);
1035 unsigned page_length = PAGE_SIZE - page_offset;
1036 page_length = remain < page_length ? remain : page_length;
1037 if (node.allocated) {
1039 ggtt->vm.insert_page(&ggtt->vm,
1040 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1041 node.start, I915_CACHE_NONE, 0);
1044 page_base += offset & PAGE_MASK;
1047 if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
1048 user_data, page_length)) {
1053 remain -= page_length;
1054 user_data += page_length;
1055 offset += page_length;
1058 mutex_lock(&i915->drm.struct_mutex);
1060 if (node.allocated) {
1062 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1063 remove_mappable_node(&node);
1065 i915_vma_unpin(vma);
1068 intel_runtime_pm_put(i915, wakeref);
1069 mutex_unlock(&i915->drm.struct_mutex);
1075 * Reads data from the object referenced by handle.
1076 * @dev: drm device pointer
1077 * @data: ioctl data blob
1078 * @file: drm file pointer
1080 * On error, the contents of *data are undefined.
1083 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1084 struct drm_file *file)
1086 struct drm_i915_gem_pread *args = data;
1087 struct drm_i915_gem_object *obj;
1090 if (args->size == 0)
1093 if (!access_ok(u64_to_user_ptr(args->data_ptr),
1097 obj = i915_gem_object_lookup(file, args->handle);
1101 /* Bounds check source. */
1102 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1107 trace_i915_gem_object_pread(obj, args->offset, args->size);
1109 ret = i915_gem_object_wait(obj,
1110 I915_WAIT_INTERRUPTIBLE,
1111 MAX_SCHEDULE_TIMEOUT);
1115 ret = i915_gem_object_pin_pages(obj);
1119 ret = i915_gem_shmem_pread(obj, args);
1120 if (ret == -EFAULT || ret == -ENODEV)
1121 ret = i915_gem_gtt_pread(obj, args);
1123 i915_gem_object_unpin_pages(obj);
1125 i915_gem_object_put(obj);
1129 /* This is the fast write path which cannot handle
1130 * page faults in the source data
1134 ggtt_write(struct io_mapping *mapping,
1135 loff_t base, int offset,
1136 char __user *user_data, int length)
1138 void __iomem *vaddr;
1139 unsigned long unwritten;
1141 /* We can use the cpu mem copy function because this is X86. */
1142 vaddr = io_mapping_map_atomic_wc(mapping, base);
1143 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1145 io_mapping_unmap_atomic(vaddr);
1147 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1148 unwritten = copy_from_user((void __force *)vaddr + offset,
1150 io_mapping_unmap(vaddr);
1157 * This is the fast pwrite path, where we copy the data directly from the
1158 * user into the GTT, uncached.
1159 * @obj: i915 GEM object
1160 * @args: pwrite arguments structure
1163 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1164 const struct drm_i915_gem_pwrite *args)
1166 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1167 struct i915_ggtt *ggtt = &i915->ggtt;
1168 intel_wakeref_t wakeref;
1169 struct drm_mm_node node;
1170 struct i915_vma *vma;
1172 void __user *user_data;
1175 ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1179 if (i915_gem_object_has_struct_page(obj)) {
1181 * Avoid waking the device up if we can fallback, as
1182 * waking/resuming is very slow (worst-case 10-100 ms
1183 * depending on PCI sleeps and our own resume time).
1184 * This easily dwarfs any performance advantage from
1185 * using the cache bypass of indirect GGTT access.
1187 wakeref = intel_runtime_pm_get_if_in_use(i915);
1193 /* No backing pages, no fallback, we must force GGTT access */
1194 wakeref = intel_runtime_pm_get(i915);
1197 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1202 node.start = i915_ggtt_offset(vma);
1203 node.allocated = false;
1204 ret = i915_vma_put_fence(vma);
1206 i915_vma_unpin(vma);
1211 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1214 GEM_BUG_ON(!node.allocated);
1217 ret = i915_gem_object_set_to_gtt_domain(obj, true);
1221 mutex_unlock(&i915->drm.struct_mutex);
1223 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1225 user_data = u64_to_user_ptr(args->data_ptr);
1226 offset = args->offset;
1227 remain = args->size;
1229 /* Operation in this page
1231 * page_base = page offset within aperture
1232 * page_offset = offset within page
1233 * page_length = bytes to copy for this page
1235 u32 page_base = node.start;
1236 unsigned int page_offset = offset_in_page(offset);
1237 unsigned int page_length = PAGE_SIZE - page_offset;
1238 page_length = remain < page_length ? remain : page_length;
1239 if (node.allocated) {
1240 wmb(); /* flush the write before we modify the GGTT */
1241 ggtt->vm.insert_page(&ggtt->vm,
1242 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1243 node.start, I915_CACHE_NONE, 0);
1244 wmb(); /* flush modifications to the GGTT (insert_page) */
1246 page_base += offset & PAGE_MASK;
1248 /* If we get a fault while copying data, then (presumably) our
1249 * source page isn't available. Return the error and we'll
1250 * retry in the slow path.
1251 * If the object is non-shmem backed, we retry again with the
1252 * path that handles page fault.
1254 if (ggtt_write(&ggtt->iomap, page_base, page_offset,
1255 user_data, page_length)) {
1260 remain -= page_length;
1261 user_data += page_length;
1262 offset += page_length;
1264 intel_fb_obj_flush(obj, ORIGIN_CPU);
1266 mutex_lock(&i915->drm.struct_mutex);
1268 if (node.allocated) {
1270 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1271 remove_mappable_node(&node);
1273 i915_vma_unpin(vma);
1276 intel_runtime_pm_put(i915, wakeref);
1278 mutex_unlock(&i915->drm.struct_mutex);
1282 /* Per-page copy function for the shmem pwrite fastpath.
1283 * Flushes invalid cachelines before writing to the target if
1284 * needs_clflush_before is set and flushes out any written cachelines after
1285 * writing if needs_clflush is set.
1288 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1289 bool needs_clflush_before,
1290 bool needs_clflush_after)
1297 if (needs_clflush_before)
1298 drm_clflush_virt_range(vaddr + offset, len);
1300 ret = __copy_from_user(vaddr + offset, user_data, len);
1301 if (!ret && needs_clflush_after)
1302 drm_clflush_virt_range(vaddr + offset, len);
1306 return ret ? -EFAULT : 0;
1310 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1311 const struct drm_i915_gem_pwrite *args)
1313 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1314 void __user *user_data;
1316 unsigned int partial_cacheline_write;
1317 unsigned int needs_clflush;
1318 unsigned int offset, idx;
1321 ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1325 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1326 mutex_unlock(&i915->drm.struct_mutex);
1330 /* If we don't overwrite a cacheline completely we need to be
1331 * careful to have up-to-date data by first clflushing. Don't
1332 * overcomplicate things and flush the entire patch.
1334 partial_cacheline_write = 0;
1335 if (needs_clflush & CLFLUSH_BEFORE)
1336 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1338 user_data = u64_to_user_ptr(args->data_ptr);
1339 remain = args->size;
1340 offset = offset_in_page(args->offset);
1341 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1342 struct page *page = i915_gem_object_get_page(obj, idx);
1343 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1345 ret = shmem_pwrite(page, offset, length, user_data,
1346 (offset | length) & partial_cacheline_write,
1347 needs_clflush & CLFLUSH_AFTER);
1352 user_data += length;
1356 intel_fb_obj_flush(obj, ORIGIN_CPU);
1357 i915_gem_obj_finish_shmem_access(obj);
1362 * Writes data to the object referenced by handle.
1364 * @data: ioctl data blob
1367 * On error, the contents of the buffer that were to be modified are undefined.
1370 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1371 struct drm_file *file)
1373 struct drm_i915_gem_pwrite *args = data;
1374 struct drm_i915_gem_object *obj;
1377 if (args->size == 0)
1380 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
1383 obj = i915_gem_object_lookup(file, args->handle);
1387 /* Bounds check destination. */
1388 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1393 /* Writes not allowed into this read-only object */
1394 if (i915_gem_object_is_readonly(obj)) {
1399 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1402 if (obj->ops->pwrite)
1403 ret = obj->ops->pwrite(obj, args);
1407 ret = i915_gem_object_wait(obj,
1408 I915_WAIT_INTERRUPTIBLE |
1410 MAX_SCHEDULE_TIMEOUT);
1414 ret = i915_gem_object_pin_pages(obj);
1419 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1420 * it would end up going through the fenced access, and we'll get
1421 * different detiling behavior between reading and writing.
1422 * pread/pwrite currently are reading and writing from the CPU
1423 * perspective, requiring manual detiling by the client.
1425 if (!i915_gem_object_has_struct_page(obj) ||
1426 cpu_write_needs_clflush(obj))
1427 /* Note that the gtt paths might fail with non-page-backed user
1428 * pointers (e.g. gtt mappings when moving data between
1429 * textures). Fallback to the shmem path in that case.
1431 ret = i915_gem_gtt_pwrite_fast(obj, args);
1433 if (ret == -EFAULT || ret == -ENOSPC) {
1434 if (obj->phys_handle)
1435 ret = i915_gem_phys_pwrite(obj, args, file);
1437 ret = i915_gem_shmem_pwrite(obj, args);
1440 i915_gem_object_unpin_pages(obj);
1442 i915_gem_object_put(obj);
1446 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1448 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1449 struct list_head *list;
1450 struct i915_vma *vma;
1452 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1454 mutex_lock(&i915->ggtt.vm.mutex);
1455 for_each_ggtt_vma(vma, obj) {
1456 if (!drm_mm_node_allocated(&vma->node))
1459 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
1461 mutex_unlock(&i915->ggtt.vm.mutex);
1463 spin_lock(&i915->mm.obj_lock);
1464 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1465 list_move_tail(&obj->mm.link, list);
1466 spin_unlock(&i915->mm.obj_lock);
1470 * Called when user space prepares to use an object with the CPU, either
1471 * through the mmap ioctl's mapping or a GTT mapping.
1473 * @data: ioctl data blob
1477 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1478 struct drm_file *file)
1480 struct drm_i915_gem_set_domain *args = data;
1481 struct drm_i915_gem_object *obj;
1482 u32 read_domains = args->read_domains;
1483 u32 write_domain = args->write_domain;
1486 /* Only handle setting domains to types used by the CPU. */
1487 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1491 * Having something in the write domain implies it's in the read
1492 * domain, and only that read domain. Enforce that in the request.
1494 if (write_domain && read_domains != write_domain)
1500 obj = i915_gem_object_lookup(file, args->handle);
1505 * Already in the desired write domain? Nothing for us to do!
1507 * We apply a little bit of cunning here to catch a broader set of
1508 * no-ops. If obj->write_domain is set, we must be in the same
1509 * obj->read_domains, and only that domain. Therefore, if that
1510 * obj->write_domain matches the request read_domains, we are
1511 * already in the same read/write domain and can skip the operation,
1512 * without having to further check the requested write_domain.
1514 if (READ_ONCE(obj->write_domain) == read_domains) {
1520 * Try to flush the object off the GPU without holding the lock.
1521 * We will repeat the flush holding the lock in the normal manner
1522 * to catch cases where we are gazumped.
1524 err = i915_gem_object_wait(obj,
1525 I915_WAIT_INTERRUPTIBLE |
1526 I915_WAIT_PRIORITY |
1527 (write_domain ? I915_WAIT_ALL : 0),
1528 MAX_SCHEDULE_TIMEOUT);
1533 * Proxy objects do not control access to the backing storage, ergo
1534 * they cannot be used as a means to manipulate the cache domain
1535 * tracking for that backing storage. The proxy object is always
1536 * considered to be outside of any cache domain.
1538 if (i915_gem_object_is_proxy(obj)) {
1544 * Flush and acquire obj->pages so that we are coherent through
1545 * direct access in memory with previous cached writes through
1546 * shmemfs and that our cache domain tracking remains valid.
1547 * For example, if the obj->filp was moved to swap without us
1548 * being notified and releasing the pages, we would mistakenly
1549 * continue to assume that the obj remained out of the CPU cached
1552 err = i915_gem_object_pin_pages(obj);
1556 err = i915_mutex_lock_interruptible(dev);
1560 if (read_domains & I915_GEM_DOMAIN_WC)
1561 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1562 else if (read_domains & I915_GEM_DOMAIN_GTT)
1563 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1565 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1567 /* And bump the LRU for this access */
1568 i915_gem_object_bump_inactive_ggtt(obj);
1570 mutex_unlock(&dev->struct_mutex);
1572 if (write_domain != 0)
1573 intel_fb_obj_invalidate(obj,
1574 fb_write_origin(obj, write_domain));
1577 i915_gem_object_unpin_pages(obj);
1579 i915_gem_object_put(obj);
1584 * Called when user space has done writes to this buffer
1586 * @data: ioctl data blob
1590 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1591 struct drm_file *file)
1593 struct drm_i915_gem_sw_finish *args = data;
1594 struct drm_i915_gem_object *obj;
1596 obj = i915_gem_object_lookup(file, args->handle);
1601 * Proxy objects are barred from CPU access, so there is no
1602 * need to ban sw_finish as it is a nop.
1605 /* Pinned buffers may be scanout, so flush the cache */
1606 i915_gem_object_flush_if_display(obj);
1607 i915_gem_object_put(obj);
1613 __vma_matches(struct vm_area_struct *vma, struct file *filp,
1614 unsigned long addr, unsigned long size)
1616 if (vma->vm_file != filp)
1619 return vma->vm_start == addr &&
1620 (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
1624 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1627 * @data: ioctl data blob
1630 * While the mapping holds a reference on the contents of the object, it doesn't
1631 * imply a ref on the object itself.
1635 * DRM driver writers who look a this function as an example for how to do GEM
1636 * mmap support, please don't implement mmap support like here. The modern way
1637 * to implement DRM mmap support is with an mmap offset ioctl (like
1638 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1639 * That way debug tooling like valgrind will understand what's going on, hiding
1640 * the mmap call in a driver private ioctl will break that. The i915 driver only
1641 * does cpu mmaps this way because we didn't know better.
1644 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1645 struct drm_file *file)
1647 struct drm_i915_gem_mmap *args = data;
1648 struct drm_i915_gem_object *obj;
1651 if (args->flags & ~(I915_MMAP_WC))
1654 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1657 obj = i915_gem_object_lookup(file, args->handle);
1661 /* prime objects have no backing filp to GEM mmap
1664 if (!obj->base.filp) {
1669 if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
1674 addr = vm_mmap(obj->base.filp, 0, args->size,
1675 PROT_READ | PROT_WRITE, MAP_SHARED,
1677 if (IS_ERR_VALUE(addr))
1680 if (args->flags & I915_MMAP_WC) {
1681 struct mm_struct *mm = current->mm;
1682 struct vm_area_struct *vma;
1684 if (down_write_killable(&mm->mmap_sem)) {
1688 vma = find_vma(mm, addr);
1689 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
1691 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1694 up_write(&mm->mmap_sem);
1695 if (IS_ERR_VALUE(addr))
1698 /* This may race, but that's ok, it only gets set */
1699 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1701 i915_gem_object_put(obj);
1703 args->addr_ptr = (u64)addr;
1707 i915_gem_object_put(obj);
1711 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
1713 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1717 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1719 * A history of the GTT mmap interface:
1721 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1722 * aligned and suitable for fencing, and still fit into the available
1723 * mappable space left by the pinned display objects. A classic problem
1724 * we called the page-fault-of-doom where we would ping-pong between
1725 * two objects that could not fit inside the GTT and so the memcpy
1726 * would page one object in at the expense of the other between every
1729 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1730 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1731 * object is too large for the available space (or simply too large
1732 * for the mappable aperture!), a view is created instead and faulted
1733 * into userspace. (This view is aligned and sized appropriately for
1736 * 2 - Recognise WC as a separate cache domain so that we can flush the
1737 * delayed writes via GTT before performing direct access via WC.
1739 * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
1740 * pagefault; swapin remains transparent.
1744 * * snoopable objects cannot be accessed via the GTT. It can cause machine
1745 * hangs on some architectures, corruption on others. An attempt to service
1746 * a GTT page fault from a snoopable object will generate a SIGBUS.
1748 * * the object must be able to fit into RAM (physical memory, though no
1749 * limited to the mappable aperture).
1754 * * a new GTT page fault will synchronize rendering from the GPU and flush
1755 * all data to system memory. Subsequent access will not be synchronized.
1757 * * all mappings are revoked on runtime device suspend.
1759 * * there are only 8, 16 or 32 fence registers to share between all users
1760 * (older machines require fence register for display and blitter access
1761 * as well). Contention of the fence registers will cause the previous users
1762 * to be unmapped and any new access will generate new page faults.
1764 * * running out of memory while servicing a fault may generate a SIGBUS,
1765 * rather than the expected SIGSEGV.
1767 int i915_gem_mmap_gtt_version(void)
1772 static inline struct i915_ggtt_view
1773 compute_partial_view(const struct drm_i915_gem_object *obj,
1774 pgoff_t page_offset,
1777 struct i915_ggtt_view view;
1779 if (i915_gem_object_is_tiled(obj))
1780 chunk = roundup(chunk, tile_row_pages(obj));
1782 view.type = I915_GGTT_VIEW_PARTIAL;
1783 view.partial.offset = rounddown(page_offset, chunk);
1785 min_t(unsigned int, chunk,
1786 (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1788 /* If the partial covers the entire object, just create a normal VMA. */
1789 if (chunk >= obj->base.size >> PAGE_SHIFT)
1790 view.type = I915_GGTT_VIEW_NORMAL;
1796 * i915_gem_fault - fault a page into the GTT
1799 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1800 * from userspace. The fault handler takes care of binding the object to
1801 * the GTT (if needed), allocating and programming a fence register (again,
1802 * only if needed based on whether the old reg is still valid or the object
1803 * is tiled) and inserting a new PTE into the faulting process.
1805 * Note that the faulting process may involve evicting existing objects
1806 * from the GTT and/or fence registers to make room. So performance may
1807 * suffer if the GTT working set is large or there are few fence registers
1810 * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1811 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1813 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
1815 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
1816 struct vm_area_struct *area = vmf->vma;
1817 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1818 struct drm_device *dev = obj->base.dev;
1819 struct drm_i915_private *dev_priv = to_i915(dev);
1820 struct i915_ggtt *ggtt = &dev_priv->ggtt;
1821 bool write = area->vm_flags & VM_WRITE;
1822 intel_wakeref_t wakeref;
1823 struct i915_vma *vma;
1824 pgoff_t page_offset;
1828 /* Sanity check that we allow writing into this object */
1829 if (i915_gem_object_is_readonly(obj) && write)
1830 return VM_FAULT_SIGBUS;
1832 /* We don't use vmf->pgoff since that has the fake offset */
1833 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1835 trace_i915_gem_object_fault(obj, page_offset, true, write);
1837 ret = i915_gem_object_pin_pages(obj);
1841 wakeref = intel_runtime_pm_get(dev_priv);
1843 srcu = i915_reset_trylock(dev_priv);
1849 ret = i915_mutex_lock_interruptible(dev);
1853 /* Access to snoopable pages through the GTT is incoherent. */
1854 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1859 /* Now pin it into the GTT as needed */
1860 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1865 /* Use a partial view if it is bigger than available space */
1866 struct i915_ggtt_view view =
1867 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1870 flags = PIN_MAPPABLE;
1871 if (view.type == I915_GGTT_VIEW_NORMAL)
1872 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
1875 * Userspace is now writing through an untracked VMA, abandon
1876 * all hope that the hardware is able to track future writes.
1878 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1880 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1881 if (IS_ERR(vma) && !view.type) {
1882 flags = PIN_MAPPABLE;
1883 view.type = I915_GGTT_VIEW_PARTIAL;
1884 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1892 ret = i915_vma_pin_fence(vma);
1896 /* Finally, remap it using the new GTT offset */
1897 ret = remap_io_mapping(area,
1898 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1899 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
1900 min_t(u64, vma->size, area->vm_end - area->vm_start),
1905 /* Mark as being mmapped into userspace for later revocation */
1906 assert_rpm_wakelock_held(dev_priv);
1907 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
1908 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1909 GEM_BUG_ON(!obj->userfault_count);
1911 i915_vma_set_ggtt_write(vma);
1914 i915_vma_unpin_fence(vma);
1916 __i915_vma_unpin(vma);
1918 mutex_unlock(&dev->struct_mutex);
1920 i915_reset_unlock(dev_priv, srcu);
1922 intel_runtime_pm_put(dev_priv, wakeref);
1923 i915_gem_object_unpin_pages(obj);
1928 * We eat errors when the gpu is terminally wedged to avoid
1929 * userspace unduly crashing (gl has no provisions for mmaps to
1930 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1931 * and so needs to be reported.
1933 if (!i915_terminally_wedged(dev_priv))
1934 return VM_FAULT_SIGBUS;
1935 /* else: fall through */
1938 * EAGAIN means the gpu is hung and we'll wait for the error
1939 * handler to reset everything when re-faulting in
1940 * i915_mutex_lock_interruptible.
1947 * EBUSY is ok: this just means that another thread
1948 * already did the job.
1950 return VM_FAULT_NOPAGE;
1952 return VM_FAULT_OOM;
1955 return VM_FAULT_SIGBUS;
1957 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1958 return VM_FAULT_SIGBUS;
1962 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
1964 struct i915_vma *vma;
1966 GEM_BUG_ON(!obj->userfault_count);
1968 obj->userfault_count = 0;
1969 list_del(&obj->userfault_link);
1970 drm_vma_node_unmap(&obj->base.vma_node,
1971 obj->base.dev->anon_inode->i_mapping);
1973 for_each_ggtt_vma(vma, obj)
1974 i915_vma_unset_userfault(vma);
1978 * i915_gem_release_mmap - remove physical page mappings
1979 * @obj: obj in question
1981 * Preserve the reservation of the mmapping with the DRM core code, but
1982 * relinquish ownership of the pages back to the system.
1984 * It is vital that we remove the page mapping if we have mapped a tiled
1985 * object through the GTT and then lose the fence register due to
1986 * resource pressure. Similarly if the object has been moved out of the
1987 * aperture, than pages mapped into userspace must be revoked. Removing the
1988 * mapping will then trigger a page fault on the next user access, allowing
1989 * fixup by i915_gem_fault().
1992 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1994 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1995 intel_wakeref_t wakeref;
1997 /* Serialisation between user GTT access and our code depends upon
1998 * revoking the CPU's PTE whilst the mutex is held. The next user
1999 * pagefault then has to wait until we release the mutex.
2001 * Note that RPM complicates somewhat by adding an additional
2002 * requirement that operations to the GGTT be made holding the RPM
2005 lockdep_assert_held(&i915->drm.struct_mutex);
2006 wakeref = intel_runtime_pm_get(i915);
2008 if (!obj->userfault_count)
2011 __i915_gem_object_release_mmap(obj);
2013 /* Ensure that the CPU's PTE are revoked and there are not outstanding
2014 * memory transactions from userspace before we return. The TLB
2015 * flushing implied above by changing the PTE above *should* be
2016 * sufficient, an extra barrier here just provides us with a bit
2017 * of paranoid documentation about our requirement to serialise
2018 * memory writes before touching registers / GSM.
2023 intel_runtime_pm_put(i915, wakeref);
2026 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
2028 struct drm_i915_gem_object *obj, *on;
2032 * Only called during RPM suspend. All users of the userfault_list
2033 * must be holding an RPM wakeref to ensure that this can not
2034 * run concurrently with themselves (and use the struct_mutex for
2035 * protection between themselves).
2038 list_for_each_entry_safe(obj, on,
2039 &dev_priv->mm.userfault_list, userfault_link)
2040 __i915_gem_object_release_mmap(obj);
2042 /* The fence will be lost when the device powers down. If any were
2043 * in use by hardware (i.e. they are pinned), we should not be powering
2044 * down! All other fences will be reacquired by the user upon waking.
2046 for (i = 0; i < dev_priv->num_fence_regs; i++) {
2047 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2049 /* Ideally we want to assert that the fence register is not
2050 * live at this point (i.e. that no piece of code will be
2051 * trying to write through fence + GTT, as that both violates
2052 * our tracking of activity and associated locking/barriers,
2053 * but also is illegal given that the hw is powered down).
2055 * Previously we used reg->pin_count as a "liveness" indicator.
2056 * That is not sufficient, and we need a more fine-grained
2057 * tool if we want to have a sanity check here.
2063 GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
2068 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2070 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2073 err = drm_gem_create_mmap_offset(&obj->base);
2077 /* Attempt to reap some mmap space from dead objects */
2079 err = i915_gem_wait_for_idle(dev_priv,
2080 I915_WAIT_INTERRUPTIBLE,
2081 MAX_SCHEDULE_TIMEOUT);
2085 i915_gem_drain_freed_objects(dev_priv);
2086 err = drm_gem_create_mmap_offset(&obj->base);
2090 } while (flush_delayed_work(&dev_priv->gt.retire_work));
2095 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2097 drm_gem_free_mmap_offset(&obj->base);
2101 i915_gem_mmap_gtt(struct drm_file *file,
2102 struct drm_device *dev,
2106 struct drm_i915_gem_object *obj;
2109 obj = i915_gem_object_lookup(file, handle);
2113 ret = i915_gem_object_create_mmap_offset(obj);
2115 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2117 i915_gem_object_put(obj);
2122 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2124 * @data: GTT mapping ioctl data
2125 * @file: GEM object info
2127 * Simply returns the fake offset to userspace so it can mmap it.
2128 * The mmap call will end up in drm_gem_mmap(), which will set things
2129 * up so we can get faults in the handler above.
2131 * The fault handler will take care of binding the object into the GTT
2132 * (since it may have been evicted to make room for something), allocating
2133 * a fence register, and mapping the appropriate aperture address into
2137 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2138 struct drm_file *file)
2140 struct drm_i915_gem_mmap_gtt *args = data;
2142 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2145 /* Immediately discard the backing storage */
2147 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2149 i915_gem_object_free_mmap_offset(obj);
2151 if (obj->base.filp == NULL)
2154 /* Our goal here is to return as much of the memory as
2155 * is possible back to the system as we are called from OOM.
2156 * To do this we must instruct the shmfs to drop all of its
2157 * backing pages, *now*.
2159 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2160 obj->mm.madv = __I915_MADV_PURGED;
2161 obj->mm.pages = ERR_PTR(-EFAULT);
2164 /* Try to discard unwanted pages */
2165 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2167 struct address_space *mapping;
2169 lockdep_assert_held(&obj->mm.lock);
2170 GEM_BUG_ON(i915_gem_object_has_pages(obj));
2172 switch (obj->mm.madv) {
2173 case I915_MADV_DONTNEED:
2174 i915_gem_object_truncate(obj);
2175 case __I915_MADV_PURGED:
2179 if (obj->base.filp == NULL)
2182 mapping = obj->base.filp->f_mapping,
2183 invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2187 * Move pages to appropriate lru and release the pagevec, decrementing the
2188 * ref count of those pages.
2190 static void check_release_pagevec(struct pagevec *pvec)
2192 check_move_unevictable_pages(pvec);
2193 __pagevec_release(pvec);
2198 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2199 struct sg_table *pages)
2201 struct sgt_iter sgt_iter;
2202 struct pagevec pvec;
2205 __i915_gem_object_release_shmem(obj, pages, true);
2206 i915_gem_gtt_finish_pages(obj, pages);
2208 if (i915_gem_object_needs_bit17_swizzle(obj))
2209 i915_gem_object_save_bit_17_swizzle(obj, pages);
2211 mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
2213 pagevec_init(&pvec);
2214 for_each_sgt_page(page, sgt_iter, pages) {
2216 set_page_dirty(page);
2218 if (obj->mm.madv == I915_MADV_WILLNEED)
2219 mark_page_accessed(page);
2221 if (!pagevec_add(&pvec, page))
2222 check_release_pagevec(&pvec);
2224 if (pagevec_count(&pvec))
2225 check_release_pagevec(&pvec);
2226 obj->mm.dirty = false;
2228 sg_free_table(pages);
2232 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2234 struct radix_tree_iter iter;
2238 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2239 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2243 static struct sg_table *
2244 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
2246 struct drm_i915_private *i915 = to_i915(obj->base.dev);
2247 struct sg_table *pages;
2249 pages = fetch_and_zero(&obj->mm.pages);
2250 if (IS_ERR_OR_NULL(pages))
2253 spin_lock(&i915->mm.obj_lock);
2254 list_del(&obj->mm.link);
2255 spin_unlock(&i915->mm.obj_lock);
2257 if (obj->mm.mapping) {
2260 ptr = page_mask_bits(obj->mm.mapping);
2261 if (is_vmalloc_addr(ptr))
2264 kunmap(kmap_to_page(ptr));
2266 obj->mm.mapping = NULL;
2269 __i915_gem_object_reset_page_iter(obj);
2270 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2275 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2276 enum i915_mm_subclass subclass)
2278 struct sg_table *pages;
2281 if (i915_gem_object_has_pinned_pages(obj))
2284 GEM_BUG_ON(obj->bind_count);
2286 /* May be called by shrinker from within get_pages() (on another bo) */
2287 mutex_lock_nested(&obj->mm.lock, subclass);
2288 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
2294 * ->put_pages might need to allocate memory for the bit17 swizzle
2295 * array, hence protect them from being reaped by removing them from gtt
2298 pages = __i915_gem_object_unset_pages(obj);
2301 * XXX Temporary hijinx to avoid updating all backends to handle
2302 * NULL pages. In the future, when we have more asynchronous
2303 * get_pages backends we should be better able to handle the
2304 * cancellation of the async task in a more uniform manner.
2306 if (!pages && !i915_gem_object_needs_async_cancel(obj))
2307 pages = ERR_PTR(-EINVAL);
2310 obj->ops->put_pages(obj, pages);
2314 mutex_unlock(&obj->mm.lock);
2319 bool i915_sg_trim(struct sg_table *orig_st)
2321 struct sg_table new_st;
2322 struct scatterlist *sg, *new_sg;
2325 if (orig_st->nents == orig_st->orig_nents)
2328 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2331 new_sg = new_st.sgl;
2332 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2333 sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2334 sg_dma_address(new_sg) = sg_dma_address(sg);
2335 sg_dma_len(new_sg) = sg_dma_len(sg);
2337 new_sg = sg_next(new_sg);
2339 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2341 sg_free_table(orig_st);
2347 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2349 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2350 const unsigned long page_count = obj->base.size / PAGE_SIZE;
2352 struct address_space *mapping;
2353 struct sg_table *st;
2354 struct scatterlist *sg;
2355 struct sgt_iter sgt_iter;
2357 unsigned long last_pfn = 0; /* suppress gcc warning */
2358 unsigned int max_segment = i915_sg_segment_size();
2359 unsigned int sg_page_sizes;
2360 struct pagevec pvec;
2365 * Assert that the object is not currently in any GPU domain. As it
2366 * wasn't in the GTT, there shouldn't be any way it could have been in
2369 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2370 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2373 * If there's no chance of allocating enough pages for the whole
2374 * object, bail early.
2376 if (page_count > totalram_pages())
2379 st = kmalloc(sizeof(*st), GFP_KERNEL);
2384 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2390 * Get the list of pages out of our struct file. They'll be pinned
2391 * at this point until we release them.
2393 * Fail silently without starting the shrinker
2395 mapping = obj->base.filp->f_mapping;
2396 mapping_set_unevictable(mapping);
2397 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
2398 noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2403 for (i = 0; i < page_count; i++) {
2404 const unsigned int shrink[] = {
2405 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2408 gfp_t gfp = noreclaim;
2412 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2417 ret = PTR_ERR(page);
2421 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2424 * We've tried hard to allocate the memory by reaping
2425 * our own buffer, now let the real VM do its job and
2426 * go down in flames if truly OOM.
2428 * However, since graphics tend to be disposable,
2429 * defer the oom here by reporting the ENOMEM back
2433 /* reclaim and warn, but no oom */
2434 gfp = mapping_gfp_mask(mapping);
2437 * Our bo are always dirty and so we require
2438 * kswapd to reclaim our pages (direct reclaim
2439 * does not effectively begin pageout of our
2440 * buffers on its own). However, direct reclaim
2441 * only waits for kswapd when under allocation
2442 * congestion. So as a result __GFP_RECLAIM is
2443 * unreliable and fails to actually reclaim our
2444 * dirty pages -- unless you try over and over
2445 * again with !__GFP_NORETRY. However, we still
2446 * want to fail this allocation rather than
2447 * trigger the out-of-memory killer and for
2448 * this we want __GFP_RETRY_MAYFAIL.
2450 gfp |= __GFP_RETRY_MAYFAIL;
2455 sg->length >= max_segment ||
2456 page_to_pfn(page) != last_pfn + 1) {
2458 sg_page_sizes |= sg->length;
2462 sg_set_page(sg, page, PAGE_SIZE, 0);
2464 sg->length += PAGE_SIZE;
2466 last_pfn = page_to_pfn(page);
2468 /* Check that the i965g/gm workaround works. */
2469 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2471 if (sg) { /* loop terminated early; short sg table */
2472 sg_page_sizes |= sg->length;
2476 /* Trim unused sg entries to avoid wasting memory. */
2479 ret = i915_gem_gtt_prepare_pages(obj, st);
2482 * DMA remapping failed? One possible cause is that
2483 * it could not reserve enough large entries, asking
2484 * for PAGE_SIZE chunks instead may be helpful.
2486 if (max_segment > PAGE_SIZE) {
2487 for_each_sgt_page(page, sgt_iter, st)
2491 max_segment = PAGE_SIZE;
2494 dev_warn(&dev_priv->drm.pdev->dev,
2495 "Failed to DMA remap %lu pages\n",
2501 if (i915_gem_object_needs_bit17_swizzle(obj))
2502 i915_gem_object_do_bit_17_swizzle(obj, st);
2504 __i915_gem_object_set_pages(obj, st, sg_page_sizes);
2511 mapping_clear_unevictable(mapping);
2512 pagevec_init(&pvec);
2513 for_each_sgt_page(page, sgt_iter, st) {
2514 if (!pagevec_add(&pvec, page))
2515 check_release_pagevec(&pvec);
2517 if (pagevec_count(&pvec))
2518 check_release_pagevec(&pvec);
2523 * shmemfs first checks if there is enough memory to allocate the page
2524 * and reports ENOSPC should there be insufficient, along with the usual
2525 * ENOMEM for a genuine allocation failure.
2527 * We use ENOSPC in our driver to mean that we have run out of aperture
2528 * space and so want to translate the error from shmemfs back to our
2529 * usual understanding of ENOMEM.
2537 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2538 struct sg_table *pages,
2539 unsigned int sg_page_sizes)
2541 struct drm_i915_private *i915 = to_i915(obj->base.dev);
2542 unsigned long supported = INTEL_INFO(i915)->page_sizes;
2545 lockdep_assert_held(&obj->mm.lock);
2547 /* Make the pages coherent with the GPU (flushing any swapin). */
2548 if (obj->cache_dirty) {
2549 obj->write_domain = 0;
2550 if (i915_gem_object_has_struct_page(obj))
2551 drm_clflush_sg(pages);
2552 obj->cache_dirty = false;
2555 obj->mm.get_page.sg_pos = pages->sgl;
2556 obj->mm.get_page.sg_idx = 0;
2558 obj->mm.pages = pages;
2560 if (i915_gem_object_is_tiled(obj) &&
2561 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2562 GEM_BUG_ON(obj->mm.quirked);
2563 __i915_gem_object_pin_pages(obj);
2564 obj->mm.quirked = true;
2567 GEM_BUG_ON(!sg_page_sizes);
2568 obj->mm.page_sizes.phys = sg_page_sizes;
2571 * Calculate the supported page-sizes which fit into the given
2572 * sg_page_sizes. This will give us the page-sizes which we may be able
2573 * to use opportunistically when later inserting into the GTT. For
2574 * example if phys=2G, then in theory we should be able to use 1G, 2M,
2575 * 64K or 4K pages, although in practice this will depend on a number of
2578 obj->mm.page_sizes.sg = 0;
2579 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2580 if (obj->mm.page_sizes.phys & ~0u << i)
2581 obj->mm.page_sizes.sg |= BIT(i);
2583 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2585 spin_lock(&i915->mm.obj_lock);
2586 list_add(&obj->mm.link, &i915->mm.unbound_list);
2587 spin_unlock(&i915->mm.obj_lock);
2590 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2594 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2595 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2599 err = obj->ops->get_pages(obj);
2600 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2605 /* Ensure that the associated pages are gathered from the backing storage
2606 * and pinned into our object. i915_gem_object_pin_pages() may be called
2607 * multiple times before they are released by a single call to
2608 * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2609 * either as a result of memory pressure (reaping pages under the shrinker)
2610 * or as the object is itself released.
2612 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2616 err = mutex_lock_interruptible(&obj->mm.lock);
2620 if (unlikely(!i915_gem_object_has_pages(obj))) {
2621 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2623 err = ____i915_gem_object_get_pages(obj);
2627 smp_mb__before_atomic();
2629 atomic_inc(&obj->mm.pages_pin_count);
2632 mutex_unlock(&obj->mm.lock);
2636 /* The 'mapping' part of i915_gem_object_pin_map() below */
2637 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2638 enum i915_map_type type)
2640 unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2641 struct sg_table *sgt = obj->mm.pages;
2642 struct sgt_iter sgt_iter;
2644 struct page *stack_pages[32];
2645 struct page **pages = stack_pages;
2646 unsigned long i = 0;
2650 /* A single page can always be kmapped */
2651 if (n_pages == 1 && type == I915_MAP_WB)
2652 return kmap(sg_page(sgt->sgl));
2654 if (n_pages > ARRAY_SIZE(stack_pages)) {
2655 /* Too big for stack -- allocate temporary array instead */
2656 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2661 for_each_sgt_page(page, sgt_iter, sgt)
2664 /* Check that we have the expected number of pages */
2665 GEM_BUG_ON(i != n_pages);
2670 /* fallthrough to use PAGE_KERNEL anyway */
2672 pgprot = PAGE_KERNEL;
2675 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2678 addr = vmap(pages, n_pages, 0, pgprot);
2680 if (pages != stack_pages)
2686 /* get, pin, and map the pages of the object into kernel space */
2687 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2688 enum i915_map_type type)
2690 enum i915_map_type has_type;
2695 if (unlikely(!i915_gem_object_has_struct_page(obj)))
2696 return ERR_PTR(-ENXIO);
2698 ret = mutex_lock_interruptible(&obj->mm.lock);
2700 return ERR_PTR(ret);
2702 pinned = !(type & I915_MAP_OVERRIDE);
2703 type &= ~I915_MAP_OVERRIDE;
2705 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2706 if (unlikely(!i915_gem_object_has_pages(obj))) {
2707 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2709 ret = ____i915_gem_object_get_pages(obj);
2713 smp_mb__before_atomic();
2715 atomic_inc(&obj->mm.pages_pin_count);
2718 GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2720 ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2721 if (ptr && has_type != type) {
2727 if (is_vmalloc_addr(ptr))
2730 kunmap(kmap_to_page(ptr));
2732 ptr = obj->mm.mapping = NULL;
2736 ptr = i915_gem_object_map(obj, type);
2742 obj->mm.mapping = page_pack_bits(ptr, type);
2746 mutex_unlock(&obj->mm.lock);
2750 atomic_dec(&obj->mm.pages_pin_count);
2756 void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
2757 unsigned long offset,
2760 enum i915_map_type has_type;
2763 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
2764 GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
2765 offset, size, obj->base.size));
2767 obj->mm.dirty = true;
2769 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
2772 ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2773 if (has_type == I915_MAP_WC)
2776 drm_clflush_virt_range(ptr + offset, size);
2777 if (size == obj->base.size) {
2778 obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
2779 obj->cache_dirty = false;
2784 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
2785 const struct drm_i915_gem_pwrite *arg)
2787 struct address_space *mapping = obj->base.filp->f_mapping;
2788 char __user *user_data = u64_to_user_ptr(arg->data_ptr);
2792 /* Caller already validated user args */
2793 GEM_BUG_ON(!access_ok(user_data, arg->size));
2796 * Before we instantiate/pin the backing store for our use, we
2797 * can prepopulate the shmemfs filp efficiently using a write into
2798 * the pagecache. We avoid the penalty of instantiating all the
2799 * pages, important if the user is just writing to a few and never
2800 * uses the object on the GPU, and using a direct write into shmemfs
2801 * allows it to avoid the cost of retrieving a page (either swapin
2802 * or clearing-before-use) before it is overwritten.
2804 if (i915_gem_object_has_pages(obj))
2807 if (obj->mm.madv != I915_MADV_WILLNEED)
2811 * Before the pages are instantiated the object is treated as being
2812 * in the CPU domain. The pages will be clflushed as required before
2813 * use, and we can freely write into the pages directly. If userspace
2814 * races pwrite with any other operation; corruption will ensue -
2815 * that is userspace's prerogative!
2819 offset = arg->offset;
2820 pg = offset_in_page(offset);
2823 unsigned int len, unwritten;
2829 len = PAGE_SIZE - pg;
2833 /* Prefault the user page to reduce potential recursion */
2834 err = __get_user(c, user_data);
2838 err = __get_user(c, user_data + len - 1);
2842 err = pagecache_write_begin(obj->base.filp, mapping,
2848 vaddr = kmap_atomic(page);
2849 unwritten = __copy_from_user_inatomic(vaddr + pg,
2852 kunmap_atomic(vaddr);
2854 err = pagecache_write_end(obj->base.filp, mapping,
2855 offset, len, len - unwritten,
2860 /* We don't handle -EFAULT, leave it to the caller to check */
2874 i915_gem_retire_work_handler(struct work_struct *work)
2876 struct drm_i915_private *dev_priv =
2877 container_of(work, typeof(*dev_priv), gt.retire_work.work);
2878 struct drm_device *dev = &dev_priv->drm;
2880 /* Come back later if the device is busy... */
2881 if (mutex_trylock(&dev->struct_mutex)) {
2882 i915_retire_requests(dev_priv);
2883 mutex_unlock(&dev->struct_mutex);
2887 * Keep the retire handler running until we are finally idle.
2888 * We do not need to do this test under locking as in the worst-case
2889 * we queue the retire worker once too often.
2891 if (READ_ONCE(dev_priv->gt.awake))
2892 queue_delayed_work(dev_priv->wq,
2893 &dev_priv->gt.retire_work,
2894 round_jiffies_up_relative(HZ));
2897 static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
2903 * Even if we fail to switch, give whatever is running a small chance
2904 * to save itself before we report the failure. Yes, this may be a
2905 * false positive due to e.g. ENOMEM, caveat emptor!
2907 if (i915_gem_switch_to_kernel_context(i915, mask))
2910 if (i915_gem_wait_for_idle(i915,
2912 I915_WAIT_FOR_IDLE_BOOST,
2913 I915_GEM_IDLE_TIMEOUT))
2917 if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
2918 dev_err(i915->drm.dev,
2919 "Failed to idle engines, declaring wedged!\n");
2923 /* Forcibly cancel outstanding work and leave the gpu quiet. */
2924 i915_gem_set_wedged(i915);
2927 i915_retire_requests(i915); /* ensure we flush after wedging */
2931 static bool load_power_context(struct drm_i915_private *i915)
2933 /* Force loading the kernel context on all engines */
2934 if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
2938 * Immediately park the GPU so that we enable powersaving and
2939 * treat it as idle. The next time we issue a request, we will
2940 * unpark and start using the engine->pinned_default_state, otherwise
2941 * it is in limbo and an early reset may fail.
2943 __i915_gem_park(i915);
2949 i915_gem_idle_work_handler(struct work_struct *work)
2951 struct drm_i915_private *i915 =
2952 container_of(work, typeof(*i915), gt.idle_work.work);
2953 bool rearm_hangcheck;
2955 if (!READ_ONCE(i915->gt.awake))
2958 if (READ_ONCE(i915->gt.active_requests))
2962 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
2964 if (!mutex_trylock(&i915->drm.struct_mutex)) {
2965 /* Currently busy, come back later */
2966 mod_delayed_work(i915->wq,
2967 &i915->gt.idle_work,
2968 msecs_to_jiffies(50));
2973 * Flush out the last user context, leaving only the pinned
2974 * kernel context resident. Should anything unfortunate happen
2975 * while we are idle (such as the GPU being power cycled), no users
2978 if (!work_pending(&i915->gt.idle_work.work) &&
2979 !i915->gt.active_requests) {
2980 ++i915->gt.active_requests; /* don't requeue idle */
2982 switch_to_kernel_context_sync(i915, i915->gt.active_engines);
2984 if (!--i915->gt.active_requests) {
2985 __i915_gem_park(i915);
2986 rearm_hangcheck = false;
2990 mutex_unlock(&i915->drm.struct_mutex);
2993 if (rearm_hangcheck) {
2994 GEM_BUG_ON(!i915->gt.awake);
2995 i915_queue_hangcheck(i915);
2999 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
3001 struct drm_i915_private *i915 = to_i915(gem->dev);
3002 struct drm_i915_gem_object *obj = to_intel_bo(gem);
3003 struct drm_i915_file_private *fpriv = file->driver_priv;
3004 struct i915_lut_handle *lut, *ln;
3006 mutex_lock(&i915->drm.struct_mutex);
3008 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
3009 struct i915_gem_context *ctx = lut->ctx;
3010 struct i915_vma *vma;
3012 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
3013 if (ctx->file_priv != fpriv)
3016 vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
3017 GEM_BUG_ON(vma->obj != obj);
3019 /* We allow the process to have multiple handles to the same
3020 * vma, in the same fd namespace, by virtue of flink/open.
3022 GEM_BUG_ON(!vma->open_count);
3023 if (!--vma->open_count && !i915_vma_is_ggtt(vma))
3024 i915_vma_close(vma);
3026 list_del(&lut->obj_link);
3027 list_del(&lut->ctx_link);
3029 i915_lut_handle_free(lut);
3030 __i915_gem_object_release_unless_active(obj);
3033 mutex_unlock(&i915->drm.struct_mutex);
3036 static unsigned long to_wait_timeout(s64 timeout_ns)
3039 return MAX_SCHEDULE_TIMEOUT;
3041 if (timeout_ns == 0)
3044 return nsecs_to_jiffies_timeout(timeout_ns);
3048 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3049 * @dev: drm device pointer
3050 * @data: ioctl data blob
3051 * @file: drm file pointer
3053 * Returns 0 if successful, else an error is returned with the remaining time in
3054 * the timeout parameter.
3055 * -ETIME: object is still busy after timeout
3056 * -ERESTARTSYS: signal interrupted the wait
3057 * -ENONENT: object doesn't exist
3058 * Also possible, but rare:
3059 * -EAGAIN: incomplete, restart syscall
3061 * -ENODEV: Internal IRQ fail
3062 * -E?: The add request failed
3064 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3065 * non-zero timeout parameter the wait ioctl will wait for the given number of
3066 * nanoseconds on an object becoming unbusy. Since the wait itself does so
3067 * without holding struct_mutex the object may become re-busied before this
3068 * function completes. A similar but shorter * race condition exists in the busy
3072 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3074 struct drm_i915_gem_wait *args = data;
3075 struct drm_i915_gem_object *obj;
3079 if (args->flags != 0)
3082 obj = i915_gem_object_lookup(file, args->bo_handle);
3086 start = ktime_get();
3088 ret = i915_gem_object_wait(obj,
3089 I915_WAIT_INTERRUPTIBLE |
3090 I915_WAIT_PRIORITY |
3092 to_wait_timeout(args->timeout_ns));
3094 if (args->timeout_ns > 0) {
3095 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
3096 if (args->timeout_ns < 0)
3097 args->timeout_ns = 0;
3100 * Apparently ktime isn't accurate enough and occasionally has a
3101 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
3102 * things up to make the test happy. We allow up to 1 jiffy.
3104 * This is a regression from the timespec->ktime conversion.
3106 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
3107 args->timeout_ns = 0;
3109 /* Asked to wait beyond the jiffie/scheduler precision? */
3110 if (ret == -ETIME && args->timeout_ns)
3114 i915_gem_object_put(obj);
3118 static int wait_for_engines(struct drm_i915_private *i915)
3120 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
3121 dev_err(i915->drm.dev,
3122 "Failed to idle engines, declaring wedged!\n");
3124 i915_gem_set_wedged(i915);
3132 wait_for_timelines(struct drm_i915_private *i915,
3133 unsigned int flags, long timeout)
3135 struct i915_gt_timelines *gt = &i915->gt.timelines;
3136 struct i915_timeline *tl;
3138 if (!READ_ONCE(i915->gt.active_requests))
3141 mutex_lock(>->mutex);
3142 list_for_each_entry(tl, >->active_list, link) {
3143 struct i915_request *rq;
3145 rq = i915_active_request_get_unlocked(&tl->last_request);
3149 mutex_unlock(>->mutex);
3154 * Switching to the kernel context is often used a synchronous
3155 * step prior to idling, e.g. in suspend for flushing all
3156 * current operations to memory before sleeping. These we
3157 * want to complete as quickly as possible to avoid prolonged
3158 * stalls, so allow the gpu to boost to maximum clocks.
3160 if (flags & I915_WAIT_FOR_IDLE_BOOST)
3163 timeout = i915_request_wait(rq, flags, timeout);
3164 i915_request_put(rq);
3168 /* restart after reacquiring the lock */
3169 mutex_lock(>->mutex);
3170 tl = list_entry(>->active_list, typeof(*tl), link);
3172 mutex_unlock(>->mutex);
3177 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
3178 unsigned int flags, long timeout)
3180 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
3181 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
3182 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
3184 /* If the device is asleep, we have no requests outstanding */
3185 if (!READ_ONCE(i915->gt.awake))
3188 timeout = wait_for_timelines(i915, flags, timeout);
3192 if (flags & I915_WAIT_LOCKED) {
3195 lockdep_assert_held(&i915->drm.struct_mutex);
3197 err = wait_for_engines(i915);
3201 i915_retire_requests(i915);
3207 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
3210 * We manually flush the CPU domain so that we can override and
3211 * force the flush for the display, and perform it asyncrhonously.
3213 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3214 if (obj->cache_dirty)
3215 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
3216 obj->write_domain = 0;
3219 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
3221 if (!READ_ONCE(obj->pin_global))
3224 mutex_lock(&obj->base.dev->struct_mutex);
3225 __i915_gem_object_flush_for_display(obj);
3226 mutex_unlock(&obj->base.dev->struct_mutex);
3230 * Moves a single object to the WC read, and possibly write domain.
3231 * @obj: object to act on
3232 * @write: ask for write access or read only
3234 * This function returns when the move is complete, including waiting on
3238 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3242 lockdep_assert_held(&obj->base.dev->struct_mutex);
3244 ret = i915_gem_object_wait(obj,
3245 I915_WAIT_INTERRUPTIBLE |
3247 (write ? I915_WAIT_ALL : 0),
3248 MAX_SCHEDULE_TIMEOUT);
3252 if (obj->write_domain == I915_GEM_DOMAIN_WC)
3255 /* Flush and acquire obj->pages so that we are coherent through
3256 * direct access in memory with previous cached writes through
3257 * shmemfs and that our cache domain tracking remains valid.
3258 * For example, if the obj->filp was moved to swap without us
3259 * being notified and releasing the pages, we would mistakenly
3260 * continue to assume that the obj remained out of the CPU cached
3263 ret = i915_gem_object_pin_pages(obj);
3267 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
3269 /* Serialise direct access to this object with the barriers for
3270 * coherent writes from the GPU, by effectively invalidating the
3271 * WC domain upon first access.
3273 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
3276 /* It should now be out of any other write domains, and we can update
3277 * the domain values for our changes.
3279 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
3280 obj->read_domains |= I915_GEM_DOMAIN_WC;
3282 obj->read_domains = I915_GEM_DOMAIN_WC;
3283 obj->write_domain = I915_GEM_DOMAIN_WC;
3284 obj->mm.dirty = true;
3287 i915_gem_object_unpin_pages(obj);
3292 * Moves a single object to the GTT read, and possibly write domain.
3293 * @obj: object to act on
3294 * @write: ask for write access or read only
3296 * This function returns when the move is complete, including waiting on
3300 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3304 lockdep_assert_held(&obj->base.dev->struct_mutex);
3306 ret = i915_gem_object_wait(obj,
3307 I915_WAIT_INTERRUPTIBLE |
3309 (write ? I915_WAIT_ALL : 0),
3310 MAX_SCHEDULE_TIMEOUT);
3314 if (obj->write_domain == I915_GEM_DOMAIN_GTT)
3317 /* Flush and acquire obj->pages so that we are coherent through
3318 * direct access in memory with previous cached writes through
3319 * shmemfs and that our cache domain tracking remains valid.
3320 * For example, if the obj->filp was moved to swap without us
3321 * being notified and releasing the pages, we would mistakenly
3322 * continue to assume that the obj remained out of the CPU cached
3325 ret = i915_gem_object_pin_pages(obj);
3329 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
3331 /* Serialise direct access to this object with the barriers for
3332 * coherent writes from the GPU, by effectively invalidating the
3333 * GTT domain upon first access.
3335 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
3338 /* It should now be out of any other write domains, and we can update
3339 * the domain values for our changes.
3341 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3342 obj->read_domains |= I915_GEM_DOMAIN_GTT;
3344 obj->read_domains = I915_GEM_DOMAIN_GTT;
3345 obj->write_domain = I915_GEM_DOMAIN_GTT;
3346 obj->mm.dirty = true;
3349 i915_gem_object_unpin_pages(obj);
3354 * Changes the cache-level of an object across all VMA.
3355 * @obj: object to act on
3356 * @cache_level: new cache level to set for the object
3358 * After this function returns, the object will be in the new cache-level
3359 * across all GTT and the contents of the backing storage will be coherent,
3360 * with respect to the new cache-level. In order to keep the backing storage
3361 * coherent for all users, we only allow a single cache level to be set
3362 * globally on the object and prevent it from being changed whilst the
3363 * hardware is reading from the object. That is if the object is currently
3364 * on the scanout it will be set to uncached (or equivalent display
3365 * cache coherency) and all non-MOCS GPU access will also be uncached so
3366 * that all direct access to the scanout remains coherent.
3368 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3369 enum i915_cache_level cache_level)
3371 struct i915_vma *vma;
3374 lockdep_assert_held(&obj->base.dev->struct_mutex);
3376 if (obj->cache_level == cache_level)
3379 /* Inspect the list of currently bound VMA and unbind any that would
3380 * be invalid given the new cache-level. This is principally to
3381 * catch the issue of the CS prefetch crossing page boundaries and
3382 * reading an invalid PTE on older architectures.
3385 list_for_each_entry(vma, &obj->vma.list, obj_link) {
3386 if (!drm_mm_node_allocated(&vma->node))
3389 if (i915_vma_is_pinned(vma)) {
3390 DRM_DEBUG("can not change the cache level of pinned objects\n");
3394 if (!i915_vma_is_closed(vma) &&
3395 i915_gem_valid_gtt_space(vma, cache_level))
3398 ret = i915_vma_unbind(vma);
3402 /* As unbinding may affect other elements in the
3403 * obj->vma_list (due to side-effects from retiring
3404 * an active vma), play safe and restart the iterator.
3409 /* We can reuse the existing drm_mm nodes but need to change the
3410 * cache-level on the PTE. We could simply unbind them all and
3411 * rebind with the correct cache-level on next use. However since
3412 * we already have a valid slot, dma mapping, pages etc, we may as
3413 * rewrite the PTE in the belief that doing so tramples upon less
3414 * state and so involves less work.
3416 if (obj->bind_count) {
3417 /* Before we change the PTE, the GPU must not be accessing it.
3418 * If we wait upon the object, we know that all the bound
3419 * VMA are no longer active.
3421 ret = i915_gem_object_wait(obj,
3422 I915_WAIT_INTERRUPTIBLE |
3425 MAX_SCHEDULE_TIMEOUT);
3429 if (!HAS_LLC(to_i915(obj->base.dev)) &&
3430 cache_level != I915_CACHE_NONE) {
3431 /* Access to snoopable pages through the GTT is
3432 * incoherent and on some machines causes a hard
3433 * lockup. Relinquish the CPU mmaping to force
3434 * userspace to refault in the pages and we can
3435 * then double check if the GTT mapping is still
3436 * valid for that pointer access.
3438 i915_gem_release_mmap(obj);
3440 /* As we no longer need a fence for GTT access,
3441 * we can relinquish it now (and so prevent having
3442 * to steal a fence from someone else on the next
3443 * fence request). Note GPU activity would have
3444 * dropped the fence as all snoopable access is
3445 * supposed to be linear.
3447 for_each_ggtt_vma(vma, obj) {
3448 ret = i915_vma_put_fence(vma);
3453 /* We either have incoherent backing store and
3454 * so no GTT access or the architecture is fully
3455 * coherent. In such cases, existing GTT mmaps
3456 * ignore the cache bit in the PTE and we can
3457 * rewrite it without confusing the GPU or having
3458 * to force userspace to fault back in its mmaps.
3462 list_for_each_entry(vma, &obj->vma.list, obj_link) {
3463 if (!drm_mm_node_allocated(&vma->node))
3466 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3472 list_for_each_entry(vma, &obj->vma.list, obj_link)
3473 vma->node.color = cache_level;
3474 i915_gem_object_set_cache_coherency(obj, cache_level);
3475 obj->cache_dirty = true; /* Always invalidate stale cachelines */
3480 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3481 struct drm_file *file)
3483 struct drm_i915_gem_caching *args = data;
3484 struct drm_i915_gem_object *obj;
3488 obj = i915_gem_object_lookup_rcu(file, args->handle);
3494 switch (obj->cache_level) {
3495 case I915_CACHE_LLC:
3496 case I915_CACHE_L3_LLC:
3497 args->caching = I915_CACHING_CACHED;
3501 args->caching = I915_CACHING_DISPLAY;
3505 args->caching = I915_CACHING_NONE;
3513 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3514 struct drm_file *file)
3516 struct drm_i915_private *i915 = to_i915(dev);
3517 struct drm_i915_gem_caching *args = data;
3518 struct drm_i915_gem_object *obj;
3519 enum i915_cache_level level;
3522 switch (args->caching) {
3523 case I915_CACHING_NONE:
3524 level = I915_CACHE_NONE;
3526 case I915_CACHING_CACHED:
3528 * Due to a HW issue on BXT A stepping, GPU stores via a
3529 * snooped mapping may leave stale data in a corresponding CPU
3530 * cacheline, whereas normally such cachelines would get
3533 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
3536 level = I915_CACHE_LLC;
3538 case I915_CACHING_DISPLAY:
3539 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
3545 obj = i915_gem_object_lookup(file, args->handle);
3550 * The caching mode of proxy object is handled by its generator, and
3551 * not allowed to be changed by userspace.
3553 if (i915_gem_object_is_proxy(obj)) {
3558 if (obj->cache_level == level)
3561 ret = i915_gem_object_wait(obj,
3562 I915_WAIT_INTERRUPTIBLE,
3563 MAX_SCHEDULE_TIMEOUT);
3567 ret = i915_mutex_lock_interruptible(dev);
3571 ret = i915_gem_object_set_cache_level(obj, level);
3572 mutex_unlock(&dev->struct_mutex);
3575 i915_gem_object_put(obj);
3580 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
3581 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
3582 * (for pageflips). We only flush the caches while preparing the buffer for
3583 * display, the callers are responsible for frontbuffer flush.
3586 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3588 const struct i915_ggtt_view *view,
3591 struct i915_vma *vma;
3594 lockdep_assert_held(&obj->base.dev->struct_mutex);
3596 /* Mark the global pin early so that we account for the
3597 * display coherency whilst setting up the cache domains.
3601 /* The display engine is not coherent with the LLC cache on gen6. As
3602 * a result, we make sure that the pinning that is about to occur is
3603 * done with uncached PTEs. This is lowest common denominator for all
3606 * However for gen6+, we could do better by using the GFDT bit instead
3607 * of uncaching, which would allow us to flush all the LLC-cached data
3608 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3610 ret = i915_gem_object_set_cache_level(obj,
3611 HAS_WT(to_i915(obj->base.dev)) ?
3612 I915_CACHE_WT : I915_CACHE_NONE);
3615 goto err_unpin_global;
3618 /* As the user may map the buffer once pinned in the display plane
3619 * (e.g. libkms for the bootup splash), we have to ensure that we
3620 * always use map_and_fenceable for all scanout buffers. However,
3621 * it may simply be too big to fit into mappable, in which case
3622 * put it anyway and hope that userspace can cope (but always first
3623 * try to preserve the existing ABI).
3625 vma = ERR_PTR(-ENOSPC);
3626 if ((flags & PIN_MAPPABLE) == 0 &&
3627 (!view || view->type == I915_GGTT_VIEW_NORMAL))
3628 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3633 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
3635 goto err_unpin_global;
3637 vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3639 __i915_gem_object_flush_for_display(obj);
3641 /* It should now be out of any other write domains, and we can update
3642 * the domain values for our changes.
3644 obj->read_domains |= I915_GEM_DOMAIN_GTT;
3654 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3656 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
3658 if (WARN_ON(vma->obj->pin_global == 0))
3661 if (--vma->obj->pin_global == 0)
3662 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
3664 /* Bump the LRU to try and avoid premature eviction whilst flipping */
3665 i915_gem_object_bump_inactive_ggtt(vma->obj);
3667 i915_vma_unpin(vma);
3671 * Moves a single object to the CPU read, and possibly write domain.
3672 * @obj: object to act on
3673 * @write: requesting write or read-only access
3675 * This function returns when the move is complete, including waiting on
3679 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3683 lockdep_assert_held(&obj->base.dev->struct_mutex);
3685 ret = i915_gem_object_wait(obj,
3686 I915_WAIT_INTERRUPTIBLE |
3688 (write ? I915_WAIT_ALL : 0),
3689 MAX_SCHEDULE_TIMEOUT);
3693 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3695 /* Flush the CPU cache if it's still invalid. */
3696 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3697 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
3698 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3701 /* It should now be out of any other write domains, and we can update
3702 * the domain values for our changes.
3704 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
3706 /* If we're writing through the CPU, then the GPU read domains will
3707 * need to be invalidated at next use.
3710 __start_cpu_write(obj);
3715 /* Throttle our rendering by waiting until the ring has completed our requests
3716 * emitted over 20 msec ago.
3718 * Note that if we were to use the current jiffies each time around the loop,
3719 * we wouldn't escape the function with any frames outstanding if the time to
3720 * render a frame was over 20ms.
3722 * This should get us reasonable parallelism between CPU and GPU but also
3723 * relatively low latency when blocking on a particular request to finish.
3726 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3728 struct drm_i915_private *dev_priv = to_i915(dev);
3729 struct drm_i915_file_private *file_priv = file->driver_priv;
3730 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3731 struct i915_request *request, *target = NULL;
3734 /* ABI: return -EIO if already wedged */
3735 ret = i915_terminally_wedged(dev_priv);
3739 spin_lock(&file_priv->mm.lock);
3740 list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
3741 if (time_after_eq(request->emitted_jiffies, recent_enough))
3745 list_del(&target->client_link);
3746 target->file_priv = NULL;
3752 i915_request_get(target);
3753 spin_unlock(&file_priv->mm.lock);
3758 ret = i915_request_wait(target,
3759 I915_WAIT_INTERRUPTIBLE,
3760 MAX_SCHEDULE_TIMEOUT);
3761 i915_request_put(target);
3763 return ret < 0 ? ret : 0;
3767 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3768 const struct i915_ggtt_view *view,
3773 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3774 struct i915_address_space *vm = &dev_priv->ggtt.vm;
3775 struct i915_vma *vma;
3778 lockdep_assert_held(&obj->base.dev->struct_mutex);
3780 if (flags & PIN_MAPPABLE &&
3781 (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
3782 /* If the required space is larger than the available
3783 * aperture, we will not able to find a slot for the
3784 * object and unbinding the object now will be in
3785 * vain. Worse, doing so may cause us to ping-pong
3786 * the object in and out of the Global GTT and
3787 * waste a lot of cycles under the mutex.
3789 if (obj->base.size > dev_priv->ggtt.mappable_end)
3790 return ERR_PTR(-E2BIG);
3792 /* If NONBLOCK is set the caller is optimistically
3793 * trying to cache the full object within the mappable
3794 * aperture, and *must* have a fallback in place for
3795 * situations where we cannot bind the object. We
3796 * can be a little more lax here and use the fallback
3797 * more often to avoid costly migrations of ourselves
3798 * and other objects within the aperture.
3800 * Half-the-aperture is used as a simple heuristic.
3801 * More interesting would to do search for a free
3802 * block prior to making the commitment to unbind.
3803 * That caters for the self-harm case, and with a
3804 * little more heuristics (e.g. NOFAULT, NOEVICT)
3805 * we could try to minimise harm to others.
3807 if (flags & PIN_NONBLOCK &&
3808 obj->base.size > dev_priv->ggtt.mappable_end / 2)
3809 return ERR_PTR(-ENOSPC);
3812 vma = i915_vma_instance(obj, vm, view);
3816 if (i915_vma_misplaced(vma, size, alignment, flags)) {
3817 if (flags & PIN_NONBLOCK) {
3818 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
3819 return ERR_PTR(-ENOSPC);
3821 if (flags & PIN_MAPPABLE &&
3822 vma->fence_size > dev_priv->ggtt.mappable_end / 2)
3823 return ERR_PTR(-ENOSPC);
3826 WARN(i915_vma_is_pinned(vma),
3827 "bo is already pinned in ggtt with incorrect alignment:"
3828 " offset=%08x, req.alignment=%llx,"
3829 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3830 i915_ggtt_offset(vma), alignment,
3831 !!(flags & PIN_MAPPABLE),
3832 i915_vma_is_map_and_fenceable(vma));
3833 ret = i915_vma_unbind(vma);
3835 return ERR_PTR(ret);
3838 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3840 return ERR_PTR(ret);
3845 static __always_inline u32 __busy_read_flag(u8 id)
3847 if (id == (u8)I915_ENGINE_CLASS_INVALID)
3850 GEM_BUG_ON(id >= 16);
3851 return 0x10000u << id;
3854 static __always_inline u32 __busy_write_id(u8 id)
3857 * The uABI guarantees an active writer is also amongst the read
3858 * engines. This would be true if we accessed the activity tracking
3859 * under the lock, but as we perform the lookup of the object and
3860 * its activity locklessly we can not guarantee that the last_write
3861 * being active implies that we have set the same engine flag from
3862 * last_read - hence we always set both read and write busy for
3865 if (id == (u8)I915_ENGINE_CLASS_INVALID)
3868 return (id + 1) | __busy_read_flag(id);
3871 static __always_inline unsigned int
3872 __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
3874 const struct i915_request *rq;
3877 * We have to check the current hw status of the fence as the uABI
3878 * guarantees forward progress. We could rely on the idle worker
3879 * to eventually flush us, but to minimise latency just ask the
3882 * Note we only report on the status of native fences.
3884 if (!dma_fence_is_i915(fence))
3887 /* opencode to_request() in order to avoid const warnings */
3888 rq = container_of(fence, const struct i915_request, fence);
3889 if (i915_request_completed(rq))
3892 /* Beware type-expansion follies! */
3893 BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
3894 return flag(rq->engine->uabi_class);
3897 static __always_inline unsigned int
3898 busy_check_reader(const struct dma_fence *fence)
3900 return __busy_set_if_active(fence, __busy_read_flag);
3903 static __always_inline unsigned int
3904 busy_check_writer(const struct dma_fence *fence)
3909 return __busy_set_if_active(fence, __busy_write_id);
3913 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3914 struct drm_file *file)
3916 struct drm_i915_gem_busy *args = data;
3917 struct drm_i915_gem_object *obj;
3918 struct reservation_object_list *list;
3924 obj = i915_gem_object_lookup_rcu(file, args->handle);
3929 * A discrepancy here is that we do not report the status of
3930 * non-i915 fences, i.e. even though we may report the object as idle,
3931 * a call to set-domain may still stall waiting for foreign rendering.
3932 * This also means that wait-ioctl may report an object as busy,
3933 * where busy-ioctl considers it idle.
3935 * We trade the ability to warn of foreign fences to report on which
3936 * i915 engines are active for the object.
3938 * Alternatively, we can trade that extra information on read/write
3941 * !reservation_object_test_signaled_rcu(obj->resv, true);
3942 * to report the overall busyness. This is what the wait-ioctl does.
3946 seq = raw_read_seqcount(&obj->resv->seq);
3948 /* Translate the exclusive fence to the READ *and* WRITE engine */
3949 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
3951 /* Translate shared fences to READ set of engines */
3952 list = rcu_dereference(obj->resv->fence);
3954 unsigned int shared_count = list->shared_count, i;
3956 for (i = 0; i < shared_count; ++i) {
3957 struct dma_fence *fence =
3958 rcu_dereference(list->shared[i]);
3960 args->busy |= busy_check_reader(fence);
3964 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
3974 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3975 struct drm_file *file_priv)
3977 return i915_gem_ring_throttle(dev, file_priv);
3981 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3982 struct drm_file *file_priv)
3984 struct drm_i915_private *dev_priv = to_i915(dev);
3985 struct drm_i915_gem_madvise *args = data;
3986 struct drm_i915_gem_object *obj;
3989 switch (args->madv) {
3990 case I915_MADV_DONTNEED:
3991 case I915_MADV_WILLNEED:
3997 obj = i915_gem_object_lookup(file_priv, args->handle);
4001 err = mutex_lock_interruptible(&obj->mm.lock);
4005 if (i915_gem_object_has_pages(obj) &&
4006 i915_gem_object_is_tiled(obj) &&
4007 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4008 if (obj->mm.madv == I915_MADV_WILLNEED) {
4009 GEM_BUG_ON(!obj->mm.quirked);
4010 __i915_gem_object_unpin_pages(obj);
4011 obj->mm.quirked = false;
4013 if (args->madv == I915_MADV_WILLNEED) {
4014 GEM_BUG_ON(obj->mm.quirked);
4015 __i915_gem_object_pin_pages(obj);
4016 obj->mm.quirked = true;
4020 if (obj->mm.madv != __I915_MADV_PURGED)
4021 obj->mm.madv = args->madv;
4023 /* if the object is no longer attached, discard its backing storage */
4024 if (obj->mm.madv == I915_MADV_DONTNEED &&
4025 !i915_gem_object_has_pages(obj))
4026 i915_gem_object_truncate(obj);
4028 args->retained = obj->mm.madv != __I915_MADV_PURGED;
4029 mutex_unlock(&obj->mm.lock);
4032 i915_gem_object_put(obj);
4037 frontbuffer_retire(struct i915_active_request *active,
4038 struct i915_request *request)
4040 struct drm_i915_gem_object *obj =
4041 container_of(active, typeof(*obj), frontbuffer_write);
4043 intel_fb_obj_flush(obj, ORIGIN_CS);
4046 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4047 const struct drm_i915_gem_object_ops *ops)
4049 mutex_init(&obj->mm.lock);
4051 spin_lock_init(&obj->vma.lock);
4052 INIT_LIST_HEAD(&obj->vma.list);
4054 INIT_LIST_HEAD(&obj->lut_list);
4055 INIT_LIST_HEAD(&obj->batch_pool_link);
4057 init_rcu_head(&obj->rcu);
4061 reservation_object_init(&obj->__builtin_resv);
4062 obj->resv = &obj->__builtin_resv;
4064 obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4065 i915_active_request_init(&obj->frontbuffer_write,
4066 NULL, frontbuffer_retire);
4068 obj->mm.madv = I915_MADV_WILLNEED;
4069 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
4070 mutex_init(&obj->mm.get_page.lock);
4072 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4075 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4076 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
4077 I915_GEM_OBJECT_IS_SHRINKABLE,
4079 .get_pages = i915_gem_object_get_pages_gtt,
4080 .put_pages = i915_gem_object_put_pages_gtt,
4082 .pwrite = i915_gem_object_pwrite_gtt,
4085 static int i915_gem_object_create_shmem(struct drm_device *dev,
4086 struct drm_gem_object *obj,
4089 struct drm_i915_private *i915 = to_i915(dev);
4090 unsigned long flags = VM_NORESERVE;
4093 drm_gem_private_object_init(dev, obj, size);
4096 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
4099 filp = shmem_file_setup("i915", size, flags);
4102 return PTR_ERR(filp);
4109 struct drm_i915_gem_object *
4110 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
4112 struct drm_i915_gem_object *obj;
4113 struct address_space *mapping;
4114 unsigned int cache_level;
4118 /* There is a prevalence of the assumption that we fit the object's
4119 * page count inside a 32bit _signed_ variable. Let's document this and
4120 * catch if we ever need to fix it. In the meantime, if you do spot
4121 * such a local variable, please consider fixing!
4123 if (size >> PAGE_SHIFT > INT_MAX)
4124 return ERR_PTR(-E2BIG);
4126 if (overflows_type(size, obj->base.size))
4127 return ERR_PTR(-E2BIG);
4129 obj = i915_gem_object_alloc();
4131 return ERR_PTR(-ENOMEM);
4133 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
4137 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4138 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
4139 /* 965gm cannot relocate objects above 4GiB. */
4140 mask &= ~__GFP_HIGHMEM;
4141 mask |= __GFP_DMA32;
4144 mapping = obj->base.filp->f_mapping;
4145 mapping_set_gfp_mask(mapping, mask);
4146 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
4148 i915_gem_object_init(obj, &i915_gem_object_ops);
4150 obj->write_domain = I915_GEM_DOMAIN_CPU;
4151 obj->read_domains = I915_GEM_DOMAIN_CPU;
4153 if (HAS_LLC(dev_priv))
4154 /* On some devices, we can have the GPU use the LLC (the CPU
4155 * cache) for about a 10% performance improvement
4156 * compared to uncached. Graphics requests other than
4157 * display scanout are coherent with the CPU in
4158 * accessing this cache. This means in this mode we
4159 * don't need to clflush on the CPU side, and on the
4160 * GPU side we only need to flush internal caches to
4161 * get data visible to the CPU.
4163 * However, we maintain the display planes as UC, and so
4164 * need to rebind when first used as such.
4166 cache_level = I915_CACHE_LLC;
4168 cache_level = I915_CACHE_NONE;
4170 i915_gem_object_set_cache_coherency(obj, cache_level);
4172 trace_i915_gem_object_create(obj);
4177 i915_gem_object_free(obj);
4178 return ERR_PTR(ret);
4181 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4183 /* If we are the last user of the backing storage (be it shmemfs
4184 * pages or stolen etc), we know that the pages are going to be
4185 * immediately released. In this case, we can then skip copying
4186 * back the contents from the GPU.
4189 if (obj->mm.madv != I915_MADV_WILLNEED)
4192 if (obj->base.filp == NULL)
4195 /* At first glance, this looks racy, but then again so would be
4196 * userspace racing mmap against close. However, the first external
4197 * reference to the filp can only be obtained through the
4198 * i915_gem_mmap_ioctl() which safeguards us against the user
4199 * acquiring such a reference whilst we are in the middle of
4200 * freeing the object.
4202 return file_count(obj->base.filp) == 1;
4205 static void __i915_gem_free_objects(struct drm_i915_private *i915,
4206 struct llist_node *freed)
4208 struct drm_i915_gem_object *obj, *on;
4209 intel_wakeref_t wakeref;
4211 wakeref = intel_runtime_pm_get(i915);
4212 llist_for_each_entry_safe(obj, on, freed, freed) {
4213 struct i915_vma *vma, *vn;
4215 trace_i915_gem_object_destroy(obj);
4217 mutex_lock(&i915->drm.struct_mutex);
4219 GEM_BUG_ON(i915_gem_object_is_active(obj));
4220 list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
4221 GEM_BUG_ON(i915_vma_is_active(vma));
4222 vma->flags &= ~I915_VMA_PIN_MASK;
4223 i915_vma_destroy(vma);
4225 GEM_BUG_ON(!list_empty(&obj->vma.list));
4226 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
4228 /* This serializes freeing with the shrinker. Since the free
4229 * is delayed, first by RCU then by the workqueue, we want the
4230 * shrinker to be able to free pages of unreferenced objects,
4231 * or else we may oom whilst there are plenty of deferred
4234 if (i915_gem_object_has_pages(obj)) {
4235 spin_lock(&i915->mm.obj_lock);
4236 list_del_init(&obj->mm.link);
4237 spin_unlock(&i915->mm.obj_lock);
4240 mutex_unlock(&i915->drm.struct_mutex);
4242 GEM_BUG_ON(obj->bind_count);
4243 GEM_BUG_ON(obj->userfault_count);
4244 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4245 GEM_BUG_ON(!list_empty(&obj->lut_list));
4247 if (obj->ops->release)
4248 obj->ops->release(obj);
4250 if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4251 atomic_set(&obj->mm.pages_pin_count, 0);
4252 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4253 GEM_BUG_ON(i915_gem_object_has_pages(obj));
4255 if (obj->base.import_attach)
4256 drm_prime_gem_destroy(&obj->base, NULL);
4258 reservation_object_fini(&obj->__builtin_resv);
4259 drm_gem_object_release(&obj->base);
4260 i915_gem_info_remove_obj(i915, obj->base.size);
4262 bitmap_free(obj->bit_17);
4263 i915_gem_object_free(obj);
4265 GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
4266 atomic_dec(&i915->mm.free_count);
4271 intel_runtime_pm_put(i915, wakeref);
4274 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4276 struct llist_node *freed;
4278 /* Free the oldest, most stale object to keep the free_list short */
4280 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
4281 /* Only one consumer of llist_del_first() allowed */
4282 spin_lock(&i915->mm.free_lock);
4283 freed = llist_del_first(&i915->mm.free_list);
4284 spin_unlock(&i915->mm.free_lock);
4286 if (unlikely(freed)) {
4288 __i915_gem_free_objects(i915, freed);
4292 static void __i915_gem_free_work(struct work_struct *work)
4294 struct drm_i915_private *i915 =
4295 container_of(work, struct drm_i915_private, mm.free_work);
4296 struct llist_node *freed;
4299 * All file-owned VMA should have been released by this point through
4300 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4301 * However, the object may also be bound into the global GTT (e.g.
4302 * older GPUs without per-process support, or for direct access through
4303 * the GTT either for the user or for scanout). Those VMA still need to
4307 spin_lock(&i915->mm.free_lock);
4308 while ((freed = llist_del_all(&i915->mm.free_list))) {
4309 spin_unlock(&i915->mm.free_lock);
4311 __i915_gem_free_objects(i915, freed);
4315 spin_lock(&i915->mm.free_lock);
4317 spin_unlock(&i915->mm.free_lock);
4320 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4322 struct drm_i915_gem_object *obj =
4323 container_of(head, typeof(*obj), rcu);
4324 struct drm_i915_private *i915 = to_i915(obj->base.dev);
4327 * We reuse obj->rcu for the freed list, so we had better not treat
4328 * it like a rcu_head from this point forwards. And we expect all
4329 * objects to be freed via this path.
4331 destroy_rcu_head(&obj->rcu);
4334 * Since we require blocking on struct_mutex to unbind the freed
4335 * object from the GPU before releasing resources back to the
4336 * system, we can not do that directly from the RCU callback (which may
4337 * be a softirq context), but must instead then defer that work onto a
4338 * kthread. We use the RCU callback rather than move the freed object
4339 * directly onto the work queue so that we can mix between using the
4340 * worker and performing frees directly from subsequent allocations for
4341 * crude but effective memory throttling.
4343 if (llist_add(&obj->freed, &i915->mm.free_list))
4344 queue_work(i915->wq, &i915->mm.free_work);
4347 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4349 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4351 if (obj->mm.quirked)
4352 __i915_gem_object_unpin_pages(obj);
4354 if (discard_backing_storage(obj))
4355 obj->mm.madv = I915_MADV_DONTNEED;
4358 * Before we free the object, make sure any pure RCU-only
4359 * read-side critical sections are complete, e.g.
4360 * i915_gem_busy_ioctl(). For the corresponding synchronized
4361 * lookup see i915_gem_object_lookup_rcu().
4363 atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
4364 call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4367 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4369 lockdep_assert_held(&obj->base.dev->struct_mutex);
4371 if (!i915_gem_object_has_active_reference(obj) &&
4372 i915_gem_object_is_active(obj))
4373 i915_gem_object_set_active_reference(obj);
4375 i915_gem_object_put(obj);
4378 void i915_gem_sanitize(struct drm_i915_private *i915)
4380 intel_wakeref_t wakeref;
4384 wakeref = intel_runtime_pm_get(i915);
4385 intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
4388 * As we have just resumed the machine and woken the device up from
4389 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
4390 * back to defaults, recovering from whatever wedged state we left it
4391 * in and so worth trying to use the device once more.
4393 if (i915_terminally_wedged(i915))
4394 i915_gem_unset_wedged(i915);
4397 * If we inherit context state from the BIOS or earlier occupants
4398 * of the GPU, the GPU may be in an inconsistent state when we
4399 * try to take over. The only way to remove the earlier state
4400 * is by resetting. However, resetting on earlier gen is tricky as
4401 * it may impact the display and we are uncertain about the stability
4402 * of the reset, so this could be applied to even earlier gen.
4404 intel_engines_sanitize(i915, false);
4406 intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
4407 intel_runtime_pm_put(i915, wakeref);
4409 mutex_lock(&i915->drm.struct_mutex);
4410 i915_gem_contexts_lost(i915);
4411 mutex_unlock(&i915->drm.struct_mutex);
4414 void i915_gem_suspend(struct drm_i915_private *i915)
4416 intel_wakeref_t wakeref;
4420 wakeref = intel_runtime_pm_get(i915);
4422 flush_workqueue(i915->wq);
4424 mutex_lock(&i915->drm.struct_mutex);
4427 * We have to flush all the executing contexts to main memory so
4428 * that they can saved in the hibernation image. To ensure the last
4429 * context image is coherent, we have to switch away from it. That
4430 * leaves the i915->kernel_context still active when
4431 * we actually suspend, and its image in memory may not match the GPU
4432 * state. Fortunately, the kernel_context is disposable and we do
4433 * not rely on its state.
4435 switch_to_kernel_context_sync(i915, i915->gt.active_engines);
4437 mutex_unlock(&i915->drm.struct_mutex);
4438 i915_reset_flush(i915);
4440 drain_delayed_work(&i915->gt.retire_work);
4443 * As the idle_work is rearming if it detects a race, play safe and
4444 * repeat the flush until it is definitely idle.
4446 drain_delayed_work(&i915->gt.idle_work);
4449 * Assert that we successfully flushed all the work and
4450 * reset the GPU back to its idle, low power state.
4452 GEM_BUG_ON(i915->gt.awake);
4454 intel_uc_suspend(i915);
4456 intel_runtime_pm_put(i915, wakeref);
4459 void i915_gem_suspend_late(struct drm_i915_private *i915)
4461 struct drm_i915_gem_object *obj;
4462 struct list_head *phases[] = {
4463 &i915->mm.unbound_list,
4464 &i915->mm.bound_list,
4469 * Neither the BIOS, ourselves or any other kernel
4470 * expects the system to be in execlists mode on startup,
4471 * so we need to reset the GPU back to legacy mode. And the only
4472 * known way to disable logical contexts is through a GPU reset.
4474 * So in order to leave the system in a known default configuration,
4475 * always reset the GPU upon unload and suspend. Afterwards we then
4476 * clean up the GEM state tracking, flushing off the requests and
4477 * leaving the system in a known idle state.
4479 * Note that is of the upmost importance that the GPU is idle and
4480 * all stray writes are flushed *before* we dismantle the backing
4481 * storage for the pinned objects.
4483 * However, since we are uncertain that resetting the GPU on older
4484 * machines is a good idea, we don't - just in case it leaves the
4485 * machine in an unusable condition.
4488 mutex_lock(&i915->drm.struct_mutex);
4489 for (phase = phases; *phase; phase++) {
4490 list_for_each_entry(obj, *phase, mm.link)
4491 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
4493 mutex_unlock(&i915->drm.struct_mutex);
4495 intel_uc_sanitize(i915);
4496 i915_gem_sanitize(i915);
4499 void i915_gem_resume(struct drm_i915_private *i915)
4503 WARN_ON(i915->gt.awake);
4505 mutex_lock(&i915->drm.struct_mutex);
4506 intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
4508 i915_gem_restore_gtt_mappings(i915);
4509 i915_gem_restore_fences(i915);
4512 * As we didn't flush the kernel context before suspend, we cannot
4513 * guarantee that the context image is complete. So let's just reset
4514 * it and start again.
4516 intel_gt_resume(i915);
4518 if (i915_gem_init_hw(i915))
4521 intel_uc_resume(i915);
4523 /* Always reload a context for powersaving. */
4524 if (!load_power_context(i915))
4528 intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
4529 mutex_unlock(&i915->drm.struct_mutex);
4533 if (!i915_reset_failed(i915)) {
4534 dev_err(i915->drm.dev,
4535 "Failed to re-initialize GPU, declaring it wedged!\n");
4536 i915_gem_set_wedged(i915);
4541 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
4543 if (INTEL_GEN(dev_priv) < 5 ||
4544 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4547 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4548 DISP_TILE_SURFACE_SWIZZLING);
4550 if (IS_GEN(dev_priv, 5))
4553 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4554 if (IS_GEN(dev_priv, 6))
4555 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4556 else if (IS_GEN(dev_priv, 7))
4557 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4558 else if (IS_GEN(dev_priv, 8))
4559 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4564 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4566 I915_WRITE(RING_CTL(base), 0);
4567 I915_WRITE(RING_HEAD(base), 0);
4568 I915_WRITE(RING_TAIL(base), 0);
4569 I915_WRITE(RING_START(base), 0);
4572 static void init_unused_rings(struct drm_i915_private *dev_priv)
4574 if (IS_I830(dev_priv)) {
4575 init_unused_ring(dev_priv, PRB1_BASE);
4576 init_unused_ring(dev_priv, SRB0_BASE);
4577 init_unused_ring(dev_priv, SRB1_BASE);
4578 init_unused_ring(dev_priv, SRB2_BASE);
4579 init_unused_ring(dev_priv, SRB3_BASE);
4580 } else if (IS_GEN(dev_priv, 2)) {
4581 init_unused_ring(dev_priv, SRB0_BASE);
4582 init_unused_ring(dev_priv, SRB1_BASE);
4583 } else if (IS_GEN(dev_priv, 3)) {
4584 init_unused_ring(dev_priv, PRB1_BASE);
4585 init_unused_ring(dev_priv, PRB2_BASE);
4589 static int __i915_gem_restart_engines(void *data)
4591 struct drm_i915_private *i915 = data;
4592 struct intel_engine_cs *engine;
4593 enum intel_engine_id id;
4596 for_each_engine(engine, i915, id) {
4597 err = engine->init_hw(engine);
4599 DRM_ERROR("Failed to restart %s (%d)\n",
4605 intel_engines_set_scheduler_caps(i915);
4610 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
4614 dev_priv->gt.last_init_time = ktime_get();
4616 /* Double layer security blanket, see i915_gem_init() */
4617 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4619 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
4620 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4622 if (IS_HASWELL(dev_priv))
4623 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4624 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4626 /* Apply the GT workarounds... */
4627 intel_gt_apply_workarounds(dev_priv);
4628 /* ...and determine whether they are sticking. */
4629 intel_gt_verify_workarounds(dev_priv, "init");
4631 i915_gem_init_swizzling(dev_priv);
4634 * At least 830 can leave some of the unused rings
4635 * "active" (ie. head != tail) after resume which
4636 * will prevent c3 entry. Makes sure all unused rings
4639 init_unused_rings(dev_priv);
4641 BUG_ON(!dev_priv->kernel_context);
4642 ret = i915_terminally_wedged(dev_priv);
4646 ret = i915_ppgtt_init_hw(dev_priv);
4648 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
4652 ret = intel_wopcm_init_hw(&dev_priv->wopcm);
4654 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
4658 /* We can't enable contexts until all firmware is loaded */
4659 ret = intel_uc_init_hw(dev_priv);
4661 DRM_ERROR("Enabling uc failed (%d)\n", ret);
4665 intel_mocs_init_l3cc_table(dev_priv);
4667 /* Only when the HW is re-initialised, can we replay the requests */
4668 ret = __i915_gem_restart_engines(dev_priv);
4672 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4677 intel_uc_fini_hw(dev_priv);
4679 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4684 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
4686 struct i915_gem_context *ctx;
4687 struct intel_engine_cs *engine;
4688 enum intel_engine_id id;
4692 * As we reset the gpu during very early sanitisation, the current
4693 * register state on the GPU should reflect its defaults values.
4694 * We load a context onto the hw (with restore-inhibit), then switch
4695 * over to a second context to save that default register state. We
4696 * can then prime every new context with that state so they all start
4697 * from the same default HW values.
4700 ctx = i915_gem_context_create_kernel(i915, 0);
4702 return PTR_ERR(ctx);
4704 for_each_engine(engine, i915, id) {
4705 struct i915_request *rq;
4707 rq = i915_request_alloc(engine, ctx);
4714 if (engine->init_context)
4715 err = engine->init_context(rq);
4717 i915_request_add(rq);
4722 /* Flush the default context image to memory, and enable powersaving. */
4723 if (!load_power_context(i915)) {
4728 for_each_engine(engine, i915, id) {
4729 struct intel_context *ce;
4730 struct i915_vma *state;
4733 ce = intel_context_lookup(ctx, engine);
4741 GEM_BUG_ON(intel_context_is_pinned(ce));
4744 * As we will hold a reference to the logical state, it will
4745 * not be torn down with the context, and importantly the
4746 * object will hold onto its vma (making it possible for a
4747 * stray GTT write to corrupt our defaults). Unmap the vma
4748 * from the GTT to prevent such accidents and reclaim the
4751 err = i915_vma_unbind(state);
4755 err = i915_gem_object_set_to_cpu_domain(state->obj, false);
4759 engine->default_state = i915_gem_object_get(state->obj);
4760 i915_gem_object_set_cache_coherency(engine->default_state,
4763 /* Check we can acquire the image of the context state */
4764 vaddr = i915_gem_object_pin_map(engine->default_state,
4766 if (IS_ERR(vaddr)) {
4767 err = PTR_ERR(vaddr);
4771 i915_gem_object_unpin_map(engine->default_state);
4774 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
4775 unsigned int found = intel_engines_has_context_isolation(i915);
4778 * Make sure that classes with multiple engine instances all
4779 * share the same basic configuration.
4781 for_each_engine(engine, i915, id) {
4782 unsigned int bit = BIT(engine->uabi_class);
4783 unsigned int expected = engine->default_state ? bit : 0;
4785 if ((found & bit) != expected) {
4786 DRM_ERROR("mismatching default context state for class %d on engine %s\n",
4787 engine->uabi_class, engine->name);
4793 i915_gem_context_set_closed(ctx);
4794 i915_gem_context_put(ctx);
4799 * If we have to abandon now, we expect the engines to be idle
4800 * and ready to be torn-down. The quickest way we can accomplish
4801 * this is by declaring ourselves wedged.
4803 i915_gem_set_wedged(i915);
4808 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
4810 struct drm_i915_gem_object *obj;
4811 struct i915_vma *vma;
4814 obj = i915_gem_object_create_stolen(i915, size);
4816 obj = i915_gem_object_create_internal(i915, size);
4818 DRM_ERROR("Failed to allocate scratch page\n");
4819 return PTR_ERR(obj);
4822 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
4828 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
4832 i915->gt.scratch = vma;
4836 i915_gem_object_put(obj);
4840 static void i915_gem_fini_scratch(struct drm_i915_private *i915)
4842 i915_vma_unpin_and_release(&i915->gt.scratch, 0);
4845 int i915_gem_init(struct drm_i915_private *dev_priv)
4849 /* We need to fallback to 4K pages if host doesn't support huge gtt. */
4850 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
4851 mkwrite_device_info(dev_priv)->page_sizes =
4852 I915_GTT_PAGE_SIZE_4K;
4854 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
4856 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
4857 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4859 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4861 i915_timelines_init(dev_priv);
4863 ret = i915_gem_init_userptr(dev_priv);
4867 ret = intel_uc_init_misc(dev_priv);
4871 ret = intel_wopcm_init(&dev_priv->wopcm);
4875 /* This is just a security blanket to placate dragons.
4876 * On some systems, we very sporadically observe that the first TLBs
4877 * used by the CS may be stale, despite us poking the TLB reset. If
4878 * we hold the forcewake during initialisation these problems
4879 * just magically go away.
4881 mutex_lock(&dev_priv->drm.struct_mutex);
4882 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4884 ret = i915_gem_init_ggtt(dev_priv);
4886 GEM_BUG_ON(ret == -EIO);
4890 ret = i915_gem_init_scratch(dev_priv,
4891 IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
4893 GEM_BUG_ON(ret == -EIO);
4897 ret = i915_gem_contexts_init(dev_priv);
4899 GEM_BUG_ON(ret == -EIO);
4903 ret = intel_engines_init(dev_priv);
4905 GEM_BUG_ON(ret == -EIO);
4909 intel_init_gt_powersave(dev_priv);
4911 ret = intel_uc_init(dev_priv);
4915 ret = i915_gem_init_hw(dev_priv);
4920 * Despite its name intel_init_clock_gating applies both display
4921 * clock gating workarounds; GT mmio workarounds and the occasional
4922 * GT power context workaround. Worse, sometimes it includes a context
4923 * register workaround which we need to apply before we record the
4924 * default HW state for all contexts.
4926 * FIXME: break up the workarounds and apply them at the right time!
4928 intel_init_clock_gating(dev_priv);
4930 ret = __intel_engines_record_defaults(dev_priv);
4934 if (i915_inject_load_failure()) {
4939 if (i915_inject_load_failure()) {
4944 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4945 mutex_unlock(&dev_priv->drm.struct_mutex);
4950 * Unwinding is complicated by that we want to handle -EIO to mean
4951 * disable GPU submission but keep KMS alive. We want to mark the
4952 * HW as irrevisibly wedged, but keep enough state around that the
4953 * driver doesn't explode during runtime.
4956 mutex_unlock(&dev_priv->drm.struct_mutex);
4958 i915_gem_suspend(dev_priv);
4959 i915_gem_suspend_late(dev_priv);
4961 i915_gem_drain_workqueue(dev_priv);
4963 mutex_lock(&dev_priv->drm.struct_mutex);
4964 intel_uc_fini_hw(dev_priv);
4966 intel_uc_fini(dev_priv);
4969 intel_cleanup_gt_powersave(dev_priv);
4970 i915_gem_cleanup_engines(dev_priv);
4974 i915_gem_contexts_fini(dev_priv);
4976 i915_gem_fini_scratch(dev_priv);
4979 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4980 mutex_unlock(&dev_priv->drm.struct_mutex);
4983 intel_uc_fini_misc(dev_priv);
4986 i915_gem_cleanup_userptr(dev_priv);
4987 i915_timelines_fini(dev_priv);
4991 mutex_lock(&dev_priv->drm.struct_mutex);
4994 * Allow engine initialisation to fail by marking the GPU as
4995 * wedged. But we only want to do this where the GPU is angry,
4996 * for all other failure, such as an allocation failure, bail.
4998 if (!i915_reset_failed(dev_priv)) {
4999 i915_load_error(dev_priv,
5000 "Failed to initialize GPU, declaring it wedged!\n");
5001 i915_gem_set_wedged(dev_priv);
5004 /* Minimal basic recovery for KMS */
5005 ret = i915_ggtt_enable_hw(dev_priv);
5006 i915_gem_restore_gtt_mappings(dev_priv);
5007 i915_gem_restore_fences(dev_priv);
5008 intel_init_clock_gating(dev_priv);
5010 mutex_unlock(&dev_priv->drm.struct_mutex);
5013 i915_gem_drain_freed_objects(dev_priv);
5017 void i915_gem_fini(struct drm_i915_private *dev_priv)
5019 i915_gem_suspend_late(dev_priv);
5020 intel_disable_gt_powersave(dev_priv);
5022 /* Flush any outstanding unpin_work. */
5023 i915_gem_drain_workqueue(dev_priv);
5025 mutex_lock(&dev_priv->drm.struct_mutex);
5026 intel_uc_fini_hw(dev_priv);
5027 intel_uc_fini(dev_priv);
5028 i915_gem_cleanup_engines(dev_priv);
5029 i915_gem_contexts_fini(dev_priv);
5030 i915_gem_fini_scratch(dev_priv);
5031 mutex_unlock(&dev_priv->drm.struct_mutex);
5033 intel_wa_list_free(&dev_priv->gt_wa_list);
5035 intel_cleanup_gt_powersave(dev_priv);
5037 intel_uc_fini_misc(dev_priv);
5038 i915_gem_cleanup_userptr(dev_priv);
5039 i915_timelines_fini(dev_priv);
5041 i915_gem_drain_freed_objects(dev_priv);
5043 WARN_ON(!list_empty(&dev_priv->contexts.list));
5046 void i915_gem_init_mmio(struct drm_i915_private *i915)
5048 i915_gem_sanitize(i915);
5052 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
5054 struct intel_engine_cs *engine;
5055 enum intel_engine_id id;
5057 for_each_engine(engine, dev_priv, id)
5058 dev_priv->gt.cleanup_engine(engine);
5062 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
5066 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
5067 !IS_CHERRYVIEW(dev_priv))
5068 dev_priv->num_fence_regs = 32;
5069 else if (INTEL_GEN(dev_priv) >= 4 ||
5070 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
5071 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
5072 dev_priv->num_fence_regs = 16;
5074 dev_priv->num_fence_regs = 8;
5076 if (intel_vgpu_active(dev_priv))
5077 dev_priv->num_fence_regs =
5078 I915_READ(vgtif_reg(avail_rs.fence_num));
5080 /* Initialize fence registers to zero */
5081 for (i = 0; i < dev_priv->num_fence_regs; i++) {
5082 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
5084 fence->i915 = dev_priv;
5086 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
5088 i915_gem_restore_fences(dev_priv);
5090 i915_gem_detect_bit_6_swizzle(dev_priv);
5093 static void i915_gem_init__mm(struct drm_i915_private *i915)
5095 spin_lock_init(&i915->mm.object_stat_lock);
5096 spin_lock_init(&i915->mm.obj_lock);
5097 spin_lock_init(&i915->mm.free_lock);
5099 init_llist_head(&i915->mm.free_list);
5101 INIT_LIST_HEAD(&i915->mm.unbound_list);
5102 INIT_LIST_HEAD(&i915->mm.bound_list);
5103 INIT_LIST_HEAD(&i915->mm.fence_list);
5104 INIT_LIST_HEAD(&i915->mm.userfault_list);
5106 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
5109 int i915_gem_init_early(struct drm_i915_private *dev_priv)
5113 INIT_LIST_HEAD(&dev_priv->gt.active_rings);
5114 INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
5116 i915_gem_init__mm(dev_priv);
5118 INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
5119 i915_gem_retire_work_handler);
5120 INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
5121 i915_gem_idle_work_handler);
5122 init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
5123 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5124 mutex_init(&dev_priv->gpu_error.wedge_mutex);
5125 init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
5127 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
5129 spin_lock_init(&dev_priv->fb_tracking.lock);
5131 err = i915_gemfs_init(dev_priv);
5133 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
5138 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
5140 i915_gem_drain_freed_objects(dev_priv);
5141 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
5142 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
5143 WARN_ON(dev_priv->mm.object_count);
5145 cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
5147 i915_gemfs_fini(dev_priv);
5150 int i915_gem_freeze(struct drm_i915_private *dev_priv)
5152 /* Discard all purgeable objects, let userspace recover those as
5153 * required after resuming.
5155 i915_gem_shrink_all(dev_priv);
5160 int i915_gem_freeze_late(struct drm_i915_private *i915)
5162 struct drm_i915_gem_object *obj;
5163 struct list_head *phases[] = {
5164 &i915->mm.unbound_list,
5165 &i915->mm.bound_list,
5170 * Called just before we write the hibernation image.
5172 * We need to update the domain tracking to reflect that the CPU
5173 * will be accessing all the pages to create and restore from the
5174 * hibernation, and so upon restoration those pages will be in the
5177 * To make sure the hibernation image contains the latest state,
5178 * we update that state just before writing out the image.
5180 * To try and reduce the hibernation image, we manually shrink
5181 * the objects as well, see i915_gem_freeze()
5184 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
5185 i915_gem_drain_freed_objects(i915);
5187 mutex_lock(&i915->drm.struct_mutex);
5188 for (phase = phases; *phase; phase++) {
5189 list_for_each_entry(obj, *phase, mm.link)
5190 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
5192 mutex_unlock(&i915->drm.struct_mutex);
5197 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5199 struct drm_i915_file_private *file_priv = file->driver_priv;
5200 struct i915_request *request;
5202 /* Clean up our request list when the client is going away, so that
5203 * later retire_requests won't dereference our soon-to-be-gone
5206 spin_lock(&file_priv->mm.lock);
5207 list_for_each_entry(request, &file_priv->mm.request_list, client_link)
5208 request->file_priv = NULL;
5209 spin_unlock(&file_priv->mm.lock);
5212 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
5214 struct drm_i915_file_private *file_priv;
5219 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5223 file->driver_priv = file_priv;
5224 file_priv->dev_priv = i915;
5225 file_priv->file = file;
5227 spin_lock_init(&file_priv->mm.lock);
5228 INIT_LIST_HEAD(&file_priv->mm.request_list);
5230 file_priv->bsd_engine = -1;
5231 file_priv->hang_timestamp = jiffies;
5233 ret = i915_gem_context_open(i915, file);
5241 * i915_gem_track_fb - update frontbuffer tracking
5242 * @old: current GEM buffer for the frontbuffer slots
5243 * @new: new GEM buffer for the frontbuffer slots
5244 * @frontbuffer_bits: bitmask of frontbuffer slots
5246 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5247 * from @old and setting them in @new. Both @old and @new can be NULL.
5249 void i915_gem_track_fb(struct drm_i915_gem_object *old,
5250 struct drm_i915_gem_object *new,
5251 unsigned frontbuffer_bits)
5253 /* Control of individual bits within the mask are guarded by
5254 * the owning plane->mutex, i.e. we can never see concurrent
5255 * manipulation of individual bits. But since the bitfield as a whole
5256 * is updated using RMW, we need to use atomics in order to update
5259 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
5260 BITS_PER_TYPE(atomic_t));
5263 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
5264 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
5268 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
5269 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
5273 /* Allocate a new GEM object and fill it with the supplied data */
5274 struct drm_i915_gem_object *
5275 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
5276 const void *data, size_t size)
5278 struct drm_i915_gem_object *obj;
5283 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
5287 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
5289 file = obj->base.filp;
5292 unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
5294 void *pgdata, *vaddr;
5296 err = pagecache_write_begin(file, file->f_mapping,
5303 memcpy(vaddr, data, len);
5306 err = pagecache_write_end(file, file->f_mapping,
5320 i915_gem_object_put(obj);
5321 return ERR_PTR(err);
5324 struct scatterlist *
5325 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
5327 unsigned int *offset)
5329 struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
5330 struct scatterlist *sg;
5331 unsigned int idx, count;
5334 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
5335 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
5337 /* As we iterate forward through the sg, we record each entry in a
5338 * radixtree for quick repeated (backwards) lookups. If we have seen
5339 * this index previously, we will have an entry for it.
5341 * Initial lookup is O(N), but this is amortized to O(1) for
5342 * sequential page access (where each new request is consecutive
5343 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
5344 * i.e. O(1) with a large constant!
5346 if (n < READ_ONCE(iter->sg_idx))
5349 mutex_lock(&iter->lock);
5351 /* We prefer to reuse the last sg so that repeated lookup of this
5352 * (or the subsequent) sg are fast - comparing against the last
5353 * sg is faster than going through the radixtree.
5358 count = __sg_page_count(sg);
5360 while (idx + count <= n) {
5365 /* If we cannot allocate and insert this entry, or the
5366 * individual pages from this range, cancel updating the
5367 * sg_idx so that on this lookup we are forced to linearly
5368 * scan onwards, but on future lookups we will try the
5369 * insertion again (in which case we need to be careful of
5370 * the error return reporting that we have already inserted
5373 ret = radix_tree_insert(&iter->radix, idx, sg);
5374 if (ret && ret != -EEXIST)
5377 entry = xa_mk_value(idx);
5378 for (i = 1; i < count; i++) {
5379 ret = radix_tree_insert(&iter->radix, idx + i, entry);
5380 if (ret && ret != -EEXIST)
5385 sg = ____sg_next(sg);
5386 count = __sg_page_count(sg);
5393 mutex_unlock(&iter->lock);
5395 if (unlikely(n < idx)) /* insertion completed by another thread */
5398 /* In case we failed to insert the entry into the radixtree, we need
5399 * to look beyond the current sg.
5401 while (idx + count <= n) {
5403 sg = ____sg_next(sg);
5404 count = __sg_page_count(sg);
5413 sg = radix_tree_lookup(&iter->radix, n);
5416 /* If this index is in the middle of multi-page sg entry,
5417 * the radix tree will contain a value entry that points
5418 * to the start of that range. We will return the pointer to
5419 * the base page and the offset of this page within the
5423 if (unlikely(xa_is_value(sg))) {
5424 unsigned long base = xa_to_value(sg);
5426 sg = radix_tree_lookup(&iter->radix, base);
5438 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
5440 struct scatterlist *sg;
5441 unsigned int offset;
5443 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
5445 sg = i915_gem_object_get_sg(obj, n, &offset);
5446 return nth_page(sg_page(sg), offset);
5449 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5451 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
5456 page = i915_gem_object_get_page(obj, n);
5458 set_page_dirty(page);
5464 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
5467 struct scatterlist *sg;
5468 unsigned int offset;
5470 sg = i915_gem_object_get_sg(obj, n, &offset);
5471 return sg_dma_address(sg) + (offset << PAGE_SHIFT);
5474 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
5476 struct sg_table *pages;
5479 if (align > obj->base.size)
5482 if (obj->ops == &i915_gem_phys_ops)
5485 if (obj->ops != &i915_gem_object_ops)
5488 err = i915_gem_object_unbind(obj);
5492 mutex_lock(&obj->mm.lock);
5494 if (obj->mm.madv != I915_MADV_WILLNEED) {
5499 if (obj->mm.quirked) {
5504 if (obj->mm.mapping) {
5509 pages = __i915_gem_object_unset_pages(obj);
5511 obj->ops = &i915_gem_phys_ops;
5513 err = ____i915_gem_object_get_pages(obj);
5517 /* Perma-pin (until release) the physical set of pages */
5518 __i915_gem_object_pin_pages(obj);
5520 if (!IS_ERR_OR_NULL(pages))
5521 i915_gem_object_ops.put_pages(obj, pages);
5522 mutex_unlock(&obj->mm.lock);
5526 obj->ops = &i915_gem_object_ops;
5527 if (!IS_ERR_OR_NULL(pages)) {
5528 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
5530 __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
5533 mutex_unlock(&obj->mm.lock);
5537 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5538 #include "selftests/scatterlist.c"
5539 #include "selftests/mock_gem_device.c"
5540 #include "selftests/huge_gem_object.c"
5541 #include "selftests/huge_pages.c"
5542 #include "selftests/i915_gem_object.c"
5543 #include "selftests/i915_gem_coherency.c"
5544 #include "selftests/i915_gem.c"