From 4c2be3c5ebfd98fc588f6bc5f53fa2ef516c02ea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 12 Jul 2019 12:27:23 +0100 Subject: [PATCH] drm/i915/gtt: Recursive ppgtt clear for gen8 With an explicit level, we can refactor the separate clear functions as a simple recursive function. The additional knowledge of the level allows us to spot when we can free an entire subtree at once. Signed-off-by: Chris Wilson Reviewed-by: Abdiel Janulgue Link: https://patchwork.freedesktop.org/patch/msgid/20190712112725.2892-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.debug | 14 ++++ drivers/gpu/drm/i915/i915_gem_gtt.c | 154 +++++++++++++++++++++--------------- 2 files changed, 104 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 8d922bb..4cdc018 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -94,6 +94,20 @@ config DRM_I915_TRACE_GEM If in doubt, say "N". +config DRM_I915_TRACE_GTT + bool "Insert extra ftrace output from the GTT internals" + depends on DRM_I915_DEBUG_GEM + select TRACING + default n + help + Enable additional and verbose debugging output that will spam + ordinary tests, but may be vital for post-mortem debugging when + used with /proc/sys/kernel/ftrace_dump_on_oops + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_I915_SW_FENCE_DEBUG_OBJECTS bool "Enable additional driver debugging for fence objects" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3a156f0..6da564e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -46,6 +46,12 @@ #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) +#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT) +#define DBG(...) trace_printk(__VA_ARGS__) +#else +#define DBG(...) +#endif + /** * DOC: Global GTT views * @@ -796,6 +802,9 @@ release_pd_entry(struct i915_page_directory * const pd, { bool free = false; + if (atomic_add_unless(&pt->used, -1, 1)) + return false; + spin_lock(&pd->lock); if (atomic_dec_and_test(&pt->used)) { clear_pd_entry(pd, idx, scratch); @@ -927,86 +936,101 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) free_scratch(vm); } -/* Removes entries from a single page table, releasing it if it's empty. - * Caller can use the return value to update higher-level entries. - */ -static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm, - struct i915_page_table *pt, - u64 start, u64 length) +static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, + struct i915_page_directory * const pd, + u64 start, const u64 end, int lvl) { - const unsigned int num_entries = gen8_pte_count(start, length); - gen8_pte_t *vaddr; + const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; + unsigned int idx, len; - vaddr = kmap_atomic_px(pt); - memset64(vaddr + gen8_pte_index(start), - vm->scratch[0].encode, - num_entries); - kunmap_atomic(vaddr); + len = gen8_pd_range(start, end, lvl--, &idx); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n", + __func__, vm, lvl + 1, start, end, + idx, len, atomic_read(px_used(pd))); + GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); - GEM_BUG_ON(num_entries > atomic_read(&pt->used)); + do { + struct i915_page_table *pt = pd->entry[idx]; + + if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && + gen8_pd_contains(start, end, lvl)) { + DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", + __func__, vm, lvl + 1, idx, start, end); + clear_pd_entry(pd, idx, scratch); + __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); + start += (u64)I915_PDES << gen8_pd_shift(lvl); + continue; + } - atomic_sub(num_entries, &pt->used); -} + if (lvl) { + start = __gen8_ppgtt_clear(vm, as_pd(pt), + start, end, lvl); + } else { + unsigned int count; + u64 *vaddr; -static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, - struct i915_page_directory *pd, - u64 start, u64 length) -{ - struct i915_page_table *pt; - u32 pde; + count = gen8_pt_count(start, end); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} removing pte\n", + __func__, vm, lvl, start, end, + gen8_pd_index(start, 0), count, + atomic_read(&pt->used)); + GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); - gen8_for_each_pde(pt, pd, start, length, pde) { - atomic_inc(&pt->used); - gen8_ppgtt_clear_pt(vm, pt, start, length); - if (release_pd_entry(pd, pde, pt, &vm->scratch[1])) + vaddr = kmap_atomic_px(pt); + memset64(vaddr + gen8_pd_index(start, 0), + vm->scratch[0].encode, + count); + kunmap_atomic(vaddr); + + atomic_sub(count, &pt->used); + start += count; + } + + if (release_pd_entry(pd, idx, pt, scratch)) free_px(vm, pt); - } + } while (idx++, --len); + + return start; } -/* Removes entries from a single page dir pointer, releasing it if it's empty. - * Caller can use the return value to update higher-level entries - */ -static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, - struct i915_page_directory * const pdp, - u64 start, u64 length) +static void gen8_ppgtt_clear(struct i915_address_space *vm, + u64 start, u64 length) { - struct i915_page_directory *pd; - unsigned int pdpe; + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - atomic_inc(px_used(pd)); - gen8_ppgtt_clear_pd(vm, pd, start, length); - if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2])) - free_px(vm, pd); - } + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + GEM_BUG_ON(length == 0); + + __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, + start, start + length, vm->top); } -static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, - u64 start, u64 length) +static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, + struct i915_page_directory *pd, + u64 start, u64 length) { - gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length); + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); + + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + + __gen8_ppgtt_clear(vm, pd, start, start + length, 1); } -/* Removes entries from a single pml4. - * This is the top-level structure in 4-level page tables used on gen8+. - * Empty entries are always scratch pml4e. - */ -static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, - u64 start, u64 length) +static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, + struct i915_page_directory * const pdp, + u64 start, u64 length) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct i915_page_directory * const pml4 = ppgtt->pd; - struct i915_page_directory *pdp; - unsigned int pml4e; + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); - GEM_BUG_ON(!i915_vm_is_4lvl(vm)); + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - atomic_inc(px_used(pdp)); - gen8_ppgtt_clear_pdp(vm, pdp, start, length); - if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) - free_px(vm, pdp); - } + __gen8_ppgtt_clear(vm, pdp, start, start + length, 2); } static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, @@ -1171,7 +1195,7 @@ unwind_pdp: if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) free_px(vm, pdp); unwind: - gen8_ppgtt_clear_4lvl(vm, from, start - from); + gen8_ppgtt_clear(vm, from, start - from); out: if (alloc) free_px(vm, alloc); @@ -1484,6 +1508,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) fill_px(pd, vm->scratch[1].encode); set_pd_entry(pdp, pdpe, pd); + atomic_inc(px_used(pd)); /* keep pinned */ } return 0; @@ -1524,6 +1549,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm) } fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); + atomic_inc(px_used(pd)); /* mark as pinned */ return pd; } @@ -1573,7 +1599,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) if (i915_vm_is_4lvl(&ppgtt->vm)) { ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl; ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; - ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl; } else { if (intel_vgpu_active(i915)) { err = gen8_preallocate_top_level_pdp(ppgtt); @@ -1583,9 +1608,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl; ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl; - ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl; } + ppgtt->vm.clear_range = gen8_ppgtt_clear; + if (intel_vgpu_active(i915)) gen8_ppgtt_notify_vgt(ppgtt, true); -- 2.7.4