From 57e8853181198065bfd96b3690f6dee68d744745 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:48:57 +0100 Subject: [PATCH] drm/i915: Use VMA for ringbuffer tracking Use the GGTT VMA as the primary cookie for handing ring objects as the most common action upon the ring is mapping and unmapping which act upon the VMA itself. By restructuring the code to work with the ring VMA, we can shrink the code and remove a few cycles from context pinning. v2: Move the flush of the object back to before the first pin. We use the am-I-bound? query to only have to check the flush on the first bind and so avoid stalling on active rings. Lots of little renames and small hoops. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-18-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +- drivers/gpu/drm/i915/i915_guc_submission.c | 16 +- drivers/gpu/drm/i915/intel_lrc.c | 17 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 243 ++++++++++++++--------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 14 +- 6 files changed, 139 insertions(+), 157 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index eb8753f..6e7cfba 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -356,7 +356,7 @@ static int per_file_ctx_stats(int id, void *ptr, void *data) if (ctx->engine[n].state) per_file_stats(0, ctx->engine[n].state->obj, data); if (ctx->engine[n].ring) - per_file_stats(0, ctx->engine[n].ring->obj, data); + per_file_stats(0, ctx->engine[n].ring->vma->obj, data); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 61708fa..27f973f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1128,12 +1128,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ee->cpu_ring_tail = ring->tail; ee->ringbuffer = i915_error_ggtt_object_create(dev_priv, - ring->obj); + ring->vma->obj); } ee->hws_page = i915_error_ggtt_object_create(dev_priv, - engine->status_page.obj); + engine->status_page.vma->obj); ee->wa_ctx = i915_error_ggtt_object_create(dev_priv, engine->wa_ctx.obj); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 4f0f173..c40b92e2 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -343,7 +343,6 @@ static void guc_init_ctx_desc(struct intel_guc *guc, struct intel_context *ce = &ctx->engine[engine->id]; uint32_t guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; - struct drm_i915_gem_object *obj; /* TODO: We have a design issue to be solved here. Only when we * receive the first batch, we know which engine is used by the @@ -358,17 +357,14 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->context_desc = lower_32_bits(ce->lrc_desc); /* The state page is after PPHWSP */ - gfx_addr = ce->state->node.start; - lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; + lrc->ring_lcra = + ce->state->node.start + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | (guc_engine_id << GUC_ELC_ENGINE_OFFSET); - obj = ce->ring->obj; - gfx_addr = i915_gem_obj_ggtt_offset(obj); - - lrc->ring_begin = gfx_addr; - lrc->ring_end = gfx_addr + obj->base.size - 1; - lrc->ring_next_free_location = gfx_addr; + lrc->ring_begin = ce->ring->vma->node.start; + lrc->ring_end = lrc->ring_begin + ce->ring->size - 1; + lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; desc.engines_used |= (1 << guc_engine_id); @@ -943,7 +939,7 @@ static void guc_create_ads(struct intel_guc *guc) * to find it. */ engine = &dev_priv->engine[RCS]; - ads->golden_context_lrca = engine->status_page.gfx_addr; + ads->golden_context_lrca = engine->status_page.ggtt_offset; for_each_engine(engine, dev_priv) ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5538e5c..73dd2f9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1273,7 +1273,7 @@ static void lrc_init_hws(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; I915_WRITE(RING_HWS_PGA(engine->mmio_base), - (u32)engine->status_page.gfx_addr); + engine->status_page.ggtt_offset); POSTING_READ(RING_HWS_PGA(engine->mmio_base)); } @@ -1695,9 +1695,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); - if (engine->status_page.obj) { - i915_gem_object_unpin_map(engine->status_page.obj); - engine->status_page.obj = NULL; + if (engine->status_page.vma) { + i915_gem_object_unpin_map(engine->status_page.vma->obj); + engine->status_page.vma = NULL; } intel_lr_context_unpin(dev_priv->kernel_context, engine); @@ -1744,16 +1744,17 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) static int lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma) { + const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE; void *hws; /* The HWSP is part of the default context object in LRC mode. */ - engine->status_page.gfx_addr = - vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE; hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); if (IS_ERR(hws)) return PTR_ERR(hws); - engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE; - engine->status_page.obj = vma->obj; + + engine->status_page.page_addr = hws + hws_offset; + engine->status_page.ggtt_offset = vma->node.start + hws_offset; + engine->status_page.vma = vma; return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4a614e5..bdb1ab9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -466,7 +466,7 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine) mmio = RING_HWS_PGA(engine->mmio_base); } - I915_WRITE(mmio, (u32)engine->status_page.gfx_addr); + I915_WRITE(mmio, engine->status_page.ggtt_offset); POSTING_READ(mmio); /* @@ -531,7 +531,6 @@ static int init_ring_common(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring = engine->buffer; - struct drm_i915_gem_object *obj = ring->obj; int ret = 0; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -571,7 +570,7 @@ static int init_ring_common(struct intel_engine_cs *engine) * registers with the above sequence (the readback of the HEAD registers * also enforces ordering), otherwise the hw might lose the new ring * register values. */ - I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj)); + I915_WRITE_START(engine, ring->vma->node.start); /* WaClearRingBufHeadRegAtInit:ctg,elk */ if (I915_READ_HEAD(engine)) @@ -586,16 +585,16 @@ static int init_ring_common(struct intel_engine_cs *engine) /* If the head is still not zero, the ring is dead */ if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 && - I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) && + I915_READ_START(engine) == ring->vma->node.start && (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) { DRM_ERROR("%s initialization failed " - "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n", + "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08llx]\n", engine->name, I915_READ_CTL(engine), I915_READ_CTL(engine) & RING_VALID, I915_READ_HEAD(engine), I915_READ_TAIL(engine), I915_READ_START(engine), - (unsigned long)i915_gem_obj_ggtt_offset(obj)); + ring->vma->node.start); ret = -EIO; goto out; } @@ -1853,79 +1852,79 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine) static void cleanup_status_page(struct intel_engine_cs *engine) { - struct drm_i915_gem_object *obj; + struct i915_vma *vma; - obj = engine->status_page.obj; - if (obj == NULL) + vma = fetch_and_zero(&engine->status_page.vma); + if (!vma) return; - kunmap(sg_page(obj->pages->sgl)); - i915_gem_object_ggtt_unpin(obj); - i915_gem_object_put(obj); - engine->status_page.obj = NULL; + i915_vma_unpin(vma); + i915_gem_object_unpin_map(vma->obj); + i915_vma_put(vma); } static int init_status_page(struct intel_engine_cs *engine) { - struct drm_i915_gem_object *obj = engine->status_page.obj; - - if (obj == NULL) { - unsigned flags; - int ret; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags; + int ret; - obj = i915_gem_object_create(&engine->i915->drm, 4096); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate status page\n"); - return PTR_ERR(obj); - } + obj = i915_gem_object_create(&engine->i915->drm, 4096); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate status page\n"); + return PTR_ERR(obj); + } - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - if (ret) - goto err_unref; - - flags = 0; - if (!HAS_LLC(engine->i915)) - /* On g33, we cannot place HWS above 256MiB, so - * restrict its pinning to the low mappable arena. - * Though this restriction is not documented for - * gen4, gen5, or byt, they also behave similarly - * and hang if the HWS is placed at the top of the - * GTT. To generalise, it appears that all !llc - * platforms have issues with us placing the HWS - * above the mappable region (even though we never - * actualy map it). - */ - flags |= PIN_MAPPABLE; - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags); - if (ret) { -err_unref: - i915_gem_object_put(obj); - return ret; - } + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + if (ret) + goto err; - engine->status_page.obj = obj; + vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; } - engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj); - engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); - memset(engine->status_page.page_addr, 0, PAGE_SIZE); + flags = PIN_GLOBAL; + if (!HAS_LLC(engine->i915)) + /* On g33, we cannot place HWS above 256MiB, so + * restrict its pinning to the low mappable arena. + * Though this restriction is not documented for + * gen4, gen5, or byt, they also behave similarly + * and hang if the HWS is placed at the top of the + * GTT. To generalise, it appears that all !llc + * platforms have issues with us placing the HWS + * above the mappable region (even though we never + * actualy map it). + */ + flags |= PIN_MAPPABLE; + ret = i915_vma_pin(vma, 0, 4096, flags); + if (ret) + goto err; - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", - engine->name, engine->status_page.gfx_addr); + engine->status_page.vma = vma; + engine->status_page.ggtt_offset = vma->node.start; + engine->status_page.page_addr = + i915_gem_object_pin_map(obj, I915_MAP_WB); + DRM_DEBUG_DRIVER("%s hws offset: 0x%08llx\n", + engine->name, vma->node.start); return 0; + +err: + i915_gem_object_put(obj); + return ret; } static int init_phys_status_page(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (!dev_priv->status_page_dmah) { - dev_priv->status_page_dmah = - drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); - if (!dev_priv->status_page_dmah) - return -ENOMEM; - } + dev_priv->status_page_dmah = + drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); + if (!dev_priv->status_page_dmah) + return -ENOMEM; engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr; memset(engine->status_page.page_addr, 0, PAGE_SIZE); @@ -1935,52 +1934,43 @@ static int init_phys_status_page(struct intel_engine_cs *engine) int intel_ring_pin(struct intel_ring *ring) { - struct drm_i915_private *dev_priv = ring->engine->i915; - struct drm_i915_gem_object *obj = ring->obj; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - unsigned flags = PIN_OFFSET_BIAS | 4096; + unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096; + struct i915_vma *vma = ring->vma; void *addr; int ret; - if (HAS_LLC(dev_priv) && !obj->stolen) { - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags); - if (ret) - return ret; + GEM_BUG_ON(ring->vaddr); - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - goto err_unpin; + if (ring->needs_iomap) + flags |= PIN_MAPPABLE; - addr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_unpin; - } - } else { - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, - flags | PIN_MAPPABLE); - if (ret) + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + if (flags & PIN_MAPPABLE) + ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); + else + ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); + if (unlikely(ret)) return ret; + } - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - goto err_unpin; + ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags); + if (unlikely(ret)) + return ret; - addr = (void __force *) - i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_unpin; - } - } + if (flags & PIN_MAPPABLE) + addr = (void __force *)i915_vma_pin_iomap(vma); + else + addr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(addr)) + goto err; ring->vaddr = addr; - ring->vma = i915_gem_obj_to_ggtt(obj); return 0; -err_unpin: - i915_gem_object_ggtt_unpin(obj); - return ret; +err: + i915_vma_unpin(vma); + return PTR_ERR(addr); } void intel_ring_unpin(struct intel_ring *ring) @@ -1988,60 +1978,56 @@ void intel_ring_unpin(struct intel_ring *ring) GEM_BUG_ON(!ring->vma); GEM_BUG_ON(!ring->vaddr); - if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen) - i915_gem_object_unpin_map(ring->obj); - else + if (ring->needs_iomap) i915_vma_unpin_iomap(ring->vma); + else + i915_gem_object_unpin_map(ring->vma->obj); ring->vaddr = NULL; - i915_gem_object_ggtt_unpin(ring->obj); - ring->vma = NULL; -} - -static void intel_destroy_ringbuffer_obj(struct intel_ring *ring) -{ - i915_gem_object_put(ring->obj); - ring->obj = NULL; + i915_vma_unpin(ring->vma); } -static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ring *ring) +static struct i915_vma * +intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) { struct drm_i915_gem_object *obj; + struct i915_vma *vma; - obj = NULL; - if (!HAS_LLC(dev)) - obj = i915_gem_object_create_stolen(dev, ring->size); - if (obj == NULL) - obj = i915_gem_object_create(dev, ring->size); + obj = ERR_PTR(-ENODEV); + if (!HAS_LLC(dev_priv)) + obj = i915_gem_object_create_stolen(&dev_priv->drm, size); if (IS_ERR(obj)) - return PTR_ERR(obj); + obj = i915_gem_object_create(&dev_priv->drm, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); /* mark ring buffers as read-only from GPU side by default */ obj->gt_ro = 1; - ring->obj = obj; + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err; - return 0; + return vma; + +err: + i915_gem_object_put(obj); + return vma; } struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size) { struct intel_ring *ring; - int ret; + struct i915_vma *vma; GEM_BUG_ON(!is_power_of_2(size)); ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (ring == NULL) { - DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", - engine->name); + if (!ring) return ERR_PTR(-ENOMEM); - } ring->engine = engine; - list_add(&ring->link, &engine->buffers); INIT_LIST_HEAD(&ring->request_list); @@ -2057,22 +2043,23 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) ring->last_retired_head = -1; intel_ring_update_space(ring); - ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring); - if (ret) { - DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n", - engine->name, ret); - list_del(&ring->link); + vma = intel_ring_create_vma(engine->i915, size); + if (IS_ERR(vma)) { kfree(ring); - return ERR_PTR(ret); + return ERR_CAST(vma); } + ring->vma = vma; + if (!HAS_LLC(engine->i915) || vma->obj->stolen) + ring->needs_iomap = true; + list_add(&ring->link, &engine->buffers); return ring; } void intel_ring_free(struct intel_ring *ring) { - intel_destroy_ringbuffer_obj(ring); + i915_vma_put(ring->vma); list_del(&ring->link); kfree(ring); } @@ -2166,7 +2153,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) ret = PTR_ERR(ring); goto error; } - engine->buffer = ring; if (I915_NEED_GFX_HWS(dev_priv)) { ret = init_status_page(engine); @@ -2181,11 +2167,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) ret = intel_ring_pin(ring); if (ret) { - DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", - engine->name, ret); - intel_destroy_ringbuffer_obj(ring); + intel_ring_free(ring); goto error; } + engine->buffer = ring; return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ea27351..bc692d5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -26,10 +26,10 @@ */ #define I915_RING_FREE_SPACE 64 -struct intel_hw_status_page { - u32 *page_addr; - unsigned int gfx_addr; - struct drm_i915_gem_object *obj; +struct intel_hw_status_page { + struct i915_vma *vma; + u32 *page_addr; + u32 ggtt_offset; }; #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) @@ -83,9 +83,8 @@ struct intel_engine_hangcheck { }; struct intel_ring { - struct drm_i915_gem_object *obj; - void *vaddr; struct i915_vma *vma; + void *vaddr; struct intel_engine_cs *engine; struct list_head link; @@ -97,6 +96,7 @@ struct intel_ring { int space; int size; int effective_size; + bool needs_iomap; /** We track the position of the requests in the ring buffer, and * when each is retired we increment last_retired_head as the GPU @@ -516,7 +516,7 @@ int init_workarounds_ring(struct intel_engine_cs *engine); static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) { - return engine->status_page.gfx_addr + I915_GEM_HWS_INDEX_ADDR; + return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; } /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -- 2.7.4