From 809e9447b92ffe1346b2d6ec390e212d5307f61c Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 9 Apr 2014 16:19:30 +0200 Subject: [PATCH] drm/nouveau: use shared fences for readable objects nouveau keeps track in userspace whether a buffer is being written to or being read, but it doesn't use that information. Change this to allow multiple readers on the same bo. Signed-off-by: Maarten Lankhorst Acked-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bo.c | 11 ++++--- drivers/gpu/drm/nouveau/nouveau_bo.h | 2 +- drivers/gpu/drm/nouveau/nouveau_display.c | 6 ++-- drivers/gpu/drm/nouveau/nouveau_fence.c | 55 +++++++++++++++++-------------- drivers/gpu/drm/nouveau/nouveau_fence.h | 4 +-- drivers/gpu/drm/nouveau/nouveau_gem.c | 37 ++++++++++++++------- 6 files changed, 68 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 6cf7db0..eea74b1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -94,7 +94,7 @@ nv10_bo_put_tile_region(struct drm_device *dev, struct nouveau_drm_tile *tile, if (tile) { spin_lock(&drm->tile.lock); - tile->fence = nouveau_fence_ref((struct nouveau_fence *)fence); + tile->fence = (struct nouveau_fence *)fence_get(fence); tile->used = false; spin_unlock(&drm->tile.lock); } @@ -970,7 +970,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr, } mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING); - ret = nouveau_fence_sync(nouveau_bo(bo), chan); + ret = nouveau_fence_sync(nouveau_bo(bo), chan, true); if (ret == 0) { ret = drm->ttm.move(chan, bo, &bo->mem, new_mem); if (ret == 0) { @@ -1458,11 +1458,14 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) } void -nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence) +nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence, bool exclusive) { struct reservation_object *resv = nvbo->bo.resv; - reservation_object_add_excl_fence(resv, &fence->base); + if (exclusive) + reservation_object_add_excl_fence(resv, &fence->base); + else if (fence) + reservation_object_add_shared_fence(resv, &fence->base); } struct ttm_bo_driver nouveau_bo_driver = { diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index 4ef88e8..ae95b2d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -78,7 +78,7 @@ u16 nouveau_bo_rd16(struct nouveau_bo *, unsigned index); void nouveau_bo_wr16(struct nouveau_bo *, unsigned index, u16 val); u32 nouveau_bo_rd32(struct nouveau_bo *, unsigned index); void nouveau_bo_wr32(struct nouveau_bo *, unsigned index, u32 val); -void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *); +void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *, bool exclusive); int nouveau_bo_validate(struct nouveau_bo *, bool interruptible, bool no_wait_gpu); diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index ec1960a..a9ec525 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -658,7 +658,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan, spin_unlock_irqrestore(&dev->event_lock, flags); /* Synchronize with the old framebuffer */ - ret = nouveau_fence_sync(old_bo, chan); + ret = nouveau_fence_sync(old_bo, chan, false); if (ret) goto fail; @@ -722,7 +722,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, goto fail_unpin; /* synchronise rendering channel with the kernel's channel */ - ret = nouveau_fence_sync(new_bo, chan); + ret = nouveau_fence_sync(new_bo, chan, false); if (ret) { ttm_bo_unreserve(&new_bo->bo); goto fail_unpin; @@ -780,7 +780,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, /* Update the crtc struct and cleanup */ crtc->primary->fb = fb; - nouveau_bo_fence(old_bo, fence); + nouveau_bo_fence(old_bo, fence, false); ttm_bo_unreserve(&old_bo->bo); if (old_bo != new_bo) nouveau_bo_unpin(old_bo); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 5e7fa68..decfe6c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -342,41 +342,56 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) } int -nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan) +nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive) { struct nouveau_fence_chan *fctx = chan->fence; - struct fence *fence = NULL; + struct fence *fence; struct reservation_object *resv = nvbo->bo.resv; struct reservation_object_list *fobj; + struct nouveau_fence *f; int ret = 0, i; + if (!exclusive) { + ret = reservation_object_reserve_shared(resv); + + if (ret) + return ret; + } + + fobj = reservation_object_get_list(resv); fence = reservation_object_get_excl(resv); - if (fence && !fence_is_signaled(fence)) { - struct nouveau_fence *f = from_fence(fence); - struct nouveau_channel *prev = f->channel; + if (fence && (!exclusive || !fobj || !fobj->shared_count)) { + struct nouveau_channel *prev = NULL; - if (prev != chan) { - ret = fctx->sync(f, prev, chan); - if (unlikely(ret)) - ret = nouveau_fence_wait(f, true, true); - } - } + f = nouveau_local_fence(fence, chan->drm); + if (f) + prev = f->channel; + + if (!prev || (prev != chan && (ret = fctx->sync(f, prev, chan)))) + ret = fence_wait(fence, true); - if (ret) return ret; + } - fobj = reservation_object_get_list(resv); - if (!fobj) + if (!exclusive || !fobj) return ret; for (i = 0; i < fobj->shared_count && !ret; ++i) { + struct nouveau_channel *prev = NULL; + fence = rcu_dereference_protected(fobj->shared[i], reservation_object_held(resv)); - /* should always be true, for now */ - if (!nouveau_local_fence(fence, chan->drm)) + f = nouveau_local_fence(fence, chan->drm); + if (f) + prev = f->channel; + + if (!prev || (ret = fctx->sync(f, prev, chan))) ret = fence_wait(fence, true); + + if (ret) + break; } return ret; @@ -390,14 +405,6 @@ nouveau_fence_unref(struct nouveau_fence **pfence) *pfence = NULL; } -struct nouveau_fence * -nouveau_fence_ref(struct nouveau_fence *fence) -{ - if (fence) - fence_get(&fence->base); - return fence; -} - int nouveau_fence_new(struct nouveau_channel *chan, bool sysmem, struct nouveau_fence **pfence) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 0282e88..986c813 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -20,15 +20,13 @@ struct nouveau_fence { int nouveau_fence_new(struct nouveau_channel *, bool sysmem, struct nouveau_fence **); -struct nouveau_fence * -nouveau_fence_ref(struct nouveau_fence *); void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *); bool nouveau_fence_done(struct nouveau_fence *); void nouveau_fence_work(struct fence *, void (*)(void *), void *); int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); -int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *); +int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive); struct nouveau_fence_chan { spinlock_t lock; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 1ba5f9e..b7dbd16 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -98,14 +98,23 @@ static void nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma) { const bool mapped = nvbo->bo.mem.mem_type != TTM_PL_SYSTEM; + struct reservation_object *resv = nvbo->bo.resv; + struct reservation_object_list *fobj; struct fence *fence = NULL; + fobj = reservation_object_get_list(resv); + list_del(&vma->head); - if (mapped) + if (fobj && fobj->shared_count > 1) + ttm_bo_wait(&nvbo->bo, true, false, false); + else if (fobj && fobj->shared_count == 1) + fence = rcu_dereference_protected(fobj->shared[0], + reservation_object_held(resv)); + else fence = reservation_object_get_excl(nvbo->bo.resv); - if (fence) { + if (fence && mapped) { nouveau_fence_work(fence, nouveau_gem_object_delete, vma); } else { if (mapped) @@ -289,15 +298,18 @@ struct validate_op { }; static void -validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence) +validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence, + struct drm_nouveau_gem_pushbuf_bo *pbbo) { struct nouveau_bo *nvbo; + struct drm_nouveau_gem_pushbuf_bo *b; while (!list_empty(&op->list)) { nvbo = list_entry(op->list.next, struct nouveau_bo, entry); + b = &pbbo[nvbo->pbbo_index]; if (likely(fence)) - nouveau_bo_fence(nvbo, fence); + nouveau_bo_fence(nvbo, fence, !!b->write_domains); if (unlikely(nvbo->validate_mapped)) { ttm_bo_kunmap(&nvbo->kmap); @@ -312,9 +324,10 @@ validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence) } static void -validate_fini(struct validate_op *op, struct nouveau_fence *fence) +validate_fini(struct validate_op *op, struct nouveau_fence *fence, + struct drm_nouveau_gem_pushbuf_bo *pbbo) { - validate_fini_no_ticket(op, fence); + validate_fini_no_ticket(op, fence, pbbo); ww_acquire_fini(&op->ticket); } @@ -370,7 +383,7 @@ retry: list_splice_tail_init(&vram_list, &op->list); list_splice_tail_init(&gart_list, &op->list); list_splice_tail_init(&both_list, &op->list); - validate_fini_no_ticket(op, NULL); + validate_fini_no_ticket(op, NULL, NULL); if (unlikely(ret == -EDEADLK)) { ret = ttm_bo_reserve_slowpath(&nvbo->bo, true, &op->ticket); @@ -412,7 +425,7 @@ retry: list_splice_tail(&gart_list, &op->list); list_splice_tail(&both_list, &op->list); if (ret) - validate_fini(op, NULL); + validate_fini(op, NULL, NULL); return ret; } @@ -446,7 +459,7 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli, return ret; } - ret = nouveau_fence_sync(nvbo, chan); + ret = nouveau_fence_sync(nvbo, chan, !!b->write_domains); if (unlikely(ret)) { if (ret != -ERESTARTSYS) NV_PRINTK(error, cli, "fail post-validate sync\n"); @@ -504,7 +517,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, if (unlikely(ret < 0)) { if (ret != -ERESTARTSYS) NV_PRINTK(error, cli, "validating bo list\n"); - validate_fini(op, NULL); + validate_fini(op, NULL, NULL); return ret; } *apply_relocs = ret; @@ -610,7 +623,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, data |= r->vor; } - ret = ttm_bo_wait(&nvbo->bo, false, false, false); + ret = ttm_bo_wait(&nvbo->bo, true, false, false); if (ret) { NV_PRINTK(error, cli, "reloc wait_idle failed: %d\n", ret); break; @@ -788,7 +801,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, } out: - validate_fini(&op, fence); + validate_fini(&op, fence, bo); nouveau_fence_unref(&fence); out_prevalid: -- 2.7.4