From e4349027f6842563555992a39add4d0b2283fbbb Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 27 Dec 2010 13:57:46 +0100 Subject: [PATCH] nvc0: implement VRAM buffer transfers with bounce buffers --- src/gallium/drivers/nvc0/nvc0_buffer.c | 302 ++++++++++++++++++++++++------- src/gallium/drivers/nvc0/nvc0_context.c | 35 ++-- src/gallium/drivers/nvc0/nvc0_context.h | 6 +- src/gallium/drivers/nvc0/nvc0_fence.c | 11 ++ src/gallium/drivers/nvc0/nvc0_fence.h | 1 + src/gallium/drivers/nvc0/nvc0_push.c | 10 +- src/gallium/drivers/nvc0/nvc0_resource.h | 58 ++++-- src/gallium/drivers/nvc0/nvc0_screen.c | 3 +- src/gallium/drivers/nvc0/nvc0_screen.h | 18 +- src/gallium/drivers/nvc0/nvc0_state.c | 4 + src/gallium/drivers/nvc0/nvc0_tex.c | 2 - src/gallium/drivers/nvc0/nvc0_winsys.h | 2 + 12 files changed, 336 insertions(+), 116 deletions(-) diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c index 93d7f5d..8021e43 100644 --- a/src/gallium/drivers/nvc0/nvc0_buffer.c +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -11,7 +11,15 @@ #include "nvc0_context.h" #include "nvc0_resource.h" -#define NVC0_BUFFER_STATUS_USER_MEMORY 0xff +struct nvc0_transfer { + struct pipe_transfer base; +}; + +static INLINE struct nvc0_transfer * +nvc0_transfer(struct pipe_transfer *transfer) +{ + return (struct nvc0_transfer *)transfer; +} static INLINE boolean nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf, @@ -28,12 +36,13 @@ nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf, &buf->offset); if (!buf->bo) return FALSE; - } else { - assert(!domain); - if (!buf->data) + } + if (domain != NOUVEAU_BO_GART) { + if (!buf->data) { buf->data = MALLOC(buf->base.width0); - if (!buf->data) - return FALSE; + if (!buf->data) + return FALSE; + } } buf->domain = domain; return TRUE; @@ -59,68 +68,199 @@ nvc0_buffer_destroy(struct pipe_screen *pscreen, if (res->mm) release_allocation(&res->mm, screen->fence.current); - if (res->status != NVC0_BUFFER_STATUS_USER_MEMORY && res->data) + if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY)) FREE(res->data); FREE(res); } -static INLINE uint32_t -nouveau_buffer_rw_flags(unsigned pipe) +/* Maybe just migrate to GART right away if we actually need to do this. */ +boolean +nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf, + unsigned start, unsigned size) +{ + struct nvc0_mm_allocation *mm; + struct nouveau_bo *bounce = NULL; + uint32_t offset; + + assert(buf->domain == NOUVEAU_BO_VRAM); + + mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset); + if (!bounce) + return FALSE; + + nvc0_m2mf_copy_linear(nvc0, bounce, offset, NOUVEAU_BO_GART, + buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, + size); + + if (nouveau_bo_map_range(bounce, offset, size, NOUVEAU_BO_RD)) + return FALSE; + memcpy(buf->data + start, bounce->map, size); + nouveau_bo_unmap(bounce); + + buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + + nouveau_bo_ref(NULL, &bounce); + if (mm) + nvc0_mm_free(mm); + return TRUE; +} + +static boolean +nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf, + unsigned start, unsigned size) { - uint32_t flags = 0; + struct nvc0_mm_allocation *mm; + struct nouveau_bo *bounce = NULL; + uint32_t offset; - if (pipe & PIPE_TRANSFER_READ) - flags = NOUVEAU_BO_RD; - if (pipe & PIPE_TRANSFER_WRITE) - flags |= NOUVEAU_BO_WR; + mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset); + if (!bounce) + return FALSE; + + nouveau_bo_map_range(bounce, offset, size, + NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); + memcpy(bounce->map, buf->data + start, size); + nouveau_bo_unmap(bounce); + + nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, + bounce, offset, NOUVEAU_BO_GART, size); + + nouveau_bo_ref(NULL, &bounce); + if (mm) + release_allocation(&mm, nvc0->screen->fence.current); + + if (start == 0 && size == buf->base.width0) + buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + return TRUE; +} + +static struct pipe_transfer * +nvc0_buffer_transfer_get(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) +{ + struct nvc0_resource *buf = nvc0_resource(resource); + struct nvc0_transfer *xfr = CALLOC_STRUCT(nvc0_transfer); + if (!xfr) + return NULL; + + xfr->base.resource = resource; + xfr->base.box.x = box->x; + xfr->base.box.width = box->width; + xfr->base.usage = usage; + + if (buf->domain == NOUVEAU_BO_VRAM) { + if (usage & PIPE_TRANSFER_READ) { + if (buf->status & NVC0_BUFFER_STATUS_DIRTY) + nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0); + } + } + + return &xfr->base; +} + +static void +nvc0_buffer_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct nvc0_resource *buf = nvc0_resource(transfer->resource); + struct nvc0_transfer *xfr = nvc0_transfer(transfer); + + if (xfr->base.usage & PIPE_TRANSFER_WRITE) { + /* writing is worse */ + nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -5000); + + if (buf->domain == NOUVEAU_BO_VRAM) { + nvc0_buffer_upload(nvc0_context(pipe), buf, + transfer->box.x, transfer->box.width); + } + + if (buf->domain != 0 && (buf->base.bind & (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER))) + nvc0_context(pipe)->vbo_dirty = TRUE; + } - return flags; + FREE(xfr); +} + +static INLINE boolean +nvc0_buffer_sync(struct nvc0_resource *buf, unsigned rw) +{ + if (rw == PIPE_TRANSFER_READ) { + if (!buf->fence_wr) + return TRUE; + if (!nvc0_fence_wait(buf->fence_wr)) + return FALSE; + } else { + if (!buf->fence) + return TRUE; + if (!nvc0_fence_wait(buf->fence)) + return FALSE; + + nvc0_fence_reference(&buf->fence, NULL); + } + nvc0_fence_reference(&buf->fence_wr, NULL); + + return TRUE; +} + +static INLINE boolean +nvc0_buffer_busy(struct nvc0_resource *buf, unsigned rw) +{ + if (rw == PIPE_TRANSFER_READ) + return (buf->fence_wr && !nvc0_fence_signalled(buf->fence_wr)); + else + return (buf->fence && !nvc0_fence_signalled(buf->fence)); } static void * nvc0_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct nvc0_resource *res = nvc0_resource(transfer->resource); - struct nvc0_fence *fence; + struct nvc0_transfer *xfr = nvc0_transfer(transfer); + struct nvc0_resource *buf = nvc0_resource(transfer->resource); + struct nouveau_bo *bo = buf->bo; uint8_t *map; int ret; - uint32_t flags = nouveau_buffer_rw_flags(transfer->usage); + uint32_t offset = xfr->base.box.x; + uint32_t flags; - if ((res->base.bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) && - (flags & NOUVEAU_BO_WR)) - nvc0_context(pipe)->vbo_dirty = TRUE; + nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -250); - if (res->domain == 0) - return res->data + transfer->box.x; + if (buf->domain != NOUVEAU_BO_GART) + return buf->data + offset; - if (res->domain == NOUVEAU_BO_VRAM) { - NOUVEAU_ERR("transfers to/from VRAM buffers are not allowed\n"); - /* if this happens, migrate back to GART */ - return NULL; - } + if (buf->mm) + flags = NOUVEAU_BO_NOSYNC | NOUVEAU_BO_RDWR; + else + flags = nouveau_screen_transfer_flags(xfr->base.usage); - if (res->score > -1024) - --res->score; + offset += buf->offset; - ret = nouveau_bo_map(res->bo, flags | NOUVEAU_BO_NOSYNC); + ret = nouveau_bo_map_range(buf->bo, offset, xfr->base.box.width, flags); if (ret) return NULL; - map = res->bo->map; - nouveau_bo_unmap(res->bo); - - fence = (flags == NOUVEAU_BO_RD) ? res->fence_wr : res->fence; - - if (fence) { - if (nvc0_fence_wait(fence) == FALSE) - NOUVEAU_ERR("failed to fence buffer\n"); - - nvc0_fence_reference(&res->fence, NULL); - nvc0_fence_reference(&res->fence_wr, NULL); + map = bo->map; + + /* Unmap right now. Since multiple buffers can share a single nouveau_bo, + * not doing so might make future maps fail or trigger "reloc while mapped" + * errors. For now, mappings to userspace are guaranteed to be persistent. + */ + nouveau_bo_unmap(bo); + + if (buf->mm) { + if (xfr->base.usage & PIPE_TRANSFER_DONTBLOCK) { + if (nvc0_buffer_busy(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE)) + return NULL; + } else + if (!(xfr->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + nvc0_buffer_sync(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE); + } } - - return map + transfer->box.x + res->offset; + return map; } @@ -131,26 +271,23 @@ nvc0_buffer_transfer_flush_region(struct pipe_context *pipe, const struct pipe_box *box) { struct nvc0_resource *res = nvc0_resource(transfer->resource); + struct nouveau_bo *bo = res->bo; + unsigned offset = res->offset + transfer->box.x + box->x; - if (!res->bo) + /* not using non-snoop system memory yet, no need for cflush */ + if (1) return; - nouveau_screen_bo_map_flush_range(pipe->screen, - res->bo, - res->offset + transfer->box.x + box->x, - box->width); + /* XXX: maybe need to upload for VRAM buffers here */ + + nouveau_screen_bo_map_flush_range(pipe->screen, bo, offset, box->width); } static void nvc0_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct nvc0_resource *res = nvc0_resource(transfer->resource); - - if (res->data) - return; - - /* nouveau_screen_bo_unmap(pipe->screen, res->bo); */ + /* we've called nouveau_bo_unmap right after map */ } const struct u_resource_vtbl nvc0_buffer_vtbl = @@ -158,8 +295,8 @@ const struct u_resource_vtbl nvc0_buffer_vtbl = u_default_resource_get_handle, /* get_handle */ nvc0_buffer_destroy, /* resource_destroy */ NULL, /* is_resource_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ + nvc0_buffer_transfer_get, /* get_transfer */ + nvc0_buffer_transfer_destroy, /* transfer_destroy */ nvc0_buffer_transfer_map, /* transfer_map */ nvc0_buffer_transfer_flush_region, /* transfer_flush_region */ nvc0_buffer_transfer_unmap, /* transfer_unmap */ @@ -227,6 +364,23 @@ nvc0_user_buffer_create(struct pipe_screen *pscreen, return &buffer->base; } +static INLINE boolean +nvc0_buffer_fetch_data(struct nvc0_resource *buf, + struct nouveau_bo *bo, unsigned offset, unsigned size) +{ + if (!buf->data) { + buf->data = MALLOC(size); + if (!buf->data) + return FALSE; + } + if (nouveau_bo_map_range(bo, offset, size, NOUVEAU_BO_RD)) + return FALSE; + memcpy(buf->data, bo->map, size); + nouveau_bo_unmap(bo); + + return TRUE; +} + /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */ boolean nvc0_buffer_migrate(struct nvc0_context *nvc0, @@ -235,38 +389,52 @@ nvc0_buffer_migrate(struct nvc0_context *nvc0, struct nvc0_screen *screen = nvc0_screen(buf->base.screen); struct nouveau_bo *bo; unsigned size = buf->base.width0; + unsigned offset; int ret; + assert(domain != buf->domain); + if (domain == NOUVEAU_BO_GART && buf->domain == 0) { if (!nvc0_buffer_allocate(screen, buf, domain)) return FALSE; - ret = nouveau_bo_map(buf->bo, NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); + ret = nouveau_bo_map_range(buf->bo, buf->offset, size, NOUVEAU_BO_WR | + NOUVEAU_BO_NOSYNC); if (ret) return ret; - memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size); + memcpy(buf->bo->map, buf->data, size); nouveau_bo_unmap(buf->bo); + FREE(buf->data); } else - if (domain == NOUVEAU_BO_VRAM && buf->domain == NOUVEAU_BO_GART) { + if (domain != 0 && buf->domain != 0) { struct nvc0_mm_allocation *mm = buf->mm; + if (domain == NOUVEAU_BO_VRAM) { + /* keep a system memory copy of our data in case we hit a fallback */ + if (!nvc0_buffer_fetch_data(buf, buf->bo, buf->offset, size)) + return FALSE; + debug_printf("migrating %u KiB to VRAM\n", size / 1024); + } + + offset = buf->offset; bo = buf->bo; buf->bo = NULL; buf->mm = NULL; nvc0_buffer_allocate(screen, buf, domain); - nvc0_m2mf_copy_linear(nvc0, buf->bo, 0, NOUVEAU_BO_VRAM, - bo, 0, NOUVEAU_BO_GART, buf->base.width0); + nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset, domain, + bo, offset, buf->domain, buf->base.width0); - release_allocation(&mm, screen->fence.current); nouveau_bo_ref(NULL, &bo); + if (mm) + release_allocation(&mm, screen->fence.current); } else if (domain == NOUVEAU_BO_VRAM && buf->domain == 0) { - /* should use a scratch buffer instead here */ - if (!nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART)) + if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM)) + return FALSE; + if (!nvc0_buffer_upload(nvc0, buf, 0, buf->base.width0)) return FALSE; - return nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_VRAM); } else - return -1; + return FALSE; buf->domain = domain; diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index d41ee29..b2b4fd6 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -104,7 +104,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) } struct resident { - struct nouveau_bo *bo; + struct nvc0_resource *res; uint32_t flags; }; @@ -112,12 +112,14 @@ void nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx, struct nvc0_resource *resource, uint32_t flags) { - struct resident rsd = { NULL, flags }; + struct resident rsd = { resource, flags }; if (!resource->bo) return; - nouveau_bo_ref(resource->bo, &rsd.bo); + /* We don't need to reference the resource here, it will be referenced + * in the context/state, and bufctx will be reset when state changes. + */ util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd); } @@ -125,35 +127,24 @@ void nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx, struct nvc0_resource *resource) { - struct resident *rsd, rem; + struct resident *rsd, *top; unsigned i; for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) { rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i); - if (rsd->bo == resource->bo) { - rem = util_dynarray_pop(&nvc0->residents[ctx], struct resident); - nouveau_bo_ref(NULL, &rem.bo); + if (rsd->res == resource) { + top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident); + if (rsd != top) + *rsd = *top; break; } } } void -nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx) -{ - unsigned i; - - for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) - nouveau_bo_ref(NULL, &util_dynarray_element(&nvc0->residents[ctx], - struct resident, i)->bo); - util_dynarray_resize(&nvc0->residents[ctx], 0); -} - -void nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) { - struct nouveau_channel *chan = nvc0->screen->base.channel; struct resident *rsd; struct util_dynarray *array; unsigned ctx, i; @@ -164,11 +155,9 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) for (i = 0; i < array->size / sizeof(struct resident); ++i) { rsd = util_dynarray_element(array, struct resident, i); - nouveau_bo_validate(chan, rsd->bo, rsd->flags); + nvc0_resource_validate(rsd->res, rsd->flags); } } - nouveau_bo_validate(chan, nvc0->screen->text, NOUVEAU_BO_RD); - nouveau_bo_validate(chan, nvc0->screen->uniforms, NOUVEAU_BO_RD); - nouveau_bo_validate(chan, nvc0->screen->txc, NOUVEAU_BO_RD); + nvc0_screen_make_buffers_resident(nvc0->screen); } diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 962a2c0..83aff0a 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -147,11 +147,15 @@ nvc0_surface(struct pipe_surface *ps) struct pipe_context *nvc0_create(struct pipe_screen *, void *); void nvc0_bufctx_emit_relocs(struct nvc0_context *); -void nvc0_bufctx_reset(struct nvc0_context *, int ctx); void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx, struct nvc0_resource *, uint32_t flags); void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx, struct nvc0_resource *); +static INLINE void +nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx) +{ + util_dynarray_resize(&nvc0->residents[ctx], 0); +} /* nvc0_draw.c */ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c index dc2abe4..0387c59 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.c +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -139,6 +139,17 @@ nvc0_screen_fence_update(struct nvc0_screen *screen) #define NVC0_FENCE_MAX_SPINS (1 << 17) boolean +nvc0_fence_signalled(struct nvc0_fence *fence) +{ + struct nvc0_screen *screen = fence->screen; + + if (fence->state == NVC0_FENCE_STATE_EMITTED) + nvc0_screen_fence_update(screen); + + return fence->state == NVC0_FENCE_STATE_SIGNALLED; +} + +boolean nvc0_fence_wait(struct nvc0_fence *fence) { struct nvc0_screen *screen = fence->screen; diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h index 7b31f28..e63c164 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.h +++ b/src/gallium/drivers/nvc0/nvc0_fence.h @@ -24,6 +24,7 @@ void nvc0_fence_emit(struct nvc0_fence *); void nvc0_fence_del(struct nvc0_fence *); boolean nvc0_fence_wait(struct nvc0_fence *); +boolean nvc0_fence_signalled(struct nvc0_fence *); static INLINE void nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence) diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 4bf259c..779a477 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -215,7 +215,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; struct nvc0_resource *res = nvc0_resource(vb->buffer); - data = nvc0_resource_map_offset(res, vb->buffer_offset, NOUVEAU_BO_RD); + data = nvc0_resource_map_offset(nvc0, res, + vb->buffer_offset, NOUVEAU_BO_RD); if (info->indexed) data += info->index_bias * vb->stride; @@ -223,12 +224,11 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) } if (info->indexed) { - ctx.idxbuf = pipe_buffer_map(&nvc0->pipe, nvc0->idxbuf.buffer, - PIPE_TRANSFER_READ, &transfer); + ctx.idxbuf = nvc0_resource_map_offset(nvc0, + nvc0_resource(nvc0->idxbuf.buffer), + nvc0->idxbuf.offset, NOUVEAU_BO_RD); if (!ctx.idxbuf) return; - ctx.idxbuf = (uint8_t *)ctx.idxbuf + nvc0->idxbuf.offset; - index_size = nvc0->idxbuf.index_size; ctx.primitive_restart = info->primitive_restart; ctx.restart_index = info->restart_index; diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 9384f19..0ffb9e8 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -12,6 +12,14 @@ struct pipe_resource; struct nouveau_bo; +struct nvc0_context; + +#define NVC0_BUFFER_SCORE_MIN -25000 +#define NVC0_BUFFER_SCORE_MAX 25000 +#define NVC0_BUFFER_SCORE_VRAM_THRESHOLD 20000 + +#define NVC0_BUFFER_STATUS_DIRTY (1 << 0) +#define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7) /* Resources, if mapped into the GPU's address space, are guaranteed to * have constant virtual addresses. @@ -21,7 +29,6 @@ struct nouveau_bo; struct nvc0_resource { struct pipe_resource base; const struct u_resource_vtbl *vtbl; - uint64_t address; uint8_t *data; struct nouveau_bo *bo; @@ -38,22 +45,55 @@ struct nvc0_resource { struct nvc0_mm_allocation *mm; }; +boolean +nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *, + unsigned start, unsigned size); + +boolean +nvc0_buffer_migrate(struct nvc0_context *, + struct nvc0_resource *, unsigned domain); + +static INLINE void +nvc0_buffer_adjust_score(struct nvc0_context *nvc0, struct nvc0_resource *res, + int16_t score) +{ + if (score < 0) { + if (res->score > NVC0_BUFFER_SCORE_MIN) + res->score += score; + } else + if (score > 0){ + if (res->score < NVC0_BUFFER_SCORE_MAX) + res->score += score; + if (res->domain == NOUVEAU_BO_GART && + res->score > NVC0_BUFFER_SCORE_VRAM_THRESHOLD) + nvc0_buffer_migrate(nvc0, res, NOUVEAU_BO_VRAM); + } +} + /* XXX: wait for fence (atm only using this for vertex push) */ static INLINE void * -nvc0_resource_map_offset(struct nvc0_resource *res, uint32_t offset, +nvc0_resource_map_offset(struct nvc0_context *nvc0, + struct nvc0_resource *res, uint32_t offset, uint32_t flags) { void *map; - if (res->domain == 0) + nvc0_buffer_adjust_score(nvc0, res, -250); + + if ((res->domain == NOUVEAU_BO_VRAM) && + (res->status & NVC0_BUFFER_STATUS_DIRTY)) + nvc0_buffer_download(nvc0, res, 0, res->base.width0); + + if (res->domain != NOUVEAU_BO_GART) return res->data + offset; + if (res->mm) + flags |= NOUVEAU_BO_NOSYNC; + if (nouveau_bo_map_range(res->bo, res->offset + offset, - res->base.width0, flags | NOUVEAU_BO_NOSYNC)) + res->base.width0, flags)) return NULL; - /* With suballocation, the same bo can be mapped several times, so unmap - * immediately. Maps are guaranteed to persist. */ map = res->bo->map; nouveau_bo_unmap(res->bo); return map; @@ -149,12 +189,6 @@ nvc0_miptree_surface_new(struct pipe_context *, void nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *); -struct nvc0_context; - -boolean -nvc0_buffer_migrate(struct nvc0_context *, - struct nvc0_resource *, unsigned domain); - boolean nvc0_migrate_vertices(struct nvc0_resource *buf, unsigned base, unsigned size); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 4ec73b0..0e80e28 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -248,7 +248,7 @@ nvc0_screen_fence_signalled(struct pipe_screen *pscreen, struct pipe_fence_handle *fence, unsigned flags) { - return !(((struct nvc0_fence *)fence)->state == NVC0_FENCE_STATE_SIGNALLED); + return !(nvc0_fence_signalled(nvc0_fence(fence))); } static int @@ -622,6 +622,7 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; nouveau_bo_validate(chan, screen->text, flags); + nouveau_bo_validate(chan, screen->uniforms, flags); nouveau_bo_validate(chan, screen->txc, flags); nouveau_bo_validate(chan, screen->tls, flags); nouveau_bo_validate(chan, screen->mp_stack_bo, flags); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 5b1b623..efa5ff6 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -101,18 +101,26 @@ int nvc0_screen_tic_alloc(struct nvc0_screen *, void *); int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); static INLINE void -nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) +nvc0_resource_fence(struct nvc0_resource *res, uint32_t flags) { struct nvc0_screen *screen = nvc0_screen(res->base.screen); - assert(res->mm); + if (res->mm) { + nvc0_fence_reference(&res->fence, screen->fence.current); - nvc0_fence_reference(&res->fence, screen->fence.current); + if (flags & NOUVEAU_BO_WR) + nvc0_fence_reference(&res->fence_wr, screen->fence.current); + } +} - if (flags & NOUVEAU_BO_WR) - nvc0_fence_reference(&res->fence_wr, screen->fence.current); +static INLINE void +nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) +{ + struct nvc0_screen *screen = nvc0_screen(res->base.screen); nouveau_bo_validate(screen->base.channel, res->bo, flags); + + nvc0_resource_fence(res, flags); } diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index 62abaa7..e77e956 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -539,6 +539,8 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, nvc0->num_textures[s] = nr; + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES); + nvc0->dirty |= NVC0_NEW_TEXTURES; } @@ -773,6 +775,8 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count); nvc0->num_vtxbufs = count; + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX); + nvc0->dirty |= NVC0_NEW_ARRAYS; } diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c index c9f929b..b219f82 100644 --- a/src/gallium/drivers/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -218,8 +218,6 @@ void nvc0_validate_textures(struct nvc0_context *nvc0) { boolean need_flush; - nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES); - need_flush = nvc0_validate_tic(nvc0, 0); need_flush |= nvc0_validate_tic(nvc0, 4); diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h index af77110..1544fb7 100644 --- a/src/gallium/drivers/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -94,6 +94,8 @@ static INLINE int OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, unsigned delta, unsigned flags) { + if (flags & NOUVEAU_BO_WR) + res->status |= NVC0_BUFFER_STATUS_DIRTY; return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); } -- 2.7.4