From 7395432f2e792f8719b03bb39b0cf5cba8ba583b Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 28 Jul 2014 09:28:05 +0800 Subject: [PATCH] ilo: try unblocking a transfer with a staging bo When mapping a busy resource with PIPE_TRANSFER_DISCARD_RANGE or PIPE_TRANSFER_FLUSH_EXPLICIT, we can avoid blocking by allocating and mapping a staging bo, and emit pipelined copies at proper places. Since the staging bo is never bound to GPU, we give it packed layout to save space. --- src/gallium/drivers/ilo/ilo_resource.c | 58 ++++++++++++- src/gallium/drivers/ilo/ilo_screen.c | 3 +- src/gallium/drivers/ilo/ilo_transfer.c | 152 ++++++++++++++++++++++++++++++--- src/gallium/drivers/ilo/ilo_transfer.h | 15 +++- 4 files changed, 209 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c index c812c19..812ccaf 100644 --- a/src/gallium/drivers/ilo/ilo_resource.c +++ b/src/gallium/drivers/ilo/ilo_resource.c @@ -1279,15 +1279,14 @@ tex_apply_layout(struct ilo_texture *tex, const struct winsys_handle *handle) { tex->bo_format = layout->format; + tex->block_width = layout->block_width; + tex->block_height = layout->block_height; + tex->block_size = layout->block_size; tex->tiling = layout->tiling; tex->bo_stride = layout->bo_stride; tex->bo_height = layout->bo_height; - tex->block_width = layout->block_width; - tex->block_height = layout->block_height; - tex->block_size = layout->block_size; - tex->halign_8 = (layout->align_i == 8); tex->valign_4 = (layout->align_j == 4); tex->array_spacing_full = layout->array_spacing_full; @@ -1315,6 +1314,44 @@ tex_apply_layout(struct ilo_texture *tex, return true; } +/** + * The texutre is for transfer only. We can define our own layout to save + * space. + */ +static bool +tex_apply_transfer_layout(struct ilo_texture *tex) +{ + const struct pipe_resource *templ = &tex->base; + const unsigned num_slices = (templ->target == PIPE_TEXTURE_3D) ? + templ->depth0 : templ->array_size; + unsigned slice_width, slice_height, i; + + assert(templ->last_level == 0); + + tex->bo_format = templ->format; + tex->block_width = util_format_get_blockwidth(templ->format); + tex->block_height = util_format_get_blockheight(templ->format); + tex->block_size = util_format_get_blocksize(templ->format); + + assert(util_is_power_of_two(tex->block_width) && + util_is_power_of_two(tex->block_height)); + + /* use packed layout */ + slice_width = align(templ->width0, tex->block_width); + slice_height = align(templ->height0, tex->block_height); + for (i = 0; i < num_slices; i++) { + tex->slices[0][i].x = 0; + tex->slices[0][i].y = slice_height * i; + } + + tex->tiling = INTEL_TILING_NONE; + tex->bo_stride = (slice_width / tex->block_width) * tex->block_size; + tex->bo_stride = align(tex->bo_stride, 64); + tex->bo_height = (slice_height / tex->block_height) * num_slices; + + return tex_create_bo(tex); +} + static void tex_destroy(struct ilo_texture *tex) { @@ -1338,6 +1375,7 @@ tex_create(struct pipe_screen *screen, { struct tex_layout layout; struct ilo_texture *tex; + bool transfer_only; tex = CALLOC_STRUCT(ilo_texture); if (!tex) @@ -1354,6 +1392,18 @@ tex_create(struct pipe_screen *screen, tex->imported = (handle != NULL); + /* use transfer layout when the texture is never bound to GPU */ + transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_TRANSFER_READ)); + if (transfer_only && templ->last_level == 0) { + if (!tex_apply_transfer_layout(tex)) { + tex_destroy(tex); + return NULL; + } + + return &tex->base; + } + if (!tex_layout_init(&layout, screen, templ, tex->slices)) { tex_destroy(tex); return NULL; diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 09980dd..d226911 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -34,6 +34,7 @@ #include "ilo_context.h" #include "ilo_format.h" #include "ilo_resource.h" +#include "ilo_transfer.h" /* for ILO_TRANSFER_MAP_BUFFER_ALIGNMENT */ #include "ilo_public.h" #include "ilo_screen.h" @@ -397,7 +398,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MULTISAMPLE: return false; /* TODO */ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: - return 64; + return ILO_TRANSFER_MAP_BUFFER_ALIGNMENT; case PIPE_CAP_CUBE_MAP_ARRAY: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return true; diff --git a/src/gallium/drivers/ilo/ilo_transfer.c b/src/gallium/drivers/ilo/ilo_transfer.c index 0f1347b..7acb4ac5c 100644 --- a/src/gallium/drivers/ilo/ilo_transfer.c +++ b/src/gallium/drivers/ilo/ilo_transfer.c @@ -30,6 +30,7 @@ #include "util/u_format_etc.h" #include "ilo_blit.h" +#include "ilo_blitter.h" #include "ilo_cp.h" #include "ilo_context.h" #include "ilo_resource.h" @@ -164,6 +165,52 @@ usage_allows_staging_bo(unsigned usage) } /** + * Allocate the staging resource. It is always linear and its size matches + * the transfer box, with proper paddings. + */ +static bool +xfer_alloc_staging_res(struct ilo_transfer *xfer) +{ + const struct pipe_resource *res = xfer->base.resource; + const struct pipe_box *box = &xfer->base.box; + struct pipe_resource templ; + + memset(&templ, 0, sizeof(templ)); + + templ.format = res->format; + + if (res->target == PIPE_BUFFER) { + templ.target = PIPE_BUFFER; + templ.width0 = + (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width; + } + else { + /* use 2D array for any texture target */ + templ.target = PIPE_TEXTURE_2D_ARRAY; + templ.width0 = box->width; + } + + templ.height0 = box->height; + templ.depth0 = 1; + templ.array_size = box->depth; + templ.nr_samples = 1; + templ.usage = PIPE_USAGE_STAGING; + templ.bind = PIPE_BIND_TRANSFER_WRITE; + + if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) { + templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT | + PIPE_RESOURCE_FLAG_MAP_COHERENT; + } + + xfer->staging.res = res->screen->resource_create(res->screen, &templ); + + if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) + assert(ilo_texture(xfer->staging.res)->tiling == INTEL_TILING_NONE); + + return (xfer->staging.res != NULL); +} + +/** * Use an alternative transfer method or rename the resource to unblock an * otherwise blocking transfer. */ @@ -185,11 +232,14 @@ xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed) renamed = true; unblocked = true; } - else if (usage_allows_staging_bo(xfer->base.usage)) { - /* TODO */ + else if (usage_allows_staging_bo(xfer->base.usage) && + xfer_alloc_staging_res(xfer)) { + xfer->method = ILO_TRANSFER_MAP_STAGING; + unblocked = true; } break; case ILO_TRANSFER_MAP_GTT_UNSYNC: + case ILO_TRANSFER_MAP_STAGING: unblocked = true; break; default: @@ -218,10 +268,10 @@ xfer_alloc_staging_sys(struct ilo_transfer *xfer) xfer->base.layer_stride = util_format_get_2d_size(format, xfer->base.stride, box->height); - xfer->staging_sys = + xfer->staging.sys = align_malloc(xfer->base.layer_stride * box->depth, alignment); - return (xfer->staging_sys != NULL); + return (xfer->staging.sys != NULL); } /** @@ -244,9 +294,29 @@ xfer_map(struct ilo_transfer *xfer) case ILO_TRANSFER_MAP_GTT_UNSYNC: ptr = intel_bo_map_unsynchronized(resource_get_bo(xfer->base.resource)); break; + case ILO_TRANSFER_MAP_STAGING: + { + const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen); + struct intel_bo *bo = resource_get_bo(xfer->staging.res); + + /* + * We want a writable, optionally persistent and coherent, mapping + * for a linear bo. We can call resource_get_transfer_method(), but + * this turns out to be fairly simple. + */ + if (is->dev.has_llc) + ptr = intel_bo_map(bo, true); + else + ptr = intel_bo_map_gtt(bo); + + if (ptr && xfer->staging.res->target == PIPE_BUFFER) + ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT); + + } + break; case ILO_TRANSFER_MAP_SW_CONVERT: case ILO_TRANSFER_MAP_SW_ZS: - ptr = xfer->staging_sys; + ptr = xfer->staging.sys; break; default: assert(!"unknown mapping method"); @@ -269,6 +339,9 @@ xfer_unmap(struct ilo_transfer *xfer) case ILO_TRANSFER_MAP_GTT_UNSYNC: intel_bo_unmap(resource_get_bo(xfer->base.resource)); break; + case ILO_TRANSFER_MAP_STAGING: + intel_bo_unmap(resource_get_bo(xfer->staging.res)); + break; default: break; } @@ -583,7 +656,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex, tex_get_box_origin(s8_tex, xfer->base.level, slice, box, &s8_mem_x, &s8_mem_y); - dst = xfer->staging_sys + xfer->base.layer_stride * slice; + dst = xfer->staging.sys + xfer->base.layer_stride * slice; for (i = 0; i < box->height; i++) { unsigned x = mem_x, s8_x = s8_mem_x; @@ -622,7 +695,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex, tex_get_box_origin(tex, xfer->base.level, slice, box, &mem_x, &mem_y); - dst = xfer->staging_sys + xfer->base.layer_stride * slice; + dst = xfer->staging.sys + xfer->base.layer_stride * slice; for (i = 0; i < box->height; i++) { unsigned x = mem_x; @@ -710,7 +783,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex, tex_get_box_origin(s8_tex, xfer->base.level, slice, box, &s8_mem_x, &s8_mem_y); - src = xfer->staging_sys + xfer->base.layer_stride * slice; + src = xfer->staging.sys + xfer->base.layer_stride * slice; for (i = 0; i < box->height; i++) { unsigned x = mem_x, s8_x = s8_mem_x; @@ -749,7 +822,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex, tex_get_box_origin(tex, xfer->base.level, slice, box, &mem_x, &mem_y); - src = xfer->staging_sys + xfer->base.layer_stride * slice; + src = xfer->staging.sys + xfer->base.layer_stride * slice; for (i = 0; i < box->height; i++) { unsigned x = mem_x; @@ -800,7 +873,7 @@ tex_staging_sys_convert_write(struct ilo_texture *tex, if (unlikely(tex->bo_format == tex->base.format)) { util_copy_box(dst, tex->bo_format, tex->bo_stride, dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth, - xfer->staging_sys, xfer->base.stride, xfer->base.layer_stride, + xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride, 0, 0, 0); tex_staging_sys_unmap_bo(tex); @@ -814,7 +887,7 @@ tex_staging_sys_convert_write(struct ilo_texture *tex, for (slice = 0; slice < box->depth; slice++) { const void *src = - xfer->staging_sys + xfer->base.layer_stride * slice; + xfer->staging.sys + xfer->base.layer_stride * slice; util_format_etc1_rgb8_unpack_rgba_8unorm(dst, tex->bo_stride, src, xfer->base.stride, @@ -919,6 +992,14 @@ tex_map(struct ilo_transfer *xfer) tex_get_slice_stride(tex, xfer->base.level) : 0; } break; + case ILO_TRANSFER_MAP_STAGING: + ptr = xfer_map(xfer); + if (ptr) { + const struct ilo_texture *staging = ilo_texture(xfer->staging.res); + xfer->base.stride = staging->bo_stride; + xfer->base.layer_stride = tex_get_slice_stride(staging, 0); + } + break; case ILO_TRANSFER_MAP_SW_CONVERT: case ILO_TRANSFER_MAP_SW_ZS: if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer)) @@ -944,7 +1025,9 @@ buf_map(struct ilo_transfer *xfer) if (!ptr) return NULL; - ptr += xfer->base.box.x; + if (xfer->method != ILO_TRANSFER_MAP_STAGING) + ptr += xfer->base.box.x; + xfer->base.stride = 0; xfer->base.layer_stride = 0; @@ -957,6 +1040,34 @@ buf_map(struct ilo_transfer *xfer) return ptr; } +static void +copy_staging_resource(struct ilo_context *ilo, + struct ilo_transfer *xfer, + const struct pipe_box *box) +{ + const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ? + xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0; + struct pipe_box modified_box; + + assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res); + + if (!box) { + u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height, + xfer->base.box.depth, &modified_box); + box = &modified_box; + } + else if (pad_x) { + modified_box = *box; + modified_box.x += pad_x; + box = &modified_box; + } + + ilo_blitter_blt_copy_resource(ilo->blitter, + xfer->base.resource, xfer->base.level, + xfer->base.box.x, xfer->base.box.y, xfer->base.box.z, + xfer->staging.res, 0, box); +} + static bool is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_flush) { @@ -1042,6 +1153,16 @@ ilo_transfer_flush_region(struct pipe_context *pipe, struct pipe_transfer *transfer, const struct pipe_box *box) { + struct ilo_context *ilo = ilo_context(pipe); + struct ilo_transfer *xfer = ilo_transfer(transfer); + + /* + * The staging resource is mapped persistently and coherently. We can copy + * without unmapping. + */ + if (xfer->method == ILO_TRANSFER_MAP_STAGING && + (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) + copy_staging_resource(ilo, xfer, box); } static void @@ -1054,10 +1175,15 @@ ilo_transfer_unmap(struct pipe_context *pipe, xfer_unmap(xfer); switch (xfer->method) { + case ILO_TRANSFER_MAP_STAGING: + if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) + copy_staging_resource(ilo, xfer, NULL); + pipe_resource_reference(&xfer->staging.res, NULL); + break; case ILO_TRANSFER_MAP_SW_CONVERT: case ILO_TRANSFER_MAP_SW_ZS: tex_staging_sys_writeback(xfer); - align_free(xfer->staging_sys); + align_free(xfer->staging.sys); break; default: break; diff --git a/src/gallium/drivers/ilo/ilo_transfer.h b/src/gallium/drivers/ilo/ilo_transfer.h index b346f45..d7f4838 100644 --- a/src/gallium/drivers/ilo/ilo_transfer.h +++ b/src/gallium/drivers/ilo/ilo_transfer.h @@ -32,12 +32,21 @@ #include "ilo_common.h" +/* + * Direct mappings are always page aligned, but ILO_TRANSFER_MAP_STAGING is + * not. + */ +#define ILO_TRANSFER_MAP_BUFFER_ALIGNMENT 64 + enum ilo_transfer_map_method { /* map() / map_gtt() / map_unsynchronized() */ ILO_TRANSFER_MAP_CPU, ILO_TRANSFER_MAP_GTT, ILO_TRANSFER_MAP_GTT_UNSYNC, + /* use staging resource */ + ILO_TRANSFER_MAP_STAGING, + /* use staging system buffer */ ILO_TRANSFER_MAP_SW_CONVERT, ILO_TRANSFER_MAP_SW_ZS, @@ -47,7 +56,11 @@ struct ilo_transfer { struct pipe_transfer base; enum ilo_transfer_map_method method; - void *staging_sys; + /* pipe_resource, system memory, or garbage depending on the method */ + union { + struct pipe_resource *res; + void *sys; + } staging; }; struct ilo_context; -- 2.7.4