From 09fc5d6e262aeb1b21faf6d952c204588602ef97 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 22 Sep 2013 21:47:35 +0200 Subject: [PATCH] radeonsi: implement clear_buffer using CP DMA, initialize CMASK with it More work needs to be done for this to be entirely shared with r600g. I'm just trying to share r600_texture.c now. The reason I put the implementation to si_descriptors.c is that the emit function had already been there. --- src/gallium/drivers/radeonsi/r600_texture.c | 5 +- src/gallium/drivers/radeonsi/radeonsi_pipe.c | 32 +++++----- src/gallium/drivers/radeonsi/si_descriptors.c | 85 ++++++++++++++++++++++++++- src/gallium/drivers/radeonsi/si_state_draw.c | 5 +- 4 files changed, 108 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/radeonsi/r600_texture.c b/src/gallium/drivers/radeonsi/r600_texture.c index aa307fb..53452c8 100644 --- a/src/gallium/drivers/radeonsi/r600_texture.c +++ b/src/gallium/drivers/radeonsi/r600_texture.c @@ -462,8 +462,9 @@ r600_texture_create_object(struct pipe_screen *screen, if (rtex->cmask.size) { /* Initialize the cmask to 0xCC (= compressed state). */ - char *map = rscreen->b.ws->buffer_map(resource->cs_buf, NULL, PIPE_TRANSFER_WRITE); - memset(map + rtex->cmask.offset, 0xCC, rtex->cmask.size); + r600_screen_clear_buffer(&rscreen->b, &resource->b.b, + rtex->cmask.offset, rtex->cmask.size, + 0xCCCCCCCC); } if (rscreen->b.debug_flags & DBG_TEX_DEPTH && rtex->is_depth) { diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c index 5528e19..8ed5d26 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c @@ -653,6 +653,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) if (!radeon_winsys_unref(rscreen->b.ws)) return; + r600_common_screen_cleanup(&rscreen->b); + if (rscreen->fences.bo) { struct r600_fence_block *entry, *tmp; @@ -823,18 +825,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) return NULL; } - r600_common_screen_init(&rscreen->b, ws); - - if (debug_get_bool_option("RADEON_PRINT_TEXDEPTH", FALSE)) - rscreen->b.debug_flags |= DBG_TEX_DEPTH; - if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE)) - rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; - - if (r600_init_tiling(rscreen)) { - FREE(rscreen); - return NULL; - } - + /* Set functions first. */ + rscreen->b.b.context_create = r600_create_context; rscreen->b.b.destroy = r600_destroy_screen; rscreen->b.b.get_name = r600_get_name; rscreen->b.b.get_vendor = r600_get_vendor; @@ -844,12 +836,9 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) rscreen->b.b.get_compute_param = r600_get_compute_param; rscreen->b.b.get_timestamp = r600_get_timestamp; rscreen->b.b.is_format_supported = si_is_format_supported; - rscreen->b.b.context_create = r600_create_context; rscreen->b.b.fence_reference = r600_fence_reference; rscreen->b.b.fence_signalled = r600_fence_signalled; rscreen->b.b.fence_finish = r600_fence_finish; - r600_init_screen_resource_functions(&rscreen->b.b); - if (rscreen->b.info.has_uvd) { rscreen->b.b.get_video_param = ruvd_get_video_param; rscreen->b.b.is_video_format_supported = ruvd_is_format_supported; @@ -857,6 +846,19 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) rscreen->b.b.get_video_param = r600_get_video_param; rscreen->b.b.is_video_format_supported = vl_video_buffer_is_format_supported; } + r600_init_screen_resource_functions(&rscreen->b.b); + + r600_common_screen_init(&rscreen->b, ws); + + if (debug_get_bool_option("RADEON_PRINT_TEXDEPTH", FALSE)) + rscreen->b.debug_flags |= DBG_TEX_DEPTH; + if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE)) + rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; + + if (r600_init_tiling(rscreen)) { + FREE(rscreen); + return NULL; + } util_format_s3tc_init(); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index a8f8781..93d3684 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -44,7 +44,7 @@ static uint32_t null_desc[8]; /* zeros */ #define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */ /* Emit a CP DMA packet to do a copy from one buffer to another. - * The size must fit in bits [20:0]. Notes: + * The size must fit in bits [20:0]. */ static void si_emit_cp_dma_copy_buffer(struct r600_context *rctx, uint64_t dst_va, uint64_t src_va, @@ -517,6 +517,88 @@ static void si_set_streamout_targets(struct pipe_context *ctx, si_update_descriptors(rctx, &buffers->desc); } +/* CP DMA */ + +/* The max number of bytes to copy per packet. */ +#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8) + +static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, + unsigned offset, unsigned size, unsigned value) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + + if (!size) + return; + + /* Fallback for unaligned clears. */ + if (offset % 4 != 0 || size % 4 != 0) { + uint32_t *map = rctx->b.ws->buffer_map(r600_resource(dst)->cs_buf, + rctx->b.rings.gfx.cs, + PIPE_TRANSFER_WRITE); + size /= 4; + for (unsigned i = 0; i < size; i++) + *map++ = value; + + util_range_add(&r600_resource(dst)->valid_buffer_range, offset, + offset + size); + return; + } + + uint64_t va = r600_resource_va(&rctx->screen->b.b, dst) + offset; + + /* Flush the caches where the resource is bound. */ + /* XXX only flush the caches where the buffer is bound. */ + rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | + R600_CONTEXT_INV_CONST_CACHE | + R600_CONTEXT_FLUSH_AND_INV_CB | + R600_CONTEXT_FLUSH_AND_INV_DB | + R600_CONTEXT_FLUSH_AND_INV_CB_META | + R600_CONTEXT_FLUSH_AND_INV_DB_META; + rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; + + while (size) { + unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); + unsigned dma_flags = 0; + + si_need_cs_space(rctx, 7 + (rctx->b.flags ? rctx->cache_flush.num_dw : 0), + FALSE); + + /* This must be done after need_cs_space. */ + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, + (struct r600_resource*)dst, RADEON_USAGE_WRITE); + + /* Flush the caches for the first copy only. + * Also wait for the previous CP DMA operations. */ + if (rctx->b.flags) { + si_emit_cache_flush(&rctx->b, NULL); + dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */ + } + + /* Do the synchronization after the last copy, so that all data is written to memory. */ + if (size == byte_count) + dma_flags |= R600_CP_DMA_SYNC; + + /* Emit the clear packet. */ + si_emit_cp_dma_clear_buffer(rctx, va, byte_count, value, dma_flags); + + size -= byte_count; + va += byte_count; + } + + /* Flush the caches again in case the 3D engine has been prefetching + * the resource. */ + /* XXX only flush the caches where the buffer is bound. */ + rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | + R600_CONTEXT_INV_CONST_CACHE | + R600_CONTEXT_FLUSH_AND_INV_CB | + R600_CONTEXT_FLUSH_AND_INV_DB | + R600_CONTEXT_FLUSH_AND_INV_CB_META | + R600_CONTEXT_FLUSH_AND_INV_DB_META; + + util_range_add(&r600_resource(dst)->valid_buffer_range, offset, + offset + size); +} + /* INIT/DEINIT */ void si_init_all_descriptors(struct r600_context *rctx) @@ -541,6 +623,7 @@ void si_init_all_descriptors(struct r600_context *rctx) /* Set pipe_context functions. */ rctx->b.b.set_constant_buffer = si_set_constant_buffer; rctx->b.b.set_stream_output_targets = si_set_streamout_targets; + rctx->b.clear_buffer = si_clear_buffer; } void si_release_all_descriptors(struct r600_context *rctx) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index cb5055a..0213523 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -677,7 +677,10 @@ void si_emit_cache_flush(struct r600_common_context *rctx, struct r600_atom *ato radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); } - if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) { + if (rctx->flags & R600_CONTEXT_WAIT_3D_IDLE) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); + } else if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) { /* Needed if streamout buffers are going to be used as a source. */ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); -- 2.7.4