From 4eea710b0d050275b532dbc117da97f569e5fb1e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 8 Jun 2016 21:00:22 +0200 Subject: [PATCH] radeonsi: try to hit direct hw MSAA resolve by changing micro mode in clear MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We could also do MSAA resolve in a compute shader like Vulkan and remove these workarounds. v2: comment the magic numbers Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/drivers/radeon/r600_texture.c | 87 +++++++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_blit.c | 20 +++++- 3 files changed, 107 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index edfae95..57fa9e3 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -252,6 +252,7 @@ struct r600_texture { uint64_t dcc_offset; /* 0 = disabled */ unsigned cb_color_info; /* fast clear enable bit */ unsigned color_clear_value[2]; + unsigned last_msaa_resolve_target_micro_mode; /* Depth buffer compression and fast clear. */ struct r600_htile_info htile; diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index a1c314e..32347f2 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1012,6 +1012,8 @@ r600_texture_create_object(struct pipe_screen *screen, * This must be done after r600_setup_surface. * Applies to R600-Cayman. */ rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D; + /* Applies to GCN. */ + rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode; if (rtex->is_depth) { if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER | @@ -1808,6 +1810,83 @@ void vi_dcc_clear_level(struct r600_common_context *rctx, clear_value, R600_COHERENCY_CB_META); } +/* Set the same micro tile mode as the destination of the last MSAA resolve. + * This allows hitting the MSAA resolve fast path, which requires that both + * src and dst micro tile modes match. + */ +static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen, + struct r600_texture *rtex) +{ + if (rtex->resource.is_shared || + rtex->surface.nsamples <= 1 || + rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode) + return; + + assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_2D); + assert(rtex->surface.last_level == 0); + + /* These magic numbers were copied from addrlib. It doesn't use any + * definitions for them either. They are all 2D_TILED_THIN1 modes with + * different bpp and micro tile mode. + */ + if (rscreen->chip_class >= CIK) { + switch (rtex->last_msaa_resolve_target_micro_mode) { + case 0: /* displayable */ + rtex->surface.tiling_index[0] = 10; + break; + case 1: /* thin */ + rtex->surface.tiling_index[0] = 14; + break; + case 3: /* rotated */ + rtex->surface.tiling_index[0] = 28; + break; + default: /* depth, thick */ + assert(!"unexpected micro mode"); + return; + } + } else { /* SI */ + switch (rtex->last_msaa_resolve_target_micro_mode) { + case 0: /* displayable */ + switch (rtex->surface.bpe) { + case 8: + rtex->surface.tiling_index[0] = 10; + break; + case 16: + rtex->surface.tiling_index[0] = 11; + break; + default: /* 32, 64 */ + rtex->surface.tiling_index[0] = 12; + break; + } + break; + case 1: /* thin */ + switch (rtex->surface.bpe) { + case 8: + rtex->surface.tiling_index[0] = 14; + break; + case 16: + rtex->surface.tiling_index[0] = 15; + break; + case 32: + rtex->surface.tiling_index[0] = 16; + break; + default: /* 64, 128 */ + rtex->surface.tiling_index[0] = 17; + break; + } + break; + default: /* depth, thick */ + assert(!"unexpected micro mode"); + return; + } + } + + rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode; + + p_atomic_inc(&rscreen->dirty_fb_counter); + p_atomic_inc(&rscreen->dirty_tex_descriptor_counter); +} + void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct pipe_framebuffer_state *fb, struct r600_atom *fb_state, @@ -1881,6 +1960,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR) continue; + /* We can change the micro tile mode before a full clear. */ + if (rctx->screen->chip_class >= SI) + si_set_optimal_micro_tile_mode(rctx->screen, tex); + vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed); vi_dcc_clear_level(rctx, tex, 0, reset_value); @@ -1897,6 +1980,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, continue; } + /* We can change the micro tile mode before a full clear. */ + if (rctx->screen->chip_class >= SI) + si_set_optimal_micro_tile_mode(rctx->screen, tex); + /* Do the fast clear. */ rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, tex->cmask.offset, tex->cmask.size, 0, diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 9de2c75..754b478 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -22,6 +22,7 @@ */ #include "si_pipe.h" +#include "sid.h" #include "util/u_format.h" #include "util/u_surface.h" @@ -903,8 +904,18 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, info->src.box.height == dst_height && info->src.box.depth == 1 && dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D && - src->surface.micro_tile_mode == dst->surface.micro_tile_mode && (!dst->cmask.size || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */ + /* Check the last constraint. */ + if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) { + /* The next fast clear will switch to this mode to + * get direct hw resolve next time if the mode is + * different now. + */ + src->last_msaa_resolve_target_micro_mode = + dst->surface.micro_tile_mode; + goto resolve_to_temp; + } + /* Resolving into a surface with DCC is unsupported. Since * it's being overwritten anyway, clear it to uncompressed. * This is still the fastest codepath even with this clear. @@ -929,6 +940,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, return true; } +resolve_to_temp: /* Shader-based resolve is VERY SLOW. Instead, resolve into * a temporary texture and blit. */ @@ -943,6 +955,12 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, templ.flags = R600_RESOURCE_FLAG_FORCE_TILING | R600_RESOURCE_FLAG_DISABLE_DCC; + /* The src and dst microtile modes must be the same. */ + if (src->surface.micro_tile_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING) + templ.bind = PIPE_BIND_SCANOUT; + else + templ.bind = 0; + tmp = ctx->screen->resource_create(ctx->screen, &templ); if (!tmp) return false; -- 2.7.4