From: Marek Olšák Date: Fri, 7 May 2021 00:57:52 +0000 (-0400) Subject: radeonsi/gfx11: enable NGG-only draw paths X-Git-Tag: upstream/22.3.5~9309 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7c423a7ad03a55031e8045f63b157de769e0bd74;p=platform%2Fupstream%2Fmesa.git radeonsi/gfx11: enable NGG-only draw paths Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index 1801501..f0270c0 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -103,7 +103,7 @@ radeonsi_include_dirs = [inc_src, inc_include, inc_gallium, inc_gallium_aux, inc radeonsi_deps = [dep_llvm, dep_clock, dep_libdrm_radeon, idep_nir_headers, idep_amdgfxregs_h, idep_mesautil] radeonsi_gfx_libs = [] -foreach ver : ['6', '7', '8', '9', '10', '103'] +foreach ver : ['6', '7', '8', '9', '10', '103', '11'] radeonsi_gfx_libs += static_library( 'radeonsi_gfx@0@'.format(ver), ['si_state_draw.cpp'], diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 002ca09..1952e3a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -632,6 +632,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign case GFX10_3: si_init_draw_functions_GFX10_3(sctx); break; + case GFX11: + si_init_draw_functions_GFX11(sctx); + break; default: unreachable("unhandled chip class"); } @@ -1270,15 +1273,23 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->has_out_of_order_rast = sscreen->info.has_out_of_order_rast && !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER)); - sscreen->use_ngg = !(sscreen->debug_flags & DBG(NO_NGG)) && - sscreen->info.chip_class >= GFX10 && - (sscreen->info.family != CHIP_NAVI14 || - sscreen->info.is_pro_graphics); - sscreen->use_ngg_culling = sscreen->use_ngg && - sscreen->info.max_render_backends >= 2 && - !((sscreen->debug_flags & DBG(NO_NGG_CULLING)) || - LLVM_VERSION_MAJOR <= 11 /* hangs on 11, see #4874 */); - sscreen->use_ngg_streamout = false; + if (sscreen->info.chip_class >= GFX11) { + sscreen->use_ngg = true; + sscreen->use_ngg_streamout = true; + /* TODO: Disable for now. Investigate if it helps. */ + sscreen->use_ngg_culling = (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL)) && + !(sscreen->debug_flags & DBG(NO_NGG_CULLING)); + } else { + sscreen->use_ngg = !(sscreen->debug_flags & DBG(NO_NGG)) && + sscreen->info.chip_class >= GFX10 && + (sscreen->info.family != CHIP_NAVI14 || + sscreen->info.is_pro_graphics); + sscreen->use_ngg_streamout = false; + sscreen->use_ngg_culling = sscreen->use_ngg && + sscreen->info.max_render_backends >= 2 && + !(sscreen->debug_flags & DBG(NO_NGG_CULLING)) && + LLVM_VERSION_MAJOR >= 12; /* hangs on 11, see #4874 */ + } /* Only set this for the cases that are known to work, which are: * - GFX9 if bpp >= 4 (in bytes) diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 8888ca7..977d880 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -601,6 +601,7 @@ void si_init_draw_functions_GFX8(struct si_context *sctx); void si_init_draw_functions_GFX9(struct si_context *sctx); void si_init_draw_functions_GFX10(struct si_context *sctx); void si_init_draw_functions_GFX10_3(struct si_context *sctx); +void si_init_draw_functions_GFX11(struct si_context *sctx); void si_init_spi_map_functions(struct si_context *sctx); /* si_state_msaa.c */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 014e20f..9b76577 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -42,6 +42,8 @@ #define GFX(name) name##GFX10 #elif (GFX_VER == 103) #define GFX(name) name##GFX10_3 +#elif (GFX_VER == 11) +#define GFX(name) name##GFX11 #else #error "Unknown gfx version" #endif @@ -425,8 +427,26 @@ static void si_prefetch_shaders(struct si_context *sctx) return; /* Prefetch shaders and VBO descriptors to TC L2. */ - if (GFX_VERSION >= GFX9) { - /* Choose the right spot for the VBO prefetch. */ + if (GFX_VERSION >= GFX11) { + if (HAS_TESS) { + if (mode != PREFETCH_AFTER_DRAW) { + if (mask & SI_PREFETCH_HS) + si_prefetch_shader_async(sctx, sctx->queued.named.hs); + + if (mode == PREFETCH_BEFORE_DRAW) + return; + } + + if (mask & SI_PREFETCH_GS) + si_prefetch_shader_async(sctx, sctx->queued.named.gs); + } else if (mode != PREFETCH_AFTER_DRAW) { + if (mask & SI_PREFETCH_GS) + si_prefetch_shader_async(sctx, sctx->queued.named.gs); + + if (mode == PREFETCH_BEFORE_DRAW) + return; + } + } else if (GFX_VERSION >= GFX9) { if (HAS_TESS) { if (mode != PREFETCH_AFTER_DRAW) { if (mask & SI_PREFETCH_HS) @@ -1735,6 +1755,9 @@ void si_set_vertex_buffer_descriptor(struct si_screen *sscreen, struct si_vertex case GFX10_3: si_set_vb_descriptor(velems, vb, element_index, out); break; + case GFX11: + si_set_vb_descriptor(velems, vb, element_index, out); + break; default: unreachable("unhandled chip class"); } @@ -2572,6 +2595,9 @@ static void si_init_draw_vbo(struct si_context *sctx) if (NGG && GFX_VERSION < GFX10) return; + if (!NGG && GFX_VERSION >= GFX11) + return; + sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] = si_draw_vbo; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index d6ca87b..765c574 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -999,6 +999,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) unsigned max_stream = util_last_bit(sel->info.base.gs.active_stream_mask); unsigned offset; + assert(sscreen->info.chip_class < GFX11); /* gfx11 doesn't have the legacy pipeline */ + pm4 = si_get_shader_pm4_state(shader); if (!pm4) return; @@ -3069,6 +3071,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, * - LDS usage is too high */ sel->tess_turns_off_ngg = sscreen->info.chip_class >= GFX10 && + sscreen->info.chip_class <= GFX10_3 && (sel->info.base.gs.invocations * sel->info.base.gs.vertices_out > 256 || sel->info.base.gs.invocations * sel->info.base.gs.vertices_out * (sel->info.num_outputs * 4 + 1) > 6500 /* max dw per GS primitive */);