From 612489bd5df5dc46a95e05c8882e28a24eb9bae9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 16 Nov 2017 17:00:50 +0100 Subject: [PATCH] radeonsi/gfx10: generate VS and TES as NGG merged ESGS shaders This does not support geometry shading yet. Also missing are streamout and NGG-specific optimizations. Acked-by: Bas Nieuwenhuizen --- src/gallium/drivers/radeonsi/Makefile.sources | 1 + src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 265 +++++++++++++++++++++ src/gallium/drivers/radeonsi/meson.build | 1 + src/gallium/drivers/radeonsi/si_shader.c | 127 ++++++++-- src/gallium/drivers/radeonsi/si_shader_internal.h | 11 + .../drivers/radeonsi/si_shader_tgsi_setup.c | 2 +- 6 files changed, 382 insertions(+), 25 deletions(-) create mode 100644 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index ab0ba93..83cca39 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -6,6 +6,7 @@ C_SOURCES := \ $(GENERATED_SOURCES) \ cik_sdma.c \ driinfo_radeonsi.h \ + gfx10_shader_ngg.c \ si_blit.c \ si_buffer.c \ si_build_pm4.h \ diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c new file mode 100644 index 0000000..f5774b2 --- /dev/null +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -0,0 +1,265 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "si_pipe.h" +#include "si_shader_internal.h" + +#include "sid.h" + +#include "util/u_memory.h" + +static LLVMValueRef get_wave_id_in_tg(struct si_shader_context *ctx) +{ + return si_unpack_param(ctx, ctx->param_merged_wave_info, 24, 4); +} + +static LLVMValueRef ngg_get_vtx_cnt(struct si_shader_context *ctx) +{ + return ac_build_bfe(&ctx->ac, ctx->gs_tg_info, + LLVMConstInt(ctx->ac.i32, 12, false), + LLVMConstInt(ctx->ac.i32, 9, false), + false); +} + +static LLVMValueRef ngg_get_prim_cnt(struct si_shader_context *ctx) +{ + return ac_build_bfe(&ctx->ac, ctx->gs_tg_info, + LLVMConstInt(ctx->ac.i32, 22, false), + LLVMConstInt(ctx->ac.i32, 9, false), + false); +} + +/* Send GS Alloc Req message from the first wave of the group to SPI. + * Message payload is: + * - bits 0..10: vertices in group + * - bits 12..22: primitives in group + */ +static void build_sendmsg_gs_alloc_req(struct si_shader_context *ctx, + LLVMValueRef vtx_cnt, + LLVMValueRef prim_cnt) +{ + LLVMBuilderRef builder = ctx->ac.builder; + LLVMValueRef tmp; + + tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, ""); + ac_build_ifcc(&ctx->ac, tmp, 5020); + + tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->ac.i32, 12, false),""); + tmp = LLVMBuildOr(builder, tmp, vtx_cnt, ""); + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_ALLOC_REQ, tmp); + + ac_build_endif(&ctx->ac, 5020); +} + +struct ngg_prim { + unsigned num_vertices; + LLVMValueRef isnull; + LLVMValueRef index[3]; + LLVMValueRef edgeflag[3]; +}; + +static void build_export_prim(struct si_shader_context *ctx, + const struct ngg_prim *prim) +{ + LLVMBuilderRef builder = ctx->ac.builder; + struct ac_export_args args; + LLVMValueRef tmp; + + tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, ""); + args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), ""); + + for (unsigned i = 0; i < prim->num_vertices; ++i) { + tmp = LLVMBuildShl(builder, prim->index[i], + LLVMConstInt(ctx->ac.i32, 10 * i, false), ""); + args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, ""); + tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, ""); + tmp = LLVMBuildShl(builder, tmp, + LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), ""); + args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, ""); + } + + args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, ""); + args.out[1] = LLVMGetUndef(ctx->ac.f32); + args.out[2] = LLVMGetUndef(ctx->ac.f32); + args.out[3] = LLVMGetUndef(ctx->ac.f32); + + args.target = V_008DFC_SQ_EXP_PRIM; + args.enabled_channels = 1; + args.done = true; + args.valid_mask = false; + args.compr = false; + + ac_build_export(&ctx->ac, &args); +} + +/** + * Emit the epilogue of an API VS or TES shader compiled as ESGS shader. + */ +void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, + unsigned max_outputs, + LLVMValueRef *addrs) +{ + struct si_shader_context *ctx = si_shader_context_from_abi(abi); + struct tgsi_shader_info *info = &ctx->shader->selector->info; + struct si_shader_output_values *outputs = NULL; + LLVMBuilderRef builder = ctx->ac.builder; + struct lp_build_if_state if_state; + LLVMValueRef tmp; + + assert(!ctx->shader->is_gs_copy_shader); + assert(info->num_outputs <= max_outputs); + + outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0])); + + for (unsigned i = 0; i < info->num_outputs; i++) { + outputs[i].semantic_name = info->output_semantic_name[i]; + outputs[i].semantic_index = info->output_semantic_index[i]; + + /* This is used only by streamout. */ + for (unsigned j = 0; j < 4; j++) { + outputs[i].values[j] = + LLVMBuildLoad(builder, + addrs[4 * i + j], + ""); + outputs[i].vertex_stream[j] = + (info->output_streams[i] >> (2 * j)) & 3; + } + } + + lp_build_endif(&ctx->merged_wrap_if_state); + + LLVMValueRef prims_in_wave = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8); + LLVMValueRef vtx_in_wave = si_unpack_param(ctx, ctx->param_merged_wave_info, 0, 8); + LLVMValueRef is_gs_thread = LLVMBuildICmp(builder, LLVMIntULT, + ac_get_thread_id(&ctx->ac), prims_in_wave, ""); + LLVMValueRef is_es_thread = LLVMBuildICmp(builder, LLVMIntULT, + ac_get_thread_id(&ctx->ac), vtx_in_wave, ""); + LLVMValueRef vtxindex[] = { + si_unpack_param(ctx, ctx->param_gs_vtx01_offset, 0, 16), + si_unpack_param(ctx, ctx->param_gs_vtx01_offset, 16, 16), + si_unpack_param(ctx, ctx->param_gs_vtx23_offset, 0, 16), + }; + + /* Determine the number of vertices per primitive. */ + unsigned num_vertices; + LLVMValueRef num_vertices_val; + + if (ctx->type == PIPE_SHADER_VERTEX) { + if (info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS]) { + /* Blits always use axis-aligned rectangles with 3 vertices. */ + num_vertices = 3; + num_vertices_val = LLVMConstInt(ctx->i32, 3, 0); + } else { + /* Extract OUTPRIM field. */ + tmp = si_unpack_param(ctx, ctx->param_vs_state_bits, 2, 2); + num_vertices_val = LLVMBuildAdd(builder, tmp, ctx->i32_1, ""); + num_vertices = 3; /* TODO: optimize for points & lines */ + } + } else { + assert(ctx->type == PIPE_SHADER_TESS_EVAL); + + if (info->properties[TGSI_PROPERTY_TES_POINT_MODE]) + num_vertices = 1; + else if (info->properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_LINES) + num_vertices = 2; + else + num_vertices = 3; + + num_vertices_val = LLVMConstInt(ctx->i32, num_vertices, false); + } + + /* TODO: streamout */ + + /* TODO: primitive culling */ + + build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx)); + + /* Export primitive data to the index buffer. Format is: + * - bits 0..8: index 0 + * - bit 9: edge flag 0 + * - bits 10..18: index 1 + * - bit 19: edge flag 1 + * - bits 20..28: index 2 + * - bit 29: edge flag 2 + * - bit 31: null primitive (skip) + * + * For the first version, we will always build up all three indices + * independent of the primitive type. The additional garbage data + * shouldn't hurt. + * + * TODO: culling depends on the primitive type, so can have some + * interaction here. + */ + lp_build_if(&if_state, &ctx->gallivm, is_gs_thread); + { + struct ngg_prim prim = {}; + + prim.num_vertices = num_vertices; + prim.isnull = ctx->ac.i1false; + memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3); + + for (unsigned i = 0; i < num_vertices; ++i) { + tmp = LLVMBuildLShr(builder, ctx->abi.gs_invocation_id, + LLVMConstInt(ctx->ac.i32, 8 + i, false), ""); + prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, ""); + } + + build_export_prim(ctx, &prim); + } + lp_build_endif(&if_state); + + /* Export per-vertex data (positions and parameters). */ + lp_build_if(&if_state, &ctx->gallivm, is_es_thread); + { + unsigned i; + + /* Unconditionally (re-)load the values for proper SSA form. */ + for (i = 0; i < info->num_outputs; i++) { + for (unsigned j = 0; j < 4; j++) { + outputs[i].values[j] = + LLVMBuildLoad(builder, + addrs[4 * i + j], + ""); + } + } + + /* TODO: Vertex shaders have to get PrimitiveID from GS VGPRs. */ + if (ctx->type == PIPE_SHADER_TESS_EVAL && + ctx->shader->key.mono.u.vs_export_prim_id) { + outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID; + outputs[i].semantic_index = 0; + outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0)); + for (unsigned j = 1; j < 4; j++) + outputs[i].values[j] = LLVMGetUndef(ctx->f32); + + memset(outputs[i].vertex_stream, 0, + sizeof(outputs[i].vertex_stream)); + i++; + } + + si_llvm_export_vs(ctx, outputs, i); + } + lp_build_endif(&if_state); + + FREE(outputs); +} diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index d733452..0ca065f 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -21,6 +21,7 @@ files_libradeonsi = files( 'cik_sdma.c', 'driinfo_radeonsi.h', + 'gfx10_shader_ngg.c', 'si_blit.c', 'si_buffer.c', 'si_build_pm4.h', diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b6410c6..2ab1833 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4412,6 +4412,10 @@ static void declare_streamout_params(struct si_shader_context *ctx, static unsigned si_get_max_workgroup_size(const struct si_shader *shader) { switch (shader->selector->type) { + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_TESS_EVAL: + return shader->key.as_ngg ? 128 : 0; + case PIPE_SHADER_TESS_CTRL: /* Return this so that LLVM doesn't remove s_barrier * instructions on chips where we use s_barrier. */ @@ -4582,7 +4586,7 @@ static void create_function(struct si_shader_context *ctx) if (ctx->screen->info.chip_class >= GFX9) { if (shader->key.as_ls || type == PIPE_SHADER_TESS_CTRL) type = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */ - else if (shader->key.as_es || type == PIPE_SHADER_GEOMETRY) + else if (shader->key.as_es || shader->key.as_ngg || type == PIPE_SHADER_GEOMETRY) type = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY; } @@ -4708,7 +4712,12 @@ static void create_function(struct si_shader_context *ctx) /* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */ declare_per_stage_desc_pointers(ctx, &fninfo, ctx->type == PIPE_SHADER_GEOMETRY); - ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + + if (ctx->shader->key.as_ngg) + add_arg_assign(&fninfo, ARG_SGPR, ctx->i32, &ctx->gs_tg_info); + else + ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4716,11 +4725,17 @@ static void create_function(struct si_shader_context *ctx) add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */ declare_global_desc_pointers(ctx, &fninfo); - declare_per_stage_desc_pointers(ctx, &fninfo, - (ctx->type == PIPE_SHADER_VERTEX || - ctx->type == PIPE_SHADER_TESS_EVAL)); + if (ctx->type != PIPE_SHADER_VERTEX || !vs_blit_property) { + declare_per_stage_desc_pointers(ctx, &fninfo, + (ctx->type == PIPE_SHADER_VERTEX || + ctx->type == PIPE_SHADER_TESS_EVAL)); + } + if (ctx->type == PIPE_SHADER_VERTEX) { - declare_vs_specific_input_sgprs(ctx, &fninfo); + if (vs_blit_property) + declare_vs_blit_inputs(ctx, &fninfo, vs_blit_property); + else + declare_vs_specific_input_sgprs(ctx, &fninfo); } else { ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4747,8 +4762,9 @@ static void create_function(struct si_shader_context *ctx) declare_tes_input_vgprs(ctx, &fninfo); } - if (ctx->type == PIPE_SHADER_VERTEX || - ctx->type == PIPE_SHADER_TESS_EVAL) { + if (ctx->shader->key.as_es && + (ctx->type == PIPE_SHADER_VERTEX || + ctx->type == PIPE_SHADER_TESS_EVAL)) { unsigned num_user_sgprs; if (ctx->type == PIPE_SHADER_VERTEX) @@ -5925,6 +5941,8 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) ctx->abi.emit_outputs = si_llvm_emit_es_epilogue; else if (shader->key.opt.vs_as_prim_discard_cs) ctx->abi.emit_outputs = si_llvm_emit_prim_discard_cs_epilogue; + else if (shader->key.as_ngg) + ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue; else ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue; bld_base->emit_epilogue = si_tgsi_emit_epilogue; @@ -5948,8 +5966,12 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in; if (shader->key.as_es) ctx->abi.emit_outputs = si_llvm_emit_es_epilogue; - else - ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue; + else { + if (shader->key.as_ngg) + ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue; + else + ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue; + } bld_base->emit_epilogue = si_tgsi_emit_epilogue; break; case PIPE_SHADER_GEOMETRY: @@ -5994,6 +6016,10 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) * * For monolithic merged shaders, the first shader is wrapped in an * if-block together with its prolog in si_build_wrapper_function. + * + * NGG vertex and tess eval shaders running as the last + * vertex/geometry stage handle execution explicitly using + * if-statements. */ if (ctx->screen->info.chip_class >= GFX9) { if (!shader->is_monolithic && @@ -6005,28 +6031,50 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) si_init_exec_from_input(ctx, ctx->param_merged_wave_info, 0); } else if (ctx->type == PIPE_SHADER_TESS_CTRL || - ctx->type == PIPE_SHADER_GEOMETRY) { + ctx->type == PIPE_SHADER_GEOMETRY || + shader->key.as_ngg) { + LLVMValueRef num_threads; + bool nested_barrier; + if (!shader->is_monolithic) ac_init_exec_full_mask(&ctx->ac); - LLVMValueRef num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8); + if (ctx->type == PIPE_SHADER_TESS_CTRL || + ctx->type == PIPE_SHADER_GEOMETRY) { + /* Number of patches / primitives */ + num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8); + nested_barrier = true; + } else { + /* Number of vertices */ + num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 0, 8); + nested_barrier = false; + } + LLVMValueRef ena = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num_threads, ""); lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena); - /* The barrier must execute for all shaders in a - * threadgroup. - * - * Execute the barrier inside the conditional block, - * so that empty waves can jump directly to s_endpgm, - * which will also signal the barrier. - * - * If the shader is TCS and the TCS epilog is present - * and contains a barrier, it will wait there and then - * reach s_endpgm. - */ - si_llvm_emit_barrier(NULL, bld_base, NULL); + if (nested_barrier) { + /* Execute a barrier before the second shader in + * a merged shader. + * + * Execute the barrier inside the conditional block, + * so that empty waves can jump directly to s_endpgm, + * which will also signal the barrier. + * + * This is possible in gfx9, because an empty wave + * for the second shader does not participate in + * the epilogue. With NGG, empty waves may still + * be required to export data (e.g. GS output vertices), + * so we cannot let them exit early. + * + * If the shader is TCS and the TCS epilog is present + * and contains a barrier, it will wait there and then + * reach s_endpgm. + */ + si_llvm_emit_barrier(NULL, bld_base, NULL); + } } } @@ -6099,6 +6147,8 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info, } else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) { key->vs_prolog.as_es = 1; key->vs_prolog.num_merged_next_stage_vgprs = 5; + } else if (shader_out->key.as_ngg) { + key->vs_prolog.num_merged_next_stage_vgprs = 5; } /* Enable loading the InstanceID VGPR. */ @@ -7227,6 +7277,21 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, key->vs_prolog.num_input_sgprs + i, ""); } + struct lp_build_if_state wrap_if_state; + LLVMValueRef original_ret = ret; + bool wrapped = false; + + if (key->vs_prolog.is_monolithic && key->vs_prolog.as_ngg) { + LLVMValueRef num_threads; + LLVMValueRef ena; + + num_threads = si_unpack_param(ctx, 3, 0, 8); + ena = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, + ac_get_thread_id(&ctx->ac), num_threads, ""); + lp_build_if(&wrap_if_state, &ctx->gallivm, ena); + wrapped = true; + } + /* Compute vertex load indices from instance divisors. */ LLVMValueRef instance_divisor_constbuf = NULL; @@ -7282,6 +7347,20 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, fninfo.num_params + i, ""); } + if (wrapped) { + lp_build_endif(&wrap_if_state); + + LLVMValueRef values[2] = { + ret, + original_ret + }; + LLVMBasicBlockRef bbs[2] = { + wrap_if_state.true_block, + wrap_if_state.entry_block + }; + ret = ac_build_phi(&ctx->ac, LLVMTypeOf(ret), 2, values, bbs); + } + si_llvm_build_ret(ctx, ret); } diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index f758e99..5419a73 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -186,6 +186,13 @@ struct si_shader_context { int param_tes_rel_patch_id; /* HW ES */ int param_es2gs_offset; + /* HW GS */ + /* On gfx10: + * - bits 0..10: ordered_wave_id + * - bits 12..20: number of vertices in group + * - bits 22..30: number of primitives in group + */ + LLVMValueRef gs_tg_info; /* API GS */ int param_gs2vs_offset; int param_gs_wave_id; /* GFX6 */ @@ -372,4 +379,8 @@ LLVMValueRef si_unpack_param(struct si_shader_context *ctx, unsigned param, unsigned rshift, unsigned bitwidth); +void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, + unsigned max_outputs, + LLVMValueRef *addrs); + #endif diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 33b4068..a9946d9 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -1128,7 +1128,7 @@ void si_llvm_create_func(struct si_shader_context *ctx, if (ctx->screen->info.chip_class >= GFX9) { if (ctx->shader->key.as_ls) real_shader_type = PIPE_SHADER_TESS_CTRL; - else if (ctx->shader->key.as_es) + else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg) real_shader_type = PIPE_SHADER_GEOMETRY; } -- 2.7.4