From 1e2d4b32b0cb505ad589915353e19c46151be732 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Fri, 2 Jun 2023 16:25:47 +0800 Subject: [PATCH] ac/llvm,radeonsi: lower nir_load_ring_gsvs_amd in abi MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- src/amd/llvm/ac_nir_to_llvm.c | 1 - src/gallium/drivers/radeonsi/si_nir_lower_abi.c | 84 +++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_shader_internal.h | 2 - src/gallium/drivers/radeonsi/si_shader_llvm.c | 14 ---- src/gallium/drivers/radeonsi/si_shader_llvm_gs.c | 80 --------------------- 5 files changed, 84 insertions(+), 97 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index c0bf744..0082164 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3085,7 +3085,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins case nir_intrinsic_load_first_vertex: case nir_intrinsic_load_tess_rel_patch_id_amd: case nir_intrinsic_load_ring_attr_amd: - case nir_intrinsic_load_ring_gsvs_amd: case nir_intrinsic_load_lds_ngg_scratch_base_amd: case nir_intrinsic_load_lds_ngg_gs_out_vertex_base_amd: result = ctx->abi->intrinsic_load(ctx->abi, instr); diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 65832d5..f991ebc 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -19,6 +19,7 @@ struct lower_abi_state { nir_ssa_def *esgs_ring; nir_ssa_def *tess_offchip_ring; + nir_ssa_def *gsvs_ring[4]; }; #define GET_FIELD_NIR(field) \ @@ -224,6 +225,80 @@ static nir_ssa_def *build_esgs_ring_desc(nir_builder *b, enum amd_gfx_level gfx_ return nir_vec(b, vec, 4); } +static void build_gsvs_ring_desc(nir_builder *b, struct lower_abi_state *s) +{ + const struct si_shader_selector *sel = s->shader->selector; + const union si_shader_key *key = &s->shader->key; + + if (s->shader->is_gs_copy_shader) { + s->gsvs_ring[0] = si_nir_load_internal_binding(b, s->args, SI_RING_GSVS, 4); + } else if (sel->stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg) { + nir_ssa_def *base_addr = si_nir_load_internal_binding(b, s->args, SI_RING_GSVS, 2); + base_addr = nir_pack_64_2x32(b, base_addr); + + /* The conceptual layout of the GSVS ring is + * v0c0 .. vLv0 v0c1 .. vLc1 .. + * but the real memory layout is swizzled across + * threads: + * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL + * t16v0c0 .. + * Override the buffer descriptor accordingly. + */ + + for (unsigned stream = 0; stream < 4; stream++) { + unsigned num_components = sel->info.num_stream_output_components[stream]; + if (!num_components) + continue; + + nir_ssa_def *desc[4]; + desc[0] = nir_unpack_64_2x32_split_x(b, base_addr); + desc[1] = nir_unpack_64_2x32_split_y(b, base_addr); + + unsigned stride = 4 * num_components * sel->info.base.gs.vertices_out; + /* Limit on the stride field for <= GFX7. */ + assert(stride < (1 << 14)); + + desc[1] = nir_ior_imm( + b, desc[1], S_008F04_STRIDE(stride) | S_008F04_SWIZZLE_ENABLE_GFX6(1)); + + unsigned num_records = s->shader->wave_size; + desc[2] = nir_imm_int(b, num_records); + + uint32_t rsrc3 = + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */ + S_008F0C_ADD_TID_ENABLE(1); + + if (sel->screen->info.gfx_level >= GFX10) { + rsrc3 |= + S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | + S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | + S_008F0C_RESOURCE_LEVEL(1); + } else { + /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */ + unsigned data_format = + sel->screen->info.gfx_level == GFX8 || sel->screen->info.gfx_level == GFX9 ? + 0 : V_008F0C_BUF_DATA_FORMAT_32; + + rsrc3 |= + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(data_format) | + S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */ + } + + desc[3] = nir_imm_int(b, rsrc3); + + s->gsvs_ring[stream] = nir_vec(b, desc, 4); + + /* next stream's desc addr */ + base_addr = nir_iadd_imm(b, base_addr, stride * num_records); + } + } +} + static void preload_reusable_variables(nir_builder *b, struct lower_abi_state *s) { const struct si_shader_selector *sel = s->shader->selector; @@ -238,6 +313,8 @@ static void preload_reusable_variables(nir_builder *b, struct lower_abi_state *s if (sel->stage == MESA_SHADER_TESS_CTRL || sel->stage == MESA_SHADER_TESS_EVAL) s->tess_offchip_ring = build_tess_ring_desc(b, sel->screen, s->args); + + build_gsvs_ring_desc(b, s); } static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_state *s) @@ -624,6 +701,13 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s assert(s->tess_offchip_ring); replacement = s->tess_offchip_ring; break; + case nir_intrinsic_load_ring_gsvs_amd: { + unsigned stream_id = nir_intrinsic_stream_id(intrin); + /* Unused nir_load_ring_gsvs_amd may not be eliminated yet. */ + replacement = s->gsvs_ring[stream_id] ? + s->gsvs_ring[stream_id] : nir_ssa_undef(b, 4, 32); + break; + } default: return false; } diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index e2b4932..3914c89 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -97,7 +97,6 @@ struct si_shader_context { struct ac_llvm_compiler *compiler; /* Preloaded descriptors. */ - LLVMValueRef gsvs_ring[4]; LLVMValueRef instance_divisor_constbuf; LLVMValueRef gs_ngg_emit; @@ -194,7 +193,6 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * LLVMValueRef si_is_es_thread(struct si_shader_context *ctx); LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx); void si_llvm_es_build_end(struct si_shader_context *ctx); -void si_preload_gs_rings(struct si_shader_context *ctx); void si_llvm_gs_build_end(struct si_shader_context *ctx); /* si_shader_llvm_tess.c */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index e5371fb..b9b6e89 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -674,9 +674,6 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin case nir_intrinsic_load_tess_rel_patch_id_amd: return si_get_rel_patch_id(ctx); - case nir_intrinsic_load_ring_gsvs_amd: - return ctx->gsvs_ring[nir_intrinsic_stream_id(intrin)]; - case nir_intrinsic_load_lds_ngg_scratch_base_amd: return LLVMBuildPtrToInt(ctx->ac.builder, ctx->gs_ngg_scratch.value, ctx->ac.i32, ""); @@ -763,15 +760,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade ac_build_load_to_sgpr( &ctx->ac, buf, LLVMConstInt(ctx->ac.i32, SI_VS_CONST_INSTANCE_DIVISORS, 0)); } - - /* preload GSVS ring for GS copy shader */ - if (shader->is_gs_copy_shader) { - ctx->gsvs_ring[0] = - ac_build_load_to_sgpr( - &ctx->ac, - ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings), - LLVMConstInt(ctx->ac.i32, SI_RING_GSVS, 0)); - } break; case MESA_SHADER_TESS_CTRL: @@ -792,8 +780,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0), "ngg_emit", AC_ADDR_SPACE_LDS); LLVMSetLinkage(ctx->gs_ngg_emit, LLVMExternalLinkage); LLVMSetAlignment(ctx->gs_ngg_emit, 4); - } else { - si_preload_gs_rings(ctx); } break; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 2dd04d4..d13e373 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -75,83 +75,3 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx) if (ctx->screen->info.gfx_level >= GFX9) ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label); } - -void si_preload_gs_rings(struct si_shader_context *ctx) -{ - if (ctx->ac.gfx_level >= GFX11) - return; - - const struct si_shader_selector *sel = ctx->shader->selector; - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, SI_RING_GSVS, 0); - LLVMValueRef base_ring = ac_build_load_to_sgpr(&ctx->ac, - ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings), offset); - - /* The conceptual layout of the GSVS ring is - * v0c0 .. vLv0 v0c1 .. vLc1 .. - * but the real memory layout is swizzled across - * threads: - * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL - * t16v0c0 .. - * Override the buffer descriptor accordingly. - */ - LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2); - uint64_t stream_offset = 0; - - for (unsigned stream = 0; stream < 4; ++stream) { - unsigned num_components; - unsigned stride; - unsigned num_records; - LLVMValueRef ring, tmp; - - num_components = sel->info.num_stream_output_components[stream]; - if (!num_components) - continue; - - stride = 4 * num_components * sel->info.base.gs.vertices_out; - - /* Limit on the stride field for <= GFX7. */ - assert(stride < (1 << 14)); - - num_records = ctx->ac.wave_size; - - ring = LLVMBuildBitCast(builder, base_ring, v2i64, ""); - tmp = LLVMBuildExtractElement(builder, ring, ctx->ac.i32_0, ""); - tmp = LLVMBuildAdd(builder, tmp, LLVMConstInt(ctx->ac.i64, stream_offset, 0), ""); - stream_offset += stride * ctx->ac.wave_size; - - ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->ac.i32_0, ""); - ring = LLVMBuildBitCast(builder, ring, ctx->ac.v4i32, ""); - tmp = LLVMBuildExtractElement(builder, ring, ctx->ac.i32_1, ""); - tmp = LLVMBuildOr( - builder, tmp, - LLVMConstInt(ctx->ac.i32, S_008F04_STRIDE(stride) | S_008F04_SWIZZLE_ENABLE_GFX6(1), 0), ""); - ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->ac.i32_1, ""); - ring = LLVMBuildInsertElement(builder, ring, LLVMConstInt(ctx->ac.i32, num_records, 0), - LLVMConstInt(ctx->ac.i32, 2, 0), ""); - - uint32_t rsrc3 = - S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */ - S_008F0C_ADD_TID_ENABLE(1); - - if (ctx->ac.gfx_level >= GFX10) { - rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); - } else { - /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */ - unsigned data_format = ctx->ac.gfx_level == GFX8 || ctx->ac.gfx_level == GFX9 ? - 0 : V_008F0C_BUF_DATA_FORMAT_32; - - rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(data_format) | - S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */ - } - - ring = LLVMBuildInsertElement(builder, ring, LLVMConstInt(ctx->ac.i32, rsrc3, false), - LLVMConstInt(ctx->ac.i32, 3, 0), ""); - - ctx->gsvs_ring[stream] = ring; - } -} -- 2.7.4