From 716ac4a55dca2765bb8e1634f02e0b8ab2fcee7f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 6 Nov 2022 18:55:46 -0500 Subject: [PATCH] nir: replace IS_SWIZZLED flag with ACCESS_IS_SWIZZLED_AMD MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Rhys Perry Reviewed-by: Timur Kristóf Part-of: --- src/amd/common/ac_nir.c | 10 +++++----- src/amd/common/ac_nir_lower_esgs_io_to_mem.c | 6 ++++-- src/amd/common/ac_nir_lower_ngg.c | 4 ++-- src/amd/compiler/aco_instruction_selection.cpp | 4 ++-- src/amd/llvm/ac_nir_to_llvm.c | 4 ++-- src/compiler/nir/nir_intrinsics.py | 7 ++----- src/compiler/shader_enums.h | 7 +++++++ 7 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 6762f7c..e54e656 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -220,7 +220,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, outputs[location][j] = nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero, - .base = offset, .is_swizzled = false, + .base = offset, .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY); offset += gs_nir->info.gs.vertices_out * 16 * 4; @@ -528,8 +528,8 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in nir_ssa_def *data = nir_u2uN(b, output, 32); nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0), - .is_swizzled = true, - .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY, + .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY | + ACCESS_IS_SWIZZLED_AMD, /* For ACO to not reorder this store around EmitVertex/EndPrimitve */ .memory_modes = nir_var_shader_out); } @@ -571,8 +571,8 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi), gsvs_ring, voffset, soffset, nir_imm_int(b, 0), - .is_swizzled = true, - .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY, + .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY | + ACCESS_IS_SWIZZLED_AMD, /* For ACO to not reorder this store around EmitVertex/EndPrimitve */ .memory_modes = nir_var_shader_out); } diff --git a/src/amd/common/ac_nir_lower_esgs_io_to_mem.c b/src/amd/common/ac_nir_lower_esgs_io_to_mem.c index 401b5ad..969448d 100644 --- a/src/amd/common/ac_nir_lower_esgs_io_to_mem.c +++ b/src/amd/common/ac_nir_lower_esgs_io_to_mem.c @@ -110,9 +110,11 @@ emit_split_buffer_store(nir_builder *b, nir_ssa_def *d, nir_ssa_def *desc, nir_s store_bytes = MIN2(store_bytes, 2); nir_ssa_def *store_val = nir_extract_bits(b, &d, 1, start_byte * 8u, 1, store_bytes * 8u); - nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero, .is_swizzled = swizzled, + nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero, .base = start_byte, .memory_modes = nir_var_shader_out, - .access = ACCESS_COHERENT | (slc ? ACCESS_STREAM_CACHE_POLICY : 0)); + .access = ACCESS_COHERENT | + (slc ? ACCESS_STREAM_CACHE_POLICY : 0) | + (swizzled ? ACCESS_IS_SWIZZLED_AMD : 0)); start_byte += store_bytes; bytes -= store_bytes; diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index ef90879..7a845f7 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -2076,8 +2076,8 @@ export_vertex_params_gfx11(nir_builder *b, nir_ssa_def *export_tid, nir_ssa_def for (unsigned j = 0; j < 4; j++) comp[j] = outputs[i].chan[j] ? outputs[i].chan[j] : undef; nir_store_buffer_amd(b, nir_vec(b, comp, 4), attr_rsrc, voffset, soffset, vindex, - .is_swizzled = true, .memory_modes = nir_var_shader_out, - .access = ACCESS_COHERENT); + .memory_modes = nir_var_shader_out, + .access = ACCESS_COHERENT | ACCESS_IS_SWIZZLED_AMD); } nir_pop_if(b, NULL); diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index a03836f..cea9935 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7270,7 +7270,7 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin) Temp s_offset = bld.as_uniform(get_ssa_temp(ctx, intrin->src[2].ssa)); Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[3].ssa)) : Temp(); - bool swizzled = nir_intrinsic_is_swizzled(intrin); + bool swizzled = nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD; bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT; bool slc = nir_intrinsic_access(intrin) & ACCESS_STREAM_CACHE_POLICY; @@ -7299,7 +7299,7 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin) Temp s_offset = bld.as_uniform(get_ssa_temp(ctx, intrin->src[3].ssa)); Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[4].ssa)) : Temp(); - bool swizzled = nir_intrinsic_is_swizzled(intrin); + bool swizzled = nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD; bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT; bool slc = nir_intrinsic_access(intrin) & ACCESS_STREAM_CACHE_POLICY; diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 3d7cf88..bdc979e 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -4180,7 +4180,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins LLVMValueRef vidx = idxen ? get_src(ctx, instr->src[3]) : NULL; unsigned num_components = instr->dest.ssa.num_components; unsigned const_offset = nir_intrinsic_base(instr); - bool swizzled = nir_intrinsic_is_swizzled(instr); + bool swizzled = nir_intrinsic_access(instr) & ACCESS_IS_SWIZZLED_AMD; bool reorder = nir_intrinsic_can_reorder(instr); bool coherent = nir_intrinsic_access(instr) & ACCESS_COHERENT; bool slc = nir_intrinsic_access(instr) & ACCESS_STREAM_CACHE_POLICY; @@ -4224,7 +4224,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins LLVMValueRef addr_soffset = get_src(ctx, instr->src[3]); LLVMValueRef vidx = idxen ? get_src(ctx, instr->src[4]) : NULL; unsigned const_offset = nir_intrinsic_base(instr); - bool swizzled = nir_intrinsic_is_swizzled(instr); + bool swizzled = nir_intrinsic_access(instr) & ACCESS_IS_SWIZZLED_AMD; bool coherent = nir_intrinsic_access(instr) & ACCESS_COHERENT; bool slc = nir_intrinsic_access(instr) & ACCESS_STREAM_CACHE_POLICY; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 42f9eab..c5b239b 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -220,9 +220,6 @@ index("nir_alu_type", "dest_type") # The swizzle mask for quad_swizzle_amd & masked_swizzle_amd index("unsigned", "swizzle_mask") -# Whether the load_buffer_amd/store_buffer_amd is swizzled -index("bool", "is_swizzled") - # Offsets for load_shared2_amd/store_shared2_amd index("uint8_t", "offset0") index("uint8_t", "offset1") @@ -1317,9 +1314,9 @@ intrinsic("optimization_barrier_vgpr_amd", dest_comp=0, src_comp=[0], # src[] = { descriptor, vector byte offset, scalar byte offset, index offset } # The index offset is multiplied by the stride in the descriptor. The vertex/scalar byte offsets # are in bytes. -intrinsic("load_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE, IS_SWIZZLED, MEMORY_MODES, ACCESS], flags=[CAN_ELIMINATE]) +intrinsic("load_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE, MEMORY_MODES, ACCESS], flags=[CAN_ELIMINATE]) # src[] = { store value, descriptor, vector byte offset, scalar byte offset, index offset } -intrinsic("store_buffer_amd", src_comp=[0, 4, 1, 1, 1], indices=[BASE, WRITE_MASK, IS_SWIZZLED, MEMORY_MODES, ACCESS]) +intrinsic("store_buffer_amd", src_comp=[0, 4, 1, 1, 1], indices=[BASE, WRITE_MASK, MEMORY_MODES, ACCESS]) # src[] = { address, unsigned 32-bit offset }. load("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index 30b8d43..03b2eaf 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -1000,6 +1000,13 @@ enum gl_access_qualifier /** Execute instruction also in helpers. */ ACCESS_INCLUDE_HELPERS = (1 << 8), + + /** + * Whether the address bits are swizzled by the hw. This practically means + * that loads can't be vectorized and must be exactly 32 bits on some chips. + * The swizzle amount is determined by the descriptor. + */ + ACCESS_IS_SWIZZLED_AMD = (1 << 9), }; /** -- 2.7.4