From 02bb506c54f998cfbc907758282a5748755c67ea Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Mon, 5 Sep 2022 09:21:38 +0200 Subject: [PATCH] r600/sfn: Lower tex,txl,txb and txf to backend This cleans up the texture code a bit and also gives more opportunities for optimization in NIR. Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp | 332 ++++++++++++++++++------- src/gallium/drivers/r600/sfn/sfn_instr_tex.h | 17 +- src/gallium/drivers/r600/sfn/sfn_nir.cpp | 3 + 3 files changed, 261 insertions(+), 91 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp index 53bdfad..1262622 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp @@ -27,8 +27,11 @@ #include "sfn_instr_tex.h" #include "sfn_instr_alu.h" #include "sfn_instr_fetch.h" +#include "sfn_nir.h" #include "sfn_debug.h" +#include "nir_builder.h" + namespace r600 { using std::string; @@ -322,6 +325,10 @@ bool TexInstr::from_nir(nir_tex_instr *tex, Shader& shader) { Inputs src(*tex, shader.value_factory()); + if (nir_tex_instr_src_index(tex, nir_tex_src_backend1) != -1) + return emit_lowered_tex(tex, src, shader); + + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) { switch (tex->op) { case nir_texop_txs: @@ -333,13 +340,6 @@ bool TexInstr::from_nir(nir_tex_instr *tex, Shader& shader) } } else { switch (tex->op) { - case nir_texop_tex: - return emit_tex_tex(tex, src, shader); - case nir_texop_txf: - return emit_tex_txf(tex, src, shader); - case nir_texop_txb: - case nir_texop_txl: - return emit_tex_txl_txb(tex, src, shader); case nir_texop_txs: return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader); case nir_texop_lod: @@ -381,11 +381,12 @@ get_sampler_id(int sampler_id, const nir_variable *deref) return result; } - -bool TexInstr::emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader) +bool TexInstr::emit_lowered_tex(nir_tex_instr* tex, Inputs& src, Shader& shader) { - auto& vf = shader.value_factory(); + assert(src.backend1); + assert(src.backend2); + auto& vf = shader.value_factory(); sfn_log << SfnLog::instr << "emit '" << *reinterpret_cast(tex) << "' (" << __func__ << ")\n"; @@ -393,99 +394,33 @@ bool TexInstr::emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader) auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref); assert(!sampler.indirect); - auto src_coord = prepare_source(tex, src, shader); auto dst = vf.dest_vec4(tex->dest, pin_group); - auto irt = new TexInstr(src.opcode, dst, {0,1,2,3}, src_coord, sampler.id, - sampler.id + R600_MAX_CONST_BUFFERS, - src.sampler_offset); - if (tex->is_array) - irt->set_tex_flag(TexInstr::z_unnormalized); - - irt->set_rect_coordinate_flags(tex); - irt->set_coord_offsets(src.offset); - - shader.emit_instruction(irt); - return true; -} + auto params = nir_src_as_const_value(*src.backend2); + int32_t coord_mask = params[0].i32; + int32_t flags = params[1].i32; -bool TexInstr::emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader) -{ - auto& vf = shader.value_factory(); - - auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref); - assert(!sampler.indirect && "Indirect sampler selection not yet supported"); - - auto src_coord = prepare_source(tex, src, shader); + RegisterVec4::Swizzle src_swizzle = {0}; + for (int i = 0; i < 4; ++i) + src_swizzle[i] = (coord_mask & (1 << i)) ? i : 7; - auto dst = vf.dest_vec4(tex->dest, pin_group); + auto src_coord = vf.src_vec4(*src.backend1, pin_group, src_swizzle); auto irt = new TexInstr(src.opcode, dst, {0,1,2,3}, src_coord, sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); - if (tex->is_array) - irt->set_tex_flag(TexInstr::z_unnormalized); + for (const auto f : TexFlags) { + if (flags & (1 << f)) + irt->set_tex_flag(f); + } - irt->set_rect_coordinate_flags(tex); irt->set_coord_offsets(src.offset); shader.emit_instruction(irt); return true; } - -bool TexInstr::emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader) -{ - auto& vf = shader.value_factory(); - - int sampler = tex->sampler_index; - - auto swizzle = src.swizzle_from_ncomps(tex->coord_components); - swizzle[3] = 3; - - if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) { - swizzle[2] = 1; - swizzle[1] = 7; - } - - auto src_coord = vf.temp_vec4(pin_group, swizzle); - - for (unsigned i = 0; i < tex->coord_components; i++) { - unsigned k = i; - if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1) - k = 2; - - - if (src.offset) { - shader.emit_instruction(new AluInstr(op2_add_int, src_coord[k], src.coord[i], - vf.src(src.offset[i], i), - AluInstr::write)); - } else { - shader.emit_instruction(new AluInstr(op1_mov, src_coord[k], src.coord[i],AluInstr::write)); - } - } - - shader.emit_instruction(new AluInstr(op1_mov, src_coord[3], src.lod, AluInstr::last_write)); - - auto dst = vf.dest_vec4(tex->dest, pin_group); - - auto tex_ir = new TexInstr(src.opcode, dst, {0, 1, 2, 3}, src_coord, - sampler, - sampler + R600_MAX_CONST_BUFFERS, - src.sampler_offset); - - if (tex->is_array) - tex_ir->set_tex_flag(z_unnormalized); - - tex_ir->set_rect_coordinate_flags(tex); - tex_ir->set_sampler_offset(src.sampler_offset); - - shader.emit_instruction(tex_ir); - - return true; -} - bool TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader) { auto& vf = shader.value_factory(); @@ -843,7 +778,7 @@ bool TexInstr::emit_tex_tg4(nir_tex_instr* tex, Inputs& src , Shader& shader) if (src.offset) { literal_offset = nir_src_as_const_value(*src.offset) != 0; r600::sfn_log << SfnLog::tex << " really have offsets and they are " << - (literal_offset ? "literal" : "varying") << + (literal_offset ? "l" : "varying") << "\n"; if (!literal_offset) { @@ -956,6 +891,8 @@ TexInstr::Inputs::Inputs(const nir_tex_instr& instr, ValueFactory& vf): ms_index(nullptr), sampler_offset(nullptr), texture_offset(nullptr), + backend1(nullptr), + backend2(nullptr), opcode(ld) { //sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n"; @@ -1004,6 +941,12 @@ TexInstr::Inputs::Inputs(const nir_tex_instr& instr, ValueFactory& vf): case nir_tex_src_sampler_offset: sampler_offset = vf.src(instr.src[i], 0); break; + case nir_tex_src_backend1: + backend1 = &instr.src[i].src; + break; + case nir_tex_src_backend2: + backend2 = &instr.src[i].src; + break; case nir_tex_src_plane: case nir_tex_src_projector: case nir_tex_src_min_lod: @@ -1109,5 +1052,218 @@ void TexInstr::set_rect_coordinate_flags(nir_tex_instr* instr) } } +class LowerTexToBackend : public NirLowerInstruction { +public: + LowerTexToBackend(amd_gfx_level chip_class); +private: + bool filter(const nir_instr *instr) const override; + nir_ssa_def *lower(nir_instr *instr) override; + + nir_ssa_def *lower_tex(nir_tex_instr *tex); + nir_ssa_def *lower_txf(nir_tex_instr *tex); + + nir_ssa_def *prepare_coord(nir_tex_instr *tex, + int &unnormalized_mask, + int &used_coord_mask); + int get_src_coords(nir_tex_instr *tex, std::array &coord, + bool round_array_index); + nir_ssa_def *prep_src(std::array &coord, + int &used_coord_mask); + nir_ssa_def *finalize(nir_tex_instr *tex, + nir_ssa_def *backend1, nir_ssa_def *backend2); + + amd_gfx_level m_chip_class; +}; + +bool r600_nir_lower_tex_to_backend(nir_shader *shader, amd_gfx_level chip_class) +{ + return LowerTexToBackend(chip_class).run(shader); +} + +LowerTexToBackend::LowerTexToBackend(amd_gfx_level chip_class): + m_chip_class(chip_class) +{ + +} + +bool LowerTexToBackend::filter(const nir_instr *instr) const +{ + if (instr->type != nir_instr_type_tex) + return false; + + auto tex = nir_instr_as_tex(instr); + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) + return false; + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txf: + break; + default: + return false; + } + + return nir_tex_instr_src_index(tex, nir_tex_src_backend1) == -1; +} + +nir_ssa_def *LowerTexToBackend::lower(nir_instr *instr) +{ + b->cursor = nir_before_instr(instr); + + auto tex = nir_instr_as_tex(instr); + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + return lower_tex(tex); + case nir_texop_txf: + return lower_txf(tex); + + default: + return nullptr; + } +} + +nir_ssa_def *LowerTexToBackend::lower_tex(nir_tex_instr *tex) +{ + int unnormalized_mask = 0; + int used_coord_mask = 0; + + nir_ssa_def *backend1 = prepare_coord(tex, unnormalized_mask, used_coord_mask); + + nir_ssa_def *backend2 = + nir_imm_ivec4(b, used_coord_mask, unnormalized_mask, 0, 0); + + return finalize(tex, backend1, backend2); +} + +nir_ssa_def *LowerTexToBackend::lower_txf(nir_tex_instr *tex) +{ + std::array new_coord = { + nullptr, + nullptr, + nullptr, + nullptr + }; + + get_src_coords(tex, new_coord, false); + + int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod); + new_coord[3] = tex->src[lod_idx].src.ssa; + + int used_coord_mask = 0; + nir_ssa_def *backend1 = prep_src(new_coord, used_coord_mask); + nir_ssa_def *backend2 = nir_imm_ivec4(b, used_coord_mask, 0, 0, 0); + + return finalize(tex, backend1, backend2); +} + +nir_ssa_def *LowerTexToBackend::finalize(nir_tex_instr *tex, nir_ssa_def *backend1, + nir_ssa_def *backend2) +{ + nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(backend1)); + nir_tex_instr_add_src(tex, nir_tex_src_backend2, nir_src_for_ssa(backend2)); + nir_tex_instr_remove_src(tex, nir_tex_src_coord); + + static const nir_tex_src_type cleanup[] = { + nir_tex_src_coord, + nir_tex_src_lod, + nir_tex_src_bias, + nir_tex_src_comparator + }; + + for (const auto type : cleanup) { + int pos = nir_tex_instr_src_index(tex, type); + if (pos >= 0) + nir_tex_instr_remove_src(tex, pos); + } + return NIR_LOWER_INSTR_PROGRESS; +} + +nir_ssa_def *LowerTexToBackend::prep_src(std::array &coord, + int &used_coord_mask) +{ + for (int i = 0; i < 4; ++i) { + if (coord[i]) + used_coord_mask |= 1 << i; + else + coord[i] = nir_ssa_undef(b, 1, 32); + } + + return nir_vec(b, coord.data(), 4); +} + +nir_ssa_def *LowerTexToBackend::prepare_coord(nir_tex_instr *tex, + int &unnormalized_mask, + int &used_coord_mask) +{ + std::array new_coord = { + nullptr, + nullptr, + nullptr, + nullptr + }; + + unnormalized_mask = get_src_coords(tex, new_coord, true); + used_coord_mask = 0; + + int comp_idx = tex->is_shadow ? + nir_tex_instr_src_index(tex, nir_tex_src_comparator): + -1; + + if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) { + int idx = tex->op == nir_texop_txl ? + nir_tex_instr_src_index(tex, nir_tex_src_lod) : + nir_tex_instr_src_index(tex, nir_tex_src_bias); + assert(idx != -1); + new_coord[3] = tex->src[idx].src.ssa; + + if (comp_idx >= 0) + new_coord[2] = tex->src[comp_idx].src.ssa; + } else if (comp_idx >= 0) { + new_coord[3] = tex->src[comp_idx].src.ssa; + } + return prep_src(new_coord, used_coord_mask); +} + +int LowerTexToBackend::get_src_coords(nir_tex_instr *tex, + std::array& coord, + bool round_array_index) +{ + int unnormalized_mask = 0; + auto coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord); + assert(coord_idx != -1); + auto old_coord = tex->src[coord_idx]; + + coord = { + nir_channel(b, old_coord.src.ssa, 0), + nullptr, + nullptr, + nullptr + }; + + if (tex->coord_components > 1) { + if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) + coord[2] = nir_channel(b, old_coord.src.ssa, 1); + else + coord[1] = nir_channel(b, old_coord.src.ssa, 1); + } + + if (tex->coord_components > 2) { + coord[2] = nir_channel(b, old_coord.src.ssa, 2); + } + if (tex->is_array) { + unnormalized_mask |= 0x4; + if (round_array_index) + coord[2] = nir_fround_even(b, coord[2]); + } + + if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { + unnormalized_mask |= 0x3; + } + + return unnormalized_mask; +} } diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_tex.h b/src/gallium/drivers/r600/sfn/sfn_instr_tex.h index 056e414..9caafd7 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_tex.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.h @@ -76,6 +76,15 @@ public: num_tex_flag }; + static constexpr Flags TexFlags[] = { + x_unnormalized, + y_unnormalized, + z_unnormalized, + w_unnormalized, + grad_fine, + num_tex_flag + }; + struct Inputs { Inputs(const nir_tex_instr& instr, ValueFactory &vf); const nir_variable *sampler_deref; @@ -91,6 +100,8 @@ public: PVirtualValue ms_index; PVirtualValue sampler_offset; PVirtualValue texture_offset; + nir_src *backend1; + nir_src *backend2; RegisterVec4::Swizzle swizzle_from_ncomps(int comps) const; @@ -158,17 +169,15 @@ private: static auto prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader &shader) -> RegisterVec4; static bool emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader); - static bool emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader); static bool emit_tex_tex_ms_direct(nir_tex_instr *tex, Inputs& src, Shader& shader); static bool emit_tex_tex_ms(nir_tex_instr *tex, Inputs& src, Shader& shader); - static bool emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader); - static bool emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader); static bool emit_tex_txs(nir_tex_instr *tex, Inputs& src, RegisterVec4::Swizzle dest_swz, Shader& shader); static bool emit_tex_lod(nir_tex_instr* tex, Inputs& src, Shader& shader); static bool emit_tex_txd(nir_tex_instr *tex, Inputs& src, Shader& shader); static bool emit_tex_tg4(nir_tex_instr* instr, Inputs& src , Shader& shader); static bool emit_tex_texture_samples(nir_tex_instr* instr, Inputs& src, Shader& shader); + static bool emit_lowered_tex(nir_tex_instr* instr, Inputs& src, Shader& shader); void set_coord_offsets(nir_src *offset); void set_rect_coordinate_flags(nir_tex_instr* instr); @@ -188,6 +197,8 @@ private: std::list m_prepare_instr; }; +bool r600_nir_lower_tex_to_backend(nir_shader *shader, amd_gfx_level chip_class); + } #endif // INSTR_TEX_H diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index 35757fd..fc09aed 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -40,6 +40,7 @@ #include "sfn_nir_lower_fs_out_to_vector.h" #include "sfn_nir_lower_alu.h" #include "sfn_nir_lower_tex.h" +#include "sfn_instr_tex.h" #include "sfn_optimizer.h" #include "sfn_ra.h" #include "sfn_scheduler.h" @@ -754,6 +755,8 @@ int r600_shader_from_nir(struct r600_context *rctx, NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL); NIR_PASS_V(sh, nir_lower_phis_to_scalar, false); NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL); + NIR_PASS_V(sh, r600::r600_nir_lower_tex_to_backend, rctx->b.gfx_level); + NIR_PASS_V(sh, r600::r600_nir_split_64bit_io); NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi); -- 2.7.4