From 09785e5e1b8475b3333292961b2d8d84c01d1c2d Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 6 Apr 2023 11:43:29 +0100 Subject: [PATCH] radv,radeonsi: use ac_nir_lower_tex fossil-db (navi21): Totals from 17279 (12.74% of 135636) affected shaders: MaxWaves: 270015 -> 269991 (-0.01%) Instrs: 24847385 -> 24843807 (-0.01%); split: -0.02%, +0.00% CodeSize: 133215364 -> 133198744 (-0.01%); split: -0.02%, +0.01% VGPRs: 1217632 -> 1217872 (+0.02%); split: -0.00%, +0.02% Latency: 405347021 -> 404971784 (-0.09%); split: -0.09%, +0.00% InvThroughput: 75386590 -> 75350344 (-0.05%); split: -0.07%, +0.03% VClause: 426986 -> 426821 (-0.04%); split: -0.04%, +0.01% SClause: 966751 -> 966971 (+0.02%); split: -0.01%, +0.03% Copies: 1738510 -> 1737970 (-0.03%); split: -0.08%, +0.05% PreSGPRs: 1169070 -> 1169120 (+0.00%); split: -0.00%, +0.00% PreVGPRs: 1136102 -> 1136183 (+0.01%); split: -0.00%, +0.01% Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- .../compiler/aco_instruction_selection.cpp | 143 +------------- src/amd/llvm/ac_llvm_build.c | 184 ------------------ src/amd/llvm/ac_llvm_build.h | 3 - src/amd/llvm/ac_nir_to_llvm.c | 56 +----- src/amd/vulkan/radv_pipeline.c | 7 + src/amd/vulkan/radv_shader.c | 1 - src/gallium/drivers/radeonsi/si_shader.c | 7 + src/gallium/drivers/radeonsi/si_shader_nir.c | 1 - 8 files changed, 22 insertions(+), 380 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index b0f83eb9fec..0f1c3afebe0 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -9059,119 +9059,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } } -void -build_cube_select(isel_context* ctx, Temp ma, Temp id, Temp deriv, Temp* out_ma, Temp* out_sc, - Temp* out_tc) -{ - Builder bld(ctx->program, ctx->block); - - Temp deriv_x = emit_extract_vector(ctx, deriv, 0, v1); - Temp deriv_y = emit_extract_vector(ctx, deriv, 1, v1); - Temp deriv_z = emit_extract_vector(ctx, deriv, 2, v1); - - Operand neg_one = Operand::c32(0xbf800000u); - Operand one = Operand::c32(0x3f800000u); - Operand two = Operand::c32(0x40000000u); - Operand four = Operand::c32(0x40800000u); - - Temp is_ma_positive = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), Operand::zero(), ma); - Temp sgn_ma = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, one, is_ma_positive); - Temp neg_sgn_ma = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand::zero(), sgn_ma); - - Temp is_ma_z = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), four, id); - Temp is_ma_y = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), two, id); - is_ma_y = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), is_ma_y, is_ma_z); - Temp is_not_ma_x = bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(s1, scc), is_ma_z, is_ma_y); - - /* select sc */ - Temp tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_z, deriv_x, is_not_ma_x); - Temp sgn = bld.vop2_e64( - aco_opcode::v_cndmask_b32, bld.def(v1), - bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), neg_sgn_ma, sgn_ma, is_ma_z), one, is_ma_y); - *out_sc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tmp, sgn); - - /* select tc */ - tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_y, deriv_z, is_ma_y); - sgn = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, sgn_ma, is_ma_y); - *out_tc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tmp, sgn); - - /* select ma */ - tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), - bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_x, deriv_y, is_ma_y), - deriv_z, is_ma_z); - tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffffu), tmp); - *out_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), two, tmp); -} - -void -prepare_cube_coords(isel_context* ctx, std::vector& coords, Temp* ddx, Temp* ddy, - bool is_deriv, bool is_array) -{ - Builder bld(ctx->program, ctx->block); - Temp ma, tc, sc, id; - aco_opcode madak = - ctx->program->gfx_level >= GFX10_3 ? aco_opcode::v_fmaak_f32 : aco_opcode::v_madak_f32; - aco_opcode madmk = - ctx->program->gfx_level >= GFX10_3 ? aco_opcode::v_fmamk_f32 : aco_opcode::v_madmk_f32; - - /* see comment in ac_prepare_cube_coords() */ - if (is_array && ctx->options->gfx_level <= GFX8) - coords[3] = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), coords[3]); - - ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]); - - aco_ptr vop3a{ - create_instruction(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)}; - vop3a->operands[0] = Operand(ma); - vop3a->abs[0] = true; - Temp invma = bld.tmp(v1); - vop3a->definitions[0] = Definition(invma); - ctx->block->instructions.emplace_back(std::move(vop3a)); - - sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), coords[0], coords[1], coords[2]); - if (!is_deriv) - sc = bld.vop2(madak, bld.def(v1), sc, invma, Operand::c32(0x3fc00000u /*1.5*/)); - - tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), coords[0], coords[1], coords[2]); - if (!is_deriv) - tc = bld.vop2(madak, bld.def(v1), tc, invma, Operand::c32(0x3fc00000u /*1.5*/)); - - id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), coords[0], coords[1], coords[2]); - - if (is_deriv) { - sc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, invma); - tc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, invma); - - for (unsigned i = 0; i < 2; i++) { - /* see comment in ac_prepare_cube_coords() */ - Temp deriv_ma; - Temp deriv_sc, deriv_tc; - build_cube_select(ctx, ma, id, i ? *ddy : *ddx, &deriv_ma, &deriv_sc, &deriv_tc); - - deriv_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, invma); - - Temp x = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), - bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_sc, invma), - bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, sc)); - Temp y = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), - bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_tc, invma), - bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, tc)); - *(i ? ddy : ddx) = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), x, y); - } - - sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), sc); - tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), tc); - } - - if (is_array) { - id = bld.vop2(madmk, bld.def(v1), coords[3], id, Operand::c32(0x41000000u /*8.0*/)); - coords.erase(coords.begin() + 3); - } - coords[0] = sc; - coords[1] = tc; - coords[2] = id; -} - void get_const_vec(nir_ssa_def* vec, nir_const_value* cv[4]) { @@ -9363,25 +9250,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) } std::vector unpacked_coord; - if (ctx->options->gfx_level == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D && - instr->coord_components) { - RegClass rc = a16 ? v2b : v1; - for (unsigned i = 0; i < coord.bytes() / rc.bytes(); i++) - unpacked_coord.emplace_back(emit_extract_vector(ctx, coord, i, rc)); - - assert(unpacked_coord.size() > 0 && unpacked_coord.size() < 3); - - Operand coord2d; - /* 0.5 for floating point coords, 0 for integer. */ - if (a16) - coord2d = instr->op == nir_texop_txf ? Operand::c16(0) : Operand::c16(0x3800); - else - coord2d = instr->op == nir_texop_txf ? Operand::c32(0) : Operand::c32(0x3f000000); - unpacked_coord.insert(std::next(unpacked_coord.begin()), bld.copy(bld.def(rc), coord2d)); - } else if (coord != Temp()) { + if (coord != Temp()) unpacked_coord.push_back(coord); - } - if (has_sample_index) unpacked_coord.push_back(sample_index); if (has_lod) @@ -9391,25 +9261,14 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) coords = emit_pack_v1(ctx, unpacked_coord); - assert(instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE || !a16); - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->coord_components) - prepare_cube_coords(ctx, coords, &ddx, &ddy, instr->op == nir_texop_txd, - instr->is_array && instr->op != nir_texop_lod); - /* pack derivatives */ if (has_ddx || has_ddy) { - RegClass rc = g16 ? v2b : v1; assert(a16 == g16 || ctx->options->gfx_level >= GFX10); std::array ddxddy = {ddx, ddy}; for (Temp tmp : ddxddy) { if (tmp == Temp()) continue; std::vector unpacked = {tmp}; - if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && ctx->options->gfx_level == GFX9) { - assert(has_ddx && has_ddy); - Temp zero = bld.copy(bld.def(rc), Operand::zero(rc.bytes())); - unpacked.push_back(zero); - } for (Temp derv : emit_pack_v1(ctx, unpacked)) derivs.push_back(derv); } diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 3ae68e87dd3..4f8abe56632 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -754,190 +754,6 @@ LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMV return LLVMBuildLShr(builder, num, post_shift, ""); } -/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 - * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is - * already multiplied by two. id is the cube face number. - */ -struct cube_selection_coords { - LLVMValueRef stc[2]; - LLVMValueRef ma; - LLVMValueRef id; -}; - -static void build_cube_intrinsic(struct ac_llvm_context *ctx, LLVMValueRef in[3], - struct cube_selection_coords *out) -{ - LLVMTypeRef f32 = ctx->f32; - - out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", f32, in, 3, 0); - out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", f32, in, 3, 0); - out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", f32, in, 3, 0); - out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", f32, in, 3, 0); -} - -/** - * Build a manual selection sequence for cube face sc/tc coordinates and - * major axis vector (multiplied by 2 for consistency) for the given - * vec3 \p coords, for the face implied by \p selcoords. - * - * For the major axis, we always adjust the sign to be in the direction of - * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards - * the selcoords major axis. - */ -static void build_cube_select(struct ac_llvm_context *ctx, - const struct cube_selection_coords *selcoords, - const LLVMValueRef *coords, LLVMValueRef *out_st, - LLVMValueRef *out_ma) -{ - LLVMBuilderRef builder = ctx->builder; - LLVMTypeRef f32 = LLVMTypeOf(coords[0]); - LLVMValueRef is_ma_positive; - LLVMValueRef sgn_ma; - LLVMValueRef is_ma_z, is_not_ma_z; - LLVMValueRef is_ma_y; - LLVMValueRef is_ma_x; - LLVMValueRef sgn; - LLVMValueRef tmp; - - is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->ma, LLVMConstReal(f32, 0.0), ""); - sgn_ma = LLVMBuildSelect(builder, is_ma_positive, LLVMConstReal(f32, 1.0), - LLVMConstReal(f32, -1.0), ""); - - is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), ""); - is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); - is_ma_y = LLVMBuildAnd( - builder, is_not_ma_z, - LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), ""); - is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), ""); - - /* Select sc */ - tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], ""); - sgn = LLVMBuildSelect( - builder, is_ma_y, LLVMConstReal(f32, 1.0), - LLVMBuildSelect(builder, is_ma_z, sgn_ma, LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); - out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); - - /* Select tc */ - tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); - sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma, LLVMConstReal(f32, -1.0), ""); - out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); - - /* Select ma */ - tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], - LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), ""); - tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &tmp, 1, 0); - *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), ""); -} - -void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod, - LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg) -{ - - LLVMBuilderRef builder = ctx->builder; - struct cube_selection_coords selcoords; - LLVMValueRef coords[3]; - LLVMValueRef invma; - - if (is_array && !is_lod) { - LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]); - - /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says: - * - * "For Array forms, the array layer used will be - * - * max(0, min(d−1, floor(layer+0.5))) - * - * where d is the depth of the texture array and layer - * comes from the component indicated in the tables below. - * Workaround for an issue where the layer is taken from a - * helper invocation which happens to fall on a different - * layer due to extrapolation." - * - * GFX8 and earlier attempt to implement this in hardware by - * clamping the value of coords[2] = (8 * layer) + face. - * Unfortunately, this means that the we end up with the wrong - * face when clamping occurs. - * - * Clamp the layer earlier to work around the issue. - */ - if (ctx->gfx_level <= GFX8) { - LLVMValueRef ge0; - ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, ""); - tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, ""); - } - - coords_arg[3] = tmp; - } - - build_cube_intrinsic(ctx, coords_arg, &selcoords); - - invma = - ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &selcoords.ma, 1, 0); - invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); - - for (int i = 0; i < 2; ++i) - coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, ""); - - coords[2] = selcoords.id; - - if (is_deriv && derivs_arg) { - LLVMValueRef derivs[4]; - int axis; - - /* Convert cube derivatives to 2D derivatives. */ - for (axis = 0; axis < 2; axis++) { - LLVMValueRef deriv_st[2]; - LLVMValueRef deriv_ma; - - /* Transform the derivative alongside the texture - * coordinate. Mathematically, the correct formula is - * as follows. Assume we're projecting onto the +Z face - * and denote by dx/dh the derivative of the (original) - * X texture coordinate with respect to horizontal - * window coordinates. The projection onto the +Z face - * plane is: - * - * f(x,z) = x/z - * - * Then df/dh = df/dx * dx/dh + df/dz * dz/dh - * = 1/z * dx/dh - x/z * 1/z * dz/dh. - * - * This motivatives the implementation below. - * - * Whether this actually gives the expected results for - * apps that might feed in derivatives obtained via - * finite differences is anyone's guess. The OpenGL spec - * seems awfully quiet about how textureGrad for cube - * maps should be handled. - */ - build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], deriv_st, &deriv_ma); - - deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, ""); - - for (int i = 0; i < 2; ++i) - derivs[axis * 2 + i] = - LLVMBuildFSub(builder, LLVMBuildFMul(builder, deriv_st[i], invma, ""), - LLVMBuildFMul(builder, deriv_ma, coords[i], ""), ""); - } - - memcpy(derivs_arg, derivs, sizeof(derivs)); - } - - /* Shift the texture coordinate. This must be applied after the - * derivative calculation. - */ - for (int i = 0; i < 2; ++i) - coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), ""); - - if (is_array) { - /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ - /* coords_arg.w component - array_index for cube arrays */ - coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]); - } - - memcpy(coords_arg, coords, sizeof(coords)); -} - LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan, LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i, LLVMValueRef j) diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 42a55492e0b..2cb2a922a9d 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -224,9 +224,6 @@ LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, LLVMValueRef nu LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef multiplier, LLVMValueRef post_shift); -void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod, - LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg); - LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan, LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i, LLVMValueRef j); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 3cb5a0da655..2410ecfdfe4 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1540,13 +1540,6 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_te return lower_gather4_integer(&ctx->ac, args, instr); } - /* Fixup for GFX9 which allocates 1D textures as 2D. */ - if (instr->op == nir_texop_lod && ctx->ac.gfx_level == GFX9) { - if ((args->dim == ac_image_2darray || args->dim == ac_image_2d) && !args->coords[1]) { - args->coords[1] = ctx->ac.i32_0; - } - } - args->attributes = AC_ATTR_INVARIANT_LOAD; bool cs_derivs = ctx->stage == MESA_SHADER_COMPUTE && ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE; @@ -4242,61 +4235,26 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) /* pack derivatives */ if (ddx || ddy) { - int num_src_deriv_channels, num_dest_deriv_channels; + int num_deriv_channels; switch (instr->sampler_dim) { case GLSL_SAMPLER_DIM_3D: - case GLSL_SAMPLER_DIM_CUBE: - num_src_deriv_channels = 3; - num_dest_deriv_channels = 3; + num_deriv_channels = 3; break; case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_CUBE: default: - num_src_deriv_channels = 2; - num_dest_deriv_channels = 2; + num_deriv_channels = 2; break; case GLSL_SAMPLER_DIM_1D: - num_src_deriv_channels = 1; - if (ctx->ac.gfx_level == GFX9) { - num_dest_deriv_channels = 2; - } else { - num_dest_deriv_channels = 1; - } + num_deriv_channels = ctx->ac.gfx_level == GFX9 ? 2 : 1; break; } - for (unsigned i = 0; i < num_src_deriv_channels; i++) { + for (unsigned i = 0; i < num_deriv_channels; i++) { args.derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i)); - args.derivs[num_dest_deriv_channels + i] = + args.derivs[num_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i)); } - for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) { - LLVMValueRef zero = args.g16 ? ctx->ac.f16_0 : ctx->ac.f32_0; - args.derivs[i] = zero; - args.derivs[num_dest_deriv_channels + i] = zero; - } - } - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) { - for (unsigned chan = 0; chan < instr->coord_components; chan++) - args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]); - if (instr->coord_components == 3) - args.coords[3] = LLVMGetUndef(args.a16 ? ctx->ac.f16 : ctx->ac.f32); - ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array, - instr->op == nir_texop_lod, args.coords, args.derivs); - } - - /* Texture coordinates fixups */ - if (ctx->ac.gfx_level == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D && - instr->op != nir_texop_lod) { - LLVMValueRef filler; - if (instr->op == nir_texop_txf) - filler = args.a16 ? ctx->ac.i16_0 : ctx->ac.i32_0; - else - filler = LLVMConstReal(args.a16 ? ctx->ac.f16 : ctx->ac.f32, 0.5); - - if (instr->is_array) - args.coords[2] = args.coords[1]; - args.coords[1] = filler; } /* Pack sample index */ diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index a73dd887327..275531f6b97 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -530,6 +530,13 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo if (progress) nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir)); + NIR_PASS( + _, stage->nir, ac_nir_lower_tex, + &(ac_nir_lower_tex_options){ + .gfx_level = gfx_level, + .lower_array_layer_round_even = !device->physical_device->rad_info.conformant_trunc_coord, + }); + if (stage->nir->info.uses_resource_info_query) NIR_PASS(_, stage->nir, ac_nir_lower_resinfo, gfx_level); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index efcd2c7cd23..8dacbae139b 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -613,7 +613,6 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ .lower_to_fragment_fetch_amd = device->physical_device->use_fmask, .lower_lod_zero_width = true, .lower_invalid_implicit_lod = true, - .lower_array_layer_round_even = !device->physical_device->rad_info.conformant_trunc_coord, }; NIR_PASS(_, nir, nir_lower_tex, &tex_options); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 596ed927041..0eeb9622c5f 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2063,6 +2063,13 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, if (sel->stage <= MESA_SHADER_GEOMETRY) NIR_PASS(progress, nir, si_nir_kill_outputs, key); + NIR_PASS( + _, nir, ac_nir_lower_tex, + &(ac_nir_lower_tex_options){ + .gfx_level = sel->screen->info.gfx_level, + .lower_array_layer_round_even = !sel->screen->info.conformant_trunc_coord, + }); + if (nir->info.uses_resource_info_query) NIR_PASS(progress, nir, ac_nir_lower_resinfo, sel->screen->info.gfx_level); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index de74a359752..0086a54e41e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -277,7 +277,6 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) .lower_invalid_implicit_lod = true, .lower_tg4_offsets = true, .lower_to_fragment_fetch_amd = sscreen->info.gfx_level < GFX11, - .lower_array_layer_round_even = !sscreen->info.conformant_trunc_coord, }; NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options); -- 2.34.1