}
void
-build_cube_select(isel_context* ctx, Temp ma, Temp id, Temp deriv, Temp* out_ma, Temp* out_sc,
- Temp* out_tc)
-{
- Builder bld(ctx->program, ctx->block);
-
- Temp deriv_x = emit_extract_vector(ctx, deriv, 0, v1);
- Temp deriv_y = emit_extract_vector(ctx, deriv, 1, v1);
- Temp deriv_z = emit_extract_vector(ctx, deriv, 2, v1);
-
- Operand neg_one = Operand::c32(0xbf800000u);
- Operand one = Operand::c32(0x3f800000u);
- Operand two = Operand::c32(0x40000000u);
- Operand four = Operand::c32(0x40800000u);
-
- Temp is_ma_positive = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), Operand::zero(), ma);
- Temp sgn_ma = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, one, is_ma_positive);
- Temp neg_sgn_ma = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand::zero(), sgn_ma);
-
- Temp is_ma_z = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), four, id);
- Temp is_ma_y = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), two, id);
- is_ma_y = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), is_ma_y, is_ma_z);
- Temp is_not_ma_x = bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(s1, scc), is_ma_z, is_ma_y);
-
- /* select sc */
- Temp tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_z, deriv_x, is_not_ma_x);
- Temp sgn = bld.vop2_e64(
- aco_opcode::v_cndmask_b32, bld.def(v1),
- bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), neg_sgn_ma, sgn_ma, is_ma_z), one, is_ma_y);
- *out_sc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tmp, sgn);
-
- /* select tc */
- tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_y, deriv_z, is_ma_y);
- sgn = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, sgn_ma, is_ma_y);
- *out_tc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tmp, sgn);
-
- /* select ma */
- tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
- bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_x, deriv_y, is_ma_y),
- deriv_z, is_ma_z);
- tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffffu), tmp);
- *out_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), two, tmp);
-}
-
-void
-prepare_cube_coords(isel_context* ctx, std::vector<Temp>& coords, Temp* ddx, Temp* ddy,
- bool is_deriv, bool is_array)
-{
- Builder bld(ctx->program, ctx->block);
- Temp ma, tc, sc, id;
- aco_opcode madak =
- ctx->program->gfx_level >= GFX10_3 ? aco_opcode::v_fmaak_f32 : aco_opcode::v_madak_f32;
- aco_opcode madmk =
- ctx->program->gfx_level >= GFX10_3 ? aco_opcode::v_fmamk_f32 : aco_opcode::v_madmk_f32;
-
- /* see comment in ac_prepare_cube_coords() */
- if (is_array && ctx->options->gfx_level <= GFX8)
- coords[3] = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), coords[3]);
-
- ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]);
-
- aco_ptr<VALU_instruction> vop3a{
- create_instruction<VALU_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
- vop3a->operands[0] = Operand(ma);
- vop3a->abs[0] = true;
- Temp invma = bld.tmp(v1);
- vop3a->definitions[0] = Definition(invma);
- ctx->block->instructions.emplace_back(std::move(vop3a));
-
- sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
- if (!is_deriv)
- sc = bld.vop2(madak, bld.def(v1), sc, invma, Operand::c32(0x3fc00000u /*1.5*/));
-
- tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
- if (!is_deriv)
- tc = bld.vop2(madak, bld.def(v1), tc, invma, Operand::c32(0x3fc00000u /*1.5*/));
-
- id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), coords[0], coords[1], coords[2]);
-
- if (is_deriv) {
- sc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, invma);
- tc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, invma);
-
- for (unsigned i = 0; i < 2; i++) {
- /* see comment in ac_prepare_cube_coords() */
- Temp deriv_ma;
- Temp deriv_sc, deriv_tc;
- build_cube_select(ctx, ma, id, i ? *ddy : *ddx, &deriv_ma, &deriv_sc, &deriv_tc);
-
- deriv_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, invma);
-
- Temp x = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
- bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_sc, invma),
- bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, sc));
- Temp y = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
- bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_tc, invma),
- bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, tc));
- *(i ? ddy : ddx) = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), x, y);
- }
-
- sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), sc);
- tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), tc);
- }
-
- if (is_array) {
- id = bld.vop2(madmk, bld.def(v1), coords[3], id, Operand::c32(0x41000000u /*8.0*/));
- coords.erase(coords.begin() + 3);
- }
- coords[0] = sc;
- coords[1] = tc;
- coords[2] = id;
-}
-
-void
get_const_vec(nir_ssa_def* vec, nir_const_value* cv[4])
{
if (vec->parent_instr->type != nir_instr_type_alu)
}
std::vector<Temp> unpacked_coord;
- if (ctx->options->gfx_level == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
- instr->coord_components) {
- RegClass rc = a16 ? v2b : v1;
- for (unsigned i = 0; i < coord.bytes() / rc.bytes(); i++)
- unpacked_coord.emplace_back(emit_extract_vector(ctx, coord, i, rc));
-
- assert(unpacked_coord.size() > 0 && unpacked_coord.size() < 3);
-
- Operand coord2d;
- /* 0.5 for floating point coords, 0 for integer. */
- if (a16)
- coord2d = instr->op == nir_texop_txf ? Operand::c16(0) : Operand::c16(0x3800);
- else
- coord2d = instr->op == nir_texop_txf ? Operand::c32(0) : Operand::c32(0x3f000000);
- unpacked_coord.insert(std::next(unpacked_coord.begin()), bld.copy(bld.def(rc), coord2d));
- } else if (coord != Temp()) {
+ if (coord != Temp())
unpacked_coord.push_back(coord);
- }
-
if (has_sample_index)
unpacked_coord.push_back(sample_index);
if (has_lod)
coords = emit_pack_v1(ctx, unpacked_coord);
- assert(instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE || !a16);
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->coord_components)
- prepare_cube_coords(ctx, coords, &ddx, &ddy, instr->op == nir_texop_txd,
- instr->is_array && instr->op != nir_texop_lod);
-
/* pack derivatives */
if (has_ddx || has_ddy) {
- RegClass rc = g16 ? v2b : v1;
assert(a16 == g16 || ctx->options->gfx_level >= GFX10);
std::array<Temp, 2> ddxddy = {ddx, ddy};
for (Temp tmp : ddxddy) {
if (tmp == Temp())
continue;
std::vector<Temp> unpacked = {tmp};
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && ctx->options->gfx_level == GFX9) {
- assert(has_ddx && has_ddy);
- Temp zero = bld.copy(bld.def(rc), Operand::zero(rc.bytes()));
- unpacked.push_back(zero);
- }
for (Temp derv : emit_pack_v1(ctx, unpacked))
derivs.push_back(derv);
}
return LLVMBuildLShr(builder, num, post_shift, "");
}
-/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
- * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
- * already multiplied by two. id is the cube face number.
- */
-struct cube_selection_coords {
- LLVMValueRef stc[2];
- LLVMValueRef ma;
- LLVMValueRef id;
-};
-
-static void build_cube_intrinsic(struct ac_llvm_context *ctx, LLVMValueRef in[3],
- struct cube_selection_coords *out)
-{
- LLVMTypeRef f32 = ctx->f32;
-
- out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", f32, in, 3, 0);
- out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", f32, in, 3, 0);
- out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", f32, in, 3, 0);
- out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", f32, in, 3, 0);
-}
-
-/**
- * Build a manual selection sequence for cube face sc/tc coordinates and
- * major axis vector (multiplied by 2 for consistency) for the given
- * vec3 \p coords, for the face implied by \p selcoords.
- *
- * For the major axis, we always adjust the sign to be in the direction of
- * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
- * the selcoords major axis.
- */
-static void build_cube_select(struct ac_llvm_context *ctx,
- const struct cube_selection_coords *selcoords,
- const LLVMValueRef *coords, LLVMValueRef *out_st,
- LLVMValueRef *out_ma)
-{
- LLVMBuilderRef builder = ctx->builder;
- LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
- LLVMValueRef is_ma_positive;
- LLVMValueRef sgn_ma;
- LLVMValueRef is_ma_z, is_not_ma_z;
- LLVMValueRef is_ma_y;
- LLVMValueRef is_ma_x;
- LLVMValueRef sgn;
- LLVMValueRef tmp;
-
- is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->ma, LLVMConstReal(f32, 0.0), "");
- sgn_ma = LLVMBuildSelect(builder, is_ma_positive, LLVMConstReal(f32, 1.0),
- LLVMConstReal(f32, -1.0), "");
-
- is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
- is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
- is_ma_y = LLVMBuildAnd(
- builder, is_not_ma_z,
- LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
- is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
-
- /* Select sc */
- tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
- sgn = LLVMBuildSelect(
- builder, is_ma_y, LLVMConstReal(f32, 1.0),
- LLVMBuildSelect(builder, is_ma_z, sgn_ma, LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
- out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
-
- /* Select tc */
- tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
- sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma, LLVMConstReal(f32, -1.0), "");
- out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
-
- /* Select ma */
- tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
- LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
- tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &tmp, 1, 0);
- *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
-}
-
-void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod,
- LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg)
-{
-
- LLVMBuilderRef builder = ctx->builder;
- struct cube_selection_coords selcoords;
- LLVMValueRef coords[3];
- LLVMValueRef invma;
-
- if (is_array && !is_lod) {
- LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
-
- /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
- *
- * "For Array forms, the array layer used will be
- *
- * max(0, min(d−1, floor(layer+0.5)))
- *
- * where d is the depth of the texture array and layer
- * comes from the component indicated in the tables below.
- * Workaround for an issue where the layer is taken from a
- * helper invocation which happens to fall on a different
- * layer due to extrapolation."
- *
- * GFX8 and earlier attempt to implement this in hardware by
- * clamping the value of coords[2] = (8 * layer) + face.
- * Unfortunately, this means that the we end up with the wrong
- * face when clamping occurs.
- *
- * Clamp the layer earlier to work around the issue.
- */
- if (ctx->gfx_level <= GFX8) {
- LLVMValueRef ge0;
- ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
- tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
- }
-
- coords_arg[3] = tmp;
- }
-
- build_cube_intrinsic(ctx, coords_arg, &selcoords);
-
- invma =
- ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &selcoords.ma, 1, 0);
- invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
-
- for (int i = 0; i < 2; ++i)
- coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
-
- coords[2] = selcoords.id;
-
- if (is_deriv && derivs_arg) {
- LLVMValueRef derivs[4];
- int axis;
-
- /* Convert cube derivatives to 2D derivatives. */
- for (axis = 0; axis < 2; axis++) {
- LLVMValueRef deriv_st[2];
- LLVMValueRef deriv_ma;
-
- /* Transform the derivative alongside the texture
- * coordinate. Mathematically, the correct formula is
- * as follows. Assume we're projecting onto the +Z face
- * and denote by dx/dh the derivative of the (original)
- * X texture coordinate with respect to horizontal
- * window coordinates. The projection onto the +Z face
- * plane is:
- *
- * f(x,z) = x/z
- *
- * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
- * = 1/z * dx/dh - x/z * 1/z * dz/dh.
- *
- * This motivatives the implementation below.
- *
- * Whether this actually gives the expected results for
- * apps that might feed in derivatives obtained via
- * finite differences is anyone's guess. The OpenGL spec
- * seems awfully quiet about how textureGrad for cube
- * maps should be handled.
- */
- build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], deriv_st, &deriv_ma);
-
- deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
-
- for (int i = 0; i < 2; ++i)
- derivs[axis * 2 + i] =
- LLVMBuildFSub(builder, LLVMBuildFMul(builder, deriv_st[i], invma, ""),
- LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
- }
-
- memcpy(derivs_arg, derivs, sizeof(derivs));
- }
-
- /* Shift the texture coordinate. This must be applied after the
- * derivative calculation.
- */
- for (int i = 0; i < 2; ++i)
- coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
-
- if (is_array) {
- /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
- /* coords_arg.w component - array_index for cube arrays */
- coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
- }
-
- memcpy(coords_arg, coords, sizeof(coords));
-}
-
LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i,
LLVMValueRef j)
LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMValueRef num,
LLVMValueRef multiplier, LLVMValueRef post_shift);
-void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod,
- LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg);
-
LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i,
LLVMValueRef j);
return lower_gather4_integer(&ctx->ac, args, instr);
}
- /* Fixup for GFX9 which allocates 1D textures as 2D. */
- if (instr->op == nir_texop_lod && ctx->ac.gfx_level == GFX9) {
- if ((args->dim == ac_image_2darray || args->dim == ac_image_2d) && !args->coords[1]) {
- args->coords[1] = ctx->ac.i32_0;
- }
- }
-
args->attributes = AC_ATTR_INVARIANT_LOAD;
bool cs_derivs =
ctx->stage == MESA_SHADER_COMPUTE && ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
/* pack derivatives */
if (ddx || ddy) {
- int num_src_deriv_channels, num_dest_deriv_channels;
+ int num_deriv_channels;
switch (instr->sampler_dim) {
case GLSL_SAMPLER_DIM_3D:
- case GLSL_SAMPLER_DIM_CUBE:
- num_src_deriv_channels = 3;
- num_dest_deriv_channels = 3;
+ num_deriv_channels = 3;
break;
case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_CUBE:
default:
- num_src_deriv_channels = 2;
- num_dest_deriv_channels = 2;
+ num_deriv_channels = 2;
break;
case GLSL_SAMPLER_DIM_1D:
- num_src_deriv_channels = 1;
- if (ctx->ac.gfx_level == GFX9) {
- num_dest_deriv_channels = 2;
- } else {
- num_dest_deriv_channels = 1;
- }
+ num_deriv_channels = ctx->ac.gfx_level == GFX9 ? 2 : 1;
break;
}
- for (unsigned i = 0; i < num_src_deriv_channels; i++) {
+ for (unsigned i = 0; i < num_deriv_channels; i++) {
args.derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
- args.derivs[num_dest_deriv_channels + i] =
+ args.derivs[num_deriv_channels + i] =
ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
}
- for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
- LLVMValueRef zero = args.g16 ? ctx->ac.f16_0 : ctx->ac.f32_0;
- args.derivs[i] = zero;
- args.derivs[num_dest_deriv_channels + i] = zero;
- }
- }
-
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
- for (unsigned chan = 0; chan < instr->coord_components; chan++)
- args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
- if (instr->coord_components == 3)
- args.coords[3] = LLVMGetUndef(args.a16 ? ctx->ac.f16 : ctx->ac.f32);
- ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array,
- instr->op == nir_texop_lod, args.coords, args.derivs);
- }
-
- /* Texture coordinates fixups */
- if (ctx->ac.gfx_level == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
- instr->op != nir_texop_lod) {
- LLVMValueRef filler;
- if (instr->op == nir_texop_txf)
- filler = args.a16 ? ctx->ac.i16_0 : ctx->ac.i32_0;
- else
- filler = LLVMConstReal(args.a16 ? ctx->ac.f16 : ctx->ac.f32, 0.5);
-
- if (instr->is_array)
- args.coords[2] = args.coords[1];
- args.coords[1] = filler;
}
/* Pack sample index */
if (progress)
nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
+ NIR_PASS(
+ _, stage->nir, ac_nir_lower_tex,
+ &(ac_nir_lower_tex_options){
+ .gfx_level = gfx_level,
+ .lower_array_layer_round_even = !device->physical_device->rad_info.conformant_trunc_coord,
+ });
+
if (stage->nir->info.uses_resource_info_query)
NIR_PASS(_, stage->nir, ac_nir_lower_resinfo, gfx_level);
.lower_to_fragment_fetch_amd = device->physical_device->use_fmask,
.lower_lod_zero_width = true,
.lower_invalid_implicit_lod = true,
- .lower_array_layer_round_even = !device->physical_device->rad_info.conformant_trunc_coord,
};
NIR_PASS(_, nir, nir_lower_tex, &tex_options);
if (sel->stage <= MESA_SHADER_GEOMETRY)
NIR_PASS(progress, nir, si_nir_kill_outputs, key);
+ NIR_PASS(
+ _, nir, ac_nir_lower_tex,
+ &(ac_nir_lower_tex_options){
+ .gfx_level = sel->screen->info.gfx_level,
+ .lower_array_layer_round_even = !sel->screen->info.conformant_trunc_coord,
+ });
+
if (nir->info.uses_resource_info_query)
NIR_PASS(progress, nir, ac_nir_lower_resinfo, sel->screen->info.gfx_level);
.lower_invalid_implicit_lod = true,
.lower_tg4_offsets = true,
.lower_to_fragment_fetch_amd = sscreen->info.gfx_level < GFX11,
- .lower_array_layer_round_even = !sscreen->info.conformant_trunc_coord,
};
NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);