#include "compiler/nir/nir_builtin_builder.h"
#include "agx_compiler.h"
#include "agx_internal_formats.h"
+#include "nir_builder_opcodes.h"
+#include "nir_intrinsics.h"
+#include "nir_intrinsics_indices.h"
#define AGX_TEXTURE_DESC_STRIDE 24
#define AGX_FORMAT_RGB32_EMULATED 0x36
return &tex->dest.ssa;
}
+static nir_ssa_def *
+nir_bitfield_mask(nir_builder *b, nir_ssa_def *x)
+{
+ nir_ssa_def *one = nir_imm_intN_t(b, 1, x->bit_size);
+ return nir_iadd_imm(b, nir_ishl(b, one, nir_u2u32(b, x)), -1);
+}
+
+static nir_ssa_def *
+calculate_twiddled_coordinates(nir_builder *b, nir_ssa_def *coord,
+ nir_ssa_def *tile_w_px_log2,
+ nir_ssa_def *tile_h_px_log2,
+ nir_ssa_def *width_tl,
+ nir_ssa_def *layer_stride_el)
+{
+ /* SIMD-within-a-register */
+ nir_ssa_def *coord_px = nir_pack_32_2x16(b, nir_u2u16(b, coord));
+ nir_ssa_def *tile_mask =
+ nir_pack_32_2x16_split(b, nir_bitfield_mask(b, tile_w_px_log2),
+ nir_bitfield_mask(b, tile_h_px_log2));
+
+ /* Modulo by the tile width/height to get the offsets within the tile */
+ nir_ssa_def *offs_xy_px = nir_iand(b, coord_px, tile_mask);
+
+ /* Get the coordinates of the corner of the tile */
+ nir_ssa_def *tile_xy_px = nir_isub(b, coord_px, offs_xy_px);
+
+ /* Unpack SIMD-within-a-register */
+ nir_ssa_def *offs_x_px = nir_unpack_32_2x16_split_x(b, offs_xy_px);
+ nir_ssa_def *offs_y_px = nir_unpack_32_2x16_split_y(b, offs_xy_px);
+ nir_ssa_def *tile_x_px =
+ nir_u2u32(b, nir_unpack_32_2x16_split_x(b, tile_xy_px));
+ nir_ssa_def *tile_y_px =
+ nir_u2u32(b, nir_unpack_32_2x16_split_y(b, tile_xy_px));
+
+ /* Get the tile size */
+ nir_ssa_def *one_32 = nir_imm_int(b, 1);
+ nir_ssa_def *tile_w_px = nir_ishl(b, one_32, nir_u2u32(b, tile_w_px_log2));
+ nir_ssa_def *tile_h_px = nir_ishl(b, one_32, nir_u2u32(b, tile_h_px_log2));
+
+ /* tile row start (px) =
+ * (y // tile height) * (# of tiles/row) * (# of pix/tile) =
+ * align_down(y, tile height) / tile height * width_tl *tile width *
+ * tile height =
+ * align_down(y, tile height) * width_tl * tile width
+ */
+ nir_ssa_def *tile_row_start_px =
+ nir_imul(b, nir_u2u32(b, tile_y_px), nir_imul(b, width_tl, tile_w_px));
+
+ /* tile column start (px) =
+ * (x // tile width) * (# of pix/tile) =
+ * align(x, tile width) / tile width * tile width * tile height =
+ * align(x, tile width) * tile height
+ */
+ nir_ssa_def *tile_col_start_px = nir_imul(b, tile_x_px, tile_h_px);
+
+ /* The pixel at which the tile starts is thus... */
+ nir_ssa_def *tile_offset_px =
+ nir_iadd(b, tile_row_start_px, tile_col_start_px);
+
+ /* Get the total offset */
+ nir_ssa_def *offs_px = nir_interleave_agx(b, offs_x_px, offs_y_px);
+ nir_ssa_def *total_px = nir_iadd(b, tile_offset_px, nir_u2u32(b, offs_px));
+
+ if (layer_stride_el) {
+ nir_ssa_def *layer = nir_channel(b, coord, 2);
+ nir_ssa_def *layer_offset_px = nir_imul(b, layer, layer_stride_el);
+ total_px = nir_iadd(b, total_px, layer_offset_px);
+ }
+
+ return total_px;
+}
+
+static nir_ssa_def *
+image_texel_address(nir_builder *b, nir_intrinsic_instr *intr)
+{
+ /* First, calculate the address of the PBE descriptor */
+ nir_ssa_def *desc_address;
+ if (intr->intrinsic == nir_intrinsic_bindless_image_texel_address)
+ desc_address = texture_descriptor_ptr_for_handle(b, intr->src[0].ssa);
+ else
+ desc_address = texture_descriptor_ptr_for_index(b, intr->src[0].ssa);
+
+ nir_ssa_def *coord = intr->src[1].ssa;
+
+ enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
+ bool layered = nir_intrinsic_image_array(intr) ||
+ (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_CUBE) ||
+ (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_3D);
+
+ /* The last 8 bytes of the 24-byte PBE descriptor contain either the
+ * software-defined atomic descriptor, or (if array image) a pointer to the
+ * descriptor. Grab it.
+ */
+ nir_ssa_def *meta_ptr = nir_iadd_imm(b, desc_address, 16);
+ nir_ssa_def *meta = nir_load_global_constant(b, meta_ptr, 8, 1, 64);
+ nir_ssa_def *layer_stride_el = NULL;
+
+ if (layered) {
+ nir_ssa_def *desc = nir_load_global_constant(b, meta, 8, 3, 32);
+ meta = nir_pack_64_2x32(b, nir_trim_vector(b, desc, 2));
+ layer_stride_el = nir_channel(b, desc, 2);
+ }
+
+ nir_ssa_def *meta_hi = nir_unpack_64_2x32_split_y(b, meta);
+
+ /* See the GenXML definitions of the software-defined atomic descriptors */
+ nir_ssa_def *base;
+
+ if (dim == GLSL_SAMPLER_DIM_BUF)
+ base = meta;
+ else
+ base = nir_ishl_imm(b, nir_iand_imm(b, meta, BITFIELD64_MASK(33)), 7);
+
+ nir_ssa_def *tile_w_px_log2 =
+ nir_u2u16(b, nir_ubitfield_extract_imm(b, meta_hi, 33 - 32, 3));
+ nir_ssa_def *tile_h_px_log2 =
+ nir_u2u16(b, nir_ubitfield_extract_imm(b, meta_hi, 36 - 32, 3));
+ nir_ssa_def *width_tl = nir_ubitfield_extract_imm(b, meta_hi, 39 - 32, 14);
+
+ /* We do not allow atomics on linear 2D or linear 2D arrays, as there are no
+ * known use cases. So, we're linear if buffer or 1D, and twiddled otherwise.
+ */
+ nir_ssa_def *total_px;
+ if (dim == GLSL_SAMPLER_DIM_BUF || dim == GLSL_SAMPLER_DIM_1D) {
+ /* 1D linear is indexed directly */
+ total_px = nir_channel(b, coord, 0);
+ } else {
+ total_px = calculate_twiddled_coordinates(
+ b, coord, tile_w_px_log2, tile_h_px_log2, width_tl, layer_stride_el);
+ }
+
+ /* Calculate the full texel address. This sequence is written carefully to
+ * ensure it will be entirely folded into the atomic's addressing arithmetic.
+ */
+ enum pipe_format format = nir_intrinsic_format(intr);
+ unsigned bytes_per_pixel_B = util_format_get_blocksize(format);
+
+ nir_ssa_def *total_B = nir_imul_imm(b, total_px, bytes_per_pixel_B);
+ return nir_iadd(b, base, nir_u2u64(b, total_B));
+}
+
+
static bool
lower_images(nir_builder *b, nir_instr *instr, UNUSED void *data)
{
nir_dest_bit_size(intr->dest)));
return true;
+ case nir_intrinsic_image_texel_address:
+ case nir_intrinsic_bindless_image_texel_address:
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, image_texel_address(b, intr));
+ return true;
+
default:
return false;
}
};
NIR_PASS(progress, s, nir_lower_tex, &lower_tex_options);
+ NIR_PASS(progress, s, nir_lower_image_atomics_to_global);
/* Lower bias after nir_lower_tex (to get rid of txd) but before
* lower_regular_texture (which will shuffle around the sources)