(*) freedreno (a2xx-a4xx), llvmpipe, and softpipe have fake Multisample anti-aliasing support
-GL 3.1, GLSL 1.40 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, virgl, zink, d3d12, panfrost
+GL 3.1, GLSL 1.40 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, virgl, zink, d3d12, panfrost, asahi
Forward compatible context support/deprecations DONE
- GL_ARB_draw_instanced (Instanced drawing) DONE (etnaviv/HALTI2, v3d, asahi)
- GL_ARB_copy_buffer (Buffer copying) DONE (v3d, vc4, lima, asahi)
- GL_NV_primitive_restart (Primitive restart) DONE (v3d, asahi)
- 16 vertex texture image units DONE (asahi)
+ GL_ARB_draw_instanced (Instanced drawing) DONE (etnaviv/HALTI2, v3d)
+ GL_ARB_copy_buffer (Buffer copying) DONE (v3d, vc4, lima)
+ GL_NV_primitive_restart (Primitive restart) DONE (v3d)
+ 16 vertex texture image units DONE ()
GL_ARB_texture_buffer_object (Texture buffer objs) DONE (v3d)
- GL_ARB_texture_rectangle (Rectangular textures) DONE (v3d, vc4, lima, asahi)
- GL_ARB_uniform_buffer_object (Uniform buffer objs) DONE (v3d, asahi)
- GL_EXT_texture_snorm (Signed normalized textures) DONE (v3d, asahi)
+ GL_ARB_texture_rectangle (Rectangular textures) DONE (v3d, vc4, lima)
+ GL_ARB_uniform_buffer_object (Uniform buffer objs) DONE (v3d)
+ GL_EXT_texture_snorm (Signed normalized textures) DONE (v3d)
GL 3.2, GLSL 1.50 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, virgl, zink, d3d12
GL_ARB_sample_shading DONE (freedreno/a6xx, i965/gen6+, nv50, panfrost)
GL_ARB_shader_subroutine DONE (freedreno, i965/gen6+, nv50, softpipe)
GL_ARB_tessellation_shader DONE (freedreno/a6xx, i965/gen7+, )
- GL_ARB_texture_buffer_object_rgb32 DONE (freedreno, i965/gen6+, softpipe, panfrost)
+ GL_ARB_texture_buffer_object_rgb32 DONE (freedreno, i965/gen6+, softpipe, panfrost, asahi)
GL_ARB_texture_cube_map_array DONE (freedreno/a4xx+, i965/gen6+, nv50, softpipe)
GL_ARB_texture_gather DONE (freedreno, i965/gen6+, nv50, softpipe, v3d, panfrost, asahi)
GL_ARB_texture_query_lod DONE (freedreno, i965, nv50, softpipe, v3d, panfrost)
GL_OES_shader_multisample_interpolation DONE (freedreno/a6xx, i965, nvc0, r600)
GL_OES_tessellation_shader DONE (freedreno/a6xx, all drivers that support GL_ARB_tessellation_shader)
GL_OES_texture_border_clamp DONE (all drivers)
- GL_OES_texture_buffer DONE (freedreno, i965, nvc0, r600, softpipe, panfrost)
+ GL_OES_texture_buffer DONE (freedreno, i965, nvc0, r600, softpipe, panfrost, asahi)
GL_OES_texture_cube_map_array DONE (freedreno/a4xx+, i965/hsw+, nvc0, r600, softpipe)
GL_OES_texture_stencil8 DONE (all drivers that support GL_ARB_texture_stencil8)
GL_OES_texture_storage_multisample_2d_array DONE (all drivers that support GL_ARB_texture_multisample)
{
switch (dim) {
case GLSL_SAMPLER_DIM_1D:
- case GLSL_SAMPLER_DIM_BUF:
return array ? AGX_DIM_1D_ARRAY : AGX_DIM_1D;
case GLSL_SAMPLER_DIM_2D:
case GLSL_SAMPLER_DIM_CUBE:
return array ? AGX_DIM_CUBE_ARRAY : AGX_DIM_CUBE;
+ case GLSL_SAMPLER_DIM_BUF:
+ unreachable("Buffer textures should have been lowered");
+
default:
unreachable("Invalid sampler dim\n");
}
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_builtin_builder.h"
#include "agx_compiler.h"
+#include "agx_internal_formats.h"
-#define AGX_TEXTURE_DESC_STRIDE 24
+#define AGX_TEXTURE_DESC_STRIDE 24
+#define AGX_FORMAT_RGB32_EMULATED 0x36
static nir_ssa_def *
texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
return ssa;
}
+/* Implement txs for buffer textures. There is no mipmapping to worry about, so
+ * this is just a uniform pull. However, we lower buffer textures to 2D so the
+ * original size is irrecoverable. Instead, we stash it in the "Acceleration
+ * buffer" field, which is unused for linear images. Fetch just that.
+ */
+static nir_ssa_def *
+agx_txs_buffer(nir_builder *b, nir_ssa_def *descriptor)
+{
+ nir_ssa_def *size_ptr = nir_iadd_imm(b, descriptor, 16);
+
+ return nir_load_global_constant(b, size_ptr, 8, 1, 32);
+}
+
static nir_ssa_def *
agx_txs(nir_builder *b, nir_tex_instr *tex)
{
nir_ssa_def *ptr = texture_descriptor_ptr(b, tex);
nir_ssa_def *comp[4] = {NULL};
+ if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+ return agx_txs_buffer(b, ptr);
+
nir_ssa_def *desc = nir_load_global_constant(b, ptr, 8, 4, 32);
nir_ssa_def *w0 = nir_channel(b, desc, 0);
nir_ssa_def *w1 = nir_channel(b, desc, 1);
return true;
}
+static nir_ssa_def *
+format_is_rgb32(nir_builder *b, nir_tex_instr *tex)
+{
+ nir_ssa_def *ptr = texture_descriptor_ptr(b, tex);
+ nir_ssa_def *desc = nir_load_global_constant(b, ptr, 8, 1, 32);
+ nir_ssa_def *channels =
+ nir_iand_imm(b, nir_ushr_imm(b, desc, 6), BITFIELD_MASK(7));
+
+ return nir_ieq_imm(b, channels, AGX_FORMAT_RGB32_EMULATED);
+}
+
+/* Load from an RGB32 buffer texture */
+static nir_ssa_def *
+load_rgb32(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *coordinate)
+{
+ /* Base address right-shifted 4: bits [66, 102) */
+ nir_ssa_def *ptr_hi = nir_iadd_imm(b, texture_descriptor_ptr(b, tex), 8);
+ nir_ssa_def *desc_hi_words = nir_load_global_constant(b, ptr_hi, 8, 2, 32);
+ nir_ssa_def *desc_hi = nir_pack_64_2x32(b, desc_hi_words);
+ nir_ssa_def *base_shr4 =
+ nir_iand_imm(b, nir_ushr_imm(b, desc_hi, 2), BITFIELD64_MASK(36));
+ nir_ssa_def *base = nir_ishl_imm(b, base_shr4, 4);
+
+ nir_ssa_def *raw = nir_load_constant_agx(
+ b, 3, nir_dest_bit_size(tex->dest), base, nir_imul_imm(b, coordinate, 3),
+ .format = AGX_INTERNAL_FORMAT_I32);
+
+ /* Set alpha to 1 (in the appropriate format) */
+ bool is_float = nir_alu_type_get_base_type(tex->dest_type) == nir_type_float;
+
+ nir_ssa_def *swizzled[4] = {
+ nir_channel(b, raw, 0), nir_channel(b, raw, 1), nir_channel(b, raw, 2),
+ is_float ? nir_imm_float(b, 1.0) : nir_imm_int(b, 1)};
+
+ return nir_vec(b, swizzled, nir_tex_instr_dest_size(tex));
+}
+
+/*
+ * Buffer textures are lowered to 2D (1024xN) textures in the driver to access
+ * more storage. When lowering, we need to fix up the coordinate accordingly.
+ *
+ * Furthermore, RGB32 formats are emulated by lowering to global memory access,
+ * so to read a buffer texture we generate code that looks like:
+ *
+ * if (descriptor->format == RGB32)
+ * return ((uint32_t *) descriptor->address)[x];
+ * else
+ * return txf(texture_as_2d, vec2(x % 1024, x / 1024));
+ */
+static bool
+lower_buffer_texture(nir_builder *b, nir_tex_instr *tex)
+{
+ nir_ssa_def *coord = steal_tex_src(tex, nir_tex_src_coord);
+
+ /* The OpenGL ES 3.2 specification says on page 187:
+ *
+ * When a buffer texture is accessed in a shader, the results of a texel
+ * fetch are undefined if the specified texel coordinate is negative, or
+ * greater than or equal to the clamped number of texels in the texture
+ * image.
+ *
+ * However, faulting would be undesirable for robustness, so clamp.
+ */
+ nir_ssa_def *size = nir_get_texture_size(b, tex);
+ coord = nir_umin(b, coord, nir_iadd_imm(b, size, -1));
+
+ /* Lower RGB32 reads if the format requires */
+ nir_if *nif = nir_push_if(b, format_is_rgb32(b, tex));
+ nir_ssa_def *rgb32 = load_rgb32(b, tex, coord);
+ nir_push_else(b, nif);
+
+ /* Otherwise, lower the texture instruction to read from 2D */
+ assert(coord->num_components == 1 && "buffer textures are 1D");
+ tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+ nir_ssa_def *coord2d = nir_vec2(b, nir_iand_imm(b, coord, BITFIELD_MASK(10)),
+ nir_ushr_imm(b, coord, 10));
+ nir_instr_remove(&tex->instr);
+ nir_builder_instr_insert(b, &tex->instr);
+ nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(coord2d));
+ nir_block *else_block = nir_cursor_current_block(b->cursor);
+ nir_pop_if(b, nif);
+
+ /* Put it together with a phi */
+ nir_ssa_def *phi = nir_if_phi(b, rgb32, &tex->dest.ssa);
+ nir_ssa_def_rewrite_uses(&tex->dest.ssa, phi);
+ nir_phi_instr *phi_instr = nir_instr_as_phi(phi->parent_instr);
+ nir_phi_src *else_src = nir_phi_get_src_from_block(phi_instr, else_block);
+ nir_instr_rewrite_src_ssa(phi->parent_instr, &else_src->src, &tex->dest.ssa);
+ return true;
+}
+
/*
* NIR indexes into array textures with unclamped floats (integer for txf). AGX
* requires the index to be a clamped integer. Lower tex_src_coord into
if (nir_tex_instr_is_query(tex))
return false;
+ if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+ return lower_buffer_texture(b, tex);
+
/* Get the coordinates */
nir_ssa_def *coord = steal_tex_src(tex, nir_tex_src_coord);
nir_ssa_def *ms_idx = steal_tex_src(tex, nir_tex_src_ms_index);
AGX_FMT(R11G11B10_FLOAT, R11G11B10, FLOAT, T, RG11B10F),
AGX_FMT(R9G9B9E5_FLOAT, R9G9B9E5, FLOAT, F, RGB9E5),
+ /* These formats are emulated for texture buffers only */
+ AGX_FMT(R32G32B32_FLOAT, R32G32B32_EMULATED, FLOAT, F, _),
+ AGX_FMT(R32G32B32_UINT, R32G32B32_EMULATED, UINT, F, _),
+ AGX_FMT(R32G32B32_SINT, R32G32B32_EMULATED, SINT, F, _),
+
AGX_FMT(ETC1_RGB8, ETC2_RGB8, UNORM, F,_),
AGX_FMT(ETC2_RGB8, ETC2_RGB8, UNORM, F,_),
AGX_FMT(ETC2_SRGB8, ETC2_RGB8, UNORM, F,_),
<value name="R8G8B8A8" value="0x28"/>
<value name="R32G32" value="0x31"/>
<value name="R16G16B16A16" value="0x32"/>
+
+ <!-- Software-defined value selected not to clash with the hardware values.
+ Texture buffer reads from this format are lowered. It is invalid to
+ use this format for anything else. -->
+ <value name="R32G32B32 (Emulated)" value="0x36"/>
+
<value name="R32G32B32A32" value="0x38"/>
<value name="GBGR 422" value="0x40"/> <!-- Subsampled, swizzle BRG1 -->
<value name="BGRG 422" value="0x41"/> <!-- Subsampled, swizzle BRG1 -->
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return 1;
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_SURFACE_SAMPLE_COUNT:
case PIPE_CAP_SAMPLE_SHADING:
- case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
case PIPE_CAP_IMAGE_LOAD_FORMATTED:
case PIPE_CAP_IMAGE_STORE_FORMATTED:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 16;
+ /* Texel buffers lowered to (at most) 1024x16384 2D textures */
case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
- return 65536;
+ return 1024 * 16384;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 64;
if (!agx_is_valid_pixel_format(tex_format))
return false;
+ /* RGB32 is emulated for texture buffers only */
+ if (ent.channels == AGX_CHANNELS_R32G32B32_EMULATED &&
+ target != PIPE_BUFFER)
+ return false;
+
if ((usage & PIPE_BIND_RENDER_TARGET) && !ent.renderable)
return false;
}
assert(samples >= 1);
switch (dim) {
+ case PIPE_BUFFER:
+ /* Lowered to 2D */
+ assert(samples == 1);
+ return AGX_TEXTURE_DIMENSION_2D;
+
case PIPE_TEXTURE_1D:
assert(samples == 1);
return AGX_TEXTURE_DIMENSION_1D;
cfg.swizzle_g = agx_channel_from_pipe(out_swizzle[1]);
cfg.swizzle_b = agx_channel_from_pipe(out_swizzle[2]);
cfg.swizzle_a = agx_channel_from_pipe(out_swizzle[3]);
- cfg.width = rsrc->base.width0;
- cfg.height = rsrc->base.height0;
- cfg.first_level = state->u.tex.first_level;
- cfg.last_level = state->u.tex.last_level;
+
+ if (state->target == PIPE_BUFFER) {
+ unsigned size_el =
+ state->u.buf.size / util_format_get_blocksize(format);
+
+ /* Use a 2D texture to increase the maximum size */
+ cfg.width = 1024;
+ cfg.height = DIV_ROUND_UP(size_el, cfg.width);
+ cfg.first_level = cfg.last_level = 0;
+
+ /* Stash the actual size in an unused part of the texture descriptor,
+ * which we'll read later to implement txs.
+ */
+ cfg.acceleration_buffer = (size_el << 4);
+ } else {
+ cfg.width = rsrc->base.width0;
+ cfg.height = rsrc->base.height0;
+ cfg.first_level = state->u.tex.first_level;
+ cfg.last_level = state->u.tex.last_level;
+ }
+
cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
cfg.unk_mipmapped = rsrc->mipmapped;
cfg.srgb_2_channel = cfg.srgb && util_format_colormask(desc) == 0x3;
}
if (include_bo) {
- cfg.address = agx_map_texture_gpu(rsrc, state->u.tex.first_layer);
+ cfg.address = agx_map_texture_gpu(rsrc, first_layer);
+
+ if (state->target == PIPE_BUFFER)
+ cfg.address += state->u.buf.offset;
if (ail_is_compressed(&rsrc->layout)) {
cfg.acceleration_buffer =
if (state->target == PIPE_TEXTURE_3D) {
cfg.depth = rsrc->base.depth0;
+ } else if (state->target == PIPE_BUFFER) {
+ cfg.depth = 1;
} else {
unsigned layers =
state->u.tex.last_layer - state->u.tex.first_layer + 1;
if (rsrc->base.nr_samples > 1)
cfg.samples = agx_translate_sample_count(rsrc->base.nr_samples);
- if (rsrc->layout.tiling == AIL_TILING_LINEAR) {
+ if (state->target == PIPE_BUFFER) {
+ cfg.stride = (cfg.width * util_format_get_blocksize(format)) - 16;
+ } else if (rsrc->layout.tiling == AIL_TILING_LINEAR) {
cfg.stride = ail_get_linear_stride_B(&rsrc->layout, 0) - 16;
} else {
assert(rsrc->layout.tiling == AIL_TILING_TWIDDLED ||
continue;
}
- agx_batch_reads(batch, agx_resource(tex->base.texture));
+ struct agx_resource *rsrc = tex->rsrc;
+ agx_batch_reads(batch, tex->rsrc);
+
+ unsigned first_layer =
+ (tex->base.target == PIPE_BUFFER) ? 0 : tex->base.u.tex.first_layer;
/* Without the address */
struct agx_texture_packed texture = tex->desc;
/* Just the address */
struct agx_texture_packed texture2;
agx_pack(&texture2, TEXTURE, cfg) {
- cfg.address =
- agx_map_texture_gpu(tex->rsrc, tex->base.u.tex.first_layer);
+ cfg.address = agx_map_texture_gpu(rsrc, first_layer);
+
+ if (rsrc->base.target == PIPE_BUFFER)
+ cfg.address += tex->base.u.buf.offset;
- if (ail_is_compressed(&tex->rsrc->layout)) {
+ if (ail_is_compressed(&rsrc->layout)) {
cfg.acceleration_buffer =
- cfg.address + tex->rsrc->layout.metadata_offset_B;
+ agx_map_texture_gpu(rsrc, 0) + rsrc->layout.metadata_offset_B +
+ (first_layer * rsrc->layout.compression_layer_stride_B);
}
}