GL_ARB_sparse_texture_clamp DONE (radeonsi/gfx9+, zink)
GL_ARB_texture_filter_minmax DONE (nvc0/gm200+, zink)
GL_ARM_shader_framebuffer_fetch_depth_stencil DONE (llvmpipe)
+ GL_EXT_shader_framebuffer_fetch DONE (freedreno/a6xx, iris/gen9+, llvmpipe, panfrost, virgl, zink, asahi)
+ GL_EXT_shader_framebuffer_fetch_non_coherent DONE (freedreno/a6xx, iris, llvmpipe, panfrost, virgl, zink, asahi)
GL_EXT_color_buffer_half_float DONE (freedreno, i965, iris, llvmpipe, nv50, nvc0, radeonsi, zink)
GL_EXT_depth_bounds_test DONE (i965/gen12+, nv50, nvc0, radeonsi, softpipe, zink)
GL_EXT_memory_object DONE (freedreno, radeonsi, i965/gen7+, llvmpipe, zink, d3d12)
nir_ssa_def *fragcoord = nir_load_frag_coord(b);
nir_ssa_def *sampid = nir_load_sample_id(b);
-
+ nir_ssa_def *layer = nir_load_layer_id(b);
fragcoord = nir_f2i32(b, fragcoord);
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
tex->op = nir_texop_txf_ms_fb;
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
- tex->coord_components = 2;
+ tex->coord_components = 3;
tex->dest_type = nir_type_float32;
+ tex->is_array = true;
tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(b, fragcoord, 0x3));
+ tex->src[0].src =
+ nir_src_for_ssa(nir_vec3(b, nir_channel(b, fragcoord, 0), nir_channel(b, fragcoord, 1), layer));
tex->src[1].src_type = nir_tex_src_ms_index;
tex->src[1].src = nir_src_for_ssa(sampid);
struct nir_io_semantics io = nir_intrinsic_io_semantics(intr);
type = TYPE_S32;
if (tex->op == nir_texop_txf_ms_fb) {
- /* only expect a single txf_ms_fb per shader: */
- compile_assert(ctx, !ctx->so->fb_read);
compile_assert(ctx, ctx->so->type == MESA_SHADER_FRAGMENT);
ctx->so->fb_read = true;
if (ctx->compiler->options.bindless_fb_read_descriptor >= 0) {
ctx->so->bindless_tex = true;
-
- info.flags = IR3_INSTR_B | IR3_INSTR_A1EN;
+ info.flags = IR3_INSTR_B;
info.base = ctx->compiler->options.bindless_fb_read_descriptor;
- info.a1_val = ctx->compiler->options.bindless_fb_read_slot << 3;
+ struct ir3_instruction *texture, *sampler;
+
+ int base_index =
+ nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
+ nir_src tex_src = tex->src[base_index].src;
+
+ if (nir_src_is_const(tex_src)) {
+ texture = create_immed_typed(b,
+ nir_src_as_uint(tex_src) + ctx->compiler->options.bindless_fb_read_slot,
+ TYPE_U32);
+ } else {
+ texture = create_immed_typed(
+ ctx->block, ctx->compiler->options.bindless_fb_read_slot, TYPE_U32);
+ struct ir3_instruction *base =
+ ir3_get_src(ctx, &tex->src[base_index].src)[0];
+ texture = ir3_ADD_U(b, texture, 0, base, 0);
+ }
+ sampler = create_immed_typed(ctx->block, 0, TYPE_U32);
+ info.samp_tex = ir3_collect(b, texture, sampler);
+ info.flags |= IR3_INSTR_S2EN;
+ if (tex->texture_non_uniform) {
+ info.flags |= IR3_INSTR_NONUNIF;
+ }
} else {
/* Otherwise append a sampler to be patched into the texture
* state:
NIR_PASS_V(s, ir3_nir_lower_load_barycentric_at_offset);
NIR_PASS_V(s, ir3_nir_move_varying_inputs);
NIR_PASS_V(s, nir_lower_fb_read);
+ NIR_PASS_V(s, ir3_nir_lower_layer_id);
}
if (compiler->gen >= 6 && s->info.stage == MESA_SHADER_FRAGMENT &&
int ir3_nir_coord_offset(nir_ssa_def *ssa);
bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
bool ir3_nir_lower_wide_load_store(nir_shader *shader);
+bool ir3_nir_lower_layer_id(nir_shader *shader);
void ir3_nir_lower_to_explicit_output(nir_shader *shader,
struct ir3_shader_variant *v,
--- /dev/null
+/*
+ * Copyright 2023 Igalia S.L.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "compiler/nir/nir_builder.h"
+#include "ir3_nir.h"
+
+static bool
+nir_lower_layer_id(nir_builder *b, nir_instr *instr, UNUSED void *cb_data)
+{
+ if (instr->type != nir_instr_type_intrinsic) {
+ return false;
+ }
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_load_layer_id)
+ return false;
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_variable *layer = nir_find_variable_with_location(b->shader, nir_var_shader_in, VARYING_SLOT_LAYER);
+
+ if (!layer) {
+ layer = nir_variable_create(b->shader, nir_var_shader_in, glsl_int_type(), "layer");
+ layer->data.location = VARYING_SLOT_LAYER;
+ layer->data.driver_location = b->shader->num_inputs++;
+ }
+
+ nir_intrinsic_instr *load_input = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
+ nir_intrinsic_set_base(load_input, layer->data.driver_location);
+ nir_intrinsic_set_component(load_input, 0);
+ load_input->num_components = 1;
+ load_input->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+ nir_intrinsic_set_dest_type(load_input, nir_type_int);
+ nir_io_semantics semantics = {
+ .location = VARYING_SLOT_LAYER,
+ .num_slots = 1,
+ };
+ nir_intrinsic_set_io_semantics(load_input, semantics);
+ nir_ssa_dest_init(&load_input->instr, &load_input->dest, 1, 32, NULL);
+ nir_builder_instr_insert(b, &load_input->instr);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, &load_input->dest.ssa);
+ return true;
+}
+
+bool ir3_nir_lower_layer_id(nir_shader *shader)
+{
+ assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+ return nir_shader_instructions_pass(shader, nir_lower_layer_id,
+ nir_metadata_block_index | nir_metadata_dominance,
+ NULL);
+}
\ No newline at end of file
v->fs.early_fragment_tests = info->fs.early_fragment_tests;
v->fs.color_is_dual_source = info->fs.color_is_dual_source;
v->fs.uses_fbfetch_output = info->fs.uses_fbfetch_output;
+ v->fs.fbfetch_coherent = info->fs.fbfetch_coherent;
break;
case MESA_SHADER_COMPUTE:
bool early_fragment_tests : 1;
bool color_is_dual_source : 1;
bool uses_fbfetch_output : 1;
+ bool fbfetch_coherent : 1;
} fs;
struct {
unsigned req_input_mem;
'ir3_nir_lower_tex_prefetch.c',
'ir3_nir_lower_wide_load_store.c',
'ir3_nir_move_varying_inputs.c',
+ 'ir3_nir_lower_layer_id.c',
'ir3_nir_opt_preamble.c',
'ir3_postsched.c',
'ir3_print.c',
uint32_t prim_mode = NO_FLUSH;
if (emit->fs->fs.uses_fbfetch_output) {
if (gmem) {
- prim_mode = ctx->blend->blend_coherent ? FLUSH_PER_OVERLAP : NO_FLUSH;
+ prim_mode = (ctx->blend->blend_coherent || emit->fs->fs.fbfetch_coherent)
+ ? FLUSH_PER_OVERLAP : NO_FLUSH;
} else {
prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE;
}
static void
patch_fb_read_gmem(struct fd_batch *batch)
{
+ struct fd_screen *screen = batch->ctx->screen;
+ const struct fd_gmem_stateobj *gmem = batch->gmem_state;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+
unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
if (!num_patches)
return;
- struct fd_screen *screen = batch->ctx->screen;
- const struct fd_gmem_stateobj *gmem = batch->gmem_state;
- struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct pipe_surface *psurf = pfb->cbufs[0];
- struct pipe_resource *prsc = psurf->texture;
- struct fd_resource *rsc = fd_resource(prsc);
- enum pipe_format format = psurf->format;
-
- uint8_t swiz[4];
- fdl6_format_swiz(psurf->format, false, swiz);
-
- /* always TILE6_2 mode in GMEM, which also means no swap: */
- uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = {
- A6XX_TEX_CONST_0_FMT(fd6_texture_format(
- format, (enum a6xx_tile_mode)rsc->layout.tile_mode)) |
- A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
- A6XX_TEX_CONST_0_SWAP(WZYX) |
- A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) |
- COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
- A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) |
- A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) |
- A6XX_TEX_CONST_0_SWIZ_Z(fdl6_swiz(swiz[2])) |
- A6XX_TEX_CONST_0_SWIZ_W(fdl6_swiz(swiz[3])),
+ for (unsigned i = 0; i < num_patches; i++) {
+ struct fd_cs_patch *patch =
+ fd_patch_element(&batch->fb_read_patches, i);
+ int buf = patch->val;
+ struct pipe_surface *psurf = pfb->cbufs[buf];
+ struct pipe_resource *prsc = psurf->texture;
+ struct fd_resource *rsc = fd_resource(prsc);
+ enum pipe_format format = psurf->format;
+
+ uint8_t swiz[4];
+ fdl6_format_swiz(psurf->format, false, swiz);
+
+ uint64_t base = screen->gmem_base + gmem->cbuf_base[buf];
+ /* always TILE6_2 mode in GMEM, which also means no swap: */
+ uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = {
+ A6XX_TEX_CONST_0_FMT(fd6_texture_format(
+ format, (enum a6xx_tile_mode)rsc->layout.tile_mode)) |
+ A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
+ A6XX_TEX_CONST_0_SWAP(WZYX) |
+ A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) |
+ COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
+ A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) |
+ A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) |
+ A6XX_TEX_CONST_0_SWIZ_Z(fdl6_swiz(swiz[2])) |
+ A6XX_TEX_CONST_0_SWIZ_W(fdl6_swiz(swiz[3])),
A6XX_TEX_CONST_1_WIDTH(pfb->width) |
- A6XX_TEX_CONST_1_HEIGHT(pfb->height),
+ A6XX_TEX_CONST_1_HEIGHT(pfb->height),
- A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]) |
- A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D),
+ A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[buf]) |
+ A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D),
A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size),
- A6XX_TEX_CONST_4_BASE_LO(screen->gmem_base),
+ A6XX_TEX_CONST_4_BASE_LO(base),
- A6XX_TEX_CONST_5_BASE_HI(screen->gmem_base >> 32) |
- A6XX_TEX_CONST_5_DEPTH(1)
- };
+ A6XX_TEX_CONST_5_BASE_HI(base >> 32) |
+ A6XX_TEX_CONST_5_DEPTH(prsc->array_size)
+ };
- for (unsigned i = 0; i < num_patches; i++) {
- struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
memcpy(patch->cs, descriptor, FDL6_TEX_CONST_DWORDS * 4);
}
+
util_dynarray_clear(&batch->fb_read_patches);
}
static void
patch_fb_read_sysmem(struct fd_batch *batch)
{
- unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
- if (!num_patches)
- return;
-
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- struct pipe_surface *psurf = pfb->cbufs[0];
- if (!psurf)
- return;
- struct fd_resource *rsc = fd_resource(psurf->texture);
+ unsigned num_patches =
+ fd_patch_num_elements(&batch->fb_read_patches);
+ if (!num_patches)
+ return;
+ for (unsigned i = 0; i < num_patches; i++) {
+ struct fd_cs_patch *patch =
+ fd_patch_element(&batch->fb_read_patches, i);
+ int buf = patch->val;
- uint32_t block_width, block_height;
- fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
+ struct pipe_surface *psurf = pfb->cbufs[buf];
+ if (!psurf)
+ return;
- struct fdl_view_args args = {
- .iova = fd_bo_get_iova(rsc->bo),
+ struct pipe_resource *prsc = psurf->texture;
+ struct fd_resource *rsc = fd_resource(prsc);
- .base_miplevel = psurf->u.tex.level,
- .level_count = 1,
+ uint32_t block_width, block_height;
+ fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
- .base_array_layer = psurf->u.tex.first_layer,
- .layer_count = 1,
+ struct fdl_view_args args = {
+ .iova = fd_bo_get_iova(rsc->bo),
- .swiz = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W},
- .format = psurf->format,
+ .base_miplevel = psurf->u.tex.level,
+ .level_count = 1,
- .type = FDL_VIEW_TYPE_2D,
- .chroma_offsets = {FDL_CHROMA_LOCATION_COSITED_EVEN,
- FDL_CHROMA_LOCATION_COSITED_EVEN},
- };
- const struct fdl_layout *layouts[3] = {&rsc->layout, NULL, NULL};
- struct fdl6_view view;
- fdl6_view_init(&view, layouts, &args,
- batch->ctx->screen->info->a6xx.has_z24uint_s8uint);
+ .base_array_layer = psurf->u.tex.first_layer,
+ .layer_count = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1,
- for (unsigned i = 0; i < num_patches; i++) {
- struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
+ .swiz = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
+ PIPE_SWIZZLE_W},
+ .format = psurf->format,
- /* This is cheating a bit, since we can't use OUT_RELOC() here.. but
- * the render target will already have a reloc emitted for RB_MRT state,
- * so we can get away with manually patching in the address here:
- */
+ .type = FDL_VIEW_TYPE_2D,
+ .chroma_offsets = {FDL_CHROMA_LOCATION_COSITED_EVEN,
+ FDL_CHROMA_LOCATION_COSITED_EVEN},
+ };
+ const struct fdl_layout *layouts[3] = {&rsc->layout, NULL, NULL};
+ struct fdl6_view view;
+ fdl6_view_init(&view, layouts, &args,
+ batch->ctx->screen->info->a6xx.has_z24uint_s8uint);
memcpy(patch->cs, view.descriptor, FDL6_TEX_CONST_DWORDS * 4);
}
+
util_dynarray_clear(&batch->fb_read_patches);
}
memcpy(desc_buf, set->descriptor, sizeof(set->descriptor));
if (unlikely(append_fb_read)) {
- /* The last image slot is used for fb-read: */
- unsigned idx = IR3_BINDLESS_DESC_COUNT - 1;
+ /* Reserve A6XX_MAX_RENDER_TARGETS image slots for fb-read */
+ unsigned idx = IR3_BINDLESS_DESC_COUNT - 1 - A6XX_MAX_RENDER_TARGETS;
- /* This is patched with the appropriate descriptor for GMEM or
- * sysmem rendering path in fd6_gmem
- */
-
- struct fd_cs_patch patch = {
- .cs = &desc_buf[idx * FDL6_TEX_CONST_DWORDS],
- };
- util_dynarray_append(&ctx->batch->fb_read_patches,
- __typeof__(patch), patch);
+ for (int i = 0; i < ctx->batch->framebuffer.nr_cbufs; i++) {
+ /* This is patched with the appropriate descriptor for GMEM or
+ * sysmem rendering path in fd6_gmem
+ */
+ struct fd_cs_patch patch = {
+ .cs = &desc_buf[(idx + i) * FDL6_TEX_CONST_DWORDS],
+ .val = i,
+ };
+ util_dynarray_append(&ctx->batch->fb_read_patches,
+ __typeof__(patch), patch);
+ }
}
}
fd_reset_wfi(batch);
util_dynarray_init(&batch->draw_patches, NULL);
- util_dynarray_init(&batch->fb_read_patches, NULL);
+ util_dynarray_init(&(batch->fb_read_patches), NULL);
if (is_a2xx(ctx->screen)) {
util_dynarray_init(&batch->shader_patches, NULL);
cleanup_submit(batch);
util_dynarray_fini(&batch->draw_patches);
- util_dynarray_fini(&batch->fb_read_patches);
+ for (int i = 0; i < MAX_RENDER_TARGETS; i++)
+ util_dynarray_fini(&(batch->fb_read_patches));
if (is_a2xx(batch->ctx->screen)) {
util_dynarray_fini(&batch->shader_patches);
case PIPE_CAP_FBFETCH:
if (fd_device_version(screen->dev) >= FD_VERSION_GMEM_BASE &&
is_a6xx(screen))
- return 1;
+ return screen->max_rts;
return 0;
case PIPE_CAP_SAMPLE_SHADING:
if (is_a6xx(screen))
struct fd_screen *screen = fd_screen(pscreen);
struct ir3_compiler_options options = {
- .bindless_fb_read_descriptor =
- ir3_shader_descriptor_set(PIPE_SHADER_FRAGMENT),
- .bindless_fb_read_slot =
- IR3_BINDLESS_IMAGE_OFFSET + IR3_BINDLESS_IMAGE_COUNT - 1,
+ .bindless_fb_read_descriptor =
+ ir3_shader_descriptor_set(PIPE_SHADER_FRAGMENT),
+ .bindless_fb_read_slot = IR3_BINDLESS_IMAGE_OFFSET +
+ IR3_BINDLESS_IMAGE_COUNT - 1 - screen->max_rts,
};
screen->compiler = ir3_compiler_create(screen->dev, screen->dev_id, &options);