Less state walking at draw time, in exchange for a SHL in the lookup.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12258>
KHR-GLES31.core.gpu_shader5.fma_precision_vec3,Fail
KHR-GLES31.core.gpu_shader5.fma_precision_vec4,Fail
-# "Array 0 length is 60829990 should be 7
-# Array 1 length is 60829990 should be 5
-# Array 2 length is 60829990 should be 3
-# Array 3 length is 60829990 should be 4
-# Array 4 length is 132120576 should be 23
-# Array 5 length is 132120576 should be 123
-# Array 6 length is 66060288 should be 419"
-KHR-GLES31.core.shader_storage_buffer_object.advanced-unsizedArrayLength-fs-std430-vec,Fail
-
-# "Array 0 length is 266338304 should be 7
-# Array 1 length is 266338304 should be 3
-# Array 2 length is 266338304 should be 4
-# Array 3 length is 266338303 should be 4"
-KHR-GLES31.core.shader_storage_buffer_object.advanced-unsizedArrayLength-fs-int,Fail
-
# "gl_NumWorkGroups: Invalid data at index 2"
KHR-GLES31.core.compute_shader.built-in-variables,Fail
emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst)
{
- if (ir3_bindless_resource(intr->src[0])) {
- struct ir3_block *b = ctx->block;
- struct ir3_instruction *ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]);
- struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0);
- resinfo->cat6.iim_val = 1;
- resinfo->cat6.d = 1;
- resinfo->cat6.type = TYPE_U32;
- resinfo->cat6.typed = false;
- /* resinfo has no writemask and always writes out 3 components */
- resinfo->dsts[0]->wrmask = MASK(3);
- ir3_handle_bindless_cat6(resinfo, intr->src[0]);
+ struct ir3_block *b = ctx->block;
+ struct ir3_instruction *ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]);
+ struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0);
+ resinfo->cat6.iim_val = 1;
+ resinfo->cat6.d = ctx->compiler->gen >= 6 ? 1 : 2;
+ resinfo->cat6.type = TYPE_U32;
+ resinfo->cat6.typed = false;
+ /* resinfo has no writemask and always writes out 3 components */
+ resinfo->dsts[0]->wrmask = MASK(3);
+ ir3_handle_bindless_cat6(resinfo, intr->src[0]);
+
+ if (ctx->compiler->gen >= 6) {
struct ir3_instruction *resinfo_dst;
ir3_split_dest(b, &resinfo_dst, resinfo, 0, 1);
/* Unfortunately resinfo returns the array length, i.e. in dwords,
- * while NIR expects us to return the size in bytes.
- *
- * TODO: fix this in NIR.
- */
+ * while NIR expects us to return the size in bytes.
+ *
+ * TODO: fix this in NIR.
+ */
*dst = ir3_SHL_B(b, resinfo_dst, 0, create_immed(b, 2), 0);
- return;
+ } else {
+ /* On a5xx, resinfo returns the low 16 bits of ssbo size in .x and the high 16 bits in .y */
+ struct ir3_instruction *resinfo_dst[2];
+ ir3_split_dest(b, resinfo_dst, resinfo, 0, 2);
+ *dst = ir3_ADD_U(b, ir3_SHL_B(b, resinfo_dst[1], 0, create_immed(b, 16), 0), 0, resinfo_dst[0], 0);
}
-
- /* SSBO size stored as a const starting at ssbo_sizes: */
- const struct ir3_const_state *const_state = ir3_const_state(ctx->so);
- unsigned blk_idx = nir_src_as_uint(intr->src[0]);
- unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) +
- const_state->ssbo_size.off[blk_idx];
-
- debug_assert(const_state->ssbo_size.mask & (1 << blk_idx));
-
- dst[0] = create_uniform(ctx->block, idx);
}
/* src[] = { offset }. const_index[] = { base } */
unsigned idx;
switch (intr->intrinsic) {
- case nir_intrinsic_get_ssbo_size:
- if (ir3_bindless_resource(intr->src[0]))
- break;
- idx = nir_src_as_uint(intr->src[0]);
- if (layout->ssbo_size.mask & (1 << idx))
- break;
- layout->ssbo_size.mask |= (1 << idx);
- layout->ssbo_size.off[idx] = layout->ssbo_size.count;
- layout->ssbo_size.count += 1; /* one const per */
- break;
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_umin:
constoff += align(const_state->num_ubos * ptrsz, 4) / 4;
}
- if (const_state->ssbo_size.count > 0) {
- unsigned cnt = const_state->ssbo_size.count;
- const_state->offsets.ssbo_sizes = constoff;
- constoff += align(cnt, 4) / 4;
- }
-
if (const_state->image_dims.count > 0) {
unsigned cnt = const_state->image_dims.count;
const_state->offsets.image_dims = constoff;
struct {
/* user const start at zero */
unsigned ubo;
- /* NOTE that a3xx might need a section for SSBO addresses too */
- unsigned ssbo_sizes;
unsigned image_dims;
unsigned driver_param;
unsigned tfbo;
} offsets;
struct {
- uint32_t mask; /* bitmask of SSBOs that have get_ssbo_size */
- uint32_t count; /* number of consts allocated */
- /* one const allocated per SSBO which has get_ssbo_size,
- * ssbo_sizes.off[ssbo_id] is offset from start of ssbo_sizes
- * consts:
- */
- uint32_t off[IR3_MAX_SHADER_BUFFERS];
- } ssbo_size;
-
- struct {
uint32_t mask; /* bitmask of images that have image_store */
uint32_t count; /* number of consts allocated */
/* three const allocated per image which has image_store:
struct pipe_shader_buffer *buf = &so->sb[i];
unsigned sz = buf->buffer_size;
- /* width is in dwords, overflows into height: */
- sz /= 4;
-
- OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
+ /* Unlike a6xx, SSBO size is in bytes. */
+ OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz & MASK(16)));
OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
OUT_PKT7(ring, CP_LOAD_STATE4, 5);
{
struct fd_context *ctx = emit->ctx;
- ir3_emit_ssbo_sizes(ctx->screen, v, ring, &ctx->shaderbuf[stage]);
ir3_emit_image_dims(ctx->screen, v, ring, &ctx->shaderimg[stage]);
}
}
static inline void
-ir3_emit_ssbo_sizes(struct fd_screen *screen,
- const struct ir3_shader_variant *v,
- struct fd_ringbuffer *ring,
- struct fd_shaderbuf_stateobj *sb)
-{
- const struct ir3_const_state *const_state = ir3_const_state(v);
- uint32_t offset = const_state->offsets.ssbo_sizes;
- if (v->constlen > offset) {
- uint32_t sizes[align(const_state->ssbo_size.count, 4)];
- unsigned mask = const_state->ssbo_size.mask;
-
- while (mask) {
- unsigned index = u_bit_scan(&mask);
- unsigned off = const_state->ssbo_size.off[index];
- sizes[off] = sb->sb[index].buffer_size;
- }
-
- emit_const_user(ring, v, offset * 4, ARRAY_SIZE(sizes), sizes);
- }
-}
-
-static inline void
ir3_emit_image_dims(struct fd_screen *screen,
const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring,
ir3_emit_immediates(ctx->screen, v, ring);
}
- if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) {
- struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t];
- ring_wfi(ctx->batch, ring);
- ir3_emit_ssbo_sizes(ctx->screen, v, ring, sb);
- }
-
if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t];
ring_wfi(ctx->batch, ring);