From: Mike Blumenkrantz Date: Tue, 12 Apr 2022 21:26:53 +0000 (-0400) Subject: zink: implement indirect buffer indexing X-Git-Tag: upstream/22.3.5~9247 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a7327c7cac9b49ac1c7679f91c4727da0d60f501;p=platform%2Fupstream%2Fmesa.git zink: implement indirect buffer indexing this compacts all buffers in the shader into an array that can be used in a single descriptor, thus handling the case of indirect indexing while also turning constant indexing into indirect (with const offsets) since there's no sane way to distinguish a "proper" implementation of this would be to skip gl_nir_lower_buffers and nir_lower_explicit_io altogether and retain the derefs, but that would require a ton of legwork of other nir passes which only operate on the explicit io intrinsics Reviewed-by: Dave Airlie Part-of: --- diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index d975eff..3dd102f 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -46,18 +46,19 @@ struct ntv_context { nir_shader *nir; struct hash_table *glsl_types; - struct hash_table *bo_types; + struct hash_table *bo_struct_types; + struct hash_table *bo_array_types; SpvId GLSL_std_450; gl_shader_stage stage; const struct zink_shader_info *sinfo; - SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][5]; //8, 16, 32, unused, 64 - nir_variable *ubo_vars[PIPE_MAX_CONSTANT_BUFFERS]; + SpvId ubos[2][5]; //8, 16, 32, unused, 64 + nir_variable *ubo_vars[2]; - SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][5]; //8, 16, 32, unused, 64 - nir_variable *ssbo_vars[PIPE_MAX_SHADER_BUFFERS]; + SpvId ssbos[5]; //8, 16, 32, unused, 64 + nir_variable *ssbo_vars; SpvId image_types[PIPE_MAX_SAMPLERS]; SpvId images[PIPE_MAX_SAMPLERS]; SpvId sampler_types[PIPE_MAX_SAMPLERS]; @@ -961,23 +962,22 @@ get_sized_uint_array_type(struct ntv_context *ctx, unsigned array_size, unsigned return array_type; } +/* get array */ static SpvId get_bo_array_type(struct ntv_context *ctx, struct nir_variable *var) { - struct hash_entry *he = _mesa_hash_table_search(ctx->bo_types, var); + struct hash_entry *he = _mesa_hash_table_search(ctx->bo_array_types, var); if (he) return (SpvId)(uintptr_t)he->data; - unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(var->type, 0))); + unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(glsl_without_array(var->type), 0))); assert(bitsize); SpvId array_type; - const struct glsl_type *type = var->type; - if (!glsl_type_is_unsized_array(type)) { - type = glsl_get_struct_field(var->interface_type, 0); - if (!glsl_type_is_unsized_array(type)) { - uint32_t array_size = glsl_get_length(type) * (bitsize / 4); - assert(array_size); - return get_sized_uint_array_type(ctx, array_size, bitsize); - } + const struct glsl_type *type = glsl_without_array(var->type); + const struct glsl_type *first_type = glsl_get_struct_field(type, 0); + if (!glsl_type_is_unsized_array(first_type)) { + uint32_t array_size = glsl_get_length(first_type); + assert(array_size); + return get_sized_uint_array_type(ctx, array_size, bitsize); } SpvId uint_type = spirv_builder_type_uint(&ctx->builder, bitsize); array_type = spirv_builder_type_runtime_array(&ctx->builder, uint_type); @@ -985,18 +985,23 @@ get_bo_array_type(struct ntv_context *ctx, struct nir_variable *var) return array_type; } +/* get array */ static SpvId get_bo_struct_type(struct ntv_context *ctx, struct nir_variable *var) { - unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(var->type, 0))); + struct hash_entry *he = _mesa_hash_table_search(ctx->bo_struct_types, var); + if (he) + return (SpvId)(uintptr_t)he->data; + const struct glsl_type *bare_type = glsl_without_array(var->type); + unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(bare_type, 0))); SpvId array_type = get_bo_array_type(ctx, var); - _mesa_hash_table_insert(ctx->bo_types, var, (void *)(uintptr_t)array_type); + _mesa_hash_table_insert(ctx->bo_array_types, var, (void *)(uintptr_t)array_type); bool ssbo = var->data.mode == nir_var_mem_ssbo; // wrap UBO-array in a struct SpvId runtime_array = 0; - if (ssbo && glsl_get_length(var->interface_type) > 1) { - const struct glsl_type *last_member = glsl_get_struct_field(var->interface_type, glsl_get_length(var->interface_type) - 1); + if (ssbo && glsl_get_length(bare_type) > 1) { + const struct glsl_type *last_member = glsl_get_struct_field(bare_type, glsl_get_length(bare_type) - 1); if (glsl_type_is_unsized_array(last_member)) { bool is_64bit = glsl_type_is_64bit(glsl_without_array(last_member)); runtime_array = spirv_builder_type_runtime_array(&ctx->builder, get_uvec_type(ctx, is_64bit ? 64 : bitsize, 1)); @@ -1014,35 +1019,35 @@ get_bo_struct_type(struct ntv_context *ctx, struct nir_variable *var) spirv_builder_emit_decoration(&ctx->builder, struct_type, SpvDecorationBlock); spirv_builder_emit_member_offset(&ctx->builder, struct_type, 0, 0); - if (runtime_array) { - spirv_builder_emit_member_offset(&ctx->builder, struct_type, 1, - glsl_get_struct_field_offset(var->interface_type, - glsl_get_length(var->interface_type) - 1)); - } + if (runtime_array) + spirv_builder_emit_member_offset(&ctx->builder, struct_type, 1, 0); - return spirv_builder_type_pointer(&ctx->builder, - ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform, - struct_type); + return struct_type; } static void emit_bo(struct ntv_context *ctx, struct nir_variable *var) { - unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(var->type, 0))); + unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(glsl_without_array(var->type), 0))); bool ssbo = var->data.mode == nir_var_mem_ssbo; - SpvId pointer_type = get_bo_struct_type(ctx, var); - + SpvId struct_type = get_bo_struct_type(ctx, var); + _mesa_hash_table_insert(ctx->bo_struct_types, var, (void *)(uintptr_t)struct_type); + SpvId array_length = emit_uint_const(ctx, 32, glsl_get_length(var->type)); + SpvId array_type = spirv_builder_type_array(&ctx->builder, struct_type, array_length); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform, + array_type); SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform); if (var->name) spirv_builder_emit_name(&ctx->builder, var_id, var->name); unsigned idx = bitsize >> 4; - assert(idx < ARRAY_SIZE(ctx->ssbos[0])); + assert(idx < ARRAY_SIZE(ctx->ssbos)); if (ssbo) { - assert(!ctx->ssbos[var->data.driver_location][idx]); - ctx->ssbos[var->data.driver_location][idx] = var_id; - ctx->ssbo_vars[var->data.driver_location] = var; + assert(!ctx->ssbos[idx]); + ctx->ssbos[idx] = var_id; + ctx->ssbo_vars = var; } else { assert(!ctx->ubos[var->data.driver_location][idx]); ctx->ubos[var->data.driver_location][idx] = var_id; @@ -2494,13 +2499,10 @@ emit_ssbo_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvId param; SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32); - nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]); - assert(const_block_index); // no dynamic indexing for now unsigned bit_size = MIN2(nir_src_bit_size(intr->src[0]), 32); unsigned idx = bit_size >> 4; - assert(idx < ARRAY_SIZE(ctx->ssbos[0])); - assert(ctx->ssbos[const_block_index->u32][idx]); - ssbo = ctx->ssbos[const_block_index->u32][idx]; + assert(idx < ARRAY_SIZE(ctx->ssbos)); + ssbo = ctx->ssbos[idx]; param = get_src(ctx, &intr->src[2]); SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, @@ -2509,10 +2511,11 @@ emit_ssbo_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvId uint_type = get_uvec_type(ctx, 32, 1); /* an id of the array stride in bytes */ SpvId uint_size = emit_uint_const(ctx, 32, bit_size / 8); + SpvId bo = get_src(ctx, &intr->src[0]); SpvId member = emit_uint_const(ctx, 32, 0); SpvId offset = get_src(ctx, &intr->src[1]); SpvId vec_offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size); - SpvId indices[] = { member, vec_offset }; + SpvId indices[] = { bo, member, vec_offset }; SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, ssbo, indices, ARRAY_SIZE(indices)); @@ -2550,25 +2553,32 @@ static void emit_get_ssbo_size(struct ntv_context *ctx, nir_intrinsic_instr *intr) { SpvId uint_type = get_uvec_type(ctx, 32, 1); - nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]); - assert(const_block_index); // no dynamic indexing for now - nir_variable *var = ctx->ssbo_vars[const_block_index->u32]; - unsigned last_member_idx = glsl_get_length(var->interface_type) - 1; + nir_variable *var = ctx->ssbo_vars; + const struct glsl_type *bare_type = glsl_without_array(var->type); + unsigned last_member_idx = glsl_get_length(bare_type) - 1; + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassStorageBuffer, + get_bo_struct_type(ctx, var)); + SpvId bo = get_src(ctx, &intr->src[0]); + SpvId indices[] = { bo }; + SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, + ctx->ssbos[2], indices, + ARRAY_SIZE(indices)); SpvId result = spirv_builder_emit_binop(&ctx->builder, SpvOpArrayLength, uint_type, - ctx->ssbos[const_block_index->u32][2], last_member_idx); + ptr, last_member_idx); /* this is going to be converted by nir to: length = (buffer_size - offset) / stride * so we need to un-convert it to avoid having the calculation performed twice */ - const struct glsl_type *last_member = glsl_get_struct_field(var->interface_type, last_member_idx); + const struct glsl_type *last_member = glsl_get_struct_field(bare_type, last_member_idx); /* multiply by stride */ result = emit_binop(ctx, SpvOpIMul, uint_type, result, emit_uint_const(ctx, 32, glsl_get_explicit_stride(last_member))); /* get total ssbo size by adding offset */ result = emit_binop(ctx, SpvOpIAdd, uint_type, result, emit_uint_const(ctx, 32, - glsl_get_struct_field_offset(var->interface_type, last_member_idx))); + glsl_get_struct_field_offset(bare_type, last_member_idx))); store_dest(ctx, &intr->dest, result, nir_type_uint); } @@ -3535,11 +3545,21 @@ emit_deref_array(struct ntv_context *ctx, nir_deref_instr *deref) SpvStorageClass storage_class = get_storage_class(var); SpvId base, type; switch (var->data.mode) { + + case nir_var_mem_ubo: + case nir_var_mem_ssbo: + base = get_src(ctx, &deref->parent); + /* this is either the array deref or the array deref */ + if (glsl_type_is_struct_or_ifc(deref->type)) { + /* array */ + type = get_bo_struct_type(ctx, var); + break; + } + /* array */ + FALLTHROUGH; case nir_var_function_temp: case nir_var_shader_in: case nir_var_shader_out: - case nir_var_mem_ubo: - case nir_var_mem_ssbo: base = get_src(ctx, &deref->parent); type = get_glsl_type(ctx, deref->type); break; @@ -3898,8 +3918,9 @@ nir_to_spirv(struct nir_shader *s, const struct zink_shader_info *sinfo, uint32_ ctx.spirv_1_4_interfaces = spirv_version >= SPIRV_VERSION(1, 4); ctx.glsl_types = _mesa_pointer_hash_table_create(ctx.mem_ctx); - ctx.bo_types = _mesa_pointer_hash_table_create(ctx.mem_ctx); - if (!ctx.glsl_types || !ctx.bo_types) + ctx.bo_array_types = _mesa_pointer_hash_table_create(ctx.mem_ctx); + ctx.bo_struct_types = _mesa_pointer_hash_table_create(ctx.mem_ctx); + if (!ctx.glsl_types || !ctx.bo_array_types || !ctx.bo_struct_types) goto fail; spirv_builder_emit_cap(&ctx.builder, SpvCapabilityShader); diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 56b0fc5..cd850fc 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -878,6 +878,7 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) case nir_intrinsic_load_ubo: { /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */ bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo && + nir_src_is_const(intr->src[0]) && nir_src_as_uint(intr->src[0]) == 0 && nir_dest_bit_size(intr->dest) == 64 && nir_intrinsic_align_offset(intr) % 8 != 0; @@ -958,36 +959,52 @@ rewrite_bo_access(nir_shader *shader, struct zink_screen *screen) } struct bo_vars { - nir_variable *ubo[PIPE_MAX_CONSTANT_BUFFERS][5]; - nir_variable *ssbo[PIPE_MAX_CONSTANT_BUFFERS][5]; + nir_variable *ubo[2][5]; + nir_variable *ssbo[5]; + uint32_t first_ubo; + uint32_t first_ssbo; }; static nir_variable * -get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, unsigned idx, unsigned bit_size) +get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size) { - nir_variable *var; - nir_variable **arr = (nir_variable**)(ssbo ? bo->ssbo : bo->ubo); + nir_variable *var, **ptr; + nir_variable **arr = (nir_variable**)bo->ubo; + unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1; - var = arr[idx * 5 + (bit_size >> 4)]; + if (ssbo) + ptr = &bo->ssbo[bit_size >> 4]; + else + ptr = &arr[idx * 5 + (bit_size >> 4)]; + var = *ptr; if (!var) { - arr[idx * 5 + (bit_size >> 4)] = var = nir_variable_clone(arr[idx * 5 + (32 >> 4)], shader); + if (ssbo) + var = bo->ssbo[32 >> 4]; + else + var = arr[idx * 5 + (32 >> 4)]; + var = nir_variable_clone(var, shader); + *ptr = var; nir_shader_add_variable(shader, var); struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2); fields[0].name = ralloc_strdup(shader, "base"); fields[1].name = ralloc_strdup(shader, "unsized"); - const struct glsl_type *array_type = glsl_get_struct_field(var->type, 0); + unsigned array_size = glsl_get_length(var->type); + const struct glsl_type *bare_type = glsl_without_array(var->type); + const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0); + unsigned length = glsl_get_length(array_type); const struct glsl_type *type; const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8); if (bit_size > 32) { assert(bit_size == 64); - type = glsl_array_type(glsl_uintN_t_type(bit_size), glsl_get_length(array_type) / 2, bit_size / 8); + type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8); } else { - type = glsl_array_type(glsl_uintN_t_type(bit_size), glsl_get_length(array_type) * (32 / bit_size), bit_size / 8); + type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8); } fields[0].type = type; fields[1].type = unsized; - var->type = glsl_struct_type(fields, glsl_get_length(var->type), "struct", false); + var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0); + var->data.driver_location = idx; } return var; } @@ -1003,31 +1020,57 @@ remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) nir_ssa_def *offset = NULL; bool is_load = true; b->cursor = nir_before_instr(instr); + nir_src *src; + bool ssbo = true; switch (intr->intrinsic) { + /* TODO: these should all be rewritten to use deref intrinsics */ + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + nir_instr_rewrite_src_ssa(instr, &intr->src[0], nir_iadd_imm(b, intr->src[0].ssa, -bo->first_ssbo)); + return true; case nir_intrinsic_store_ssbo: - var = get_bo_var(b->shader, bo, true, nir_src_as_uint(intr->src[1]), nir_src_bit_size(intr->src[0])); + src = &intr->src[1]; + var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0])); offset = intr->src[2].ssa; is_load = false; break; case nir_intrinsic_load_ssbo: - var = get_bo_var(b->shader, bo, true, nir_src_as_uint(intr->src[0]), nir_dest_bit_size(intr->dest)); + src = &intr->src[0]; + var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest)); offset = intr->src[1].ssa; break; case nir_intrinsic_load_ubo: - var = get_bo_var(b->shader, bo, false, nir_src_as_uint(intr->src[0]), nir_dest_bit_size(intr->dest)); + src = &intr->src[0]; + var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest)); offset = intr->src[1].ssa; + ssbo = false; break; default: return false; } assert(var); assert(offset); - nir_deref_instr *deref_var = nir_build_deref_struct(b, nir_build_deref_var(b, var), 0); + nir_deref_instr *deref_var = nir_build_deref_var(b, var); + nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa; + if (!ssbo && bo->first_ubo && var->data.driver_location) + idx = nir_iadd_imm(b, idx, -bo->first_ubo); + else if (ssbo && bo->first_ssbo) + idx = nir_iadd_imm(b, idx, -bo->first_ssbo); + nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx); + nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0); assert(intr->num_components <= 2); if (is_load) { nir_ssa_def *result[2]; for (unsigned i = 0; i < intr->num_components; i++) { - nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_var, offset); + nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset); result[i] = nir_load_deref(b, deref_arr); if (intr->intrinsic == nir_intrinsic_load_ssbo) nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr)); @@ -1036,7 +1079,7 @@ remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) nir_ssa_def *load = nir_vec(b, result, intr->num_components); nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); } else { - nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_var, offset); + nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset); nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr)); } nir_instr_remove(instr); @@ -1044,17 +1087,23 @@ remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) } static bool -remove_bo_access(nir_shader *shader) +remove_bo_access(nir_shader *shader, struct zink_shader *zs) { struct bo_vars bo; memset(&bo, 0, sizeof(bo)); + if (zs->ubos_used) + bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2; + assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS); + if (zs->ssbos_used) + bo.first_ssbo = ffs(zs->ssbos_used) - 1; + assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS); nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) { if (var->data.mode == nir_var_mem_ssbo) { - assert(!bo.ssbo[var->data.driver_location][32 >> 4]); - bo.ssbo[var->data.driver_location][32 >> 4] = var; + assert(!bo.ssbo[32 >> 4]); + bo.ssbo[32 >> 4] = var; } else { - assert(!bo.ubo[var->data.driver_location][32 >> 4]); - bo.ubo[var->data.driver_location][32 >> 4] = var; + assert(!bo.ubo[!!var->data.driver_location][32 >> 4]); + bo.ubo[!!var->data.driver_location][32 >> 4] = var; } } return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo); @@ -1385,7 +1434,7 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad if (screen->driconf.inline_uniforms) { NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared); NIR_PASS_V(nir, rewrite_bo_access, screen); - NIR_PASS_V(nir, remove_bo_access); + NIR_PASS_V(nir, remove_bo_access, zs); } if (inlined_uniforms) { optimize_nir(nir); @@ -1441,32 +1490,108 @@ bool nir_lower_dynamic_bo_access(nir_shader *shader); * so instead we delete all those broken variables and just make new ones */ static bool -unbreak_bos(nir_shader *shader) +unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size) { - uint32_t ssbo_used = 0; - uint32_t ubo_used = 0; uint64_t max_ssbo_size = 0; uint64_t max_ubo_size = 0; - bool ssbo_sizes[PIPE_MAX_SHADER_BUFFERS] = {false}; - if (!shader->info.num_ssbos && !shader->info.num_ubos && !shader->num_uniforms) + if (!shader->info.num_ssbos && !shader->info.num_ubos) return false; + + nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) { + const struct glsl_type *type = glsl_without_array(var->type); + if (type_is_counter(type)) + continue; + unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false); + if (var->data.mode == nir_var_mem_ubo) + max_ubo_size = MAX2(max_ubo_size, size); + else + max_ssbo_size = MAX2(max_ssbo_size, size); + var->data.mode = nir_var_shader_temp; + } + nir_fixup_deref_modes(shader); + NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL); + optimize_nir(shader); + + struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2); + fields[0].name = ralloc_strdup(shader, "base"); + fields[1].name = ralloc_strdup(shader, "unsized"); + if (shader->info.num_ubos) { + const struct glsl_type *ubo_type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4); + fields[0].type = ubo_type; + if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) { + nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, + glsl_array_type(glsl_struct_type(fields, 1, "struct", false), 1, 0), + "uniform_0"); + var->interface_type = var->type; + var->data.mode = nir_var_mem_ubo; + var->data.driver_location = 0; + } + + unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo; + uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0); + if (num_ubos && ubos_used) { + /* shrink array as much as possible */ + unsigned first_ubo = ffs(ubos_used) - 2; + assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS); + num_ubos -= first_ubo; + assert(num_ubos); + nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, + glsl_array_type(glsl_struct_type(fields, 1, "struct", false), num_ubos, 0), + "ubos"); + var->interface_type = var->type; + var->data.mode = nir_var_mem_ubo; + var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo; + } + } + if (shader->info.num_ssbos && zs->ssbos_used) { + /* shrink array as much as possible */ + unsigned first_ssbo = ffs(zs->ssbos_used) - 1; + assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS); + unsigned num_ssbos = shader->info.num_ssbos - first_ssbo; + assert(num_ssbos); + const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4); + const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4); + fields[0].type = ssbo_type; + fields[1].type = max_ssbo_size ? unsized : NULL; + unsigned field_count = max_ssbo_size && needs_size ? 2 : 1; + nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo, + glsl_array_type(glsl_struct_type(fields, field_count, "struct", false), num_ssbos, 0), + "ssbos"); + var->interface_type = var->type; + var->data.mode = nir_var_mem_ssbo; + var->data.driver_location = first_ssbo; + } + return true; +} + +static uint32_t +get_src_mask(unsigned total, nir_src src) +{ + if (nir_src_is_const(src)) + return BITFIELD_BIT(nir_src_as_uint(src)); + return BITFIELD_MASK(total); +} + +static bool +analyze_io(struct zink_shader *zs, nir_shader *shader) +{ + bool ret = false; nir_function_impl *impl = nir_shader_get_entrypoint(shader); nir_foreach_block(block, impl) { nir_foreach_instr(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; - + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { case nir_intrinsic_store_ssbo: - ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[1])); + zs->ssbos_used |= get_src_mask(shader->info.num_ssbos, intrin->src[1]); break; - + case nir_intrinsic_get_ssbo_size: { - uint32_t slot = nir_src_as_uint(intrin->src[0]); - ssbo_used |= BITFIELD_BIT(slot); - ssbo_sizes[slot] = true; + zs->ssbos_used |= get_src_mask(shader->info.num_ssbos, intrin->src[0]); + ret = true; break; } case nir_intrinsic_ssbo_atomic_add: @@ -1483,69 +1608,18 @@ unbreak_bos(nir_shader *shader) case nir_intrinsic_ssbo_atomic_fmax: case nir_intrinsic_ssbo_atomic_fcomp_swap: case nir_intrinsic_load_ssbo: - ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0])); + zs->ssbos_used |= get_src_mask(shader->info.num_ssbos, intrin->src[0]); break; case nir_intrinsic_load_ubo: case nir_intrinsic_load_ubo_vec4: - ubo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0])); + zs->ubos_used |= get_src_mask(shader->info.num_ubos, intrin->src[0]); break; default: break; } } } - - nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) { - const struct glsl_type *type = glsl_without_array(var->type); - if (type_is_counter(type)) - continue; - unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false); - if (var->data.mode == nir_var_mem_ubo) - max_ubo_size = MAX2(max_ubo_size, size); - else - max_ssbo_size = MAX2(max_ssbo_size, size); - var->data.mode = nir_var_shader_temp; - } - nir_fixup_deref_modes(shader); - NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL); - optimize_nir(shader); - - if (!ssbo_used && !ubo_used) - return false; - - struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2); - fields[0].name = ralloc_strdup(shader, "base"); - fields[1].name = ralloc_strdup(shader, "unsized"); - if (ubo_used) { - const struct glsl_type *ubo_type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4); - fields[0].type = ubo_type; - u_foreach_bit(slot, ubo_used) { - char buf[64]; - snprintf(buf, sizeof(buf), "ubo_slot_%u", slot); - nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, glsl_struct_type(fields, 1, "struct", false), buf); - var->interface_type = var->type; - var->data.driver_location = slot; - } - } - if (ssbo_used) { - const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4); - const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4); - fields[0].type = ssbo_type; - u_foreach_bit(slot, ssbo_used) { - char buf[64]; - snprintf(buf, sizeof(buf), "ssbo_slot_%u", slot); - bool use_runtime = ssbo_sizes[slot] && max_ssbo_size; - if (use_runtime) - fields[1].type = unsized; - else - fields[1].type = NULL; - nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo, - glsl_struct_type(fields, 1 + use_runtime, "struct", false), buf); - var->interface_type = var->type; - var->data.driver_location = slot; - } - } - return true; + return ret; } /* this is a "default" bindless texture used if the shader has no texture variables */ @@ -1729,8 +1803,7 @@ zink_binding(gl_shader_stage stage, VkDescriptorType type, int index) switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - assert(index < PIPE_MAX_CONSTANT_BUFFERS); - return (stage * PIPE_MAX_CONSTANT_BUFFERS) + index; + return stage * 2 + !!index; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: @@ -1738,8 +1811,7 @@ zink_binding(gl_shader_stage stage, VkDescriptorType type, int index) return (stage * PIPE_MAX_SAMPLERS) + index; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - assert(index < PIPE_MAX_SHADER_BUFFERS); - return (stage * PIPE_MAX_SHADER_BUFFERS) + index; + return stage; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: @@ -2063,12 +2135,13 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8); NIR_PASS_V(nir, lower_64bit_vertex_attribs); - NIR_PASS_V(nir, unbreak_bos); + bool needs_size = analyze_io(ret, nir); + NIR_PASS_V(nir, unbreak_bos, ret, needs_size); /* run in compile if there could be inlined uniforms */ if (!screen->driconf.inline_uniforms) { NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared); NIR_PASS_V(nir, rewrite_bo_access, screen); - NIR_PASS_V(nir, remove_bo_access); + NIR_PASS_V(nir, remove_bo_access, ret); } if (zink_debug & ZINK_DEBUG_NIR) { @@ -2115,8 +2188,8 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding; ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; - ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; - ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index); + ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type); + assert(ret->bindings[ztype][ret->num_bindings[ztype]].size); ret->num_bindings[ztype]++; } else if (var->data.mode == nir_var_mem_ssbo) { ztype = ZINK_DESCRIPTOR_TYPE_SSBO; @@ -2125,10 +2198,10 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, var->data.driver_location); ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; - ret->ssbos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index); ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; + ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type); + assert(ret->bindings[ztype][ret->num_bindings[ztype]].size); ret->num_bindings[ztype]++; } else { assert(var->data.mode == nir_var_uniform || @@ -2209,8 +2282,6 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr) if (nir->info.stage == MESA_SHADER_GEOMETRY) NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream); optimize_nir(nir); - if (nir->info.num_ubos || nir->info.num_ssbos) - NIR_PASS_V(nir, nir_lower_dynamic_bo_access); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); if (screen->driconf.inline_uniforms) nir_find_inlinable_uniforms(nir); diff --git a/src/gallium/drivers/zink/zink_descriptors_lazy.c b/src/gallium/drivers/zink/zink_descriptors_lazy.c index be3f3df..cca84b9 100644 --- a/src/gallium/drivers/zink/zink_descriptors_lazy.c +++ b/src/gallium/drivers/zink/zink_descriptors_lazy.c @@ -78,12 +78,13 @@ bdd_lazy(struct zink_batch_state *bs) static void init_template_entry(struct zink_shader *shader, enum zink_descriptor_type type, - unsigned idx, unsigned offset, VkDescriptorUpdateTemplateEntry *entry, unsigned *entry_idx, bool flatten_dynamic) + unsigned idx, VkDescriptorUpdateTemplateEntry *entry, unsigned *entry_idx, bool flatten_dynamic) { int index = shader->bindings[type][idx].index; enum pipe_shader_type stage = pipe_shader_type_from_mesa(shader->nir->info.stage); entry->dstArrayElement = 0; entry->dstBinding = shader->bindings[type][idx].binding; + entry->descriptorCount = shader->bindings[type][idx].size; if (shader->bindings[type][idx].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC && flatten_dynamic) /* filter out DYNAMIC type here */ entry->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; @@ -92,33 +93,27 @@ init_template_entry(struct zink_shader *shader, enum zink_descriptor_type type, switch (shader->bindings[type][idx].type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - entry->descriptorCount = 1; - entry->offset = offsetof(struct zink_context, di.ubos[stage][index + offset]); + entry->offset = offsetof(struct zink_context, di.ubos[stage][index]); entry->stride = sizeof(VkDescriptorBufferInfo); break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - entry->descriptorCount = shader->bindings[type][idx].size; - entry->offset = offsetof(struct zink_context, di.textures[stage][index + offset]); + entry->offset = offsetof(struct zink_context, di.textures[stage][index]); entry->stride = sizeof(VkDescriptorImageInfo); break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - entry->descriptorCount = shader->bindings[type][idx].size; - entry->offset = offsetof(struct zink_context, di.tbos[stage][index + offset]); + entry->offset = offsetof(struct zink_context, di.tbos[stage][index]); entry->stride = sizeof(VkBufferView); break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - entry->descriptorCount = 1; - entry->offset = offsetof(struct zink_context, di.ssbos[stage][index + offset]); + entry->offset = offsetof(struct zink_context, di.ssbos[stage][index]); entry->stride = sizeof(VkDescriptorBufferInfo); break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - entry->descriptorCount = shader->bindings[type][idx].size; - entry->offset = offsetof(struct zink_context, di.images[stage][index + offset]); + entry->offset = offsetof(struct zink_context, di.images[stage][index]); entry->stride = sizeof(VkDescriptorImageInfo); break; case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - entry->descriptorCount = shader->bindings[type][idx].size; - entry->offset = offsetof(struct zink_context, di.texel_images[stage][index + offset]); + entry->offset = offsetof(struct zink_context, di.texel_images[stage][index]); entry->stride = sizeof(VkBufferView); break; default: @@ -207,21 +202,7 @@ zink_descriptor_program_init_lazy(struct zink_context *ctx, struct zink_program enum zink_descriptor_size_index idx = zink_vktype_to_size_idx(shader->bindings[j][k].type); sizes[idx].descriptorCount += shader->bindings[j][k].size; sizes[idx].type = shader->bindings[j][k].type; - switch (shader->bindings[j][k].type) { - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - init_template_entry(shader, j, k, 0, &entries[j][entry_idx[j]], &entry_idx[j], screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - for (unsigned l = 0; l < shader->bindings[j][k].size; l++) - init_template_entry(shader, j, k, l, &entries[j][entry_idx[j]], &entry_idx[j], screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY); - break; - default: - break; - } + init_template_entry(shader, j, k, &entries[j][entry_idx[j]], &entry_idx[j], screen->descriptor_mode == ZINK_DESCRIPTOR_MODE_LAZY); num_bindings[j]++; has_bindings |= BITFIELD_BIT(j); }