From 823b3bfeeaf7cadd627ca44088c57463d69dfc0d Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Tue, 5 Sep 2023 11:26:16 +0200 Subject: [PATCH] ir3,tu: Refactor push consts info plumbing In preparation for a new way to pass push consts into a shader, introduced in a7xx. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/ir3/ir3.c | 7 ++-- src/freedreno/ir3/ir3_compiler_nir.c | 2 +- src/freedreno/ir3/ir3_disk_cache.c | 8 ++-- src/freedreno/ir3/ir3_nir.c | 4 +- src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c | 2 +- src/freedreno/ir3/ir3_nir_opt_preamble.c | 2 +- src/freedreno/ir3/ir3_ra.c | 2 +- src/freedreno/ir3/ir3_shader.c | 13 ++----- src/freedreno/ir3/ir3_shader.h | 53 +++++++++++++------------- src/freedreno/vulkan/tu_clear_blit.cc | 2 +- src/freedreno/vulkan/tu_cmd_buffer.cc | 15 +++----- src/freedreno/vulkan/tu_pipeline.cc | 19 ++++++++- src/freedreno/vulkan/tu_pipeline.h | 4 ++ src/freedreno/vulkan/tu_shader.cc | 29 ++++++-------- src/freedreno/vulkan/tu_shader.h | 11 +++--- 15 files changed, 89 insertions(+), 84 deletions(-) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index adfe1f0..b469594 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -72,7 +72,8 @@ is_shared_consts(struct ir3_compiler *compiler, struct ir3_const_state *const_state, struct ir3_register *reg) { - if (const_state->shared_consts_enable && reg->flags & IR3_REG_CONST) { + if (const_state->push_consts_type == IR3_PUSH_CONSTS_SHARED && + reg->flags & IR3_REG_CONST) { uint32_t min_const_reg = regid(compiler->shared_consts_base_offset, 0); uint32_t max_const_reg = regid(compiler->shared_consts_base_offset + @@ -136,9 +137,9 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count) const struct ir3_compiler *compiler = v->compiler; /* If the user forced a particular wavesize respect that. */ - if (v->real_wavesize == IR3_SINGLE_ONLY) + if (v->shader_options.real_wavesize == IR3_SINGLE_ONLY) return false; - if (v->real_wavesize == IR3_DOUBLE_ONLY) + if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY) return true; /* We can't support more than compiler->branchstack_size diverging threads diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index bc80313..b8cd984 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2087,7 +2087,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) */ ctx->so->constlen = MAX2(ctx->so->constlen, - ctx->so->num_reserved_user_consts + + ctx->so->shader_options.num_reserved_user_consts + const_state->ubo_state.size / 16); } break; diff --git a/src/freedreno/ir3/ir3_disk_cache.c b/src/freedreno/ir3/ir3_disk_cache.c index 3ca04b0..a7c3955 100644 --- a/src/freedreno/ir3/ir3_disk_cache.c +++ b/src/freedreno/ir3/ir3_disk_cache.c @@ -90,10 +90,10 @@ ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler, _mesa_sha1_update(&ctx, blob.data, blob.size); blob_finish(&blob); - _mesa_sha1_update(&ctx, &shader->api_wavesize, - sizeof(shader->api_wavesize)); - _mesa_sha1_update(&ctx, &shader->real_wavesize, - sizeof(shader->real_wavesize)); + _mesa_sha1_update(&ctx, &shader->options.api_wavesize, + sizeof(shader->options.api_wavesize)); + _mesa_sha1_update(&ctx, &shader->options.real_wavesize, + sizeof(shader->options.real_wavesize)); /* Note that on some gens stream-out is lowered in ir3 to stg. For later * gens we maybe don't need to include stream-out in the cache key. diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 567dbc7..2598064 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -531,7 +531,7 @@ ir3_nir_post_finalize(struct ir3_shader *shader) * the "real" subgroup size. */ unsigned subgroup_size = 0, max_subgroup_size = 0; - switch (shader->api_wavesize) { + switch (shader->options.api_wavesize) { case IR3_SINGLE_ONLY: subgroup_size = max_subgroup_size = compiler->threadsize_base; break; @@ -981,7 +981,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, const_state->num_ubos = nir->info.num_ubos; assert((const_state->ubo_state.size % 16) == 0); - unsigned constoff = v->num_reserved_user_consts + + unsigned constoff = v->shader_options.num_reserved_user_consts + const_state->ubo_state.size / 16 + const_state->preamble_size; unsigned ptrsz = ir3_pointer_size(compiler); diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index d0ad45e..85719e8 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -450,7 +450,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v) uint32_t range_size = state->range[i].end - state->range[i].start; assert(offset <= max_upload); - state->range[i].offset = offset + v->num_reserved_user_consts * 16; + state->range[i].offset = offset + v->shader_options.num_reserved_user_consts * 16; assert(offset <= max_upload); offset += range_size; } diff --git a/src/freedreno/ir3/ir3_nir_opt_preamble.c b/src/freedreno/ir3/ir3_nir_opt_preamble.c index 35b12cc..32e73d4 100644 --- a/src/freedreno/ir3/ir3_nir_opt_preamble.c +++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c @@ -303,7 +303,7 @@ ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v) /* First, lower load/store_preamble. */ const struct ir3_const_state *const_state = ir3_const_state(v); - unsigned preamble_base = v->num_reserved_user_consts * 4 + + unsigned preamble_base = v->shader_options.num_reserved_user_consts * 4 + const_state->ubo_state.size / 4; unsigned preamble_size = const_state->preamble_size * 4; diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index a011c8f..0ec4530 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -2577,7 +2577,7 @@ ir3_ra(struct ir3_shader_variant *v) * because on some gens the register file is not big enough to hold a * double-size wave with all 48 registers in use. */ - if (v->real_wavesize == IR3_DOUBLE_ONLY) { + if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY) { limit_pressure.full = MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16); } diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index e5b1d94..d4c64e2 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -298,13 +298,11 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key, v->num_ssbos = info->num_ssbos; v->num_ibos = info->num_ssbos + info->num_images; - v->num_reserved_user_consts = shader->num_reserved_user_consts; - v->api_wavesize = shader->api_wavesize; - v->real_wavesize = shader->real_wavesize; + v->shader_options = shader->options; if (!v->binning_pass) { v->const_state = rzalloc_size(v, sizeof(*v->const_state)); - v->const_state->shared_consts_enable = shader->shared_consts_enable; + v->const_state->push_consts_type = shader->options.push_consts_type; } return v; @@ -589,7 +587,7 @@ ir3_trim_constlen(const struct ir3_shader_variant **variants, if (variants[i]) { constlens[i] = variants[i]->constlen; shared_consts_enable = - ir3_const_state(variants[i])->shared_consts_enable; + ir3_const_state(variants[i])->push_consts_type == IR3_PUSH_CONSTS_SHARED; } } @@ -641,10 +639,7 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir, if (stream_output) memcpy(&shader->stream_output, stream_output, sizeof(shader->stream_output)); - shader->num_reserved_user_consts = options->reserved_user_consts; - shader->api_wavesize = options->api_wavesize; - shader->real_wavesize = options->real_wavesize; - shader->shared_consts_enable = options->shared_consts_enable; + shader->options = *options; shader->nir = nir; ir3_disk_cache_init_shader_key(compiler, shader); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 6fdfee7..345d2af 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -147,6 +147,12 @@ struct ir3_ubo_analysis_state { uint32_t size; }; +enum ir3_push_consts_type { + IR3_PUSH_CONSTS_NONE, + IR3_PUSH_CONSTS_PER_STAGE, + IR3_PUSH_CONSTS_SHARED, +}; + /** * Describes the layout of shader consts in the const register file. * @@ -213,7 +219,7 @@ struct ir3_const_state { /* State of ubo access lowered to push consts: */ struct ir3_ubo_analysis_state ubo_state; - bool shared_consts_enable; + enum ir3_push_consts_type push_consts_type; }; /** @@ -489,6 +495,20 @@ struct ir3_disasm_info { /* Represents half register in regid */ #define HALF_REG_ID 0x100 +struct ir3_shader_options { + unsigned num_reserved_user_consts; + /* What API-visible wavesizes are allowed. Even if only double wavesize is + * allowed, we may still use the smaller wavesize "under the hood" and the + * application simply sees the upper half as always disabled. + */ + enum ir3_wavesize_option api_wavesize; + /* What wavesizes we're allowed to actually use. If the API wavesize is + * single-only, then this must be single-only too. + */ + enum ir3_wavesize_option real_wavesize; + enum ir3_push_consts_type push_consts_type; +}; + /** * Shader variant which contains the actual hw shader instructions, * and necessary info for shader state setup. @@ -554,6 +574,8 @@ struct ir3_shader_variant { struct ir3_info info; + struct ir3_shader_options shader_options; + uint32_t constant_data_size; /* Levels of nesting of flow control: @@ -751,8 +773,6 @@ struct ir3_shader_variant { /* The total number of SSBOs and images, i.e. the number of hardware IBOs. */ unsigned num_ibos; - unsigned num_reserved_user_consts; - union { struct { enum tess_primitive_mode primitive_mode; @@ -790,8 +810,6 @@ struct ir3_shader_variant { } cs; }; - enum ir3_wavesize_option api_wavesize, real_wavesize; - /* For when we don't have a shader, variant's copy of streamout state */ struct ir3_stream_output_info stream_output; }; @@ -849,18 +867,7 @@ struct ir3_shader { struct ir3_compiler *compiler; - unsigned num_reserved_user_consts; - - /* What API-visible wavesizes are allowed. Even if only double wavesize is - * allowed, we may still use the smaller wavesize "under the hood" and the - * application simply sees the upper half as always disabled. - */ - enum ir3_wavesize_option api_wavesize; - - /* What wavesizes we're allowed to actually use. If the API wavesize is - * single-only, then this must be single-only too. - */ - enum ir3_wavesize_option real_wavesize; + struct ir3_shader_options options; bool nir_finalized; struct nir_shader *nir; @@ -893,8 +900,6 @@ struct ir3_shader { * recompiles for GL NOS that doesn't actually apply to the shader. */ struct ir3_shader_key key_mask; - - bool shared_consts_enable; }; /** @@ -914,7 +919,8 @@ static inline unsigned _ir3_max_const(const struct ir3_shader_variant *v, bool safe_constlen) { const struct ir3_compiler *compiler = v->compiler; - bool shared_consts_enable = ir3_const_state(v)->shared_consts_enable; + bool shared_consts_enable = + ir3_const_state(v)->push_consts_type == IR3_PUSH_CONSTS_SHARED; /* Shared consts size for CS and FS matches with what's acutally used, * but the size of shared consts for geomtry stages doesn't. @@ -969,13 +975,6 @@ ir3_shader_get_variant(struct ir3_shader *shader, const struct ir3_shader_key *key, bool binning_pass, bool keep_ir, bool *created); - -struct ir3_shader_options { - unsigned reserved_user_consts; - enum ir3_wavesize_option api_wavesize, real_wavesize; - bool shared_consts_enable; -}; - struct ir3_shader * ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir, const struct ir3_shader_options *options, diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index dd237ab..b58b118 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -741,7 +741,7 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir, ir3_finalize_nir(dev->compiler, nir); const struct ir3_shader_options options = { - .reserved_user_consts = align(consts, 4), + .num_reserved_user_consts = align(consts, 4), .api_wavesize = IR3_SINGLE_OR_DOUBLE, .real_wavesize = IR3_SINGLE_OR_DOUBLE, }; diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 49ae238..ea48e4b 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -4274,9 +4274,10 @@ tu6_emit_user_consts(struct tu_cs *cs, struct tu_descriptor_state *descriptors, uint32_t *push_constants) { - if (const_state->push_consts.dwords > 0) { + if (const_state->push_consts.type == IR3_PUSH_CONSTS_PER_STAGE) { unsigned num_units = const_state->push_consts.dwords; unsigned offset = const_state->push_consts.lo; + assert(num_units > 0); /* DST_OFF and NUM_UNIT requires vec4 units */ tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_units); @@ -4355,7 +4356,7 @@ tu6_const_size(struct tu_cmd_buffer *cmd, { uint32_t dwords = 0; - if (shared_consts->dwords > 0) { + if (shared_consts->type == IR3_PUSH_CONSTS_SHARED) { dwords += shared_consts->dwords + 4; } @@ -4376,7 +4377,7 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd, { uint32_t dwords = 0; const struct tu_push_constant_range *shared_consts = - compute ? &cmd->state.shaders[MESA_SHADER_COMPUTE]->shared_consts : + compute ? &cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state.push_consts : &cmd->state.program.shared_consts; dwords = tu6_const_size(cmd, shared_consts, compute); @@ -4387,14 +4388,8 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd, struct tu_cs cs; tu_cs_begin_sub_stream(&cmd->sub_cs, dwords, &cs); - if (shared_consts->dwords > 0) { + if (shared_consts->type == IR3_PUSH_CONSTS_SHARED) { tu6_emit_shared_consts(&cs, shared_consts, cmd->push_constants, compute); - - for (uint32_t i = 0; i < ARRAY_SIZE(cmd->state.program.link); i++) { - const struct tu_program_descriptor_linkage *link = - &cmd->state.program.link[i]; - assert(!link->tu_const_state.push_consts.dwords); - } } if (compute) { diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 129f475..1725b82 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -308,6 +308,20 @@ tu_blend_state_is_dual_src(const struct vk_color_blend_state *cb) return false; } +enum ir3_push_consts_type +tu_push_consts_type(const struct tu_pipeline_layout *layout, + const struct ir3_compiler *compiler) +{ + if (!layout->push_constant_size) + return IR3_PUSH_CONSTS_NONE; + + if (tu6_shared_constants_enable(layout, compiler)) { + return IR3_PUSH_CONSTS_SHARED; + } else { + return IR3_PUSH_CONSTS_PER_STAGE; + } +} + template struct xs_config { uint16_t reg_sp_xs_config; @@ -2321,9 +2335,10 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, &pipeline->shaders[i]->const_state, variants[i]); - if (pipeline->shaders[i]->shared_consts.dwords != 0) { + if (pipeline->shaders[i]->const_state.push_consts.type == + IR3_PUSH_CONSTS_SHARED) { pipeline->program.shared_consts = - pipeline->shaders[i]->shared_consts; + pipeline->shaders[i]->const_state.push_consts; } } diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 485430c..a96e2cd 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -70,6 +70,10 @@ tu6_shared_constants_enable(const struct tu_pipeline_layout *layout, layout->push_constant_size <= (compiler->shared_consts_size * 16); } +enum ir3_push_consts_type +tu_push_consts_type(const struct tu_pipeline_layout *layout, + const struct ir3_compiler *compiler); + struct tu_program_descriptor_linkage { struct ir3_const_state const_state; diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 78173a7..ff6b429 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -682,8 +682,7 @@ gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader) } if (min >= max) { - tu_shader->const_state.push_consts.lo = 0; - tu_shader->const_state.push_consts.dwords = 0; + tu_shader->const_state.push_consts = (struct tu_push_constant_range) {}; return; } @@ -706,7 +705,7 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev, const struct tu_pipeline_layout *layout, unsigned *reserved_consts_vec4_out) { - if (!tu6_shared_constants_enable(layout, dev->compiler)) + if (tu_shader->const_state.push_consts.type == IR3_PUSH_CONSTS_PER_STAGE) gather_push_constants(shader, tu_shader); struct tu_const_state *const_state = &tu_shader->const_state; @@ -1227,7 +1226,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_pvtmem_config *pvtmem, uint64_t binary_iova) { - bool shared_consts_enable = ir3_const_state(v)->shared_consts_enable; + bool shared_consts_enable = + ir3_const_state(v)->push_consts_type == IR3_PUSH_CONSTS_SHARED; tu6_emit_shared_consts_enable(cs, shared_consts_enable); tu_cs_emit_regs(cs, HLSQ_INVALIDATE_CMD(CHIP, @@ -2084,7 +2084,6 @@ tu_shader_serialize(struct vk_pipeline_cache_object *object, container_of(object, struct tu_shader, base); blob_write_bytes(blob, &shader->const_state, sizeof(shader->const_state)); - blob_write_bytes(blob, &shader->shared_consts, sizeof(shader->shared_consts)); blob_write_uint32(blob, shader->view_mask); blob_write_uint8(blob, shader->active_desc_sets); @@ -2126,7 +2125,6 @@ tu_shader_deserialize(struct vk_pipeline_cache *cache, return NULL; blob_copy_bytes(blob, &shader->const_state, sizeof(shader->const_state)); - blob_copy_bytes(blob, &shader->shared_consts, sizeof(shader->shared_consts)); shader->view_mask = blob_read_uint32(blob); shader->active_desc_sets = blob_read_uint8(blob); @@ -2270,6 +2268,12 @@ tu_shader_create(struct tu_device *dev, nir->info.stage == MESA_SHADER_GEOMETRY) tu_gather_xfb_info(nir, &so_info); + shader->const_state.push_consts = (struct tu_push_constant_range) { + .lo = 0, + .dwords = layout->push_constant_size / 4, + .type = tu_push_consts_type(layout, dev->compiler), + }; + unsigned reserved_consts_vec4 = 0; NIR_PASS_V(nir, tu_lower_io, dev, shader, layout, &reserved_consts_vec4); @@ -2277,20 +2281,11 @@ tu_shader_create(struct tu_device *dev, ir3_finalize_nir(dev->compiler, nir); - bool shared_consts_enable = tu6_shared_constants_enable(layout, dev->compiler); - if (shared_consts_enable) { - assert(!shader->const_state.push_consts.dwords); - shader->shared_consts = (struct tu_push_constant_range) { - .lo = 0, - .dwords = layout->push_constant_size / 4, - }; - } - const struct ir3_shader_options options = { - .reserved_user_consts = reserved_consts_vec4, + .num_reserved_user_consts = reserved_consts_vec4, .api_wavesize = key->api_wavesize, .real_wavesize = key->real_wavesize, - .shared_consts_enable = shared_consts_enable, + .push_consts_type = shader->const_state.push_consts.type, }; struct ir3_shader *ir3_shader = diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index 69f4022..1d6ec2e 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -30,10 +30,16 @@ struct tu_inline_ubo unsigned size_vec4; }; +/* The meaning of the range depends on "type". If it's + * IR3_PUSH_CONSTS_PER_STAGE, then it's the range used by this shader. If + * it's IR3_PUSH_CONSTS_SHARED then it's the overall range as provided by + * the pipeline layout and must match between shaders where it's non-zero. + */ struct tu_push_constant_range { uint32_t lo; uint32_t dwords; + enum ir3_push_consts_type type; }; struct tu_const_state @@ -63,11 +69,6 @@ struct tu_shader uint32_t view_mask; uint8_t active_desc_sets; - /* This is the range of shared consts used by all shaders. It must be the - * same between shaders. - */ - struct tu_push_constant_range shared_consts; - union { struct { unsigned patch_type; -- 2.7.4