From 46def1814e826d2a3d7e0c3b7b53c976e2c57d4f Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Tue, 2 Feb 2021 18:00:08 +0200 Subject: [PATCH] turnip: consider shader's immediates size for sub-stream allocation Otherwise we could exceed pre-allocated space. Fixes: dEQP-VK.spirv_assembly.instruction.compute.opphi.wide Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/ci/deqp-freedreno-a630-fails.txt | 4 -- src/freedreno/vulkan/tu_pipeline.c | 68 +++++++++++++++++++++----- 2 files changed, 55 insertions(+), 17 deletions(-) diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt index 5264d75..8400b70 100644 --- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt +++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt @@ -71,10 +71,6 @@ dEQP-VK.api.info.image_format_properties.2d.optimal.g8_b8r8_2plane_420_unorm,Fai # "Mismatch between VkPhysicalDeviceProtectedMemoryProperties at vktApiFeatureInfo.cpp:4208" dEQP-VK.api.info.get_physical_device_properties2.properties,Fail -# "deqp-vk: ../src/freedreno/vulkan/tu_cs.h:186: tu_cs_reserve: Assertion `tu_cs_get_space(cs) >= reserved_size' failed." -# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8841 -dEQP-VK.spirv_assembly.instruction.compute.opphi.wide,Crash - # Fails when TU_DEBUG=forcebin is set dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_single_buffer_geom,Fail dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_two_buffers_geom,Fail diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 6e94f98..02276ec 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -257,6 +257,8 @@ struct tu_pipeline_builder uint64_t shader_iova[MESA_SHADER_FRAGMENT + 1]; uint64_t binning_vs_iova; + uint32_t additional_cs_reserve_size; + struct tu_pvtmem_config pvtmem; bool rasterizer_discard; @@ -390,6 +392,32 @@ static const struct xs_config { }, }; +static uint32_t +tu_xs_get_immediates_packet_size_dwords(const struct ir3_shader_variant *xs) +{ + const struct ir3_const_state *const_state = ir3_const_state(xs); + uint32_t base = const_state->offsets.immediate; + int32_t size = DIV_ROUND_UP(const_state->immediates_count, 4); + + /* truncate size to avoid writing constants that shader + * does not use: + */ + size = MIN2(size + base, xs->constlen) - base; + + return MAX2(size, 0) * 4; +} + +/* We allocate fixed-length substreams for shader state, however some + * parts of the state may have unbound length. Their additional space + * requirements should be calculated here. + */ +static uint32_t +tu_xs_get_additional_cs_size_dwords(const struct ir3_shader_variant *xs) +{ + uint32_t size = tu_xs_get_immediates_packet_size_dwords(xs); + return size; +} + void tu6_emit_xs_config(struct tu_cs *cs, gl_shader_stage stage, /* xs->type, but xs may be NULL */ @@ -529,24 +557,19 @@ tu6_emit_xs(struct tu_cs *cs, const struct ir3_const_state *const_state = ir3_const_state(xs); uint32_t base = const_state->offsets.immediate; - int size = DIV_ROUND_UP(const_state->immediates_count, 4); + unsigned immediate_size = tu_xs_get_immediates_packet_size_dwords(xs); - /* truncate size to avoid writing constants that shader - * does not use: - */ - size = MIN2(size + base, xs->constlen) - base; - - if (size > 0) { - tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + size * 4); + if (immediate_size > 0) { + tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + immediate_size); tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) | - CP_LOAD_STATE6_0_NUM_UNIT(size)); + CP_LOAD_STATE6_0_NUM_UNIT(immediate_size / 4)); tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - tu_cs_emit_array(cs, const_state->immediates, size * 4); + tu_cs_emit_array(cs, const_state->immediates, immediate_size); } if (const_state->constant_data_ubo != -1) { @@ -2153,9 +2176,27 @@ tu_pipeline_allocate_cs(struct tu_device *dev, pvtmem_bytes = MAX2(pvtmem_bytes, builder->binning_variant->pvtmem_size); size += calc_pvtmem_size(dev, NULL, pvtmem_bytes) / 4; + + builder->additional_cs_reserve_size = 0; + for (unsigned i = 0; i < ARRAY_SIZE(builder->variants); i++) { + struct ir3_shader_variant *variant = builder->variants[i]; + if (variant) { + builder->additional_cs_reserve_size += + tu_xs_get_additional_cs_size_dwords(variant); + + if (variant->binning) { + builder->additional_cs_reserve_size += + tu_xs_get_additional_cs_size_dwords(variant->binning); + } + } + } + + size += builder->additional_cs_reserve_size; } else { size += compute->info.size / 4; size += calc_pvtmem_size(dev, NULL, compute->pvtmem_size) / 4; + + size += tu_xs_get_additional_cs_size_dwords(compute); } tu_cs_init(&pipeline->cs, dev, TU_CS_MODE_SUB_STREAM, size); @@ -2563,11 +2604,11 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, tu6_emit_program_config(&prog_cs, builder); pipeline->program.config_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs); - tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs); + tu_cs_begin_sub_stream(&pipeline->cs, 512 + builder->additional_cs_reserve_size, &prog_cs); tu6_emit_program(&prog_cs, builder, false, pipeline); pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs); - tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs); + tu_cs_begin_sub_stream(&pipeline->cs, 512 + builder->additional_cs_reserve_size, &prog_cs); tu6_emit_program(&prog_cs, builder, true, pipeline); pipeline->program.binning_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs); @@ -3273,7 +3314,8 @@ tu_compute_pipeline_create(VkDevice device, pipeline->compute.subgroup_size = v->info.double_threadsize ? 128 : 64; struct tu_cs prog_cs; - tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs); + uint32_t additional_reserve_size = tu_xs_get_additional_cs_size_dwords(v); + tu_cs_begin_sub_stream(&pipeline->cs, 64 + additional_reserve_size, &prog_cs); tu6_emit_cs_config(&prog_cs, shader, v, &pvtmem, shader_iova); pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs); -- 2.7.4