From 67aa8029a80e46bb3f72e4f1c87c52b471abd939 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Alejandro=20Pi=C3=B1eiro?= Date: Mon, 30 Dec 2019 13:01:44 +0100 Subject: [PATCH] v3dv/cmd_buffer: emit shader_state packets Values still doesn't take into account having vertex elements data, but keeps some of that half-done code in comments. It would be better to do that when we get an example using it. Part-of: --- src/broadcom/vulkan/v3dv_cmd_buffer.c | 212 ++++++++++++++++++++++++++++++++++ src/broadcom/vulkan/v3dv_private.h | 7 +- src/broadcom/vulkan/v3dv_queue.c | 7 +- 3 files changed, 223 insertions(+), 3 deletions(-) diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index 10a7cba..2f5d39e 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -1209,6 +1209,215 @@ emit_viewport(struct v3dv_cmd_buffer *cmd_buffer) } } +/* FIXME: in fact this is not really required at this point, as we don't plan + * to initially support GS, but it is more readable and serves as a + * placeholder, to have the struct and fill it with default values. + */ +struct vpm_config { + uint32_t As; + uint32_t Vc; + uint32_t Gs; + uint32_t Gd; + uint32_t Gv; + uint32_t Ve; + uint32_t gs_width; +}; + +static void +cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; + struct v3dv_pipeline *pipeline = state->pipeline; + + assert(pipeline); + + /* Upload the uniforms to the indirect CL first */ + + /* FIXME: uniforms not supported yet */ + + struct v3dv_cl_reloc vs_uniforms = { NULL, 0 }; + struct v3dv_cl_reloc vs_bin_uniforms = { NULL, 0 }; + struct v3dv_cl_reloc fs_uniforms = { NULL, 0 }; + + /* Update the cache dirty flag based on the shader progs data */ + state->tmu_dirty_rcl |= pipeline->vs_bin->prog_data.vs->base.tmu_dirty_rcl; + state->tmu_dirty_rcl |= pipeline->vs->prog_data.vs->base.tmu_dirty_rcl; + state->tmu_dirty_rcl |= pipeline->fs->prog_data.fs->base.tmu_dirty_rcl; + + /* FIXME: fake vtx->num_elements, that is the vertex state that includes + * data from the buffers used on the vertex. Such info is still not + * supported or filled in any place. On Gallium that is filled by + * st_update_array, that eventually calls v3d_vertex_state_create + * + * We area handling it mostly to the GFXH-930 workaround mentioned below, + * as it would provide more context of why it is needed and to the code. + */ + uint32_t vtx_num_elements = 0; + + /* See GFXH-930 workaround below */ + uint32_t num_elements_to_emit = MAX2(vtx_num_elements, 1); + + uint32_t shader_rec_offset = + v3dv_cl_ensure_space(&cmd_buffer->indirect, + cl_packet_length(GL_SHADER_STATE_RECORD) + + num_elements_to_emit * + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), + 32); + + struct vpm_config vpm_cfg_bin, vpm_cfg; + + /* FIXME: values below are default when non-GS is available. Would need to + * provide real values if GS gets supported + */ + vpm_cfg_bin.As = 1; + vpm_cfg_bin.Ve = 0; + vpm_cfg_bin.Vc = pipeline->vs_bin->prog_data.vs->vcm_cache_size; + + vpm_cfg.As = 1; + vpm_cfg.Ve = 0; + vpm_cfg.Vc = pipeline->vs->prog_data.vs->vcm_cache_size; + + cl_emit(&cmd_buffer->indirect, GL_SHADER_STATE_RECORD, shader) { + shader.enable_clipping = true; + + shader.point_size_in_shaded_vertex_data = + pipeline->vs->key.vs.per_vertex_point_size; + + /* Must be set if the shader modifies Z, discards, or modifies + * the sample mask. For any of these cases, the fragment + * shader needs to write the Z value (even just discards). + */ + shader.fragment_shader_does_z_writes = + pipeline->fs->prog_data.fs->writes_z; + /* Set if the EZ test must be disabled (due to shader side + * effects and the early_z flag not being present in the + * shader). + */ + shader.turn_off_early_z_test = + pipeline->fs->prog_data.fs->disable_ez; + + shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = + pipeline->fs->prog_data.fs->uses_center_w; + + shader.any_shader_reads_hardware_written_primitive_id = false; + + shader.do_scoreboard_wait_on_first_thread_switch = + pipeline->fs->prog_data.fs->lock_scoreboard_on_first_thrsw; + shader.disable_implicit_point_line_varyings = + !pipeline->fs->prog_data.fs->uses_implicit_point_line_varyings; + + shader.number_of_varyings_in_fragment_shader = + pipeline->fs->prog_data.fs->num_inputs; + + shader.coordinate_shader_propagate_nans = true; + shader.vertex_shader_propagate_nans = true; + shader.fragment_shader_propagate_nans = true; + + shader.coordinate_shader_code_address = + v3dv_cl_address(pipeline->vs_bin->assembly_bo, 0); + shader.vertex_shader_code_address = + v3dv_cl_address(pipeline->vs->assembly_bo, 0); + shader.fragment_shader_code_address = + v3dv_cl_address(pipeline->fs->assembly_bo, 0); + + /* FIXME: Use combined input/output size flag in the common case (also + * on v3d, see v3dx_draw). + */ + shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = + pipeline->vs_bin->prog_data.vs->separate_segments; + shader.vertex_shader_has_separate_input_and_output_vpm_blocks = + pipeline->vs->prog_data.vs->separate_segments; + + shader.coordinate_shader_input_vpm_segment_size = + pipeline->vs_bin->prog_data.vs->separate_segments ? + pipeline->vs_bin->prog_data.vs->vpm_input_size : 1; + shader.vertex_shader_input_vpm_segment_size = + pipeline->vs->prog_data.vs->separate_segments ? + pipeline->vs->prog_data.vs->vpm_input_size : 1; + + shader.coordinate_shader_output_vpm_segment_size = + pipeline->vs_bin->prog_data.vs->vpm_output_size; + shader.vertex_shader_output_vpm_segment_size = + pipeline->vs->prog_data.vs->vpm_output_size; + + shader.coordinate_shader_uniforms_address = vs_bin_uniforms; + shader.vertex_shader_uniforms_address = vs_uniforms; + shader.fragment_shader_uniforms_address = fs_uniforms; + + shader.min_coord_shader_input_segments_required_in_play = + vpm_cfg_bin.As; + shader.min_vertex_shader_input_segments_required_in_play = + vpm_cfg.As; + + shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = + vpm_cfg_bin.Ve; + shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = + vpm_cfg.Ve; + + shader.coordinate_shader_4_way_threadable = + pipeline->vs_bin->prog_data.vs->base.threads == 4; + shader.vertex_shader_4_way_threadable = + pipeline->vs->prog_data.vs->base.threads == 4; + shader.fragment_shader_4_way_threadable = + pipeline->fs->prog_data.fs->base.threads == 4; + + shader.coordinate_shader_start_in_final_thread_section = + pipeline->vs_bin->prog_data.vs->base.single_seg; + shader.vertex_shader_start_in_final_thread_section = + pipeline->vs->prog_data.vs->base.single_seg; + shader.fragment_shader_start_in_final_thread_section = + pipeline->fs->prog_data.fs->base.single_seg; + + shader.vertex_id_read_by_coordinate_shader = + pipeline->vs_bin->prog_data.vs->uses_vid; + shader.instance_id_read_by_coordinate_shader = + pipeline->vs_bin->prog_data.vs->uses_iid; + shader.vertex_id_read_by_vertex_shader = + pipeline->vs->prog_data.vs->uses_vid; + shader.instance_id_read_by_vertex_shader = + pipeline->vs->prog_data.vs->uses_iid; + + /* FIXME: I understand that the following is needed only if + * vtx_num_elements > 0 + */ +/* shader.address_of_default_attribute_values = */ + } + + /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */ + + /* FIXME: vertex elements not supported yet (vtx_num_elements == 0) */ + if (vtx_num_elements == 0) { + /* GFXH-930: At least one attribute must be enabled and read + * by CS and VS. If we have no attributes being consumed by + * the shader, set up a dummy to be loaded into the VPM. + */ + cl_emit(&cmd_buffer->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { + /* Valid address of data whose value will be unused. */ + attr.address = v3dv_cl_address(cmd_buffer->indirect.bo, 0); + + attr.type = ATTRIBUTE_FLOAT; + attr.stride = 0; + attr.vec_size = 1; + + attr.number_of_values_read_by_coordinate_shader = 1; + attr.number_of_values_read_by_vertex_shader = 1; + } + } + + cl_emit(&cmd_buffer->bcl, VCM_CACHE_SIZE, vcm) { + vcm.number_of_16_vertex_batches_for_binning = vpm_cfg_bin.Vc; + vcm.number_of_16_vertex_batches_for_rendering = vpm_cfg.Vc; + } + + cl_emit(&cmd_buffer->bcl, GL_SHADER_STATE, state) { + state.address = v3dv_cl_address(cmd_buffer->indirect.bo, + shader_rec_offset); + state.number_of_attribute_arrays = num_elements_to_emit; + } + +} + + static void cmd_buffer_emit_state(struct v3dv_cmd_buffer *cmd_buffer) { @@ -1216,6 +1425,9 @@ cmd_buffer_emit_state(struct v3dv_cmd_buffer *cmd_buffer) uint32_t states = cmd_buffer->state.dirty; struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; + if (states & (V3DV_CMD_DIRTY_PIPELINE)) { + cmd_buffer_emit_graphics_pipeline(cmd_buffer); + } /* Emit(flush) dynamic state */ if (states & (V3DV_CMD_DIRTY_DYNAMIC_VIEWPORT | V3DV_CMD_DIRTY_DYNAMIC_SCISSOR)) { assert(dynamic->scissor.count > 0 || dynamic->viewport.count > 0); diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 0062965..f202fba 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -451,6 +451,9 @@ struct v3dv_cmd_buffer_state { struct v3dv_dynamic_state dynamic; uint32_t dirty; + + /* FIXME: here? */ + bool tmu_dirty_rcl; }; struct v3dv_cmd_buffer { @@ -502,8 +505,8 @@ vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) } /* - * Utility struct so shader_module_compile_to_nir and other methods doesn't - * have so many parameters. + * Per-stage info for each stage, useful so shader_module_compile_to_nir and + * other methods doesn't have so many parameters. * * FIXME: for the case of the coordinate shader and the vertex shader, module, * entrypoint, spec_info and nir are the same. There are also info only diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c index 5cf9e0c..fb5359c 100644 --- a/src/broadcom/vulkan/v3dv_queue.c +++ b/src/broadcom/vulkan/v3dv_queue.c @@ -54,7 +54,12 @@ queue_submit(struct v3dv_queue *queue, submit.rcl_start = cmd_buffer->rcl.bo->offset; submit.rcl_end = cmd_buffer->rcl.bo->offset + v3dv_cl_offset(&cmd_buffer->rcl); - submit.flags = 0; /* FIXME */ + submit.flags = 0; + /* FIXME: we already know that we support cache flush, as we only support + * hw that supports that, but would be better to just DRM-ask it + */ + if (cmd_buffer->state.tmu_dirty_rcl) + submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE; submit.qma = cmd_buffer->tile_alloc->offset; submit.qms = cmd_buffer->tile_alloc->size; -- 2.7.4