v3dv/cmd_buffer: emit shader_state packets
authorAlejandro Piñeiro <apinheiro@igalia.com>
Mon, 30 Dec 2019 12:01:44 +0000 (13:01 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 13 Oct 2020 21:21:26 +0000 (21:21 +0000)
Values still doesn't take into account having vertex elements data,
but keeps some of that half-done code in comments. It would be better
to do that when we get an example using it.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>

src/broadcom/vulkan/v3dv_cmd_buffer.c
src/broadcom/vulkan/v3dv_private.h
src/broadcom/vulkan/v3dv_queue.c

index 10a7cba..2f5d39e 100644 (file)
@@ -1209,6 +1209,215 @@ emit_viewport(struct v3dv_cmd_buffer *cmd_buffer)
    }
 }
 
+/* FIXME: in fact this is not really required at this point, as we don't plan
+ * to initially support GS, but it is more readable and serves as a
+ * placeholder, to have the struct and fill it with default values.
+ */
+struct vpm_config {
+   uint32_t As;
+   uint32_t Vc;
+   uint32_t Gs;
+   uint32_t Gd;
+   uint32_t Gv;
+   uint32_t Ve;
+   uint32_t gs_width;
+};
+
+static void
+cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
+{
+   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+   struct v3dv_pipeline *pipeline = state->pipeline;
+
+   assert(pipeline);
+
+   /* Upload the uniforms to the indirect CL first */
+
+   /* FIXME: uniforms not supported yet */
+
+   struct v3dv_cl_reloc vs_uniforms = { NULL, 0 };
+   struct v3dv_cl_reloc vs_bin_uniforms = { NULL, 0 };
+   struct v3dv_cl_reloc fs_uniforms = { NULL, 0 };
+
+   /* Update the cache dirty flag based on the shader progs data */
+   state->tmu_dirty_rcl |= pipeline->vs_bin->prog_data.vs->base.tmu_dirty_rcl;
+   state->tmu_dirty_rcl |= pipeline->vs->prog_data.vs->base.tmu_dirty_rcl;
+   state->tmu_dirty_rcl |= pipeline->fs->prog_data.fs->base.tmu_dirty_rcl;
+
+   /* FIXME: fake vtx->num_elements, that is the vertex state that includes
+    * data from the buffers used on the vertex. Such info is still not
+    * supported or filled in any place. On Gallium that is filled by
+    * st_update_array, that eventually calls v3d_vertex_state_create
+    *
+    * We area handling it mostly to the GFXH-930 workaround mentioned below,
+    * as it would provide more context of why it is needed and to the code.
+    */
+   uint32_t vtx_num_elements = 0;
+
+   /* See GFXH-930 workaround below */
+   uint32_t num_elements_to_emit = MAX2(vtx_num_elements, 1);
+
+   uint32_t shader_rec_offset =
+      v3dv_cl_ensure_space(&cmd_buffer->indirect,
+                           cl_packet_length(GL_SHADER_STATE_RECORD) +
+                           num_elements_to_emit *
+                           cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
+                           32);
+
+   struct vpm_config vpm_cfg_bin, vpm_cfg;
+
+   /* FIXME: values below are default when non-GS is available. Would need to
+    * provide real values if GS gets supported
+    */
+   vpm_cfg_bin.As = 1;
+   vpm_cfg_bin.Ve = 0;
+   vpm_cfg_bin.Vc = pipeline->vs_bin->prog_data.vs->vcm_cache_size;
+
+   vpm_cfg.As = 1;
+   vpm_cfg.Ve = 0;
+   vpm_cfg.Vc = pipeline->vs->prog_data.vs->vcm_cache_size;
+
+   cl_emit(&cmd_buffer->indirect, GL_SHADER_STATE_RECORD, shader) {
+      shader.enable_clipping = true;
+
+      shader.point_size_in_shaded_vertex_data =
+         pipeline->vs->key.vs.per_vertex_point_size;
+
+      /* Must be set if the shader modifies Z, discards, or modifies
+       * the sample mask.  For any of these cases, the fragment
+       * shader needs to write the Z value (even just discards).
+       */
+      shader.fragment_shader_does_z_writes =
+         pipeline->fs->prog_data.fs->writes_z;
+      /* Set if the EZ test must be disabled (due to shader side
+       * effects and the early_z flag not being present in the
+       * shader).
+       */
+      shader.turn_off_early_z_test =
+         pipeline->fs->prog_data.fs->disable_ez;
+
+      shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
+         pipeline->fs->prog_data.fs->uses_center_w;
+
+      shader.any_shader_reads_hardware_written_primitive_id = false;
+
+      shader.do_scoreboard_wait_on_first_thread_switch =
+         pipeline->fs->prog_data.fs->lock_scoreboard_on_first_thrsw;
+      shader.disable_implicit_point_line_varyings =
+         !pipeline->fs->prog_data.fs->uses_implicit_point_line_varyings;
+
+      shader.number_of_varyings_in_fragment_shader =
+         pipeline->fs->prog_data.fs->num_inputs;
+
+      shader.coordinate_shader_propagate_nans = true;
+      shader.vertex_shader_propagate_nans = true;
+      shader.fragment_shader_propagate_nans = true;
+
+      shader.coordinate_shader_code_address =
+         v3dv_cl_address(pipeline->vs_bin->assembly_bo, 0);
+      shader.vertex_shader_code_address =
+         v3dv_cl_address(pipeline->vs->assembly_bo, 0);
+      shader.fragment_shader_code_address =
+         v3dv_cl_address(pipeline->fs->assembly_bo, 0);
+
+      /* FIXME: Use combined input/output size flag in the common case (also
+       * on v3d, see v3dx_draw).
+       */
+      shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
+         pipeline->vs_bin->prog_data.vs->separate_segments;
+      shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
+         pipeline->vs->prog_data.vs->separate_segments;
+
+      shader.coordinate_shader_input_vpm_segment_size =
+         pipeline->vs_bin->prog_data.vs->separate_segments ?
+         pipeline->vs_bin->prog_data.vs->vpm_input_size : 1;
+      shader.vertex_shader_input_vpm_segment_size =
+         pipeline->vs->prog_data.vs->separate_segments ?
+         pipeline->vs->prog_data.vs->vpm_input_size : 1;
+
+      shader.coordinate_shader_output_vpm_segment_size =
+         pipeline->vs_bin->prog_data.vs->vpm_output_size;
+      shader.vertex_shader_output_vpm_segment_size =
+         pipeline->vs->prog_data.vs->vpm_output_size;
+
+      shader.coordinate_shader_uniforms_address = vs_bin_uniforms;
+      shader.vertex_shader_uniforms_address = vs_uniforms;
+      shader.fragment_shader_uniforms_address = fs_uniforms;
+
+      shader.min_coord_shader_input_segments_required_in_play =
+         vpm_cfg_bin.As;
+      shader.min_vertex_shader_input_segments_required_in_play =
+         vpm_cfg.As;
+
+      shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
+         vpm_cfg_bin.Ve;
+      shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
+         vpm_cfg.Ve;
+
+      shader.coordinate_shader_4_way_threadable =
+         pipeline->vs_bin->prog_data.vs->base.threads == 4;
+      shader.vertex_shader_4_way_threadable =
+         pipeline->vs->prog_data.vs->base.threads == 4;
+      shader.fragment_shader_4_way_threadable =
+         pipeline->fs->prog_data.fs->base.threads == 4;
+
+      shader.coordinate_shader_start_in_final_thread_section =
+         pipeline->vs_bin->prog_data.vs->base.single_seg;
+      shader.vertex_shader_start_in_final_thread_section =
+         pipeline->vs->prog_data.vs->base.single_seg;
+      shader.fragment_shader_start_in_final_thread_section =
+         pipeline->fs->prog_data.fs->base.single_seg;
+
+      shader.vertex_id_read_by_coordinate_shader =
+         pipeline->vs_bin->prog_data.vs->uses_vid;
+      shader.instance_id_read_by_coordinate_shader =
+         pipeline->vs_bin->prog_data.vs->uses_iid;
+      shader.vertex_id_read_by_vertex_shader =
+         pipeline->vs->prog_data.vs->uses_vid;
+      shader.instance_id_read_by_vertex_shader =
+         pipeline->vs->prog_data.vs->uses_iid;
+
+      /* FIXME: I understand that the following is needed only if
+       * vtx_num_elements > 0
+       */
+/*       shader.address_of_default_attribute_values = */
+   }
+
+   /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */
+
+   /* FIXME: vertex elements not supported yet (vtx_num_elements == 0) */
+   if (vtx_num_elements == 0) {
+      /* GFXH-930: At least one attribute must be enabled and read
+       * by CS and VS.  If we have no attributes being consumed by
+       * the shader, set up a dummy to be loaded into the VPM.
+       */
+      cl_emit(&cmd_buffer->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+         /* Valid address of data whose value will be unused. */
+         attr.address = v3dv_cl_address(cmd_buffer->indirect.bo, 0);
+
+         attr.type = ATTRIBUTE_FLOAT;
+         attr.stride = 0;
+         attr.vec_size = 1;
+
+         attr.number_of_values_read_by_coordinate_shader = 1;
+         attr.number_of_values_read_by_vertex_shader = 1;
+      }
+   }
+
+   cl_emit(&cmd_buffer->bcl, VCM_CACHE_SIZE, vcm) {
+      vcm.number_of_16_vertex_batches_for_binning = vpm_cfg_bin.Vc;
+      vcm.number_of_16_vertex_batches_for_rendering = vpm_cfg.Vc;
+   }
+
+   cl_emit(&cmd_buffer->bcl, GL_SHADER_STATE, state) {
+      state.address = v3dv_cl_address(cmd_buffer->indirect.bo,
+                                      shader_rec_offset);
+      state.number_of_attribute_arrays = num_elements_to_emit;
+   }
+
+}
+
+
 static void
 cmd_buffer_emit_state(struct v3dv_cmd_buffer *cmd_buffer)
 {
@@ -1216,6 +1425,9 @@ cmd_buffer_emit_state(struct v3dv_cmd_buffer *cmd_buffer)
    uint32_t states = cmd_buffer->state.dirty;
    struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
 
+   if (states & (V3DV_CMD_DIRTY_PIPELINE)) {
+      cmd_buffer_emit_graphics_pipeline(cmd_buffer);
+   }
    /* Emit(flush) dynamic state */
    if (states & (V3DV_CMD_DIRTY_DYNAMIC_VIEWPORT | V3DV_CMD_DIRTY_DYNAMIC_SCISSOR)) {
       assert(dynamic->scissor.count > 0 || dynamic->viewport.count > 0);
index 0062965..f202fba 100644 (file)
@@ -451,6 +451,9 @@ struct v3dv_cmd_buffer_state {
 
    struct v3dv_dynamic_state dynamic;
    uint32_t dirty;
+
+   /* FIXME: here? */
+   bool tmu_dirty_rcl;
 };
 
 struct v3dv_cmd_buffer {
@@ -502,8 +505,8 @@ vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
 }
 
 /*
- * Utility struct so shader_module_compile_to_nir and other methods doesn't
- * have so many parameters.
+ * Per-stage info for each stage, useful so shader_module_compile_to_nir and
+ * other methods doesn't have so many parameters.
  *
  * FIXME: for the case of the coordinate shader and the vertex shader, module,
  * entrypoint, spec_info and nir are the same. There are also info only
index 5cf9e0c..fb5359c 100644 (file)
@@ -54,7 +54,12 @@ queue_submit(struct v3dv_queue *queue,
    submit.rcl_start = cmd_buffer->rcl.bo->offset;
    submit.rcl_end = cmd_buffer->rcl.bo->offset + v3dv_cl_offset(&cmd_buffer->rcl);
 
-   submit.flags = 0; /* FIXME */
+   submit.flags = 0;
+   /* FIXME: we already know that we support cache flush, as we only support
+    * hw that supports that, but would be better to just DRM-ask it
+    */
+   if (cmd_buffer->state.tmu_dirty_rcl)
+      submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
 
    submit.qma = cmd_buffer->tile_alloc->offset;
    submit.qms = cmd_buffer->tile_alloc->size;