v3dv: split v3dv_pipeline hw version dependant code to a new source file
authorAlejandro Piñeiro <apinheiro@igalia.com>
Thu, 10 Jun 2021 12:17:55 +0000 (14:17 +0200)
committerAlejandro Piñeiro <apinheiro@igalia.com>
Tue, 22 Jun 2021 09:34:06 +0000 (11:34 +0200)
v2: merge some of the v3dvx entrypoints to general pack methods (Iago)

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11310>

src/broadcom/vulkan/meson.build
src/broadcom/vulkan/v3dv_pipeline.c
src/broadcom/vulkan/v3dvx_pipeline.c [new file with mode: 0644]
src/broadcom/vulkan/v3dvx_private.h

index 472d2dc..c94a4bf 100644 (file)
@@ -54,6 +54,7 @@ libv3dv_files = files(
 )
 
 files_per_version = files(
+  'v3dvx_pipeline.c',
   'v3dvx_queue.c',
 )
 
index 156f8de..d6f4805 100644 (file)
@@ -37,8 +37,6 @@
 
 #include "vulkan/util/vk_format.h"
 
-#include "broadcom/cle/v3dx_pack.h"
-
 void
 v3dv_print_v3d_key(struct v3d_key *key,
                    uint32_t v3d_key_size)
@@ -2284,309 +2282,6 @@ pipeline_init_dynamic_state(
    pipeline->dynamic_state.mask = dynamic_states;
 }
 
-static uint8_t
-blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
-{
-   switch (factor) {
-   case VK_BLEND_FACTOR_ZERO:
-   case VK_BLEND_FACTOR_ONE:
-   case VK_BLEND_FACTOR_SRC_COLOR:
-   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
-   case VK_BLEND_FACTOR_DST_COLOR:
-   case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
-   case VK_BLEND_FACTOR_SRC_ALPHA:
-   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
-   case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
-      return factor;
-   case VK_BLEND_FACTOR_CONSTANT_COLOR:
-   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
-   case VK_BLEND_FACTOR_CONSTANT_ALPHA:
-   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
-      *needs_constants = true;
-      return factor;
-   case VK_BLEND_FACTOR_DST_ALPHA:
-      return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
-                             V3D_BLEND_FACTOR_DST_ALPHA;
-   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
-      return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
-                             V3D_BLEND_FACTOR_INV_DST_ALPHA;
-   case VK_BLEND_FACTOR_SRC1_COLOR:
-   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
-   case VK_BLEND_FACTOR_SRC1_ALPHA:
-   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
-      assert(!"Invalid blend factor: dual source blending not supported.");
-   default:
-      assert(!"Unknown blend factor.");
-   }
-
-   /* Should be handled by the switch, added to avoid a "end of non-void
-    * function" error
-    */
-   unreachable("Unknown blend factor.");
-}
-
-static void
-pack_blend(struct v3dv_pipeline *pipeline,
-           const VkPipelineColorBlendStateCreateInfo *cb_info)
-{
-   /* By default, we are not enabling blending and all color channel writes are
-    * enabled. Color write enables are independent of whether blending is
-    * enabled or not.
-    *
-    * Vulkan specifies color write masks so that bits set correspond to
-    * enabled channels. Our hardware does it the other way around.
-    */
-   pipeline->blend.enables = 0;
-   pipeline->blend.color_write_masks = 0; /* All channels enabled */
-
-   if (!cb_info)
-      return;
-
-   assert(pipeline->subpass);
-   if (pipeline->subpass->color_count == 0)
-      return;
-
-   assert(pipeline->subpass->color_count == cb_info->attachmentCount);
-
-   pipeline->blend.needs_color_constants = false;
-   uint32_t color_write_masks = 0;
-   for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) {
-      const VkPipelineColorBlendAttachmentState *b_state =
-         &cb_info->pAttachments[i];
-
-      uint32_t attachment_idx =
-         pipeline->subpass->color_attachments[i].attachment;
-      if (attachment_idx == VK_ATTACHMENT_UNUSED)
-         continue;
-
-      color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
-
-      if (!b_state->blendEnable)
-         continue;
-
-      VkAttachmentDescription *desc =
-         &pipeline->pass->attachments[attachment_idx].desc;
-      const struct v3dv_format *format = v3dv_get_format(desc->format);
-      bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1);
-
-      uint8_t rt_mask = 1 << i;
-      pipeline->blend.enables |= rt_mask;
-
-      v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
-         config.render_target_mask = rt_mask;
-
-         config.color_blend_mode = b_state->colorBlendOp;
-         config.color_blend_dst_factor =
-            blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
-                         &pipeline->blend.needs_color_constants);
-         config.color_blend_src_factor =
-            blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
-                         &pipeline->blend.needs_color_constants);
-
-         config.alpha_blend_mode = b_state->alphaBlendOp;
-         config.alpha_blend_dst_factor =
-            blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
-                         &pipeline->blend.needs_color_constants);
-         config.alpha_blend_src_factor =
-            blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
-                         &pipeline->blend.needs_color_constants);
-      }
-   }
-
-   pipeline->blend.color_write_masks = color_write_masks;
-}
-
-/* This requires that pack_blend() had been called before so we can set
- * the overall blend enable bit in the CFG_BITS packet.
- */
-static void
-pack_cfg_bits(struct v3dv_pipeline *pipeline,
-              const VkPipelineDepthStencilStateCreateInfo *ds_info,
-              const VkPipelineRasterizationStateCreateInfo *rs_info,
-              const VkPipelineMultisampleStateCreateInfo *ms_info)
-{
-   assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
-
-   pipeline->msaa =
-      ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
-
-   v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) {
-      config.enable_forward_facing_primitive =
-         rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
-
-      config.enable_reverse_facing_primitive =
-         rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false;
-
-      /* Seems like the hardware is backwards regarding this setting... */
-      config.clockwise_primitives =
-         rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false;
-
-      config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false;
-
-      /* This is required to pass line rasterization tests in CTS while
-       * exposing, at least, a minimum of 4-bits of subpixel precision
-       * (the minimum requirement).
-       */
-      config.line_rasterization = 1; /* perp end caps */
-
-      if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
-         config.direct3d_wireframe_triangles_mode = true;
-         config.direct3d_point_fill_mode =
-            rs_info->polygonMode == VK_POLYGON_MODE_POINT;
-      }
-
-      config.rasterizer_oversample_mode = pipeline->msaa ? 1 : 0;
-
-      /* From the Vulkan spec:
-       *
-       *   "Provoking Vertex:
-       *
-       *       The vertex in a primitive from which flat shaded attribute
-       *       values are taken. This is generally the “first” vertex in the
-       *       primitive, and depends on the primitive topology."
-       *
-       * First vertex is the Direct3D style for provoking vertex. OpenGL uses
-       * the last vertex by default.
-       */
-      config.direct3d_provoking_vertex = true;
-
-      config.blend_enable = pipeline->blend.enables != 0;
-
-      /* Disable depth/stencil if we don't have a D/S attachment */
-      bool has_ds_attachment =
-         pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED;
-
-      if (ds_info && ds_info->depthTestEnable && has_ds_attachment) {
-         config.z_updates_enable = ds_info->depthWriteEnable;
-         config.depth_test_function = ds_info->depthCompareOp;
-      } else {
-         config.depth_test_function = VK_COMPARE_OP_ALWAYS;
-      }
-
-      /* EZ state will be updated at draw time based on bound pipeline state */
-      config.early_z_updates_enable = false;
-      config.early_z_enable = false;
-
-      config.stencil_enable =
-         ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
-
-      pipeline->z_updates_enable = config.z_updates_enable;
-   };
-}
-
-static uint32_t
-translate_stencil_op(enum pipe_stencil_op op)
-{
-   switch (op) {
-   case VK_STENCIL_OP_KEEP:
-      return V3D_STENCIL_OP_KEEP;
-   case VK_STENCIL_OP_ZERO:
-      return V3D_STENCIL_OP_ZERO;
-   case VK_STENCIL_OP_REPLACE:
-      return V3D_STENCIL_OP_REPLACE;
-   case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
-      return V3D_STENCIL_OP_INCR;
-   case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
-      return V3D_STENCIL_OP_DECR;
-   case VK_STENCIL_OP_INVERT:
-      return V3D_STENCIL_OP_INVERT;
-   case VK_STENCIL_OP_INCREMENT_AND_WRAP:
-      return V3D_STENCIL_OP_INCWRAP;
-   case VK_STENCIL_OP_DECREMENT_AND_WRAP:
-      return V3D_STENCIL_OP_DECWRAP;
-   default:
-      unreachable("bad stencil op");
-   }
-}
-
-static void
-pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
-                        uint8_t *stencil_cfg,
-                        bool is_front,
-                        bool is_back,
-                        const VkStencilOpState *stencil_state)
-{
-   /* From the Vulkan spec:
-    *
-    *   "Reference is an integer reference value that is used in the unsigned
-    *    stencil comparison. The reference value used by stencil comparison
-    *    must be within the range [0,2^s-1] , where s is the number of bits in
-    *    the stencil framebuffer attachment, otherwise the reference value is
-    *    considered undefined."
-    *
-    * In our case, 's' is always 8, so we clamp to that to prevent our packing
-    * functions to assert in debug mode if they see larger values.
-    *
-    * If we have dynamic state we need to make sure we set the corresponding
-    * state bits to 0, since cl_emit_with_prepacked ORs the new value with
-    * the old.
-    */
-   const uint8_t write_mask =
-      pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ?
-         0 : stencil_state->writeMask & 0xff;
-
-   const uint8_t compare_mask =
-      pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
-         0 : stencil_state->compareMask & 0xff;
-
-   const uint8_t reference =
-      pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
-         0 : stencil_state->reference & 0xff;
-
-   v3dvx_pack(stencil_cfg, STENCIL_CFG, config) {
-      config.front_config = is_front;
-      config.back_config = is_back;
-      config.stencil_write_mask = write_mask;
-      config.stencil_test_mask = compare_mask;
-      config.stencil_test_function = stencil_state->compareOp;
-      config.stencil_pass_op = translate_stencil_op(stencil_state->passOp);
-      config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp);
-      config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp);
-      config.stencil_ref_value = reference;
-   }
-}
-
-static void
-pack_stencil_cfg(struct v3dv_pipeline *pipeline,
-                 const VkPipelineDepthStencilStateCreateInfo *ds_info)
-{
-   assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
-
-   if (!ds_info || !ds_info->stencilTestEnable)
-      return;
-
-   if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
-      return;
-
-   const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
-                                           V3DV_DYNAMIC_STENCIL_WRITE_MASK |
-                                           V3DV_DYNAMIC_STENCIL_REFERENCE;
-
-
-   /* If front != back or we have dynamic stencil state we can't emit a single
-    * packet for both faces.
-    */
-   bool needs_front_and_back = false;
-   if ((pipeline->dynamic_state.mask & dynamic_stencil_states) ||
-       memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front)))
-      needs_front_and_back = true;
-
-   /* If the front and back configurations are the same we can emit both with
-    * a single packet.
-    */
-   pipeline->emit_stencil_cfg[0] = true;
-   if (!needs_front_and_back) {
-      pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
-                              true, true, &ds_info->front);
-   } else {
-      pipeline->emit_stencil_cfg[1] = true;
-      pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
-                              true, false, &ds_info->front);
-      pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
-                              false, true, &ds_info->back);
-   }
-}
-
 static bool
 stencil_op_is_no_op(const VkStencilOpState *stencil)
 {
@@ -2659,216 +2354,6 @@ pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
    }
 }
 
-static void
-pack_shader_state_record(struct v3dv_pipeline *pipeline)
-{
-   assert(sizeof(pipeline->shader_state_record) ==
-          cl_packet_length(GL_SHADER_STATE_RECORD));
-
-   struct v3d_fs_prog_data *prog_data_fs =
-      pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
-
-   struct v3d_vs_prog_data *prog_data_vs =
-      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
-
-   struct v3d_vs_prog_data *prog_data_vs_bin =
-      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
-
-
-   /* Note: we are not packing addresses, as we need the job (see
-    * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
-    * point as they depend on dynamic info that can be set after create the
-    * pipeline (like viewport), . Would need to be filled later, so we are
-    * doing a partial prepacking.
-    */
-   v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
-      shader.enable_clipping = true;
-
-      shader.point_size_in_shaded_vertex_data =
-         pipeline->topology == PIPE_PRIM_POINTS;
-
-      /* Must be set if the shader modifies Z, discards, or modifies
-       * the sample mask.  For any of these cases, the fragment
-       * shader needs to write the Z value (even just discards).
-       */
-      shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
-      /* Set if the EZ test must be disabled (due to shader side
-       * effects and the early_z flag not being present in the
-       * shader).
-       */
-      shader.turn_off_early_z_test = prog_data_fs->disable_ez;
-
-      shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
-         prog_data_fs->uses_center_w;
-
-      /* The description for gl_SampleID states that if a fragment shader reads
-       * it, then we should automatically activate per-sample shading. However,
-       * the Vulkan spec also states that if a framebuffer has no attachments:
-       *
-       *    "The subpass continues to use the width, height, and layers of the
-       *     framebuffer to define the dimensions of the rendering area, and the
-       *     rasterizationSamples from each pipeline’s
-       *     VkPipelineMultisampleStateCreateInfo to define the number of
-       *     samples used in rasterization multisample rasterization."
-       *
-       * So in this scenario, if the pipeline doesn't enable multiple samples
-       * but the fragment shader accesses gl_SampleID we would be requested
-       * to do per-sample shading in single sample rasterization mode, which
-       * is pointless, so just disable it in that case.
-       */
-      shader.enable_sample_rate_shading =
-         pipeline->sample_rate_shading ||
-         (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
-
-      shader.any_shader_reads_hardware_written_primitive_id = false;
-
-      shader.do_scoreboard_wait_on_first_thread_switch =
-         prog_data_fs->lock_scoreboard_on_first_thrsw;
-      shader.disable_implicit_point_line_varyings =
-         !prog_data_fs->uses_implicit_point_line_varyings;
-
-      shader.number_of_varyings_in_fragment_shader =
-         prog_data_fs->num_inputs;
-
-      shader.coordinate_shader_propagate_nans = true;
-      shader.vertex_shader_propagate_nans = true;
-      shader.fragment_shader_propagate_nans = true;
-
-      /* Note: see previous note about adresses */
-      /* shader.coordinate_shader_code_address */
-      /* shader.vertex_shader_code_address */
-      /* shader.fragment_shader_code_address */
-
-      /* FIXME: Use combined input/output size flag in the common case (also
-       * on v3d, see v3dx_draw).
-       */
-      shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
-         prog_data_vs_bin->separate_segments;
-      shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
-         prog_data_vs->separate_segments;
-
-      shader.coordinate_shader_input_vpm_segment_size =
-         prog_data_vs_bin->separate_segments ?
-         prog_data_vs_bin->vpm_input_size : 1;
-      shader.vertex_shader_input_vpm_segment_size =
-         prog_data_vs->separate_segments ?
-         prog_data_vs->vpm_input_size : 1;
-
-      shader.coordinate_shader_output_vpm_segment_size =
-         prog_data_vs_bin->vpm_output_size;
-      shader.vertex_shader_output_vpm_segment_size =
-         prog_data_vs->vpm_output_size;
-
-      /* Note: see previous note about adresses */
-      /* shader.coordinate_shader_uniforms_address */
-      /* shader.vertex_shader_uniforms_address */
-      /* shader.fragment_shader_uniforms_address */
-
-      shader.min_coord_shader_input_segments_required_in_play =
-         pipeline->vpm_cfg_bin.As;
-      shader.min_vertex_shader_input_segments_required_in_play =
-         pipeline->vpm_cfg.As;
-
-      shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
-         pipeline->vpm_cfg_bin.Ve;
-      shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
-         pipeline->vpm_cfg.Ve;
-
-      shader.coordinate_shader_4_way_threadable =
-         prog_data_vs_bin->base.threads == 4;
-      shader.vertex_shader_4_way_threadable =
-         prog_data_vs->base.threads == 4;
-      shader.fragment_shader_4_way_threadable =
-         prog_data_fs->base.threads == 4;
-
-      shader.coordinate_shader_start_in_final_thread_section =
-         prog_data_vs_bin->base.single_seg;
-      shader.vertex_shader_start_in_final_thread_section =
-         prog_data_vs->base.single_seg;
-      shader.fragment_shader_start_in_final_thread_section =
-         prog_data_fs->base.single_seg;
-
-      shader.vertex_id_read_by_coordinate_shader =
-         prog_data_vs_bin->uses_vid;
-      shader.base_instance_id_read_by_coordinate_shader =
-         prog_data_vs_bin->uses_biid;
-      shader.instance_id_read_by_coordinate_shader =
-         prog_data_vs_bin->uses_iid;
-      shader.vertex_id_read_by_vertex_shader =
-         prog_data_vs->uses_vid;
-      shader.base_instance_id_read_by_vertex_shader =
-         prog_data_vs->uses_biid;
-      shader.instance_id_read_by_vertex_shader =
-         prog_data_vs->uses_iid;
-
-      /* Note: see previous note about adresses */
-      /* shader.address_of_default_attribute_values */
-   }
-}
-
-static void
-pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
-{
-   assert(sizeof(pipeline->vcm_cache_size) ==
-          cl_packet_length(VCM_CACHE_SIZE));
-
-   v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
-      vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
-      vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
-   }
-}
-
-/* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
-static uint8_t
-get_attr_type(const struct util_format_description *desc)
-{
-   uint32_t r_size = desc->channel[0].size;
-   uint8_t attr_type = ATTRIBUTE_FLOAT;
-
-   switch (desc->channel[0].type) {
-   case UTIL_FORMAT_TYPE_FLOAT:
-      if (r_size == 32) {
-         attr_type = ATTRIBUTE_FLOAT;
-      } else {
-         assert(r_size == 16);
-         attr_type = ATTRIBUTE_HALF_FLOAT;
-      }
-      break;
-
-   case UTIL_FORMAT_TYPE_SIGNED:
-   case UTIL_FORMAT_TYPE_UNSIGNED:
-      switch (r_size) {
-      case 32:
-         attr_type = ATTRIBUTE_INT;
-         break;
-      case 16:
-         attr_type = ATTRIBUTE_SHORT;
-         break;
-      case 10:
-         attr_type = ATTRIBUTE_INT2_10_10_10;
-         break;
-      case 8:
-         attr_type = ATTRIBUTE_BYTE;
-         break;
-      default:
-         fprintf(stderr,
-                 "format %s unsupported\n",
-                 desc->name);
-         attr_type = ATTRIBUTE_BYTE;
-         abort();
-      }
-      break;
-
-   default:
-      fprintf(stderr,
-              "format %s unsupported\n",
-              desc->name);
-      abort();
-   }
-
-   return attr_type;
-}
-
 static bool
 pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
 {
@@ -2926,36 +2411,6 @@ v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
 }
 
 static void
-pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
-                                   uint32_t index,
-                                   const VkVertexInputAttributeDescription *vi_desc)
-{
-   const uint32_t packet_length =
-      cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
-
-   const struct util_format_description *desc =
-      vk_format_description(vi_desc->format);
-
-   uint32_t binding = vi_desc->binding;
-
-   v3dvx_pack(&pipeline->vertex_attrs[index * packet_length],
-             GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
-
-      /* vec_size == 0 means 4 */
-      attr.vec_size = desc->nr_channels & 3;
-      attr.signed_int_type = (desc->channel[0].type ==
-                              UTIL_FORMAT_TYPE_SIGNED);
-      attr.normalized_int_type = desc->channel[0].normalized;
-      attr.read_as_int_uint = desc->channel[0].pure_integer;
-
-      attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
-                                   0xffff);
-      attr.stride = pipeline->vb[binding].stride;
-      attr.type = get_attr_type(desc);
-   }
-}
-
-static void
 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
                          const VkPipelineMultisampleStateCreateInfo *ms_info)
 {
@@ -3032,9 +2487,9 @@ pipeline_init(struct v3dv_pipeline *pipeline,
     */
    assert(!ds_info || !ds_info->depthBoundsTestEnable);
 
-   pack_blend(pipeline, cb_info);
-   pack_cfg_bits(pipeline, ds_info, rs_info, ms_info);
-   pack_stencil_cfg(pipeline, ds_info);
+   v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
+                                       rs_info, ms_info);
+
    pipeline_set_ez_state(pipeline, ds_info);
    enable_depth_bias(pipeline, rs_info);
    pipeline_set_sample_mask(pipeline, ms_info);
@@ -3052,49 +2507,8 @@ pipeline_init(struct v3dv_pipeline *pipeline,
       return result;
    }
 
-   pack_shader_state_record(pipeline);
-   pack_vcm_cache_size(pipeline);
-
-   const VkPipelineVertexInputStateCreateInfo *vi_info =
-      pCreateInfo->pVertexInputState;
-
-   pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
-   for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
-      const VkVertexInputBindingDescription *desc =
-         &vi_info->pVertexBindingDescriptions[i];
-
-      pipeline->vb[desc->binding].stride = desc->stride;
-      pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
-   }
-
-   pipeline->va_count = 0;
-   struct v3d_vs_prog_data *prog_data_vs =
-      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
-
-   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
-      const VkVertexInputAttributeDescription *desc =
-         &vi_info->pVertexAttributeDescriptions[i];
-      uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
-
-      /* We use a custom driver_location_map instead of
-       * nir_find_variable_with_location because if we were able to get the
-       * shader variant from the cache, we would not have the nir shader
-       * available.
-       */
-      uint32_t driver_location =
-         prog_data_vs->driver_location_map[location];
-
-      if (driver_location != -1) {
-         assert(driver_location < MAX_VERTEX_ATTRIBS);
-         pipeline->va[driver_location].offset = desc->offset;
-         pipeline->va[driver_location].binding = desc->binding;
-         pipeline->va[driver_location].vk_format = desc->format;
-
-         pack_shader_state_attribute_record(pipeline, driver_location, desc);
-
-         pipeline->va_count++;
-      }
-   }
+   v3dv_X(device, pipeline_pack_compile_state)(pipeline,
+                                               pCreateInfo->pVertexInputState);
 
    if (pipeline_has_integer_vertex_attrib(pipeline)) {
       pipeline->default_attribute_values =
diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c
new file mode 100644 (file)
index 0000000..a2d664b
--- /dev/null
@@ -0,0 +1,630 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3dv_private.h"
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+#include "vk_format_info.h"
+
+static uint8_t
+blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
+{
+   switch (factor) {
+   case VK_BLEND_FACTOR_ZERO:
+   case VK_BLEND_FACTOR_ONE:
+   case VK_BLEND_FACTOR_SRC_COLOR:
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+   case VK_BLEND_FACTOR_DST_COLOR:
+   case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+   case VK_BLEND_FACTOR_SRC_ALPHA:
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+   case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+      return factor;
+   case VK_BLEND_FACTOR_CONSTANT_COLOR:
+   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+   case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+      *needs_constants = true;
+      return factor;
+   case VK_BLEND_FACTOR_DST_ALPHA:
+      return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
+                             V3D_BLEND_FACTOR_DST_ALPHA;
+   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+      return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
+                             V3D_BLEND_FACTOR_INV_DST_ALPHA;
+   case VK_BLEND_FACTOR_SRC1_COLOR:
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
+   case VK_BLEND_FACTOR_SRC1_ALPHA:
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
+      assert(!"Invalid blend factor: dual source blending not supported.");
+   default:
+      assert(!"Unknown blend factor.");
+   }
+
+   /* Should be handled by the switch, added to avoid a "end of non-void
+    * function" error
+    */
+   unreachable("Unknown blend factor.");
+}
+
+static void
+pack_blend(struct v3dv_pipeline *pipeline,
+           const VkPipelineColorBlendStateCreateInfo *cb_info)
+{
+   /* By default, we are not enabling blending and all color channel writes are
+    * enabled. Color write enables are independent of whether blending is
+    * enabled or not.
+    *
+    * Vulkan specifies color write masks so that bits set correspond to
+    * enabled channels. Our hardware does it the other way around.
+    */
+   pipeline->blend.enables = 0;
+   pipeline->blend.color_write_masks = 0; /* All channels enabled */
+
+   if (!cb_info)
+      return;
+
+   assert(pipeline->subpass);
+   if (pipeline->subpass->color_count == 0)
+      return;
+
+   assert(pipeline->subpass->color_count == cb_info->attachmentCount);
+
+   pipeline->blend.needs_color_constants = false;
+   uint32_t color_write_masks = 0;
+   for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) {
+      const VkPipelineColorBlendAttachmentState *b_state =
+         &cb_info->pAttachments[i];
+
+      uint32_t attachment_idx =
+         pipeline->subpass->color_attachments[i].attachment;
+      if (attachment_idx == VK_ATTACHMENT_UNUSED)
+         continue;
+
+      color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
+
+      if (!b_state->blendEnable)
+         continue;
+
+      VkAttachmentDescription *desc =
+         &pipeline->pass->attachments[attachment_idx].desc;
+      const struct v3dv_format *format = v3dv_get_format(desc->format);
+      bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1);
+
+      uint8_t rt_mask = 1 << i;
+      pipeline->blend.enables |= rt_mask;
+
+      v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
+         config.render_target_mask = rt_mask;
+
+         config.color_blend_mode = b_state->colorBlendOp;
+         config.color_blend_dst_factor =
+            blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
+                         &pipeline->blend.needs_color_constants);
+         config.color_blend_src_factor =
+            blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
+                         &pipeline->blend.needs_color_constants);
+
+         config.alpha_blend_mode = b_state->alphaBlendOp;
+         config.alpha_blend_dst_factor =
+            blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
+                         &pipeline->blend.needs_color_constants);
+         config.alpha_blend_src_factor =
+            blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
+                         &pipeline->blend.needs_color_constants);
+      }
+   }
+
+   pipeline->blend.color_write_masks = color_write_masks;
+}
+
+/* This requires that pack_blend() had been called before so we can set
+ * the overall blend enable bit in the CFG_BITS packet.
+ */
+static void
+pack_cfg_bits(struct v3dv_pipeline *pipeline,
+              const VkPipelineDepthStencilStateCreateInfo *ds_info,
+              const VkPipelineRasterizationStateCreateInfo *rs_info,
+              const VkPipelineMultisampleStateCreateInfo *ms_info)
+{
+   assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
+
+   pipeline->msaa =
+      ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
+
+   v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) {
+      config.enable_forward_facing_primitive =
+         rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
+
+      config.enable_reverse_facing_primitive =
+         rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false;
+
+      /* Seems like the hardware is backwards regarding this setting... */
+      config.clockwise_primitives =
+         rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false;
+
+      config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false;
+
+      /* This is required to pass line rasterization tests in CTS while
+       * exposing, at least, a minimum of 4-bits of subpixel precision
+       * (the minimum requirement).
+       */
+      config.line_rasterization = 1; /* perp end caps */
+
+      if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
+         config.direct3d_wireframe_triangles_mode = true;
+         config.direct3d_point_fill_mode =
+            rs_info->polygonMode == VK_POLYGON_MODE_POINT;
+      }
+
+      config.rasterizer_oversample_mode = pipeline->msaa ? 1 : 0;
+
+      /* From the Vulkan spec:
+       *
+       *   "Provoking Vertex:
+       *
+       *       The vertex in a primitive from which flat shaded attribute
+       *       values are taken. This is generally the “first” vertex in the
+       *       primitive, and depends on the primitive topology."
+       *
+       * First vertex is the Direct3D style for provoking vertex. OpenGL uses
+       * the last vertex by default.
+       */
+      config.direct3d_provoking_vertex = true;
+
+      config.blend_enable = pipeline->blend.enables != 0;
+
+      /* Disable depth/stencil if we don't have a D/S attachment */
+      bool has_ds_attachment =
+         pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED;
+
+      if (ds_info && ds_info->depthTestEnable && has_ds_attachment) {
+         config.z_updates_enable = ds_info->depthWriteEnable;
+         config.depth_test_function = ds_info->depthCompareOp;
+      } else {
+         config.depth_test_function = VK_COMPARE_OP_ALWAYS;
+      }
+
+      /* EZ state will be updated at draw time based on bound pipeline state */
+      config.early_z_updates_enable = false;
+      config.early_z_enable = false;
+
+      config.stencil_enable =
+         ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
+
+      pipeline->z_updates_enable = config.z_updates_enable;
+   };
+}
+
+static uint32_t
+translate_stencil_op(enum pipe_stencil_op op)
+{
+   switch (op) {
+   case VK_STENCIL_OP_KEEP:
+      return V3D_STENCIL_OP_KEEP;
+   case VK_STENCIL_OP_ZERO:
+      return V3D_STENCIL_OP_ZERO;
+   case VK_STENCIL_OP_REPLACE:
+      return V3D_STENCIL_OP_REPLACE;
+   case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
+      return V3D_STENCIL_OP_INCR;
+   case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
+      return V3D_STENCIL_OP_DECR;
+   case VK_STENCIL_OP_INVERT:
+      return V3D_STENCIL_OP_INVERT;
+   case VK_STENCIL_OP_INCREMENT_AND_WRAP:
+      return V3D_STENCIL_OP_INCWRAP;
+   case VK_STENCIL_OP_DECREMENT_AND_WRAP:
+      return V3D_STENCIL_OP_DECWRAP;
+   default:
+      unreachable("bad stencil op");
+   }
+}
+
+static void
+pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
+                        uint8_t *stencil_cfg,
+                        bool is_front,
+                        bool is_back,
+                        const VkStencilOpState *stencil_state)
+{
+   /* From the Vulkan spec:
+    *
+    *   "Reference is an integer reference value that is used in the unsigned
+    *    stencil comparison. The reference value used by stencil comparison
+    *    must be within the range [0,2^s-1] , where s is the number of bits in
+    *    the stencil framebuffer attachment, otherwise the reference value is
+    *    considered undefined."
+    *
+    * In our case, 's' is always 8, so we clamp to that to prevent our packing
+    * functions to assert in debug mode if they see larger values.
+    *
+    * If we have dynamic state we need to make sure we set the corresponding
+    * state bits to 0, since cl_emit_with_prepacked ORs the new value with
+    * the old.
+    */
+   const uint8_t write_mask =
+      pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ?
+         0 : stencil_state->writeMask & 0xff;
+
+   const uint8_t compare_mask =
+      pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
+         0 : stencil_state->compareMask & 0xff;
+
+   const uint8_t reference =
+      pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
+         0 : stencil_state->reference & 0xff;
+
+   v3dvx_pack(stencil_cfg, STENCIL_CFG, config) {
+      config.front_config = is_front;
+      config.back_config = is_back;
+      config.stencil_write_mask = write_mask;
+      config.stencil_test_mask = compare_mask;
+      config.stencil_test_function = stencil_state->compareOp;
+      config.stencil_pass_op = translate_stencil_op(stencil_state->passOp);
+      config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp);
+      config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp);
+      config.stencil_ref_value = reference;
+   }
+}
+
+static void
+pack_stencil_cfg(struct v3dv_pipeline *pipeline,
+                 const VkPipelineDepthStencilStateCreateInfo *ds_info)
+{
+   assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
+
+   if (!ds_info || !ds_info->stencilTestEnable)
+      return;
+
+   if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
+      return;
+
+   const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
+                                           V3DV_DYNAMIC_STENCIL_WRITE_MASK |
+                                           V3DV_DYNAMIC_STENCIL_REFERENCE;
+
+
+   /* If front != back or we have dynamic stencil state we can't emit a single
+    * packet for both faces.
+    */
+   bool needs_front_and_back = false;
+   if ((pipeline->dynamic_state.mask & dynamic_stencil_states) ||
+       memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front)))
+      needs_front_and_back = true;
+
+   /* If the front and back configurations are the same we can emit both with
+    * a single packet.
+    */
+   pipeline->emit_stencil_cfg[0] = true;
+   if (!needs_front_and_back) {
+      pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
+                              true, true, &ds_info->front);
+   } else {
+      pipeline->emit_stencil_cfg[1] = true;
+      pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
+                              true, false, &ds_info->front);
+      pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
+                              false, true, &ds_info->back);
+   }
+}
+
+void
+v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
+                          const VkPipelineColorBlendStateCreateInfo *cb_info,
+                          const VkPipelineDepthStencilStateCreateInfo *ds_info,
+                          const VkPipelineRasterizationStateCreateInfo *rs_info,
+                          const VkPipelineMultisampleStateCreateInfo *ms_info)
+{
+   pack_blend(pipeline, cb_info);
+   pack_cfg_bits(pipeline, ds_info, rs_info, ms_info);
+   pack_stencil_cfg(pipeline, ds_info);
+}
+
+static void
+pack_shader_state_record(struct v3dv_pipeline *pipeline)
+{
+   assert(sizeof(pipeline->shader_state_record) ==
+          cl_packet_length(GL_SHADER_STATE_RECORD));
+
+   struct v3d_fs_prog_data *prog_data_fs =
+      pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
+
+   struct v3d_vs_prog_data *prog_data_vs =
+      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
+
+   struct v3d_vs_prog_data *prog_data_vs_bin =
+      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
+
+
+   /* Note: we are not packing addresses, as we need the job (see
+    * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
+    * point as they depend on dynamic info that can be set after create the
+    * pipeline (like viewport), . Would need to be filled later, so we are
+    * doing a partial prepacking.
+    */
+   v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
+      shader.enable_clipping = true;
+
+      shader.point_size_in_shaded_vertex_data =
+         pipeline->topology == PIPE_PRIM_POINTS;
+
+      /* Must be set if the shader modifies Z, discards, or modifies
+       * the sample mask.  For any of these cases, the fragment
+       * shader needs to write the Z value (even just discards).
+       */
+      shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
+      /* Set if the EZ test must be disabled (due to shader side
+       * effects and the early_z flag not being present in the
+       * shader).
+       */
+      shader.turn_off_early_z_test = prog_data_fs->disable_ez;
+
+      shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
+         prog_data_fs->uses_center_w;
+
+      /* The description for gl_SampleID states that if a fragment shader reads
+       * it, then we should automatically activate per-sample shading. However,
+       * the Vulkan spec also states that if a framebuffer has no attachments:
+       *
+       *    "The subpass continues to use the width, height, and layers of the
+       *     framebuffer to define the dimensions of the rendering area, and the
+       *     rasterizationSamples from each pipeline’s
+       *     VkPipelineMultisampleStateCreateInfo to define the number of
+       *     samples used in rasterization multisample rasterization."
+       *
+       * So in this scenario, if the pipeline doesn't enable multiple samples
+       * but the fragment shader accesses gl_SampleID we would be requested
+       * to do per-sample shading in single sample rasterization mode, which
+       * is pointless, so just disable it in that case.
+       */
+      shader.enable_sample_rate_shading =
+         pipeline->sample_rate_shading ||
+         (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
+
+      shader.any_shader_reads_hardware_written_primitive_id = false;
+
+      shader.do_scoreboard_wait_on_first_thread_switch =
+         prog_data_fs->lock_scoreboard_on_first_thrsw;
+      shader.disable_implicit_point_line_varyings =
+         !prog_data_fs->uses_implicit_point_line_varyings;
+
+      shader.number_of_varyings_in_fragment_shader =
+         prog_data_fs->num_inputs;
+
+      shader.coordinate_shader_propagate_nans = true;
+      shader.vertex_shader_propagate_nans = true;
+      shader.fragment_shader_propagate_nans = true;
+
+      /* Note: see previous note about adresses */
+      /* shader.coordinate_shader_code_address */
+      /* shader.vertex_shader_code_address */
+      /* shader.fragment_shader_code_address */
+
+      /* FIXME: Use combined input/output size flag in the common case (also
+       * on v3d, see v3dx_draw).
+       */
+      shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
+         prog_data_vs_bin->separate_segments;
+      shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
+         prog_data_vs->separate_segments;
+
+      shader.coordinate_shader_input_vpm_segment_size =
+         prog_data_vs_bin->separate_segments ?
+         prog_data_vs_bin->vpm_input_size : 1;
+      shader.vertex_shader_input_vpm_segment_size =
+         prog_data_vs->separate_segments ?
+         prog_data_vs->vpm_input_size : 1;
+
+      shader.coordinate_shader_output_vpm_segment_size =
+         prog_data_vs_bin->vpm_output_size;
+      shader.vertex_shader_output_vpm_segment_size =
+         prog_data_vs->vpm_output_size;
+
+      /* Note: see previous note about adresses */
+      /* shader.coordinate_shader_uniforms_address */
+      /* shader.vertex_shader_uniforms_address */
+      /* shader.fragment_shader_uniforms_address */
+
+      shader.min_coord_shader_input_segments_required_in_play =
+         pipeline->vpm_cfg_bin.As;
+      shader.min_vertex_shader_input_segments_required_in_play =
+         pipeline->vpm_cfg.As;
+
+      shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
+         pipeline->vpm_cfg_bin.Ve;
+      shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
+         pipeline->vpm_cfg.Ve;
+
+      shader.coordinate_shader_4_way_threadable =
+         prog_data_vs_bin->base.threads == 4;
+      shader.vertex_shader_4_way_threadable =
+         prog_data_vs->base.threads == 4;
+      shader.fragment_shader_4_way_threadable =
+         prog_data_fs->base.threads == 4;
+
+      shader.coordinate_shader_start_in_final_thread_section =
+         prog_data_vs_bin->base.single_seg;
+      shader.vertex_shader_start_in_final_thread_section =
+         prog_data_vs->base.single_seg;
+      shader.fragment_shader_start_in_final_thread_section =
+         prog_data_fs->base.single_seg;
+
+      shader.vertex_id_read_by_coordinate_shader =
+         prog_data_vs_bin->uses_vid;
+      shader.base_instance_id_read_by_coordinate_shader =
+         prog_data_vs_bin->uses_biid;
+      shader.instance_id_read_by_coordinate_shader =
+         prog_data_vs_bin->uses_iid;
+      shader.vertex_id_read_by_vertex_shader =
+         prog_data_vs->uses_vid;
+      shader.base_instance_id_read_by_vertex_shader =
+         prog_data_vs->uses_biid;
+      shader.instance_id_read_by_vertex_shader =
+         prog_data_vs->uses_iid;
+
+      /* Note: see previous note about adresses */
+      /* shader.address_of_default_attribute_values */
+   }
+}
+
+static void
+pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
+{
+   assert(sizeof(pipeline->vcm_cache_size) ==
+          cl_packet_length(VCM_CACHE_SIZE));
+
+   v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
+      vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
+      vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
+   }
+}
+
+/* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
+static uint8_t
+get_attr_type(const struct util_format_description *desc)
+{
+   uint32_t r_size = desc->channel[0].size;
+   uint8_t attr_type = ATTRIBUTE_FLOAT;
+
+   switch (desc->channel[0].type) {
+   case UTIL_FORMAT_TYPE_FLOAT:
+      if (r_size == 32) {
+         attr_type = ATTRIBUTE_FLOAT;
+      } else {
+         assert(r_size == 16);
+         attr_type = ATTRIBUTE_HALF_FLOAT;
+      }
+      break;
+
+   case UTIL_FORMAT_TYPE_SIGNED:
+   case UTIL_FORMAT_TYPE_UNSIGNED:
+      switch (r_size) {
+      case 32:
+         attr_type = ATTRIBUTE_INT;
+         break;
+      case 16:
+         attr_type = ATTRIBUTE_SHORT;
+         break;
+      case 10:
+         attr_type = ATTRIBUTE_INT2_10_10_10;
+         break;
+      case 8:
+         attr_type = ATTRIBUTE_BYTE;
+         break;
+      default:
+         fprintf(stderr,
+                 "format %s unsupported\n",
+                 desc->name);
+         attr_type = ATTRIBUTE_BYTE;
+         abort();
+      }
+      break;
+
+   default:
+      fprintf(stderr,
+              "format %s unsupported\n",
+              desc->name);
+      abort();
+   }
+
+   return attr_type;
+}
+
+static void
+pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
+                                   uint32_t index,
+                                   const VkVertexInputAttributeDescription *vi_desc)
+{
+   const uint32_t packet_length =
+      cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
+
+   const struct util_format_description *desc =
+      vk_format_description(vi_desc->format);
+
+   uint32_t binding = vi_desc->binding;
+
+   v3dvx_pack(&pipeline->vertex_attrs[index * packet_length],
+             GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+
+      /* vec_size == 0 means 4 */
+      attr.vec_size = desc->nr_channels & 3;
+      attr.signed_int_type = (desc->channel[0].type ==
+                              UTIL_FORMAT_TYPE_SIGNED);
+      attr.normalized_int_type = desc->channel[0].normalized;
+      attr.read_as_int_uint = desc->channel[0].pure_integer;
+
+      attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
+                                   0xffff);
+      attr.stride = pipeline->vb[binding].stride;
+      attr.type = get_attr_type(desc);
+   }
+}
+
+void
+v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
+                                  const VkPipelineVertexInputStateCreateInfo *vi_info)
+{
+   pack_shader_state_record(pipeline);
+   pack_vcm_cache_size(pipeline);
+
+   pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
+   for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
+      const VkVertexInputBindingDescription *desc =
+         &vi_info->pVertexBindingDescriptions[i];
+
+      pipeline->vb[desc->binding].stride = desc->stride;
+      pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
+   }
+
+   pipeline->va_count = 0;
+   struct v3d_vs_prog_data *prog_data_vs =
+      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
+
+   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
+      const VkVertexInputAttributeDescription *desc =
+         &vi_info->pVertexAttributeDescriptions[i];
+      uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
+
+      /* We use a custom driver_location_map instead of
+       * nir_find_variable_with_location because if we were able to get the
+       * shader variant from the cache, we would not have the nir shader
+       * available.
+       */
+      uint32_t driver_location =
+         prog_data_vs->driver_location_map[location];
+
+      if (driver_location != -1) {
+         assert(driver_location < MAX_VERTEX_ATTRIBS);
+         pipeline->va[driver_location].offset = desc->offset;
+         pipeline->va[driver_location].binding = desc->binding;
+         pipeline->va[driver_location].vk_format = desc->format;
+
+         pack_shader_state_attribute_record(pipeline, driver_location, desc);
+
+         pipeline->va_count++;
+      }
+   }
+}
index 436d251..b320682 100644 (file)
 #error This file is included by means other than v3dv_private.h
 #endif
 
+/* Used at v3dv_pipeline */
+void
+v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
+                          const VkPipelineColorBlendStateCreateInfo *cb_info,
+                          const VkPipelineDepthStencilStateCreateInfo *ds_info,
+                          const VkPipelineRasterizationStateCreateInfo *rs_info,
+                          const VkPipelineMultisampleStateCreateInfo *ms_info);
+void
+v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
+                                  const VkPipelineVertexInputStateCreateInfo *vi_info);
+
 /* Used at v3dv_queue */
 void
 v3dX(job_emit_noop)(struct v3dv_job *job);