radv,aco: use pipe_format for static vertex input state
authorRhys Perry <pendingchaos02@gmail.com>
Fri, 29 Jul 2022 19:14:59 +0000 (20:14 +0100)
committerMarge Bot <emma+marge@anholt.net>
Tue, 30 Aug 2022 19:02:11 +0000 (19:02 +0000)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17894>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_instruction_selection_setup.cpp
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_shader.h

index 4e1e29b..2547833 100644 (file)
@@ -5386,11 +5386,10 @@ visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr)
 }
 
 bool
-check_vertex_fetch_size(isel_context* ctx, const ac_data_format_info* vtx_info, unsigned offset,
+check_vertex_fetch_size(isel_context* ctx, const ac_vtx_format_info* vtx_info, unsigned offset,
                         unsigned binding_align, unsigned channels)
 {
-   unsigned vertex_byte_size = vtx_info->chan_byte_size * channels;
-   if (vtx_info->chan_byte_size != 4 && channels == 3)
+   if (!(vtx_info->has_hw_format & BITFIELD_BIT(channels - 1)))
       return false;
 
    /* Split typed vertex buffer loads on GFX6 and GFX10+ to avoid any
@@ -5399,17 +5398,18 @@ check_vertex_fetch_size(isel_context* ctx, const ac_data_format_info* vtx_info,
     * also if the VBO offset is aligned to a scalar (eg. stride is 8 and VBO
     * offset is 2 for R16G16B16A16_SNORM).
     */
+   unsigned vertex_byte_size = vtx_info->chan_byte_size * channels;
    return (ctx->options->gfx_level >= GFX7 && ctx->options->gfx_level <= GFX9) ||
           (offset % vertex_byte_size == 0 && MAX2(binding_align, 1) % vertex_byte_size == 0);
 }
 
 uint8_t
-get_fetch_data_format(isel_context* ctx, const ac_data_format_info* vtx_info, unsigned offset,
-                      unsigned* channels, unsigned max_channels, unsigned binding_align)
+get_fetch_format(isel_context* ctx, const ac_vtx_format_info* vtx_info, unsigned offset,
+                 unsigned* channels, unsigned max_channels, unsigned binding_align)
 {
    if (!vtx_info->chan_byte_size) {
       *channels = vtx_info->num_channels;
-      return vtx_info->chan_format;
+      return vtx_info->hw_format[0];
    }
 
    unsigned num_channels = *channels;
@@ -5434,22 +5434,7 @@ get_fetch_data_format(isel_context* ctx, const ac_data_format_info* vtx_info, un
       num_channels = new_channels;
    }
 
-   switch (vtx_info->chan_format) {
-   case V_008F0C_BUF_DATA_FORMAT_8:
-      return std::array<uint8_t, 4>{V_008F0C_BUF_DATA_FORMAT_8, V_008F0C_BUF_DATA_FORMAT_8_8,
-                                    V_008F0C_BUF_DATA_FORMAT_INVALID,
-                                    V_008F0C_BUF_DATA_FORMAT_8_8_8_8}[num_channels - 1];
-   case V_008F0C_BUF_DATA_FORMAT_16:
-      return std::array<uint8_t, 4>{V_008F0C_BUF_DATA_FORMAT_16, V_008F0C_BUF_DATA_FORMAT_16_16,
-                                    V_008F0C_BUF_DATA_FORMAT_INVALID,
-                                    V_008F0C_BUF_DATA_FORMAT_16_16_16_16}[num_channels - 1];
-   case V_008F0C_BUF_DATA_FORMAT_32:
-      return std::array<uint8_t, 4>{V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32,
-                                    V_008F0C_BUF_DATA_FORMAT_32_32_32,
-                                    V_008F0C_BUF_DATA_FORMAT_32_32_32_32}[num_channels - 1];
-   }
-   unreachable("shouldn't reach here");
-   return V_008F0C_BUF_DATA_FORMAT_INVALID;
+   return vtx_info->hw_format[num_channels - 1];
 }
 
 void
@@ -5503,12 +5488,12 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
       unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[location];
       uint32_t attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[location];
       uint32_t attrib_stride = ctx->options->key.vs.vertex_attribute_strides[location];
-      unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[location];
+      enum pipe_format attrib_format =
+         (enum pipe_format)ctx->options->key.vs.vertex_attribute_formats[location];
       unsigned binding_align = ctx->options->key.vs.vertex_binding_align[attrib_binding];
 
-      unsigned dfmt = attrib_format & 0xf;
-      unsigned nfmt = (attrib_format >> 4) & 0x7;
-      const struct ac_data_format_info* vtx_info = ac_get_data_format_info(dfmt);
+      const struct ac_vtx_format_info* vtx_info =
+         ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, attrib_format);
 
       unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa) << component;
       unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels);
@@ -5559,15 +5544,11 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
 
          /* use MUBUF when possible to avoid possible alignment issues */
          /* TODO: we could use SDWA to unpack 8/16-bit attributes without extra instructions */
-         bool use_mubuf =
-            (nfmt == V_008F0C_BUF_NUM_FORMAT_FLOAT || nfmt == V_008F0C_BUF_NUM_FORMAT_UINT ||
-             nfmt == V_008F0C_BUF_NUM_FORMAT_SINT) &&
-            vtx_info->chan_byte_size == 4 && bitsize != 16;
-         unsigned fetch_dfmt = V_008F0C_BUF_DATA_FORMAT_INVALID;
+         bool use_mubuf = vtx_info->chan_byte_size == 4 && bitsize != 16;
+         unsigned fetch_fmt = V_008F0C_BUF_DATA_FORMAT_INVALID;
          if (!use_mubuf) {
-            fetch_dfmt =
-               get_fetch_data_format(ctx, vtx_info, fetch_offset, &fetch_component,
-                                     vtx_info->num_channels - channel_start, binding_align);
+            fetch_fmt = get_fetch_format(ctx, vtx_info, fetch_offset, &fetch_component,
+                                         vtx_info->num_channels - channel_start, binding_align);
          } else {
             /* GFX6 only supports loading vec3 with MTBUF, split to vec2,scalar. */
             if (fetch_component == 3 && ctx->options->gfx_level == GFX6)
@@ -5644,8 +5625,10 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
                                     .instr;
             mubuf->mubuf().vtx_binding = attrib_binding + 1;
          } else {
+            unsigned dfmt = fetch_fmt & 0xf;
+            unsigned nfmt = fetch_fmt >> 4;
             Instruction* mtbuf = bld.mtbuf(opcode, Definition(fetch_dst), list, fetch_index,
-                                           soffset, fetch_dfmt, nfmt, fetch_offset, false, true)
+                                           soffset, dfmt, nfmt, fetch_offset, false, true)
                                     .instr;
             mtbuf->mtbuf().vtx_binding = attrib_binding + 1;
          }
@@ -5665,7 +5648,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
 
       if (!direct_fetch) {
          bool is_float =
-            nfmt != V_008F0C_BUF_NUM_FORMAT_UINT && nfmt != V_008F0C_BUF_NUM_FORMAT_SINT;
+            nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr)) == nir_type_float;
 
          unsigned num_components = instr->dest.ssa.num_components;
 
index 702a583..68639c2 100644 (file)
@@ -416,30 +416,19 @@ init_context(isel_context* ctx, nir_shader* shader)
    ctx->ub_config.max_workgroup_size[1] = 2048;
    ctx->ub_config.max_workgroup_size[2] = 2048;
    for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
-      unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[i];
-      unsigned dfmt = attrib_format & 0xf;
-      unsigned nfmt = (attrib_format >> 4) & 0x7;
+      pipe_format format = (pipe_format)ctx->options->key.vs.vertex_attribute_formats[i];
+      const struct util_format_description* desc = util_format_description(format);
 
-      uint32_t max = UINT32_MAX;
-      if (nfmt == V_008F0C_BUF_NUM_FORMAT_UNORM) {
+      uint32_t max;
+      if (desc->channel[0].type != UTIL_FORMAT_TYPE_UNSIGNED) {
+         max = UINT32_MAX;
+      } else if (desc->channel[0].normalized) {
          max = 0x3f800000u;
-      } else if (nfmt == V_008F0C_BUF_NUM_FORMAT_UINT || nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED) {
-         bool uscaled = nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED;
-         switch (dfmt) {
-         case V_008F0C_BUF_DATA_FORMAT_8:
-         case V_008F0C_BUF_DATA_FORMAT_8_8:
-         case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: max = uscaled ? 0x437f0000u : UINT8_MAX; break;
-         case V_008F0C_BUF_DATA_FORMAT_10_10_10_2:
-         case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: max = uscaled ? 0x447fc000u : 1023; break;
-         case V_008F0C_BUF_DATA_FORMAT_10_11_11:
-         case V_008F0C_BUF_DATA_FORMAT_11_11_10: max = uscaled ? 0x44ffe000u : 2047; break;
-         case V_008F0C_BUF_DATA_FORMAT_16:
-         case V_008F0C_BUF_DATA_FORMAT_16_16:
-         case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: max = uscaled ? 0x477fff00u : UINT16_MAX; break;
-         case V_008F0C_BUF_DATA_FORMAT_32:
-         case V_008F0C_BUF_DATA_FORMAT_32_32:
-         case V_008F0C_BUF_DATA_FORMAT_32_32_32:
-         case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: max = uscaled ? 0x4f800000u : UINT32_MAX; break;
+      } else {
+         max = 0;
+         for (unsigned j = 0; j < desc->nr_channels; j++) {
+            uint32_t chan_max = u_uintN_max(desc->channel[0].size);
+            max = MAX2(max, desc->channel[j].pure_integer ? chan_max : fui(chan_max));
          }
       }
       ctx->ub_config.vertex_attrib_max[i] = max;
index 34ee6bf..a79c25e 100644 (file)
@@ -394,11 +394,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
    LLVMValueRef input;
    LLVMValueRef buffer_index;
    unsigned attrib_index = driver_location - VERT_ATTRIB_GENERIC0;
-   unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index];
-   unsigned data_format = attrib_format & 0x0f;
-   unsigned num_format = (attrib_format >> 4) & 0x07;
-   bool is_float =
-      num_format != V_008F0C_BUF_NUM_FORMAT_UINT && num_format != V_008F0C_BUF_NUM_FORMAT_SINT;
+   enum pipe_format attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index];
+   const struct util_format_description *desc = util_format_description(attrib_format);
+   bool is_float = !desc->channel[0].pure_integer;
    uint8_t input_usage_mask =
       ctx->shader_info->vs.input_usage_mask[driver_location];
    unsigned num_input_channels = util_last_bit(input_usage_mask);
@@ -424,7 +422,8 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
                                   ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex), "");
    }
 
-   const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format);
+   const struct ac_vtx_format_info *vtx_info =
+      ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, attrib_format);
 
    /* Adjust the number of channels to load based on the vertex attribute format. */
    unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
@@ -432,6 +431,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
    unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index];
    unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index];
 
+   unsigned data_format = vtx_info->hw_format[num_channels - 1] & 0xf;
+   unsigned num_format = vtx_info->hw_format[0] >> 4;
+
    unsigned desc_index =
       ctx->shader_info->vs.use_per_attribute_vb_descs ? attrib_index : attrib_binding;
    desc_index = util_bitcount(ctx->shader_info->vs.vb_desc_usage_mask &
@@ -444,8 +446,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
     * dynamic) is unaligned and also if the VBO offset is aligned to a scalar (eg. stride is 8 and
     * VBO offset is 2 for R16G16B16A16_SNORM).
     */
-   if ((ctx->ac.gfx_level == GFX6 || ctx->ac.gfx_level >= GFX10) && vtx_info->chan_byte_size) {
-      unsigned chan_format = vtx_info->chan_format;
+   if (((ctx->ac.gfx_level == GFX6 || ctx->ac.gfx_level >= GFX10) && vtx_info->chan_byte_size) ||
+       !(vtx_info->has_hw_format & BITFIELD_BIT(vtx_info->num_channels - 1))) {
+      unsigned chan_format = vtx_info->hw_format[0] & 0xf;
       LLVMValueRef values[4];
 
       for (unsigned chan = 0; chan < num_channels; chan++) {
index d0d153e..45e9e65 100644 (file)
@@ -3010,6 +3010,7 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin
                                     const struct radv_blend_state *blend)
 {
    struct radv_device *device = pipeline->base.device;
+   const struct radv_physical_device *pdevice = device->physical_device;
    struct radv_pipeline_key key = radv_generate_pipeline_key(&pipeline->base, pCreateInfo->flags);
 
    key.has_multiview_view_index = !!state->rp->view_mask;
@@ -3023,16 +3024,9 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin
       u_foreach_bit(i, state->vi->attributes_valid) {
          uint32_t binding = state->vi->attributes[i].binding;
          uint32_t offset = state->vi->attributes[i].offset;
-         VkFormat format = state->vi->attributes[i].format;
-         const struct util_format_description *format_desc;
-         unsigned num_format, data_format;
-         bool post_shuffle;
-
-         format_desc = vk_format_description(format);
-         radv_translate_vertex_format(device->physical_device, format, format_desc, &data_format,
-                                      &num_format, &post_shuffle, &key.vs.vertex_alpha_adjust[i]);
+         enum pipe_format format = vk_format_to_pipe_format(state->vi->attributes[i].format);
 
-         key.vs.vertex_attribute_formats[i] = data_format | (num_format << 4);
+         key.vs.vertex_attribute_formats[i] = format;
          key.vs.vertex_attribute_bindings[i] = binding;
          key.vs.vertex_attribute_offsets[i] = offset;
          key.vs.instance_rate_divisors[i] = state->vi->bindings[binding].divisor;
@@ -3056,13 +3050,10 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin
             key.vs.instance_rate_inputs |= 1u << i;
          }
 
-         if (post_shuffle) {
-            key.vs.vertex_post_shuffle |= 1u << i;
-         }
-
-         const struct ac_data_format_info *dfmt_info = ac_get_data_format_info(data_format);
+         const struct ac_vtx_format_info *vtx_info =
+            ac_get_vtx_format_info(pdevice->rad_info.gfx_level, pdevice->rad_info.family, format);
          unsigned attrib_align =
-            dfmt_info->chan_byte_size ? dfmt_info->chan_byte_size : dfmt_info->element_size;
+            vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size;
 
          /* If offset is misaligned, then the buffer offset must be too. Just skip updating
           * vertex_binding_align in this case.
@@ -3803,7 +3794,8 @@ radv_adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alp
 }
 
 static bool
-radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_key)
+radv_lower_vs_input(nir_shader *nir, const struct radv_physical_device *pdevice,
+                    const struct radv_pipeline_key *pipeline_key)
 {
    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
    bool progress = false;
@@ -3824,25 +3816,22 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
             continue;
 
          unsigned location = nir_intrinsic_base(intrin) - VERT_ATTRIB_GENERIC0;
-         enum ac_vs_input_alpha_adjust alpha_adjust =
-            pipeline_key->vs.vertex_alpha_adjust[location];
-         bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location);
 
          unsigned component = nir_intrinsic_component(intrin);
          unsigned num_components = intrin->dest.ssa.num_components;
 
-         unsigned attrib_format = pipeline_key->vs.vertex_attribute_formats[location];
-         unsigned dfmt = attrib_format & 0xf;
-         unsigned nfmt = (attrib_format >> 4) & 0x7;
-         const struct ac_data_format_info *vtx_info = ac_get_data_format_info(dfmt);
+         enum pipe_format attrib_format = pipeline_key->vs.vertex_attribute_formats[location];
+         const struct ac_vtx_format_info *desc = ac_get_vtx_format_info(
+            pdevice->rad_info.gfx_level, pdevice->rad_info.family, attrib_format);
          bool is_float =
-            nfmt != V_008F0C_BUF_NUM_FORMAT_UINT && nfmt != V_008F0C_BUF_NUM_FORMAT_SINT;
+            nir_alu_type_get_base_type(nir_intrinsic_dest_type(intrin)) == nir_type_float;
 
          unsigned mask = nir_ssa_def_components_read(&intrin->dest.ssa) << component;
-         unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels);
+         unsigned num_channels = MIN2(util_last_bit(mask), desc->num_channels);
 
          static const unsigned swizzle_normal[4] = {0, 1, 2, 3};
          static const unsigned swizzle_post_shuffle[4] = {2, 1, 0, 3};
+         bool post_shuffle = G_008F0C_DST_SEL_X(desc->dst_sel) == V_008F0C_SQ_SEL_Z;
          const unsigned *swizzle = post_shuffle ? swizzle_post_shuffle : swizzle_normal;
 
          b.cursor = nir_after_instr(instr);
@@ -3871,9 +3860,9 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
             }
          }
 
-         if (alpha_adjust != AC_ALPHA_ADJUST_NONE && component + num_components == 4) {
+         if (desc->alpha_adjust != AC_ALPHA_ADJUST_NONE && component + num_components == 4) {
             unsigned idx = num_components - 1;
-            channels[idx] = radv_adjust_vertex_fetch_alpha(&b, alpha_adjust, channels[idx]);
+            channels[idx] = radv_adjust_vertex_fetch_alpha(&b, desc->alpha_adjust, channels[idx]);
          }
 
          nir_ssa_def *new_dest = nir_vec(&b, channels, num_components);
@@ -4579,7 +4568,8 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
    }
 
    if (stages[MESA_SHADER_VERTEX].nir) {
-      NIR_PASS(_, stages[MESA_SHADER_VERTEX].nir, radv_lower_vs_input, pipeline_key);
+      NIR_PASS(_, stages[MESA_SHADER_VERTEX].nir, radv_lower_vs_input, device->physical_device,
+               pipeline_key);
    }
 
    if (stages[MESA_SHADER_FRAGMENT].nir && !radv_use_llvm_for_stage(device, MESA_SHADER_FRAGMENT)) {
index 5ef418b..2b2d218 100644 (file)
@@ -71,8 +71,6 @@ struct radv_pipeline_key {
       uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
       uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
       uint8_t vertex_binding_align[MAX_VBS];
-      enum ac_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
-      uint32_t vertex_post_shuffle;
       uint32_t provoking_vtx_last : 1;
       uint32_t dynamic_input_state : 1;
       uint8_t topology;