ac: add ac_vtx_format_info
authorRhys Perry <pendingchaos02@gmail.com>
Fri, 29 Jul 2022 18:34:47 +0000 (19:34 +0100)
committerMarge Bot <emma+marge@anholt.net>
Tue, 30 Aug 2022 19:02:11 +0000 (19:02 +0000)
This will be used by RADV and ACO.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17894>

src/amd/common/ac_shader_util.c
src/amd/common/ac_shader_util.h
src/amd/compiler/aco_instruction_selection.cpp
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_formats.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_shader.h

index 791acbd..cba42f1 100644 (file)
@@ -421,6 +421,114 @@ const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt)
    return &data_format_table[dfmt];
 }
 
+#define DUP2(v) v, v
+#define DUP3(v) v, v, v
+#define DUP4(v) v, v, v, v
+
+#define FMT(dfmt, nfmt) 0xb, {HW_FMT(dfmt, nfmt), HW_FMT(dfmt##_##dfmt, nfmt), HW_FMT_INVALID, HW_FMT(dfmt##_##dfmt##_##dfmt##_##dfmt, nfmt)}
+#define FMT_32(nfmt) 0xf, {HW_FMT(32, nfmt), HW_FMT(32_32, nfmt), HW_FMT(32_32_32, nfmt), HW_FMT(32_32_32_32, nfmt)}
+#define FMT_64(nfmt) 0x3, {HW_FMT(32_32, nfmt), HW_FMT(32_32_32_32, nfmt), DUP2(HW_FMT_INVALID)}
+#define FMTP(dfmt, nfmt) 0xf, {DUP4(HW_FMT(dfmt, nfmt))}
+
+#define DST_SEL(x, y, z, w) \
+   (S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_##x) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_##y) | \
+    S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_##z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_##w))
+
+#define LIST_NFMT_8_16(nfmt) \
+   [(int)PIPE_FORMAT_R8_##nfmt] = {DST_SEL(X,0,0,1), 1, 1, 1, FMT(8, nfmt)}, \
+   [(int)PIPE_FORMAT_R8G8_##nfmt] = {DST_SEL(X,Y,0,1), 2, 2, 1, FMT(8, nfmt)}, \
+   [(int)PIPE_FORMAT_R8G8B8_##nfmt] = {DST_SEL(X,Y,Z,1), 3, 3, 1, FMT(8, nfmt)}, \
+   [(int)PIPE_FORMAT_B8G8R8_##nfmt] = {DST_SEL(Z,Y,X,1), 3, 3, 1, FMT(8, nfmt)}, \
+   [(int)PIPE_FORMAT_R8G8B8A8_##nfmt] = {DST_SEL(X,Y,Z,W), 4, 4, 1, FMT(8, nfmt)}, \
+   [(int)PIPE_FORMAT_B8G8R8A8_##nfmt] = {DST_SEL(Z,Y,X,W), 4, 4, 1, FMT(8, nfmt)}, \
+   [(int)PIPE_FORMAT_R16_##nfmt] = {DST_SEL(X,0,0,1), 2, 1, 2, FMT(16, nfmt)}, \
+   [(int)PIPE_FORMAT_R16G16_##nfmt] = {DST_SEL(X,Y,0,1), 4, 2, 2, FMT(16, nfmt)}, \
+   [(int)PIPE_FORMAT_R16G16B16_##nfmt] = {DST_SEL(X,Y,Z,1), 6, 3, 2, FMT(16, nfmt)}, \
+   [(int)PIPE_FORMAT_R16G16B16A16_##nfmt] = {DST_SEL(X,Y,Z,W), 8, 4, 2, FMT(16, nfmt)},
+
+#define LIST_NFMT_32_64(nfmt) \
+   [(int)PIPE_FORMAT_R32_##nfmt] = {DST_SEL(X,0,0,1), 4, 1, 4, FMT_32(nfmt)}, \
+   [(int)PIPE_FORMAT_R32G32_##nfmt] = {DST_SEL(X,Y,0,1), 8, 2, 4, FMT_32(nfmt)}, \
+   [(int)PIPE_FORMAT_R32G32B32_##nfmt] = {DST_SEL(X,Y,Z,1), 12, 3, 4, FMT_32(nfmt)}, \
+   [(int)PIPE_FORMAT_R32G32B32A32_##nfmt] = {DST_SEL(X,Y,Z,W), 16, 4, 4, FMT_32(nfmt)}, \
+   [(int)PIPE_FORMAT_R64_##nfmt] = {DST_SEL(X,Y,0,0), 8, 1, 8, FMT_64(nfmt)}, \
+   [(int)PIPE_FORMAT_R64G64_##nfmt] = {DST_SEL(X,Y,Z,W), 16, 2, 8, FMT_64(nfmt)}, \
+   [(int)PIPE_FORMAT_R64G64B64_##nfmt] = {DST_SEL(X,Y,Z,W), 24, 3, 8, FMT_64(nfmt)}, \
+   [(int)PIPE_FORMAT_R64G64B64A64_##nfmt] = {DST_SEL(X,Y,Z,W), 32, 4, 8, FMT_64(nfmt)}, \
+
+#define VB_FORMATS \
+   [(int)PIPE_FORMAT_NONE] = {DST_SEL(0,0,0,1), 0, 4, 0, 0xf, {DUP4(HW_FMT_INVALID)}}, \
+   LIST_NFMT_8_16(UNORM) \
+   LIST_NFMT_8_16(SNORM) \
+   LIST_NFMT_8_16(USCALED) \
+   LIST_NFMT_8_16(SSCALED) \
+   LIST_NFMT_8_16(UINT) \
+   LIST_NFMT_8_16(SINT) \
+   LIST_NFMT_32_64(UINT) \
+   LIST_NFMT_32_64(SINT) \
+   LIST_NFMT_32_64(FLOAT) \
+   [(int)PIPE_FORMAT_R16_FLOAT] = {DST_SEL(X,0,0,1), 2, 1, 2, FMT(16, FLOAT)}, \
+   [(int)PIPE_FORMAT_R16G16_FLOAT] = {DST_SEL(X,Y,0,1), 4, 2, 2, FMT(16, FLOAT)}, \
+   [(int)PIPE_FORMAT_R16G16B16_FLOAT] = {DST_SEL(X,Y,Z,1), 6, 3, 2, FMT(16, FLOAT)}, \
+   [(int)PIPE_FORMAT_R16G16B16A16_FLOAT] = {DST_SEL(X,Y,Z,W), 8, 4, 2, FMT(16, FLOAT)}, \
+   [(int)PIPE_FORMAT_B10G10R10A2_UNORM] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, UNORM)}, \
+   [(int)PIPE_FORMAT_B10G10R10A2_SNORM] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, SNORM), \
+                                           AA(AC_ALPHA_ADJUST_SNORM)}, \
+   [(int)PIPE_FORMAT_B10G10R10A2_USCALED] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, USCALED)}, \
+   [(int)PIPE_FORMAT_B10G10R10A2_SSCALED] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, SSCALED), \
+                                             AA(AC_ALPHA_ADJUST_SSCALED)}, \
+   [(int)PIPE_FORMAT_B10G10R10A2_UINT] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, UINT)}, \
+   [(int)PIPE_FORMAT_B10G10R10A2_SINT] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, SINT), \
+                                          AA(AC_ALPHA_ADJUST_SINT)}, \
+   [(int)PIPE_FORMAT_R10G10B10A2_UNORM] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, UNORM)}, \
+   [(int)PIPE_FORMAT_R10G10B10A2_SNORM] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, SNORM), \
+                                           AA(AC_ALPHA_ADJUST_SNORM)}, \
+   [(int)PIPE_FORMAT_R10G10B10A2_USCALED] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, USCALED)}, \
+   [(int)PIPE_FORMAT_R10G10B10A2_SSCALED] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, SSCALED), \
+                                             AA(AC_ALPHA_ADJUST_SSCALED)}, \
+   [(int)PIPE_FORMAT_R10G10B10A2_UINT] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, UINT)}, \
+   [(int)PIPE_FORMAT_R10G10B10A2_SINT] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, SINT), \
+                                          AA(AC_ALPHA_ADJUST_SINT)}, \
+   [(int)PIPE_FORMAT_R11G11B10_FLOAT] = {DST_SEL(X,Y,Z,W), 4, 3, 0, FMTP(10_11_11, FLOAT)}, \
+
+#define HW_FMT(dfmt, nfmt) (V_008F0C_BUF_DATA_FORMAT_##dfmt | (V_008F0C_BUF_NUM_FORMAT_##nfmt << 4))
+#define HW_FMT_INVALID (V_008F0C_BUF_DATA_FORMAT_INVALID | (V_008F0C_BUF_NUM_FORMAT_UNORM << 4))
+#define AA(v) v
+static const struct ac_vtx_format_info vb_formats_gfx6_alpha_adjust[] = {VB_FORMATS};
+#undef AA
+
+#define AA(v) AC_ALPHA_ADJUST_NONE
+static const struct ac_vtx_format_info vb_formats_gfx6[] = {VB_FORMATS};
+#undef HW_FMT_INVALID
+#undef HW_FMT
+
+#define HW_FMT(dfmt, nfmt) V_008F0C_GFX10_FORMAT_##dfmt##_##nfmt
+#define HW_FMT_INVALID V_008F0C_GFX10_FORMAT_INVALID
+static const struct ac_vtx_format_info vb_formats_gfx10[] = {VB_FORMATS};
+#undef HW_FMT_INVALID
+#undef HW_FMT
+
+#define HW_FMT(dfmt, nfmt) V_008F0C_GFX11_FORMAT_##dfmt##_##nfmt
+#define HW_FMT_INVALID V_008F0C_GFX11_FORMAT_INVALID
+static const struct ac_vtx_format_info vb_formats_gfx11[] = {VB_FORMATS};
+
+const struct ac_vtx_format_info *
+ac_get_vtx_format_info_table(enum amd_gfx_level level, enum radeon_family family)
+{
+   if (level >= GFX11)
+      return vb_formats_gfx11;
+   else if (level >= GFX10)
+      return vb_formats_gfx10;
+   bool alpha_adjust = level <= GFX8 && family != CHIP_STONEY;
+   return alpha_adjust ? vb_formats_gfx6_alpha_adjust : vb_formats_gfx6;
+}
+
+const struct ac_vtx_format_info *
+ac_get_vtx_format_info(enum amd_gfx_level level, enum radeon_family family, enum pipe_format fmt)
+{
+   return &ac_get_vtx_format_info_table(level, family)[fmt];
+}
+
 enum ac_image_dim ac_get_sampler_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim dim,
                                      bool is_array)
 {
index ae40cb0..2d76c38 100644 (file)
@@ -28,6 +28,7 @@
 #include "amd_family.h"
 #include "compiler/nir/nir.h"
 #include "compiler/shader_enums.h"
+#include "util/format/u_format.h"
 
 #include <stdbool.h>
 #include <stdint.h>
@@ -55,6 +56,29 @@ struct ac_data_format_info {
    uint8_t chan_format;
 };
 
+enum ac_vs_input_alpha_adjust {
+   AC_ALPHA_ADJUST_NONE = 0,
+   AC_ALPHA_ADJUST_SNORM = 1,
+   AC_ALPHA_ADJUST_SSCALED = 2,
+   AC_ALPHA_ADJUST_SINT = 3,
+};
+
+struct ac_vtx_format_info {
+   uint16_t dst_sel;
+   uint8_t element_size;
+   uint8_t num_channels;
+   uint8_t chan_byte_size; /* 0 for packed formats */
+
+   /* These last three are dependent on the family. */
+
+   uint8_t has_hw_format;
+   /* Index is number of channels minus one. Use any index for packed formats.
+    * GFX6-8 is dfmt[0:3],nfmt[4:7].
+    */
+   uint8_t hw_format[4];
+   enum ac_vs_input_alpha_adjust alpha_adjust : 8;
+};
+
 struct ac_spi_color_formats {
    unsigned normal : 8;
    unsigned alpha : 8;
@@ -101,6 +125,13 @@ unsigned ac_get_tbuffer_format(enum amd_gfx_level gfx_level, unsigned dfmt, unsi
 
 const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt);
 
+const struct ac_vtx_format_info *ac_get_vtx_format_info_table(enum amd_gfx_level level,
+                                                              enum radeon_family family);
+
+const struct ac_vtx_format_info *ac_get_vtx_format_info(enum amd_gfx_level level,
+                                                        enum radeon_family family,
+                                                        enum pipe_format fmt);
+
 enum ac_image_dim ac_get_sampler_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim dim,
                                      bool is_array);
 
index 81a479d..4e1e29b 100644 (file)
@@ -12353,7 +12353,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shade
       unsigned alpha_adjust = (key->state.alpha_adjust_lo >> loc) & 0x1;
       alpha_adjust |= ((key->state.alpha_adjust_hi >> loc) & 0x1) << 1;
 
-      if (alpha_adjust == ALPHA_ADJUST_SSCALED)
+      if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED)
          bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), Operand(alpha, v1));
 
       /* For the integer-like cases, do a natural sign extension.
@@ -12362,16 +12362,16 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shade
        * and happen to contain 0, 1, 2, 3 as the two LSBs of the
        * exponent.
        */
-      unsigned offset = alpha_adjust == ALPHA_ADJUST_SNORM ? 23u : 0u;
+      unsigned offset = alpha_adjust == AC_ALPHA_ADJUST_SNORM ? 23u : 0u;
       bld.vop3(aco_opcode::v_bfe_i32, Definition(alpha, v1), Operand(alpha, v1),
                Operand::c32(offset), Operand::c32(2u));
 
       /* Convert back to the right type. */
-      if (alpha_adjust == ALPHA_ADJUST_SNORM) {
+      if (alpha_adjust == AC_ALPHA_ADJUST_SNORM) {
          bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(alpha, v1), Operand(alpha, v1));
          bld.vop2(aco_opcode::v_max_f32, Definition(alpha, v1), Operand::c32(0xbf800000u),
                   Operand(alpha, v1));
-      } else if (alpha_adjust == ALPHA_ADJUST_SSCALED) {
+      } else if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED) {
          bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(alpha, v1), Operand(alpha, v1));
       }
    }
index 2ad8c8d..d3dc79e 100644 (file)
@@ -5967,7 +5967,7 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD
       if (!found) {
          unsigned nfmt, dfmt;
          bool post_shuffle;
-         enum radv_vs_input_alpha_adjust alpha_adjust;
+         enum ac_vs_input_alpha_adjust alpha_adjust;
          const struct util_format_description *format_desc = vk_format_description(attrib->format);
 
          found = util_dynarray_grow(&cmd_buffer->cached_vertex_formats,
index b8d88df..a58c479 100644 (file)
@@ -151,26 +151,26 @@ void
 radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format,
                              const struct util_format_description *desc, unsigned *dfmt,
                              unsigned *nfmt, bool *post_shuffle,
-                             enum radv_vs_input_alpha_adjust *alpha_adjust)
+                             enum ac_vs_input_alpha_adjust *alpha_adjust)
 {
    assert(desc->channel[0].type != UTIL_FORMAT_TYPE_VOID);
    *nfmt = radv_translate_buffer_numformat(desc, 0);
    *dfmt = radv_translate_buffer_dataformat(desc, 0);
 
-   *alpha_adjust = ALPHA_ADJUST_NONE;
+   *alpha_adjust = AC_ALPHA_ADJUST_NONE;
    if (pdevice->rad_info.gfx_level <= GFX8 && pdevice->rad_info.family != CHIP_STONEY) {
       switch (format) {
       case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
       case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
-         *alpha_adjust = ALPHA_ADJUST_SNORM;
+         *alpha_adjust = AC_ALPHA_ADJUST_SNORM;
          break;
       case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
       case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
-         *alpha_adjust = ALPHA_ADJUST_SSCALED;
+         *alpha_adjust = AC_ALPHA_ADJUST_SSCALED;
          break;
       case VK_FORMAT_A2R10G10B10_SINT_PACK32:
       case VK_FORMAT_A2B10G10R10_SINT_PACK32:
-         *alpha_adjust = ALPHA_ADJUST_SINT;
+         *alpha_adjust = AC_ALPHA_ADJUST_SINT;
          break;
       default:
          break;
index 2d9e89a..d0d153e 100644 (file)
@@ -3776,11 +3776,10 @@ radv_consider_force_vrs(const struct radv_pipeline *pipeline, bool noop_fs,
 }
 
 static nir_ssa_def *
-radv_adjust_vertex_fetch_alpha(nir_builder *b,
-                               enum radv_vs_input_alpha_adjust alpha_adjust,
+radv_adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust,
                                nir_ssa_def *alpha)
 {
-   if (alpha_adjust == ALPHA_ADJUST_SSCALED)
+   if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED)
       alpha = nir_f2u32(b, alpha);
 
    /* For the integer-like cases, do a natural sign extension.
@@ -3788,15 +3787,15 @@ radv_adjust_vertex_fetch_alpha(nir_builder *b,
     * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 and happen to contain 0, 1, 2, 3 as
     * the two LSBs of the exponent.
     */
-   unsigned offset = alpha_adjust == ALPHA_ADJUST_SNORM ? 23u : 0u;
+   unsigned offset = alpha_adjust == AC_ALPHA_ADJUST_SNORM ? 23u : 0u;
 
    alpha = nir_ibfe_imm(b, alpha, offset, 2u);
 
    /* Convert back to the right type. */
-   if (alpha_adjust == ALPHA_ADJUST_SNORM) {
+   if (alpha_adjust == AC_ALPHA_ADJUST_SNORM) {
       alpha = nir_i2f32(b, alpha);
       alpha = nir_fmax(b, alpha, nir_imm_float(b, -1.0f));
-   } else if (alpha_adjust == ALPHA_ADJUST_SSCALED) {
+   } else if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED) {
       alpha = nir_i2f32(b, alpha);
    }
 
@@ -3825,7 +3824,8 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
             continue;
 
          unsigned location = nir_intrinsic_base(intrin) - VERT_ATTRIB_GENERIC0;
-         enum radv_vs_input_alpha_adjust alpha_adjust = pipeline_key->vs.vertex_alpha_adjust[location];
+         enum ac_vs_input_alpha_adjust alpha_adjust =
+            pipeline_key->vs.vertex_alpha_adjust[location];
          bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location);
 
          unsigned component = nir_intrinsic_component(intrin);
@@ -3871,7 +3871,7 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
             }
          }
 
-         if (alpha_adjust != ALPHA_ADJUST_NONE && component + num_components == 4) {
+         if (alpha_adjust != AC_ALPHA_ADJUST_NONE && component + num_components == 4) {
             unsigned idx = num_components - 1;
             channels[idx] = radv_adjust_vertex_fetch_alpha(&b, alpha_adjust, channels[idx]);
          }
index e3711ab..10a7986 100644 (file)
@@ -2184,7 +2184,7 @@ bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
 void radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format,
                                   const struct util_format_description *desc, unsigned *dfmt,
                                   unsigned *nfmt, bool *post_shuffle,
-                                  enum radv_vs_input_alpha_adjust *alpha_adjust);
+                                  enum ac_vs_input_alpha_adjust *alpha_adjust);
 uint32_t radv_translate_colorformat(VkFormat format);
 uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
                                         int first_non_void);
index 045013b..5ef418b 100644 (file)
@@ -52,13 +52,6 @@ struct radv_shader_args;
 struct radv_vs_input_state;
 struct radv_shader_args;
 
-enum radv_vs_input_alpha_adjust {
-   ALPHA_ADJUST_NONE = 0,
-   ALPHA_ADJUST_SNORM = 1,
-   ALPHA_ADJUST_SSCALED = 2,
-   ALPHA_ADJUST_SINT = 3,
-};
-
 struct radv_pipeline_key {
    uint32_t has_multiview_view_index : 1;
    uint32_t optimisations_disabled : 1;
@@ -78,7 +71,7 @@ struct radv_pipeline_key {
       uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
       uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
       uint8_t vertex_binding_align[MAX_VBS];
-      enum radv_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
+      enum ac_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
       uint32_t vertex_post_shuffle;
       uint32_t provoking_vtx_last : 1;
       uint32_t dynamic_input_state : 1;