From 6a2ada93b49b2317e25d433da1548843a14b25d7 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 29 Jul 2022 19:34:47 +0100 Subject: [PATCH] ac: add ac_vtx_format_info This will be used by RADV and ACO. Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_shader_util.c | 108 +++++++++++++++++++++++++ src/amd/common/ac_shader_util.h | 31 +++++++ src/amd/compiler/aco_instruction_selection.cpp | 8 +- src/amd/vulkan/radv_cmd_buffer.c | 2 +- src/amd/vulkan/radv_formats.c | 10 +-- src/amd/vulkan/radv_pipeline.c | 16 ++-- src/amd/vulkan/radv_private.h | 2 +- src/amd/vulkan/radv_shader.h | 9 +-- 8 files changed, 159 insertions(+), 27 deletions(-) diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 791acbd..cba42f1 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -421,6 +421,114 @@ const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt) return &data_format_table[dfmt]; } +#define DUP2(v) v, v +#define DUP3(v) v, v, v +#define DUP4(v) v, v, v, v + +#define FMT(dfmt, nfmt) 0xb, {HW_FMT(dfmt, nfmt), HW_FMT(dfmt##_##dfmt, nfmt), HW_FMT_INVALID, HW_FMT(dfmt##_##dfmt##_##dfmt##_##dfmt, nfmt)} +#define FMT_32(nfmt) 0xf, {HW_FMT(32, nfmt), HW_FMT(32_32, nfmt), HW_FMT(32_32_32, nfmt), HW_FMT(32_32_32_32, nfmt)} +#define FMT_64(nfmt) 0x3, {HW_FMT(32_32, nfmt), HW_FMT(32_32_32_32, nfmt), DUP2(HW_FMT_INVALID)} +#define FMTP(dfmt, nfmt) 0xf, {DUP4(HW_FMT(dfmt, nfmt))} + +#define DST_SEL(x, y, z, w) \ + (S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_##x) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_##y) | \ + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_##z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_##w)) + +#define LIST_NFMT_8_16(nfmt) \ + [(int)PIPE_FORMAT_R8_##nfmt] = {DST_SEL(X,0,0,1), 1, 1, 1, FMT(8, nfmt)}, \ + [(int)PIPE_FORMAT_R8G8_##nfmt] = {DST_SEL(X,Y,0,1), 2, 2, 1, FMT(8, nfmt)}, \ + [(int)PIPE_FORMAT_R8G8B8_##nfmt] = {DST_SEL(X,Y,Z,1), 3, 3, 1, FMT(8, nfmt)}, \ + [(int)PIPE_FORMAT_B8G8R8_##nfmt] = {DST_SEL(Z,Y,X,1), 3, 3, 1, FMT(8, nfmt)}, \ + [(int)PIPE_FORMAT_R8G8B8A8_##nfmt] = {DST_SEL(X,Y,Z,W), 4, 4, 1, FMT(8, nfmt)}, \ + [(int)PIPE_FORMAT_B8G8R8A8_##nfmt] = {DST_SEL(Z,Y,X,W), 4, 4, 1, FMT(8, nfmt)}, \ + [(int)PIPE_FORMAT_R16_##nfmt] = {DST_SEL(X,0,0,1), 2, 1, 2, FMT(16, nfmt)}, \ + [(int)PIPE_FORMAT_R16G16_##nfmt] = {DST_SEL(X,Y,0,1), 4, 2, 2, FMT(16, nfmt)}, \ + [(int)PIPE_FORMAT_R16G16B16_##nfmt] = {DST_SEL(X,Y,Z,1), 6, 3, 2, FMT(16, nfmt)}, \ + [(int)PIPE_FORMAT_R16G16B16A16_##nfmt] = {DST_SEL(X,Y,Z,W), 8, 4, 2, FMT(16, nfmt)}, + +#define LIST_NFMT_32_64(nfmt) \ + [(int)PIPE_FORMAT_R32_##nfmt] = {DST_SEL(X,0,0,1), 4, 1, 4, FMT_32(nfmt)}, \ + [(int)PIPE_FORMAT_R32G32_##nfmt] = {DST_SEL(X,Y,0,1), 8, 2, 4, FMT_32(nfmt)}, \ + [(int)PIPE_FORMAT_R32G32B32_##nfmt] = {DST_SEL(X,Y,Z,1), 12, 3, 4, FMT_32(nfmt)}, \ + [(int)PIPE_FORMAT_R32G32B32A32_##nfmt] = {DST_SEL(X,Y,Z,W), 16, 4, 4, FMT_32(nfmt)}, \ + [(int)PIPE_FORMAT_R64_##nfmt] = {DST_SEL(X,Y,0,0), 8, 1, 8, FMT_64(nfmt)}, \ + [(int)PIPE_FORMAT_R64G64_##nfmt] = {DST_SEL(X,Y,Z,W), 16, 2, 8, FMT_64(nfmt)}, \ + [(int)PIPE_FORMAT_R64G64B64_##nfmt] = {DST_SEL(X,Y,Z,W), 24, 3, 8, FMT_64(nfmt)}, \ + [(int)PIPE_FORMAT_R64G64B64A64_##nfmt] = {DST_SEL(X,Y,Z,W), 32, 4, 8, FMT_64(nfmt)}, \ + +#define VB_FORMATS \ + [(int)PIPE_FORMAT_NONE] = {DST_SEL(0,0,0,1), 0, 4, 0, 0xf, {DUP4(HW_FMT_INVALID)}}, \ + LIST_NFMT_8_16(UNORM) \ + LIST_NFMT_8_16(SNORM) \ + LIST_NFMT_8_16(USCALED) \ + LIST_NFMT_8_16(SSCALED) \ + LIST_NFMT_8_16(UINT) \ + LIST_NFMT_8_16(SINT) \ + LIST_NFMT_32_64(UINT) \ + LIST_NFMT_32_64(SINT) \ + LIST_NFMT_32_64(FLOAT) \ + [(int)PIPE_FORMAT_R16_FLOAT] = {DST_SEL(X,0,0,1), 2, 1, 2, FMT(16, FLOAT)}, \ + [(int)PIPE_FORMAT_R16G16_FLOAT] = {DST_SEL(X,Y,0,1), 4, 2, 2, FMT(16, FLOAT)}, \ + [(int)PIPE_FORMAT_R16G16B16_FLOAT] = {DST_SEL(X,Y,Z,1), 6, 3, 2, FMT(16, FLOAT)}, \ + [(int)PIPE_FORMAT_R16G16B16A16_FLOAT] = {DST_SEL(X,Y,Z,W), 8, 4, 2, FMT(16, FLOAT)}, \ + [(int)PIPE_FORMAT_B10G10R10A2_UNORM] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, UNORM)}, \ + [(int)PIPE_FORMAT_B10G10R10A2_SNORM] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, SNORM), \ + AA(AC_ALPHA_ADJUST_SNORM)}, \ + [(int)PIPE_FORMAT_B10G10R10A2_USCALED] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, USCALED)}, \ + [(int)PIPE_FORMAT_B10G10R10A2_SSCALED] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, SSCALED), \ + AA(AC_ALPHA_ADJUST_SSCALED)}, \ + [(int)PIPE_FORMAT_B10G10R10A2_UINT] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, UINT)}, \ + [(int)PIPE_FORMAT_B10G10R10A2_SINT] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, SINT), \ + AA(AC_ALPHA_ADJUST_SINT)}, \ + [(int)PIPE_FORMAT_R10G10B10A2_UNORM] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, UNORM)}, \ + [(int)PIPE_FORMAT_R10G10B10A2_SNORM] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, SNORM), \ + AA(AC_ALPHA_ADJUST_SNORM)}, \ + [(int)PIPE_FORMAT_R10G10B10A2_USCALED] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, USCALED)}, \ + [(int)PIPE_FORMAT_R10G10B10A2_SSCALED] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, SSCALED), \ + AA(AC_ALPHA_ADJUST_SSCALED)}, \ + [(int)PIPE_FORMAT_R10G10B10A2_UINT] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, UINT)}, \ + [(int)PIPE_FORMAT_R10G10B10A2_SINT] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, SINT), \ + AA(AC_ALPHA_ADJUST_SINT)}, \ + [(int)PIPE_FORMAT_R11G11B10_FLOAT] = {DST_SEL(X,Y,Z,W), 4, 3, 0, FMTP(10_11_11, FLOAT)}, \ + +#define HW_FMT(dfmt, nfmt) (V_008F0C_BUF_DATA_FORMAT_##dfmt | (V_008F0C_BUF_NUM_FORMAT_##nfmt << 4)) +#define HW_FMT_INVALID (V_008F0C_BUF_DATA_FORMAT_INVALID | (V_008F0C_BUF_NUM_FORMAT_UNORM << 4)) +#define AA(v) v +static const struct ac_vtx_format_info vb_formats_gfx6_alpha_adjust[] = {VB_FORMATS}; +#undef AA + +#define AA(v) AC_ALPHA_ADJUST_NONE +static const struct ac_vtx_format_info vb_formats_gfx6[] = {VB_FORMATS}; +#undef HW_FMT_INVALID +#undef HW_FMT + +#define HW_FMT(dfmt, nfmt) V_008F0C_GFX10_FORMAT_##dfmt##_##nfmt +#define HW_FMT_INVALID V_008F0C_GFX10_FORMAT_INVALID +static const struct ac_vtx_format_info vb_formats_gfx10[] = {VB_FORMATS}; +#undef HW_FMT_INVALID +#undef HW_FMT + +#define HW_FMT(dfmt, nfmt) V_008F0C_GFX11_FORMAT_##dfmt##_##nfmt +#define HW_FMT_INVALID V_008F0C_GFX11_FORMAT_INVALID +static const struct ac_vtx_format_info vb_formats_gfx11[] = {VB_FORMATS}; + +const struct ac_vtx_format_info * +ac_get_vtx_format_info_table(enum amd_gfx_level level, enum radeon_family family) +{ + if (level >= GFX11) + return vb_formats_gfx11; + else if (level >= GFX10) + return vb_formats_gfx10; + bool alpha_adjust = level <= GFX8 && family != CHIP_STONEY; + return alpha_adjust ? vb_formats_gfx6_alpha_adjust : vb_formats_gfx6; +} + +const struct ac_vtx_format_info * +ac_get_vtx_format_info(enum amd_gfx_level level, enum radeon_family family, enum pipe_format fmt) +{ + return &ac_get_vtx_format_info_table(level, family)[fmt]; +} + enum ac_image_dim ac_get_sampler_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim dim, bool is_array) { diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index ae40cb0..2d76c38 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -28,6 +28,7 @@ #include "amd_family.h" #include "compiler/nir/nir.h" #include "compiler/shader_enums.h" +#include "util/format/u_format.h" #include #include @@ -55,6 +56,29 @@ struct ac_data_format_info { uint8_t chan_format; }; +enum ac_vs_input_alpha_adjust { + AC_ALPHA_ADJUST_NONE = 0, + AC_ALPHA_ADJUST_SNORM = 1, + AC_ALPHA_ADJUST_SSCALED = 2, + AC_ALPHA_ADJUST_SINT = 3, +}; + +struct ac_vtx_format_info { + uint16_t dst_sel; + uint8_t element_size; + uint8_t num_channels; + uint8_t chan_byte_size; /* 0 for packed formats */ + + /* These last three are dependent on the family. */ + + uint8_t has_hw_format; + /* Index is number of channels minus one. Use any index for packed formats. + * GFX6-8 is dfmt[0:3],nfmt[4:7]. + */ + uint8_t hw_format[4]; + enum ac_vs_input_alpha_adjust alpha_adjust : 8; +}; + struct ac_spi_color_formats { unsigned normal : 8; unsigned alpha : 8; @@ -101,6 +125,13 @@ unsigned ac_get_tbuffer_format(enum amd_gfx_level gfx_level, unsigned dfmt, unsi const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt); +const struct ac_vtx_format_info *ac_get_vtx_format_info_table(enum amd_gfx_level level, + enum radeon_family family); + +const struct ac_vtx_format_info *ac_get_vtx_format_info(enum amd_gfx_level level, + enum radeon_family family, + enum pipe_format fmt); + enum ac_image_dim ac_get_sampler_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim dim, bool is_array); diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 81a479d..4e1e29b 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -12353,7 +12353,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shade unsigned alpha_adjust = (key->state.alpha_adjust_lo >> loc) & 0x1; alpha_adjust |= ((key->state.alpha_adjust_hi >> loc) & 0x1) << 1; - if (alpha_adjust == ALPHA_ADJUST_SSCALED) + if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED) bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), Operand(alpha, v1)); /* For the integer-like cases, do a natural sign extension. @@ -12362,16 +12362,16 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shade * and happen to contain 0, 1, 2, 3 as the two LSBs of the * exponent. */ - unsigned offset = alpha_adjust == ALPHA_ADJUST_SNORM ? 23u : 0u; + unsigned offset = alpha_adjust == AC_ALPHA_ADJUST_SNORM ? 23u : 0u; bld.vop3(aco_opcode::v_bfe_i32, Definition(alpha, v1), Operand(alpha, v1), Operand::c32(offset), Operand::c32(2u)); /* Convert back to the right type. */ - if (alpha_adjust == ALPHA_ADJUST_SNORM) { + if (alpha_adjust == AC_ALPHA_ADJUST_SNORM) { bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(alpha, v1), Operand(alpha, v1)); bld.vop2(aco_opcode::v_max_f32, Definition(alpha, v1), Operand::c32(0xbf800000u), Operand(alpha, v1)); - } else if (alpha_adjust == ALPHA_ADJUST_SSCALED) { + } else if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED) { bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(alpha, v1), Operand(alpha, v1)); } } diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 2ad8c8d..d3dc79e 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5967,7 +5967,7 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD if (!found) { unsigned nfmt, dfmt; bool post_shuffle; - enum radv_vs_input_alpha_adjust alpha_adjust; + enum ac_vs_input_alpha_adjust alpha_adjust; const struct util_format_description *format_desc = vk_format_description(attrib->format); found = util_dynarray_grow(&cmd_buffer->cached_vertex_formats, diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c index b8d88df..a58c479 100644 --- a/src/amd/vulkan/radv_formats.c +++ b/src/amd/vulkan/radv_formats.c @@ -151,26 +151,26 @@ void radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format, const struct util_format_description *desc, unsigned *dfmt, unsigned *nfmt, bool *post_shuffle, - enum radv_vs_input_alpha_adjust *alpha_adjust) + enum ac_vs_input_alpha_adjust *alpha_adjust) { assert(desc->channel[0].type != UTIL_FORMAT_TYPE_VOID); *nfmt = radv_translate_buffer_numformat(desc, 0); *dfmt = radv_translate_buffer_dataformat(desc, 0); - *alpha_adjust = ALPHA_ADJUST_NONE; + *alpha_adjust = AC_ALPHA_ADJUST_NONE; if (pdevice->rad_info.gfx_level <= GFX8 && pdevice->rad_info.family != CHIP_STONEY) { switch (format) { case VK_FORMAT_A2R10G10B10_SNORM_PACK32: case VK_FORMAT_A2B10G10R10_SNORM_PACK32: - *alpha_adjust = ALPHA_ADJUST_SNORM; + *alpha_adjust = AC_ALPHA_ADJUST_SNORM; break; case VK_FORMAT_A2R10G10B10_SSCALED_PACK32: case VK_FORMAT_A2B10G10R10_SSCALED_PACK32: - *alpha_adjust = ALPHA_ADJUST_SSCALED; + *alpha_adjust = AC_ALPHA_ADJUST_SSCALED; break; case VK_FORMAT_A2R10G10B10_SINT_PACK32: case VK_FORMAT_A2B10G10R10_SINT_PACK32: - *alpha_adjust = ALPHA_ADJUST_SINT; + *alpha_adjust = AC_ALPHA_ADJUST_SINT; break; default: break; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 2d9e89a..d0d153e 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3776,11 +3776,10 @@ radv_consider_force_vrs(const struct radv_pipeline *pipeline, bool noop_fs, } static nir_ssa_def * -radv_adjust_vertex_fetch_alpha(nir_builder *b, - enum radv_vs_input_alpha_adjust alpha_adjust, +radv_adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust, nir_ssa_def *alpha) { - if (alpha_adjust == ALPHA_ADJUST_SSCALED) + if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED) alpha = nir_f2u32(b, alpha); /* For the integer-like cases, do a natural sign extension. @@ -3788,15 +3787,15 @@ radv_adjust_vertex_fetch_alpha(nir_builder *b, * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 and happen to contain 0, 1, 2, 3 as * the two LSBs of the exponent. */ - unsigned offset = alpha_adjust == ALPHA_ADJUST_SNORM ? 23u : 0u; + unsigned offset = alpha_adjust == AC_ALPHA_ADJUST_SNORM ? 23u : 0u; alpha = nir_ibfe_imm(b, alpha, offset, 2u); /* Convert back to the right type. */ - if (alpha_adjust == ALPHA_ADJUST_SNORM) { + if (alpha_adjust == AC_ALPHA_ADJUST_SNORM) { alpha = nir_i2f32(b, alpha); alpha = nir_fmax(b, alpha, nir_imm_float(b, -1.0f)); - } else if (alpha_adjust == ALPHA_ADJUST_SSCALED) { + } else if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED) { alpha = nir_i2f32(b, alpha); } @@ -3825,7 +3824,8 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke continue; unsigned location = nir_intrinsic_base(intrin) - VERT_ATTRIB_GENERIC0; - enum radv_vs_input_alpha_adjust alpha_adjust = pipeline_key->vs.vertex_alpha_adjust[location]; + enum ac_vs_input_alpha_adjust alpha_adjust = + pipeline_key->vs.vertex_alpha_adjust[location]; bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location); unsigned component = nir_intrinsic_component(intrin); @@ -3871,7 +3871,7 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke } } - if (alpha_adjust != ALPHA_ADJUST_NONE && component + num_components == 4) { + if (alpha_adjust != AC_ALPHA_ADJUST_NONE && component + num_components == 4) { unsigned idx = num_components - 1; channels[idx] = radv_adjust_vertex_fetch_alpha(&b, alpha_adjust, channels[idx]); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index e3711ab..10a7986 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2184,7 +2184,7 @@ bool radv_is_buffer_format_supported(VkFormat format, bool *scaled); void radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format, const struct util_format_description *desc, unsigned *dfmt, unsigned *nfmt, bool *post_shuffle, - enum radv_vs_input_alpha_adjust *alpha_adjust); + enum ac_vs_input_alpha_adjust *alpha_adjust); uint32_t radv_translate_colorformat(VkFormat format); uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc, int first_non_void); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 045013b..5ef418b 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -52,13 +52,6 @@ struct radv_shader_args; struct radv_vs_input_state; struct radv_shader_args; -enum radv_vs_input_alpha_adjust { - ALPHA_ADJUST_NONE = 0, - ALPHA_ADJUST_SNORM = 1, - ALPHA_ADJUST_SSCALED = 2, - ALPHA_ADJUST_SINT = 3, -}; - struct radv_pipeline_key { uint32_t has_multiview_view_index : 1; uint32_t optimisations_disabled : 1; @@ -78,7 +71,7 @@ struct radv_pipeline_key { uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS]; uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS]; uint8_t vertex_binding_align[MAX_VBS]; - enum radv_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS]; + enum ac_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS]; uint32_t vertex_post_shuffle; uint32_t provoking_vtx_last : 1; uint32_t dynamic_input_state : 1; -- 2.7.4