From: Simon Perretta Date: Sat, 11 Feb 2023 22:34:05 +0000 (+0000) Subject: pvr: Add support for generating transfer fragment programs X-Git-Tag: upstream/23.3.3~9834 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f0b47cfd658194965b2f40d6550e5fb6de508976;p=platform%2Fupstream%2Fmesa.git pvr: Add support for generating transfer fragment programs Co-authored-by: Karmjit Mahil Co-authored-by: Rajnesh Kanwal Signed-off-by: Karmjit Mahil Signed-off-by: Rajnesh Kanwal Signed-off-by: Simon Perretta Acked-by: Frank Binns Part-of: --- diff --git a/src/imagination/rogue/rogue.c b/src/imagination/rogue/rogue.c index e4ad5e0..2f21e29 100644 --- a/src/imagination/rogue/rogue.c +++ b/src/imagination/rogue/rogue.c @@ -371,6 +371,12 @@ rogue_reg *rogue_pixout_reg(rogue_shader *shader, unsigned index) } PUBLIC +rogue_reg *rogue_special_reg(rogue_shader *shader, unsigned index) +{ + return rogue_reg_cached(shader, ROGUE_REG_CLASS_SPECIAL, index); +} + +PUBLIC rogue_reg *rogue_vtxin_reg(rogue_shader *shader, unsigned index) { return rogue_reg_cached(shader, ROGUE_REG_CLASS_VTXIN, index); @@ -586,6 +592,16 @@ rogue_coeff_regarray(rogue_shader *shader, unsigned size, unsigned start_index) } PUBLIC +rogue_regarray * +rogue_shared_regarray(rogue_shader *shader, unsigned size, unsigned start_index) +{ + return rogue_regarray_cached(shader, + size, + ROGUE_REG_CLASS_SHARED, + start_index); +} + +PUBLIC rogue_regarray *rogue_ssa_vec_regarray(rogue_shader *shader, unsigned size, unsigned start_index, diff --git a/src/imagination/rogue/rogue.h b/src/imagination/rogue/rogue.h index e4c02fe..de59bb3 100644 --- a/src/imagination/rogue/rogue.h +++ b/src/imagination/rogue/rogue.h @@ -1887,6 +1887,8 @@ rogue_reg *rogue_const_reg(rogue_shader *shader, unsigned index); rogue_reg *rogue_pixout_reg(rogue_shader *shader, unsigned index); +rogue_reg *rogue_special_reg(rogue_shader *shader, unsigned index); + rogue_reg *rogue_vtxin_reg(rogue_shader *shader, unsigned index); rogue_reg *rogue_vtxout_reg(rogue_shader *shader, unsigned index); @@ -1905,6 +1907,10 @@ rogue_temp_regarray(rogue_shader *shader, unsigned size, unsigned start_index); rogue_regarray * rogue_coeff_regarray(rogue_shader *shader, unsigned size, unsigned start_index); +rogue_regarray *rogue_shared_regarray(rogue_shader *shader, + unsigned size, + unsigned start_index); + rogue_regarray *rogue_ssa_vec_regarray(rogue_shader *shader, unsigned size, unsigned start_index, diff --git a/src/imagination/rogue/rogue_compile.c b/src/imagination/rogue/rogue_compile.c index 1beeccb..143015d 100644 --- a/src/imagination/rogue/rogue_compile.c +++ b/src/imagination/rogue/rogue_compile.c @@ -682,11 +682,9 @@ static inline void rogue_feedback_used_regs(rogue_build_ctx *ctx, { /* TODO NEXT: Use this counting method elsewhere as well. */ ctx->common_data[shader->stage].temps = - __bitset_count(shader->regs_used[ROGUE_REG_CLASS_TEMP], - BITSET_WORDS(rogue_reg_infos[ROGUE_REG_CLASS_TEMP].num)); - ctx->common_data[shader->stage].internals = __bitset_count( - shader->regs_used[ROGUE_REG_CLASS_INTERNAL], - BITSET_WORDS(rogue_reg_infos[ROGUE_REG_CLASS_INTERNAL].num)); + rogue_count_used_regs(shader, ROGUE_REG_CLASS_TEMP); + ctx->common_data[shader->stage].internals = + rogue_count_used_regs(shader, ROGUE_REG_CLASS_INTERNAL); } static bool ssa_def_cb(nir_ssa_def *ssa, void *state) diff --git a/src/imagination/rogue/rogue_encode.c b/src/imagination/rogue/rogue_encode.c index 0ab4ff5..b2d0ec1 100644 --- a/src/imagination/rogue/rogue_encode.c +++ b/src/imagination/rogue/rogue_encode.c @@ -1304,14 +1304,14 @@ static void rogue_encode_instr_group(rogue_instr_group *group, } PUBLIC -void rogue_encode_shader(UNUSED rogue_build_ctx *ctx, +void rogue_encode_shader(rogue_build_ctx *ctx, rogue_shader *shader, struct util_dynarray *binary) { if (!shader->is_grouped) unreachable("Can't encode shader with ungrouped instructions."); - util_dynarray_init(binary, shader); + util_dynarray_init(binary, ctx); rogue_foreach_instr_group_in_shader (group, shader) rogue_encode_instr_group(group, binary); diff --git a/src/imagination/rogue/rogue_info.c b/src/imagination/rogue/rogue_info.c index 3c0bebd..fd5db06 100644 --- a/src/imagination/rogue/rogue_info.c +++ b/src/imagination/rogue/rogue_info.c @@ -581,7 +581,7 @@ const rogue_alu_op_info rogue_alu_op_infos[ROGUE_ALU_OP_COUNT] = { }, .supported_dst_types = { [0] = T(REG) | T(REGARRAY) | T(IO), }, .supported_src_types = { - [0] = T(REG), + [0] = T(REG) | T(REGARRAY), }, }, [ROGUE_ALU_OP_FADD] = { .str = "fadd", .num_dsts = 1, .num_srcs = 2, @@ -685,7 +685,7 @@ const rogue_alu_op_info rogue_alu_op_infos[ROGUE_ALU_OP_COUNT] = { [ROGUE_ALU_OP_MOV] = { .str = "mov", .num_dsts = 1, .num_srcs = 1, .supported_dst_types = { [0] = T(REG) | T(REGARRAY), }, .supported_src_types = { - [0] = T(REG) | T(IMM), + [0] = T(REG) | T(REGARRAY) | T(IMM), }, }, [ROGUE_ALU_OP_CMOV] = { .str = "cmov", .num_dsts = 1, .num_srcs = 3, diff --git a/src/imagination/rogue/rogue_print.c b/src/imagination/rogue/rogue_print.c index b8aa46b..5762800 100644 --- a/src/imagination/rogue/rogue_print.c +++ b/src/imagination/rogue/rogue_print.c @@ -643,10 +643,15 @@ void rogue_print_shader(FILE *fp, const rogue_shader *shader) { fputs("/*", fp); + if (shader->stage == MESA_SHADER_NONE) + fputs(" USC program", fp); + else + fprintf(fp, " %s shader", _mesa_shader_stage_to_string(shader->stage)); + if (shader->name) - fprintf(fp, " \"%s\":", shader->name); + fprintf(fp, " - %s", shader->name); - fprintf(fp, " %s shader */\n", _mesa_shader_stage_to_string(shader->stage)); + fputs(" */\n", fp); rogue_foreach_block (block, shader) rogue_print_block(fp, block); diff --git a/src/imagination/vulkan/meson.build b/src/imagination/vulkan/meson.build index 6a36b51..1cb990d 100644 --- a/src/imagination/vulkan/meson.build +++ b/src/imagination/vulkan/meson.build @@ -67,6 +67,7 @@ pvr_files = files( 'pvr_wsi.c', 'usc/pvr_uscgen.c', + 'usc/pvr_uscgen_tq.c', ) pvr_includes = [ diff --git a/src/imagination/vulkan/pvr_common.h b/src/imagination/vulkan/pvr_common.h index 36ac805..b6d683d 100644 --- a/src/imagination/vulkan/pvr_common.h +++ b/src/imagination/vulkan/pvr_common.h @@ -57,6 +57,11 @@ #define PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT \ (uint32_t)(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT + 1U) +#define PVR_TRANSFER_MAX_LAYERS 1U +#define PVR_TRANSFER_MAX_LOADS 4U +#define PVR_TRANSFER_MAX_IMAGES \ + (PVR_TRANSFER_MAX_LAYERS * PVR_TRANSFER_MAX_LOADS) + /* TODO: move into a common surface library? */ enum pvr_memlayout { PVR_MEMLAYOUT_UNDEFINED = 0, /* explicitly treat 0 as undefined */ @@ -141,6 +146,30 @@ enum pvr_stage_allocation { PVR_STAGE_ALLOCATION_COUNT }; +enum pvr_resolve_op { + PVR_RESOLVE_BLEND, + PVR_RESOLVE_MIN, + PVR_RESOLVE_MAX, + PVR_RESOLVE_SAMPLE0, + PVR_RESOLVE_SAMPLE1, + PVR_RESOLVE_SAMPLE2, + PVR_RESOLVE_SAMPLE3, + PVR_RESOLVE_SAMPLE4, + PVR_RESOLVE_SAMPLE5, + PVR_RESOLVE_SAMPLE6, + PVR_RESOLVE_SAMPLE7, +}; + +enum pvr_alpha_type { + PVR_ALPHA_NONE, + PVR_ALPHA_SOURCE, + PVR_ALPHA_PREMUL_SOURCE, + PVR_ALPHA_GLOBAL, + PVR_ALPHA_PREMUL_SOURCE_WITH_GLOBAL, + PVR_ALPHA_CUSTOM, + PVR_ALPHA_AATEXT, +}; + enum pvr_event_state { PVR_EVENT_STATE_SET_BY_HOST, PVR_EVENT_STATE_RESET_BY_HOST, diff --git a/src/imagination/vulkan/pvr_formats.c b/src/imagination/vulkan/pvr_formats.c index 0aba239..e99656f 100644 --- a/src/imagination/vulkan/pvr_formats.c +++ b/src/imagination/vulkan/pvr_formats.c @@ -28,6 +28,7 @@ #include #include "hwdef/rogue_hw_utils.h" +#include "pvr_common.h" #include "pvr_formats.h" #include "pvr_private.h" #include "util/bitpack_helpers.h" @@ -1031,3 +1032,76 @@ bool pvr_format_is_pbe_downscalable(VkFormat vk_format) return false; } } + +uint32_t pvr_pbe_pixel_num_loads(enum pvr_transfer_pbe_pixel_src pbe_format, + uint32_t alpha_type) +{ + switch (alpha_type) { + default: + case PVR_ALPHA_NONE: + break; + case PVR_ALPHA_SOURCE: + case PVR_ALPHA_PREMUL_SOURCE: + case PVR_ALPHA_PREMUL_SOURCE_WITH_GLOBAL: + case PVR_ALPHA_GLOBAL: + return 2U; + } + + switch (pbe_format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_US8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16: + case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16: + case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32: + case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32: + case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32: + case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32: + case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16: + case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM: + case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM: + case PVR_TRANSFER_PBE_PIXEL_SRC_F32: + case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2: + case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4: + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32: + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64: + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128: + case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB: + case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45: + case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24: + case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8: + + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32: + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F: + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8: + + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED: + case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V: + case PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED: + case PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED: + return 1U; + + case PVR_TRANSFER_PBE_PIXEL_SRC_NUM: + default: + return 0U; + } +} diff --git a/src/imagination/vulkan/pvr_formats.h b/src/imagination/vulkan/pvr_formats.h index f7d011b..bdd1a4c 100644 --- a/src/imagination/vulkan/pvr_formats.h +++ b/src/imagination/vulkan/pvr_formats.h @@ -57,6 +57,156 @@ enum pvr_pbe_accum_format { PVR_PBE_ACCUM_FORMAT_U24, }; +/** + * Pixel related shader selector. The logic selecting the shader has to take + * into account the pixel related properties (controlling the conversion path in + * the shader) and the geometry related properties (controlling the sample + * position calcs). These two can be orthogonal. + * + * integer format conversions, bit depth : 8, 16, 32 per ch formats : signed, + * unsigned. Strategy: convert everything to U32 or S32 then USC pack. PBE just + * pass through. + * + * fixed point format conversions, bit depth 565, 1555, 555 etc. Strategy: + * fcnorm to 4 F32, then USC pack to F16F16. PBE converts to destination + * + * float/fixed format conversions + * strategy: fcnorm, then pack to f16 _when_ destination is not f32. + * fmt | unorm | flt | + * 8 | x | | + * 16 | x | x | + * 32 | x | x | + * + * + * non-merge type DS blit table + * ********************************************** + * * * S8 D16 D24S8 D32 D32S8 * + * ********************************************** + * * S8 * cpy i i i i * + * * D16 * i cpy i - i * + * * D24S8 * swiz - cpy (1) - * + * * D32 * i - i cpy i * + * * D32S8 * (2) - - cpy cpy * + * ********************************************** + * + * merge with stencil pick type DS blit table + * ********************************************** + * * * S8 D16 D24S8 D32 D32S8 * + * ********************************************** + * * S8 * i i (1) i (2) * + * * D16 * i i i i i * + * * D24S8 * i i (3) i (4) * + * * D32 * i i i i i * + * * D32S8 * i i (5) i (6) * + * ********************************************** + * + * merge with depth pick type DS blit table + * ********************************************** + * * * S8 D16 D24S8 D32 D32S8 * + * ********************************************** + * * S8 * i i i i i * + * * D16 * - - - - - * + * * D24S8 * - - (s) - - * + * * D32 * - - (1) - (2) * + * * D32S8 * - - - - (s) * + * ********************************************** + * + * D formats are unpacked into a single register according to their format + * S formats are unpacked into a single register in U8 + * D24S8 is in a single 32 bit register (as the PBE can't read it from + * unpacked.) + * + * Swizzles are applied on the TPU not the PBE because of potential + * accumulation i.e. a non-iterated shader doesn't know if it writes the output + * buffer for PBE emit or a second pass blend. + */ +enum pvr_transfer_pbe_pixel_src { + PVR_TRANSFER_PBE_PIXEL_SRC_UU8888 = 0, + PVR_TRANSFER_PBE_PIXEL_SRC_US8888 = 1, + PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16 = 2, + PVR_TRANSFER_PBE_PIXEL_SRC_US16S16 = 3, + PVR_TRANSFER_PBE_PIXEL_SRC_SU8888 = 4, + PVR_TRANSFER_PBE_PIXEL_SRC_SS8888 = 5, + PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16 = 6, + PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16 = 7, + + PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102 = 8, + PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102 = 9, + PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102 = 10, + PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102 = 11, + + PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32 = 12, + PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32 = 13, + PVR_TRANSFER_PBE_PIXEL_SRC_US32S32 = 14, + PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32 = 15, + + PVR_TRANSFER_PBE_PIXEL_SRC_F16F16 = 16, + PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM = 17, + PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM = 18, + + PVR_TRANSFER_PBE_PIXEL_SRC_F32X4 = 19, + PVR_TRANSFER_PBE_PIXEL_SRC_F32X2 = 20, + PVR_TRANSFER_PBE_PIXEL_SRC_F32 = 21, + + PVR_TRANSFER_PBE_PIXEL_SRC_RAW32 = 22, + PVR_TRANSFER_PBE_PIXEL_SRC_RAW64 = 23, + PVR_TRANSFER_PBE_PIXEL_SRC_RAW128 = 24, + + /* f16 to U8 conversion in shader. */ + PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8 = 25, + + PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB = 26, + PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45 = 27, + + PVR_TRANSFER_PBE_PIXEL_SRC_D24S8 = 28, + PVR_TRANSFER_PBE_PIXEL_SRC_S8D24 = 29, + PVR_TRANSFER_PBE_PIXEL_SRC_D32S8 = 30, + + /* D: D32_S8 */ + PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8 = 31, + PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8 = 32, + PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8 = 33, + PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8 = 34, + + /* D: D32 */ + PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32 = 35, + PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F = 36, + + /* D : D24_S8 */ + PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8 = 37, + PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8 = 38, + PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8 = 39, + PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8 = 40, + PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8 = 41, + PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8 = 42, + + /* ob0 holds Y and ob0 holds U or V. */ + PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED = 43, + + /* ob0 holds Y, ob1 holds U, ob2 holds V. */ + PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V = 44, + + PVR_TRANSFER_PBE_PIXEL_SRC_MASK16 = 45, + PVR_TRANSFER_PBE_PIXEL_SRC_MASK32 = 46, + PVR_TRANSFER_PBE_PIXEL_SRC_MASK48 = 47, + PVR_TRANSFER_PBE_PIXEL_SRC_MASK64 = 48, + PVR_TRANSFER_PBE_PIXEL_SRC_MASK96 = 49, + PVR_TRANSFER_PBE_PIXEL_SRC_MASK128 = 50, + + PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8 = 51, + + /* ob0 holds Y and ob0 holds V or U. */ + PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED = 52, + + /* ob0 holds Y, ob1 holds UV interleaved. */ + PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED = 53, + + /* FIXME: This changes for other BVNC's which may change the hashing logic + * in pvr_hash_shader. + */ + PVR_TRANSFER_PBE_PIXEL_SRC_NUM = 54, +}; + const uint8_t *pvr_get_format_swizzle(VkFormat vk_format); uint32_t pvr_get_tex_format(VkFormat vk_format); uint32_t pvr_get_tex_format_aspect(VkFormat vk_format, @@ -70,4 +220,13 @@ void pvr_get_hw_clear_color(VkFormat vk_format, VkClearColorValue value, uint32_t packed_out[static const 4]); +/* TODO: alpha_type is of 'enum pvr_int_pbe_pixel_num_loads' type. See if we can + * move that in here. It's currently in pvr_common.h and it doesn't seem + * appropriate including that in here. Also moving the definition in here would + * make pvr_common.h include this which would mean that the compiler would be + * pulling in vulkan specific format stuff. + */ +uint32_t pvr_pbe_pixel_num_loads(enum pvr_transfer_pbe_pixel_src pbe_format, + uint32_t alpha_type); + #endif /* PVR_FORMATS_H */ diff --git a/src/imagination/vulkan/usc/pvr_uscgen.h b/src/imagination/vulkan/usc/pvr_uscgen.h index 396d3f3..9b7e85f 100644 --- a/src/imagination/vulkan/usc/pvr_uscgen.h +++ b/src/imagination/vulkan/usc/pvr_uscgen.h @@ -24,9 +24,119 @@ #ifndef PVR_USCGEN_H #define PVR_USCGEN_H +#include +#include + +#include "pvr_common.h" +#include "pvr_formats.h" #include "util/u_dynarray.h" -#include +enum pvr_int_coord_set_floats { + PVR_INT_COORD_SET_FLOATS_0 = 0, + PVR_INT_COORD_SET_FLOATS_4 = 1, + /* For rate changes to 0 base screen space. */ + PVR_INT_COORD_SET_FLOATS_6 = 2, + PVR_INT_COORD_SET_FLOATS_NUM = 3 +}; + +struct pvr_tq_shader_properties { + /* Controls whether this is an iterated shader. */ + bool iterated; + + /* Controls whether this is meant to be running at full rate. */ + bool full_rate; + + /* Sample specific channel of pixel. */ + bool pick_component; + + /* Alpha type from transfer API. */ + uint32_t alpha_type; + + struct pvr_tq_layer_properties { + /* Controls whether we need to send the sample count to the TPU. */ + bool msaa; + + /* In case we run pixel rate, to do an USC resolve - but still in MSAA TPU + * samples. + */ + uint32_t sample_count; + + enum pvr_resolve_op resolve_op; + + /* Selects the pixel conversion that we have to perform. */ + enum pvr_transfer_pbe_pixel_src pbe_format; + + /* Sampling from a 3D texture with a constant Z position. */ + bool sample; + + /* Number of float coefficients to get from screen space to texture space. + */ + enum pvr_int_coord_set_floats layer_floats; + + /* Unaligned texture address in bytes. */ + uint32_t byte_unwind; + + /* Enable bilinear filter in shader. */ + bool linear; + } layer_props; +}; + +/* All offsets are in dwords. */ +/* Devices may have more than 256 sh regs but we're expecting to use vary few so + * let's use uint8_t. + */ +struct pvr_tq_frag_sh_reg_layout { + struct { + /* How many image sampler descriptors are present. */ + uint8_t count; + /* TODO: See if we ever need more than one combined image sampler + * descriptor. If this is linked to the amount of layers used, we only + * ever use one layer so this wouldn't need to be an array. + */ + struct { + uint8_t image; + uint8_t sampler; + } offsets[PVR_TRANSFER_MAX_IMAGES]; + } combined_image_samplers; + + /* TODO: Dynamic consts are used for various things so do this properly by + * having an actual layout instead of chucking them all together using an + * implicit layout. + */ + struct { + /* How many dynamic consts regs have been allocated. */ + uint8_t count; + uint8_t offset; + } dynamic_consts; + + /* Total sh regs allocated by the driver. It does not include the regs + * necessary for compiler_out. + */ + uint8_t driver_total; + + /* Provided by the compiler to the driver to be appended to the shareds. */ + /* No offset field since these will be appended at the end so driver_total + * can be used instead. + */ + struct { + struct { + /* TODO: Remove this count and just use `compiler_out_total`? Or remove + * that one and use this one? + */ + uint8_t count; + /* TODO: The array size is chosen arbitrarily based on the max + * constants currently produced by the compiler. Make this dynamic? + */ + /* Values to fill in into each shared reg used for usc constants. */ + uint32_t values[10]; + } usc_constants; + } compiler_out; + + /* Total extra sh regs needed by the compiler that need to be appended to the + * shareds by the driver. + */ + uint8_t compiler_out_total; +}; /* TODO: Shader caching (not pipeline caching) support. */ @@ -37,4 +147,9 @@ void pvr_uscgen_per_job_eot(uint32_t emit_count, void pvr_uscgen_nop(struct util_dynarray *binary); +void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props, + struct pvr_tq_frag_sh_reg_layout *sh_reg_layout, + unsigned *temps_used, + struct util_dynarray *binary); + #endif /* PVR_USCGEN_H */ diff --git a/src/imagination/vulkan/usc/pvr_uscgen_tq.c b/src/imagination/vulkan/usc/pvr_uscgen_tq.c new file mode 100644 index 0000000..64f2f32 --- /dev/null +++ b/src/imagination/vulkan/usc/pvr_uscgen_tq.c @@ -0,0 +1,210 @@ +/* + * Copyright © 2023 Imagination Technologies Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "pvr_uscgen.h" +#include "rogue/rogue.h" +#include "rogue/rogue_builder.h" +#include "util/u_dynarray.h" + +void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props, + struct pvr_tq_frag_sh_reg_layout *sh_reg_layout, + unsigned *temps_used, + struct util_dynarray *binary) +{ + rogue_builder b; + rogue_shader *shader = rogue_shader_create(NULL, MESA_SHADER_NONE); + + unsigned smp_coord_size = 2; + unsigned smp_coord_idx = 0; + rogue_regarray *smp_coords; + + unsigned channels = 0; + unsigned output_idx = 1; + rogue_regarray *outputs = NULL; + + unsigned image_state_size = 4; + unsigned image_state_idx; + rogue_regarray *image_state; + + unsigned smp_state_size = 4; + unsigned smp_state_idx; + rogue_regarray *smp_state; + + rogue_set_shader_name(shader, "TQ (fragment)"); + rogue_builder_init(&b, shader); + rogue_push_block(&b); + + smp_coords = + rogue_ssa_vec_regarray(b.shader, smp_coord_size, smp_coord_idx, 0); + + /* TODO: Unrestrict. */ + assert(shader_props->full_rate == false); + assert(shader_props->pick_component == false); + assert(shader_props->alpha_type == 0); + + const struct pvr_tq_layer_properties *layer_props = + &shader_props->layer_props; + uint32_t loads; + + /* TODO: Unrestrict. */ + assert(layer_props->msaa == false); + assert(layer_props->sample_count == 1U); + assert(layer_props->resolve_op == PVR_RESOLVE_BLEND); + assert(layer_props->pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_RAW64 || + layer_props->pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_RAW128); + assert(layer_props->sample == false); + assert(layer_props->layer_floats == PVR_INT_COORD_SET_FLOATS_0); + assert(layer_props->byte_unwind == 0); + assert(layer_props->linear == false); + + loads = pvr_pbe_pixel_num_loads(layer_props->pbe_format, + shader_props->alpha_type); + for (uint32_t load = 0; load < loads; ++load) { + if (shader_props->iterated) { + /* TODO: feed{back,forward} the coeff index to/from shader_info. */ + unsigned coeff_index = 0; + rogue_regarray *coeffs = + rogue_coeff_regarray(b.shader, smp_coord_size * 4, coeff_index); + + rogue_instr *instr = &rogue_FITR_PIXEL(&b, + rogue_ref_regarray(smp_coords), + rogue_ref_drc(0), + rogue_ref_regarray(coeffs), + rogue_ref_val(smp_coord_size)) + ->instr; + rogue_add_instr_comment(instr, "load_iterated"); + } else { + rogue_instr *instr; + rogue_regarray *smp_coord_x = + rogue_ssa_vec_regarray(b.shader, 1, smp_coord_idx, 0); + rogue_regarray *smp_coord_y = + rogue_ssa_vec_regarray(b.shader, 1, smp_coord_idx, 1); + + /* (X,Y).P, pixel (X,Y) coordinates, pixel mode. */ + rogue_reg *in_x = rogue_special_reg(b.shader, 97); + rogue_reg *in_y = rogue_special_reg(b.shader, 100); + + instr = + &rogue_MOV(&b, rogue_ref_regarray(smp_coord_x), rogue_ref_reg(in_x)) + ->instr; + rogue_add_instr_comment(instr, "load_x"); + + instr = + &rogue_MOV(&b, rogue_ref_regarray(smp_coord_y), rogue_ref_reg(in_y)) + ->instr; + rogue_add_instr_comment(instr, "load_y"); + } + + if (!layer_props->msaa) { + } else { + unreachable("Unsupported layer property (MSAA)."); + } + } + + /* Source conversion. */ + switch (layer_props->pbe_format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64: + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128: + break; + + default: + unreachable("Unsupported layer property (format)."); + } + + /* TODO: Select the texture_regs index appropriately. */ + assert(sh_reg_layout->combined_image_samplers.count == 1); + image_state_idx = sh_reg_layout->combined_image_samplers.offsets[0].image; + image_state = + rogue_shared_regarray(b.shader, image_state_size, image_state_idx); + + smp_state_idx = sh_reg_layout->combined_image_samplers.offsets[0].sampler; + smp_state = rogue_shared_regarray(b.shader, smp_state_size, smp_state_idx); + + /* Pack/blend phase. */ + rogue_backend_instr *smp2d; + + switch (layer_props->pbe_format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64: + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128: { + switch (layer_props->pbe_format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64: + channels = 2; + break; + + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128: + channels = 4; + break; + + default: + unreachable("Unsupported layer property (format)."); + } + + outputs = rogue_ssa_vec_regarray(b.shader, channels, output_idx, 0); + + smp2d = rogue_SMP2D(&b, + rogue_ref_regarray(outputs), + rogue_ref_drc(0), + rogue_ref_regarray(image_state), + rogue_ref_regarray(smp_coords), + rogue_ref_regarray(smp_state), + rogue_ref_io(ROGUE_IO_NONE), + rogue_ref_val(channels)); + rogue_set_backend_op_mod(smp2d, ROGUE_BACKEND_OP_MOD_SLCWRITEBACK); + rogue_add_instr_comment(&smp2d->instr, "pack/blend"); + + if (!shader_props->iterated) + rogue_set_backend_op_mod(smp2d, ROGUE_BACKEND_OP_MOD_NNCOORDS); + break; + } + + default: + unreachable("Unsupported layer property (format)."); + } + + assert(channels && outputs); + + /* Copy outputs. */ + for (unsigned u = 0; u < channels; ++u) { + rogue_regarray *output_elem = + rogue_ssa_vec_regarray(b.shader, 1, output_idx, u); + rogue_reg *pixout_elem = rogue_pixout_reg(b.shader, u); + rogue_MOV(&b, + rogue_ref_reg(pixout_elem), + rogue_ref_regarray(output_elem)); + } + + rogue_END(&b); + + rogue_shader_passes(shader); + rogue_encode_shader(NULL, shader, binary); + + *temps_used = rogue_count_used_regs(shader, ROGUE_REG_CLASS_TEMP); + + sh_reg_layout->compiler_out.usc_constants.count = 0; + sh_reg_layout->compiler_out_total = 0; + + ralloc_free(shader); +}