pvr: Add support for generating transfer fragment programs
authorSimon Perretta <simon.perretta@imgtec.com>
Sat, 11 Feb 2023 22:34:05 +0000 (22:34 +0000)
committerMarge Bot <emma+marge@anholt.net>
Wed, 19 Apr 2023 11:01:05 +0000 (11:01 +0000)
Co-authored-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Co-authored-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21550>

12 files changed:
src/imagination/rogue/rogue.c
src/imagination/rogue/rogue.h
src/imagination/rogue/rogue_compile.c
src/imagination/rogue/rogue_encode.c
src/imagination/rogue/rogue_info.c
src/imagination/rogue/rogue_print.c
src/imagination/vulkan/meson.build
src/imagination/vulkan/pvr_common.h
src/imagination/vulkan/pvr_formats.c
src/imagination/vulkan/pvr_formats.h
src/imagination/vulkan/usc/pvr_uscgen.h
src/imagination/vulkan/usc/pvr_uscgen_tq.c [new file with mode: 0644]

index e4ad5e0..2f21e29 100644 (file)
@@ -371,6 +371,12 @@ rogue_reg *rogue_pixout_reg(rogue_shader *shader, unsigned index)
 }
 
 PUBLIC
+rogue_reg *rogue_special_reg(rogue_shader *shader, unsigned index)
+{
+   return rogue_reg_cached(shader, ROGUE_REG_CLASS_SPECIAL, index);
+}
+
+PUBLIC
 rogue_reg *rogue_vtxin_reg(rogue_shader *shader, unsigned index)
 {
    return rogue_reg_cached(shader, ROGUE_REG_CLASS_VTXIN, index);
@@ -586,6 +592,16 @@ rogue_coeff_regarray(rogue_shader *shader, unsigned size, unsigned start_index)
 }
 
 PUBLIC
+rogue_regarray *
+rogue_shared_regarray(rogue_shader *shader, unsigned size, unsigned start_index)
+{
+   return rogue_regarray_cached(shader,
+                                size,
+                                ROGUE_REG_CLASS_SHARED,
+                                start_index);
+}
+
+PUBLIC
 rogue_regarray *rogue_ssa_vec_regarray(rogue_shader *shader,
                                        unsigned size,
                                        unsigned start_index,
index e4c02fe..de59bb3 100644 (file)
@@ -1887,6 +1887,8 @@ rogue_reg *rogue_const_reg(rogue_shader *shader, unsigned index);
 
 rogue_reg *rogue_pixout_reg(rogue_shader *shader, unsigned index);
 
+rogue_reg *rogue_special_reg(rogue_shader *shader, unsigned index);
+
 rogue_reg *rogue_vtxin_reg(rogue_shader *shader, unsigned index);
 
 rogue_reg *rogue_vtxout_reg(rogue_shader *shader, unsigned index);
@@ -1905,6 +1907,10 @@ rogue_temp_regarray(rogue_shader *shader, unsigned size, unsigned start_index);
 rogue_regarray *
 rogue_coeff_regarray(rogue_shader *shader, unsigned size, unsigned start_index);
 
+rogue_regarray *rogue_shared_regarray(rogue_shader *shader,
+                                      unsigned size,
+                                      unsigned start_index);
+
 rogue_regarray *rogue_ssa_vec_regarray(rogue_shader *shader,
                                        unsigned size,
                                        unsigned start_index,
index 1beeccb..143015d 100644 (file)
@@ -682,11 +682,9 @@ static inline void rogue_feedback_used_regs(rogue_build_ctx *ctx,
 {
    /* TODO NEXT: Use this counting method elsewhere as well. */
    ctx->common_data[shader->stage].temps =
-      __bitset_count(shader->regs_used[ROGUE_REG_CLASS_TEMP],
-                     BITSET_WORDS(rogue_reg_infos[ROGUE_REG_CLASS_TEMP].num));
-   ctx->common_data[shader->stage].internals = __bitset_count(
-      shader->regs_used[ROGUE_REG_CLASS_INTERNAL],
-      BITSET_WORDS(rogue_reg_infos[ROGUE_REG_CLASS_INTERNAL].num));
+      rogue_count_used_regs(shader, ROGUE_REG_CLASS_TEMP);
+   ctx->common_data[shader->stage].internals =
+      rogue_count_used_regs(shader, ROGUE_REG_CLASS_INTERNAL);
 }
 
 static bool ssa_def_cb(nir_ssa_def *ssa, void *state)
index 0ab4ff5..b2d0ec1 100644 (file)
@@ -1304,14 +1304,14 @@ static void rogue_encode_instr_group(rogue_instr_group *group,
 }
 
 PUBLIC
-void rogue_encode_shader(UNUSED rogue_build_ctx *ctx,
+void rogue_encode_shader(rogue_build_ctx *ctx,
                          rogue_shader *shader,
                          struct util_dynarray *binary)
 {
    if (!shader->is_grouped)
       unreachable("Can't encode shader with ungrouped instructions.");
 
-   util_dynarray_init(binary, shader);
+   util_dynarray_init(binary, ctx);
 
    rogue_foreach_instr_group_in_shader (group, shader)
       rogue_encode_instr_group(group, binary);
index 3c0bebd..fd5db06 100644 (file)
@@ -581,7 +581,7 @@ const rogue_alu_op_info rogue_alu_op_infos[ROGUE_ALU_OP_COUNT] = {
       },
       .supported_dst_types = { [0] = T(REG) | T(REGARRAY) | T(IO), },
       .supported_src_types = {
-         [0] = T(REG),
+         [0] = T(REG) | T(REGARRAY),
       },
    },
    [ROGUE_ALU_OP_FADD] = { .str = "fadd", .num_dsts = 1, .num_srcs = 2,
@@ -685,7 +685,7 @@ const rogue_alu_op_info rogue_alu_op_infos[ROGUE_ALU_OP_COUNT] = {
    [ROGUE_ALU_OP_MOV] = { .str = "mov", .num_dsts = 1, .num_srcs = 1,
       .supported_dst_types = { [0] = T(REG) | T(REGARRAY), },
       .supported_src_types = {
-         [0] = T(REG) | T(IMM),
+         [0] = T(REG) | T(REGARRAY) | T(IMM),
       },
    },
    [ROGUE_ALU_OP_CMOV] = { .str = "cmov", .num_dsts = 1, .num_srcs = 3,
index b8aa46b..5762800 100644 (file)
@@ -643,10 +643,15 @@ void rogue_print_shader(FILE *fp, const rogue_shader *shader)
 {
    fputs("/*", fp);
 
+   if (shader->stage == MESA_SHADER_NONE)
+      fputs(" USC program", fp);
+   else
+      fprintf(fp, " %s shader", _mesa_shader_stage_to_string(shader->stage));
+
    if (shader->name)
-      fprintf(fp, " \"%s\":", shader->name);
+      fprintf(fp, " - %s", shader->name);
 
-   fprintf(fp, " %s shader */\n", _mesa_shader_stage_to_string(shader->stage));
+   fputs(" */\n", fp);
 
    rogue_foreach_block (block, shader)
       rogue_print_block(fp, block);
index 6a36b51..1cb990d 100644 (file)
@@ -67,6 +67,7 @@ pvr_files = files(
   'pvr_wsi.c',
 
   'usc/pvr_uscgen.c',
+  'usc/pvr_uscgen_tq.c',
 )
 
 pvr_includes = [
index 36ac805..b6d683d 100644 (file)
 #define PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT \
    (uint32_t)(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT + 1U)
 
+#define PVR_TRANSFER_MAX_LAYERS 1U
+#define PVR_TRANSFER_MAX_LOADS 4U
+#define PVR_TRANSFER_MAX_IMAGES \
+   (PVR_TRANSFER_MAX_LAYERS * PVR_TRANSFER_MAX_LOADS)
+
 /* TODO: move into a common surface library? */
 enum pvr_memlayout {
    PVR_MEMLAYOUT_UNDEFINED = 0, /* explicitly treat 0 as undefined */
@@ -141,6 +146,30 @@ enum pvr_stage_allocation {
    PVR_STAGE_ALLOCATION_COUNT
 };
 
+enum pvr_resolve_op {
+   PVR_RESOLVE_BLEND,
+   PVR_RESOLVE_MIN,
+   PVR_RESOLVE_MAX,
+   PVR_RESOLVE_SAMPLE0,
+   PVR_RESOLVE_SAMPLE1,
+   PVR_RESOLVE_SAMPLE2,
+   PVR_RESOLVE_SAMPLE3,
+   PVR_RESOLVE_SAMPLE4,
+   PVR_RESOLVE_SAMPLE5,
+   PVR_RESOLVE_SAMPLE6,
+   PVR_RESOLVE_SAMPLE7,
+};
+
+enum pvr_alpha_type {
+   PVR_ALPHA_NONE,
+   PVR_ALPHA_SOURCE,
+   PVR_ALPHA_PREMUL_SOURCE,
+   PVR_ALPHA_GLOBAL,
+   PVR_ALPHA_PREMUL_SOURCE_WITH_GLOBAL,
+   PVR_ALPHA_CUSTOM,
+   PVR_ALPHA_AATEXT,
+};
+
 enum pvr_event_state {
    PVR_EVENT_STATE_SET_BY_HOST,
    PVR_EVENT_STATE_RESET_BY_HOST,
index 0aba239..e99656f 100644 (file)
@@ -28,6 +28,7 @@
 #include <vulkan/vulkan.h>
 
 #include "hwdef/rogue_hw_utils.h"
+#include "pvr_common.h"
 #include "pvr_formats.h"
 #include "pvr_private.h"
 #include "util/bitpack_helpers.h"
@@ -1031,3 +1032,76 @@ bool pvr_format_is_pbe_downscalable(VkFormat vk_format)
       return false;
    }
 }
+
+uint32_t pvr_pbe_pixel_num_loads(enum pvr_transfer_pbe_pixel_src pbe_format,
+                                 uint32_t alpha_type)
+{
+   switch (alpha_type) {
+   default:
+   case PVR_ALPHA_NONE:
+      break;
+   case PVR_ALPHA_SOURCE:
+   case PVR_ALPHA_PREMUL_SOURCE:
+   case PVR_ALPHA_PREMUL_SOURCE_WITH_GLOBAL:
+   case PVR_ALPHA_GLOBAL:
+      return 2U;
+   }
+
+   switch (pbe_format) {
+   case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8:
+
+   case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
+
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED:
+      return 1U;
+
+   case PVR_TRANSFER_PBE_PIXEL_SRC_NUM:
+   default:
+      return 0U;
+   }
+}
index f7d011b..bdd1a4c 100644 (file)
@@ -57,6 +57,156 @@ enum pvr_pbe_accum_format {
    PVR_PBE_ACCUM_FORMAT_U24,
 };
 
+/**
+ * Pixel related shader selector. The logic selecting the shader has to take
+ * into account the pixel related properties (controlling the conversion path in
+ * the shader) and the geometry related properties (controlling the sample
+ * position calcs). These two can be orthogonal.
+ *
+ * integer format conversions, bit depth : 8, 16, 32 per ch formats : signed,
+ * unsigned. Strategy: convert everything to U32 or S32 then USC pack. PBE just
+ * pass through.
+ *
+ * fixed point format conversions, bit depth 565, 1555, 555 etc. Strategy:
+ * fcnorm to 4 F32, then USC pack to F16F16. PBE converts to destination
+ *
+ * float/fixed format conversions
+ * strategy: fcnorm, then pack to f16 _when_ destination is not f32.
+ *      fmt | unorm | flt |
+ *        8 |     x |     |
+ *       16 |     x |   x |
+ *       32 |     x |   x |
+ *
+ *
+ * non-merge type DS blit table
+ * **********************************************
+ * *        *  S8    D16   D24S8  D32    D32S8  *
+ * **********************************************
+ * * S8     *  cpy   i     i      i      i      *
+ * * D16    *  i     cpy   i      -      i      *
+ * * D24S8  *  swiz  -     cpy    (1)    -      *
+ * * D32    *  i     -     i      cpy    i      *
+ * * D32S8  *  (2)   -     -      cpy    cpy    *
+ * **********************************************
+ *
+ * merge with stencil pick type DS blit table
+ * **********************************************
+ * *        *  S8    D16   D24S8  D32    D32S8  *
+ * **********************************************
+ * * S8     *  i     i     (1)    i      (2)    *
+ * * D16    *  i     i     i      i      i      *
+ * * D24S8  *  i     i     (3)    i      (4)    *
+ * * D32    *  i     i     i      i      i      *
+ * * D32S8  *  i     i     (5)    i      (6)    *
+ * **********************************************
+ *
+ * merge with depth pick type DS blit table
+ * **********************************************
+ * *        *  S8    D16   D24S8  D32    D32S8  *
+ * **********************************************
+ * * S8     *  i     i     i      i      i      *
+ * * D16    *  -     -     -      -      -      *
+ * * D24S8  *  -     -     (s)    -      -      *
+ * * D32    *  -     -     (1)    -      (2)    *
+ * * D32S8  *  -     -     -      -      (s)    *
+ * **********************************************
+ *
+ * D formats are unpacked into a single register according to their format
+ * S formats are unpacked into a single register in U8
+ * D24S8 is in a single 32 bit register (as the PBE can't read it from
+ * unpacked.)
+ *
+ * Swizzles are applied on the TPU not the PBE because of potential
+ * accumulation i.e. a non-iterated shader doesn't know if it writes the output
+ * buffer for PBE emit or a second pass blend.
+ */
+enum pvr_transfer_pbe_pixel_src {
+   PVR_TRANSFER_PBE_PIXEL_SRC_UU8888 = 0,
+   PVR_TRANSFER_PBE_PIXEL_SRC_US8888 = 1,
+   PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16 = 2,
+   PVR_TRANSFER_PBE_PIXEL_SRC_US16S16 = 3,
+   PVR_TRANSFER_PBE_PIXEL_SRC_SU8888 = 4,
+   PVR_TRANSFER_PBE_PIXEL_SRC_SS8888 = 5,
+   PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16 = 6,
+   PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16 = 7,
+
+   PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102 = 8,
+   PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102 = 9,
+   PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102 = 10,
+   PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102 = 11,
+
+   PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32 = 12,
+   PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32 = 13,
+   PVR_TRANSFER_PBE_PIXEL_SRC_US32S32 = 14,
+   PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32 = 15,
+
+   PVR_TRANSFER_PBE_PIXEL_SRC_F16F16 = 16,
+   PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM = 17,
+   PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM = 18,
+
+   PVR_TRANSFER_PBE_PIXEL_SRC_F32X4 = 19,
+   PVR_TRANSFER_PBE_PIXEL_SRC_F32X2 = 20,
+   PVR_TRANSFER_PBE_PIXEL_SRC_F32 = 21,
+
+   PVR_TRANSFER_PBE_PIXEL_SRC_RAW32 = 22,
+   PVR_TRANSFER_PBE_PIXEL_SRC_RAW64 = 23,
+   PVR_TRANSFER_PBE_PIXEL_SRC_RAW128 = 24,
+
+   /* f16 to U8 conversion in shader. */
+   PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8 = 25,
+
+   PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB = 26,
+   PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45 = 27,
+
+   PVR_TRANSFER_PBE_PIXEL_SRC_D24S8 = 28,
+   PVR_TRANSFER_PBE_PIXEL_SRC_S8D24 = 29,
+   PVR_TRANSFER_PBE_PIXEL_SRC_D32S8 = 30,
+
+   /* D: D32_S8 */
+   PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8 = 31,
+   PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8 = 32,
+   PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8 = 33,
+   PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8 = 34,
+
+   /* D: D32 */
+   PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32 = 35,
+   PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F = 36,
+
+   /* D : D24_S8 */
+   PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8 = 37,
+   PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8 = 38,
+   PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8 = 39,
+   PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8 = 40,
+   PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8 = 41,
+   PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8 = 42,
+
+   /* ob0 holds Y and ob0 holds U or V. */
+   PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED = 43,
+
+   /* ob0 holds Y, ob1 holds U, ob2 holds V. */
+   PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V = 44,
+
+   PVR_TRANSFER_PBE_PIXEL_SRC_MASK16 = 45,
+   PVR_TRANSFER_PBE_PIXEL_SRC_MASK32 = 46,
+   PVR_TRANSFER_PBE_PIXEL_SRC_MASK48 = 47,
+   PVR_TRANSFER_PBE_PIXEL_SRC_MASK64 = 48,
+   PVR_TRANSFER_PBE_PIXEL_SRC_MASK96 = 49,
+   PVR_TRANSFER_PBE_PIXEL_SRC_MASK128 = 50,
+
+   PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8 = 51,
+
+   /* ob0 holds Y and ob0 holds V or U. */
+   PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED = 52,
+
+   /* ob0 holds Y, ob1 holds UV interleaved. */
+   PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED = 53,
+
+   /* FIXME: This changes for other BVNC's which may change the hashing logic
+    * in pvr_hash_shader.
+    */
+   PVR_TRANSFER_PBE_PIXEL_SRC_NUM = 54,
+};
+
 const uint8_t *pvr_get_format_swizzle(VkFormat vk_format);
 uint32_t pvr_get_tex_format(VkFormat vk_format);
 uint32_t pvr_get_tex_format_aspect(VkFormat vk_format,
@@ -70,4 +220,13 @@ void pvr_get_hw_clear_color(VkFormat vk_format,
                             VkClearColorValue value,
                             uint32_t packed_out[static const 4]);
 
+/* TODO: alpha_type is of 'enum pvr_int_pbe_pixel_num_loads' type. See if we can
+ * move that in here. It's currently in pvr_common.h and it doesn't seem
+ * appropriate including that in here. Also moving the definition in here would
+ * make pvr_common.h include this which would mean that the compiler would be
+ * pulling in vulkan specific format stuff.
+ */
+uint32_t pvr_pbe_pixel_num_loads(enum pvr_transfer_pbe_pixel_src pbe_format,
+                                 uint32_t alpha_type);
+
 #endif /* PVR_FORMATS_H */
index 396d3f3..9b7e85f 100644 (file)
 #ifndef PVR_USCGEN_H
 #define PVR_USCGEN_H
 
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "pvr_common.h"
+#include "pvr_formats.h"
 #include "util/u_dynarray.h"
 
-#include <stdint.h>
+enum pvr_int_coord_set_floats {
+   PVR_INT_COORD_SET_FLOATS_0 = 0,
+   PVR_INT_COORD_SET_FLOATS_4 = 1,
+   /* For rate changes to 0 base screen space. */
+   PVR_INT_COORD_SET_FLOATS_6 = 2,
+   PVR_INT_COORD_SET_FLOATS_NUM = 3
+};
+
+struct pvr_tq_shader_properties {
+   /* Controls whether this is an iterated shader. */
+   bool iterated;
+
+   /* Controls whether this is meant to be running at full rate. */
+   bool full_rate;
+
+   /* Sample specific channel of pixel. */
+   bool pick_component;
+
+   /* Alpha type from transfer API. */
+   uint32_t alpha_type;
+
+   struct pvr_tq_layer_properties {
+      /* Controls whether we need to send the sample count to the TPU. */
+      bool msaa;
+
+      /* In case we run pixel rate, to do an USC resolve - but still in MSAA TPU
+       * samples.
+       */
+      uint32_t sample_count;
+
+      enum pvr_resolve_op resolve_op;
+
+      /* Selects the pixel conversion that we have to perform. */
+      enum pvr_transfer_pbe_pixel_src pbe_format;
+
+      /* Sampling from a 3D texture with a constant Z position. */
+      bool sample;
+
+      /* Number of float coefficients to get from screen space to texture space.
+       */
+      enum pvr_int_coord_set_floats layer_floats;
+
+      /* Unaligned texture address in bytes. */
+      uint32_t byte_unwind;
+
+      /* Enable bilinear filter in shader. */
+      bool linear;
+   } layer_props;
+};
+
+/* All offsets are in dwords. */
+/* Devices may have more than 256 sh regs but we're expecting to use vary few so
+ * let's use uint8_t.
+ */
+struct pvr_tq_frag_sh_reg_layout {
+   struct {
+      /* How many image sampler descriptors are present. */
+      uint8_t count;
+      /* TODO: See if we ever need more than one combined image sampler
+       * descriptor. If this is linked to the amount of layers used, we only
+       * ever use one layer so this wouldn't need to be an array.
+       */
+      struct {
+         uint8_t image;
+         uint8_t sampler;
+      } offsets[PVR_TRANSFER_MAX_IMAGES];
+   } combined_image_samplers;
+
+   /* TODO: Dynamic consts are used for various things so do this properly by
+    * having an actual layout instead of chucking them all together using an
+    * implicit layout.
+    */
+   struct {
+      /* How many dynamic consts regs have been allocated. */
+      uint8_t count;
+      uint8_t offset;
+   } dynamic_consts;
+
+   /* Total sh regs allocated by the driver. It does not include the regs
+    * necessary for compiler_out.
+    */
+   uint8_t driver_total;
+
+   /* Provided by the compiler to the driver to be appended to the shareds. */
+   /* No offset field since these will be appended at the end so driver_total
+    * can be used instead.
+    */
+   struct {
+      struct {
+         /* TODO: Remove this count and just use `compiler_out_total`? Or remove
+          * that one and use this one?
+          */
+         uint8_t count;
+         /* TODO: The array size is chosen arbitrarily based on the max
+          * constants currently produced by the compiler. Make this dynamic?
+          */
+         /* Values to fill in into each shared reg used for usc constants. */
+         uint32_t values[10];
+      } usc_constants;
+   } compiler_out;
+
+   /* Total extra sh regs needed by the compiler that need to be appended to the
+    * shareds by the driver.
+    */
+   uint8_t compiler_out_total;
+};
 
 /* TODO: Shader caching (not pipeline caching) support. */
 
@@ -37,4 +147,9 @@ void pvr_uscgen_per_job_eot(uint32_t emit_count,
 
 void pvr_uscgen_nop(struct util_dynarray *binary);
 
+void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
+                        struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+                        unsigned *temps_used,
+                        struct util_dynarray *binary);
+
 #endif /* PVR_USCGEN_H */
diff --git a/src/imagination/vulkan/usc/pvr_uscgen_tq.c b/src/imagination/vulkan/usc/pvr_uscgen_tq.c
new file mode 100644 (file)
index 0000000..64f2f32
--- /dev/null
@@ -0,0 +1,210 @@
+/*
+ * Copyright © 2023 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include "pvr_uscgen.h"
+#include "rogue/rogue.h"
+#include "rogue/rogue_builder.h"
+#include "util/u_dynarray.h"
+
+void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
+                        struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+                        unsigned *temps_used,
+                        struct util_dynarray *binary)
+{
+   rogue_builder b;
+   rogue_shader *shader = rogue_shader_create(NULL, MESA_SHADER_NONE);
+
+   unsigned smp_coord_size = 2;
+   unsigned smp_coord_idx = 0;
+   rogue_regarray *smp_coords;
+
+   unsigned channels = 0;
+   unsigned output_idx = 1;
+   rogue_regarray *outputs = NULL;
+
+   unsigned image_state_size = 4;
+   unsigned image_state_idx;
+   rogue_regarray *image_state;
+
+   unsigned smp_state_size = 4;
+   unsigned smp_state_idx;
+   rogue_regarray *smp_state;
+
+   rogue_set_shader_name(shader, "TQ (fragment)");
+   rogue_builder_init(&b, shader);
+   rogue_push_block(&b);
+
+   smp_coords =
+      rogue_ssa_vec_regarray(b.shader, smp_coord_size, smp_coord_idx, 0);
+
+   /* TODO: Unrestrict. */
+   assert(shader_props->full_rate == false);
+   assert(shader_props->pick_component == false);
+   assert(shader_props->alpha_type == 0);
+
+   const struct pvr_tq_layer_properties *layer_props =
+      &shader_props->layer_props;
+   uint32_t loads;
+
+   /* TODO: Unrestrict. */
+   assert(layer_props->msaa == false);
+   assert(layer_props->sample_count == 1U);
+   assert(layer_props->resolve_op == PVR_RESOLVE_BLEND);
+   assert(layer_props->pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_RAW64 ||
+          layer_props->pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_RAW128);
+   assert(layer_props->sample == false);
+   assert(layer_props->layer_floats == PVR_INT_COORD_SET_FLOATS_0);
+   assert(layer_props->byte_unwind == 0);
+   assert(layer_props->linear == false);
+
+   loads = pvr_pbe_pixel_num_loads(layer_props->pbe_format,
+                                   shader_props->alpha_type);
+   for (uint32_t load = 0; load < loads; ++load) {
+      if (shader_props->iterated) {
+         /* TODO: feed{back,forward} the coeff index to/from shader_info. */
+         unsigned coeff_index = 0;
+         rogue_regarray *coeffs =
+            rogue_coeff_regarray(b.shader, smp_coord_size * 4, coeff_index);
+
+         rogue_instr *instr = &rogue_FITR_PIXEL(&b,
+                                                rogue_ref_regarray(smp_coords),
+                                                rogue_ref_drc(0),
+                                                rogue_ref_regarray(coeffs),
+                                                rogue_ref_val(smp_coord_size))
+                                  ->instr;
+         rogue_add_instr_comment(instr, "load_iterated");
+      } else {
+         rogue_instr *instr;
+         rogue_regarray *smp_coord_x =
+            rogue_ssa_vec_regarray(b.shader, 1, smp_coord_idx, 0);
+         rogue_regarray *smp_coord_y =
+            rogue_ssa_vec_regarray(b.shader, 1, smp_coord_idx, 1);
+
+         /* (X,Y).P, pixel (X,Y) coordinates, pixel mode. */
+         rogue_reg *in_x = rogue_special_reg(b.shader, 97);
+         rogue_reg *in_y = rogue_special_reg(b.shader, 100);
+
+         instr =
+            &rogue_MOV(&b, rogue_ref_regarray(smp_coord_x), rogue_ref_reg(in_x))
+                ->instr;
+         rogue_add_instr_comment(instr, "load_x");
+
+         instr =
+            &rogue_MOV(&b, rogue_ref_regarray(smp_coord_y), rogue_ref_reg(in_y))
+                ->instr;
+         rogue_add_instr_comment(instr, "load_y");
+      }
+
+      if (!layer_props->msaa) {
+      } else {
+         unreachable("Unsupported layer property (MSAA).");
+      }
+   }
+
+   /* Source conversion. */
+   switch (layer_props->pbe_format) {
+   case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
+      break;
+
+   default:
+      unreachable("Unsupported layer property (format).");
+   }
+
+   /* TODO: Select the texture_regs index appropriately. */
+   assert(sh_reg_layout->combined_image_samplers.count == 1);
+   image_state_idx = sh_reg_layout->combined_image_samplers.offsets[0].image;
+   image_state =
+      rogue_shared_regarray(b.shader, image_state_size, image_state_idx);
+
+   smp_state_idx = sh_reg_layout->combined_image_samplers.offsets[0].sampler;
+   smp_state = rogue_shared_regarray(b.shader, smp_state_size, smp_state_idx);
+
+   /* Pack/blend phase. */
+   rogue_backend_instr *smp2d;
+
+   switch (layer_props->pbe_format) {
+   case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
+   case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128: {
+      switch (layer_props->pbe_format) {
+      case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
+         channels = 2;
+         break;
+
+      case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
+         channels = 4;
+         break;
+
+      default:
+         unreachable("Unsupported layer property (format).");
+      }
+
+      outputs = rogue_ssa_vec_regarray(b.shader, channels, output_idx, 0);
+
+      smp2d = rogue_SMP2D(&b,
+                          rogue_ref_regarray(outputs),
+                          rogue_ref_drc(0),
+                          rogue_ref_regarray(image_state),
+                          rogue_ref_regarray(smp_coords),
+                          rogue_ref_regarray(smp_state),
+                          rogue_ref_io(ROGUE_IO_NONE),
+                          rogue_ref_val(channels));
+      rogue_set_backend_op_mod(smp2d, ROGUE_BACKEND_OP_MOD_SLCWRITEBACK);
+      rogue_add_instr_comment(&smp2d->instr, "pack/blend");
+
+      if (!shader_props->iterated)
+         rogue_set_backend_op_mod(smp2d, ROGUE_BACKEND_OP_MOD_NNCOORDS);
+      break;
+   }
+
+   default:
+      unreachable("Unsupported layer property (format).");
+   }
+
+   assert(channels && outputs);
+
+   /* Copy outputs. */
+   for (unsigned u = 0; u < channels; ++u) {
+      rogue_regarray *output_elem =
+         rogue_ssa_vec_regarray(b.shader, 1, output_idx, u);
+      rogue_reg *pixout_elem = rogue_pixout_reg(b.shader, u);
+      rogue_MOV(&b,
+                rogue_ref_reg(pixout_elem),
+                rogue_ref_regarray(output_elem));
+   }
+
+   rogue_END(&b);
+
+   rogue_shader_passes(shader);
+   rogue_encode_shader(NULL, shader, binary);
+
+   *temps_used = rogue_count_used_regs(shader, ROGUE_REG_CLASS_TEMP);
+
+   sh_reg_layout->compiler_out.usc_constants.count = 0;
+   sh_reg_layout->compiler_out_total = 0;
+
+   ralloc_free(shader);
+}