}
PUBLIC
+rogue_reg *rogue_special_reg(rogue_shader *shader, unsigned index)
+{
+ return rogue_reg_cached(shader, ROGUE_REG_CLASS_SPECIAL, index);
+}
+
+PUBLIC
rogue_reg *rogue_vtxin_reg(rogue_shader *shader, unsigned index)
{
return rogue_reg_cached(shader, ROGUE_REG_CLASS_VTXIN, index);
}
PUBLIC
+rogue_regarray *
+rogue_shared_regarray(rogue_shader *shader, unsigned size, unsigned start_index)
+{
+ return rogue_regarray_cached(shader,
+ size,
+ ROGUE_REG_CLASS_SHARED,
+ start_index);
+}
+
+PUBLIC
rogue_regarray *rogue_ssa_vec_regarray(rogue_shader *shader,
unsigned size,
unsigned start_index,
rogue_reg *rogue_pixout_reg(rogue_shader *shader, unsigned index);
+rogue_reg *rogue_special_reg(rogue_shader *shader, unsigned index);
+
rogue_reg *rogue_vtxin_reg(rogue_shader *shader, unsigned index);
rogue_reg *rogue_vtxout_reg(rogue_shader *shader, unsigned index);
rogue_regarray *
rogue_coeff_regarray(rogue_shader *shader, unsigned size, unsigned start_index);
+rogue_regarray *rogue_shared_regarray(rogue_shader *shader,
+ unsigned size,
+ unsigned start_index);
+
rogue_regarray *rogue_ssa_vec_regarray(rogue_shader *shader,
unsigned size,
unsigned start_index,
{
/* TODO NEXT: Use this counting method elsewhere as well. */
ctx->common_data[shader->stage].temps =
- __bitset_count(shader->regs_used[ROGUE_REG_CLASS_TEMP],
- BITSET_WORDS(rogue_reg_infos[ROGUE_REG_CLASS_TEMP].num));
- ctx->common_data[shader->stage].internals = __bitset_count(
- shader->regs_used[ROGUE_REG_CLASS_INTERNAL],
- BITSET_WORDS(rogue_reg_infos[ROGUE_REG_CLASS_INTERNAL].num));
+ rogue_count_used_regs(shader, ROGUE_REG_CLASS_TEMP);
+ ctx->common_data[shader->stage].internals =
+ rogue_count_used_regs(shader, ROGUE_REG_CLASS_INTERNAL);
}
static bool ssa_def_cb(nir_ssa_def *ssa, void *state)
}
PUBLIC
-void rogue_encode_shader(UNUSED rogue_build_ctx *ctx,
+void rogue_encode_shader(rogue_build_ctx *ctx,
rogue_shader *shader,
struct util_dynarray *binary)
{
if (!shader->is_grouped)
unreachable("Can't encode shader with ungrouped instructions.");
- util_dynarray_init(binary, shader);
+ util_dynarray_init(binary, ctx);
rogue_foreach_instr_group_in_shader (group, shader)
rogue_encode_instr_group(group, binary);
},
.supported_dst_types = { [0] = T(REG) | T(REGARRAY) | T(IO), },
.supported_src_types = {
- [0] = T(REG),
+ [0] = T(REG) | T(REGARRAY),
},
},
[ROGUE_ALU_OP_FADD] = { .str = "fadd", .num_dsts = 1, .num_srcs = 2,
[ROGUE_ALU_OP_MOV] = { .str = "mov", .num_dsts = 1, .num_srcs = 1,
.supported_dst_types = { [0] = T(REG) | T(REGARRAY), },
.supported_src_types = {
- [0] = T(REG) | T(IMM),
+ [0] = T(REG) | T(REGARRAY) | T(IMM),
},
},
[ROGUE_ALU_OP_CMOV] = { .str = "cmov", .num_dsts = 1, .num_srcs = 3,
{
fputs("/*", fp);
+ if (shader->stage == MESA_SHADER_NONE)
+ fputs(" USC program", fp);
+ else
+ fprintf(fp, " %s shader", _mesa_shader_stage_to_string(shader->stage));
+
if (shader->name)
- fprintf(fp, " \"%s\":", shader->name);
+ fprintf(fp, " - %s", shader->name);
- fprintf(fp, " %s shader */\n", _mesa_shader_stage_to_string(shader->stage));
+ fputs(" */\n", fp);
rogue_foreach_block (block, shader)
rogue_print_block(fp, block);
'pvr_wsi.c',
'usc/pvr_uscgen.c',
+ 'usc/pvr_uscgen_tq.c',
)
pvr_includes = [
#define PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT \
(uint32_t)(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT + 1U)
+#define PVR_TRANSFER_MAX_LAYERS 1U
+#define PVR_TRANSFER_MAX_LOADS 4U
+#define PVR_TRANSFER_MAX_IMAGES \
+ (PVR_TRANSFER_MAX_LAYERS * PVR_TRANSFER_MAX_LOADS)
+
/* TODO: move into a common surface library? */
enum pvr_memlayout {
PVR_MEMLAYOUT_UNDEFINED = 0, /* explicitly treat 0 as undefined */
PVR_STAGE_ALLOCATION_COUNT
};
+enum pvr_resolve_op {
+ PVR_RESOLVE_BLEND,
+ PVR_RESOLVE_MIN,
+ PVR_RESOLVE_MAX,
+ PVR_RESOLVE_SAMPLE0,
+ PVR_RESOLVE_SAMPLE1,
+ PVR_RESOLVE_SAMPLE2,
+ PVR_RESOLVE_SAMPLE3,
+ PVR_RESOLVE_SAMPLE4,
+ PVR_RESOLVE_SAMPLE5,
+ PVR_RESOLVE_SAMPLE6,
+ PVR_RESOLVE_SAMPLE7,
+};
+
+enum pvr_alpha_type {
+ PVR_ALPHA_NONE,
+ PVR_ALPHA_SOURCE,
+ PVR_ALPHA_PREMUL_SOURCE,
+ PVR_ALPHA_GLOBAL,
+ PVR_ALPHA_PREMUL_SOURCE_WITH_GLOBAL,
+ PVR_ALPHA_CUSTOM,
+ PVR_ALPHA_AATEXT,
+};
+
enum pvr_event_state {
PVR_EVENT_STATE_SET_BY_HOST,
PVR_EVENT_STATE_RESET_BY_HOST,
#include <vulkan/vulkan.h>
#include "hwdef/rogue_hw_utils.h"
+#include "pvr_common.h"
#include "pvr_formats.h"
#include "pvr_private.h"
#include "util/bitpack_helpers.h"
return false;
}
}
+
+uint32_t pvr_pbe_pixel_num_loads(enum pvr_transfer_pbe_pixel_src pbe_format,
+ uint32_t alpha_type)
+{
+ switch (alpha_type) {
+ default:
+ case PVR_ALPHA_NONE:
+ break;
+ case PVR_ALPHA_SOURCE:
+ case PVR_ALPHA_PREMUL_SOURCE:
+ case PVR_ALPHA_PREMUL_SOURCE_WITH_GLOBAL:
+ case PVR_ALPHA_GLOBAL:
+ return 2U;
+ }
+
+ switch (pbe_format) {
+ case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8:
+
+ case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
+
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED:
+ return 1U;
+
+ case PVR_TRANSFER_PBE_PIXEL_SRC_NUM:
+ default:
+ return 0U;
+ }
+}
PVR_PBE_ACCUM_FORMAT_U24,
};
+/**
+ * Pixel related shader selector. The logic selecting the shader has to take
+ * into account the pixel related properties (controlling the conversion path in
+ * the shader) and the geometry related properties (controlling the sample
+ * position calcs). These two can be orthogonal.
+ *
+ * integer format conversions, bit depth : 8, 16, 32 per ch formats : signed,
+ * unsigned. Strategy: convert everything to U32 or S32 then USC pack. PBE just
+ * pass through.
+ *
+ * fixed point format conversions, bit depth 565, 1555, 555 etc. Strategy:
+ * fcnorm to 4 F32, then USC pack to F16F16. PBE converts to destination
+ *
+ * float/fixed format conversions
+ * strategy: fcnorm, then pack to f16 _when_ destination is not f32.
+ * fmt | unorm | flt |
+ * 8 | x | |
+ * 16 | x | x |
+ * 32 | x | x |
+ *
+ *
+ * non-merge type DS blit table
+ * **********************************************
+ * * * S8 D16 D24S8 D32 D32S8 *
+ * **********************************************
+ * * S8 * cpy i i i i *
+ * * D16 * i cpy i - i *
+ * * D24S8 * swiz - cpy (1) - *
+ * * D32 * i - i cpy i *
+ * * D32S8 * (2) - - cpy cpy *
+ * **********************************************
+ *
+ * merge with stencil pick type DS blit table
+ * **********************************************
+ * * * S8 D16 D24S8 D32 D32S8 *
+ * **********************************************
+ * * S8 * i i (1) i (2) *
+ * * D16 * i i i i i *
+ * * D24S8 * i i (3) i (4) *
+ * * D32 * i i i i i *
+ * * D32S8 * i i (5) i (6) *
+ * **********************************************
+ *
+ * merge with depth pick type DS blit table
+ * **********************************************
+ * * * S8 D16 D24S8 D32 D32S8 *
+ * **********************************************
+ * * S8 * i i i i i *
+ * * D16 * - - - - - *
+ * * D24S8 * - - (s) - - *
+ * * D32 * - - (1) - (2) *
+ * * D32S8 * - - - - (s) *
+ * **********************************************
+ *
+ * D formats are unpacked into a single register according to their format
+ * S formats are unpacked into a single register in U8
+ * D24S8 is in a single 32 bit register (as the PBE can't read it from
+ * unpacked.)
+ *
+ * Swizzles are applied on the TPU not the PBE because of potential
+ * accumulation i.e. a non-iterated shader doesn't know if it writes the output
+ * buffer for PBE emit or a second pass blend.
+ */
+enum pvr_transfer_pbe_pixel_src {
+ PVR_TRANSFER_PBE_PIXEL_SRC_UU8888 = 0,
+ PVR_TRANSFER_PBE_PIXEL_SRC_US8888 = 1,
+ PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16 = 2,
+ PVR_TRANSFER_PBE_PIXEL_SRC_US16S16 = 3,
+ PVR_TRANSFER_PBE_PIXEL_SRC_SU8888 = 4,
+ PVR_TRANSFER_PBE_PIXEL_SRC_SS8888 = 5,
+ PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16 = 6,
+ PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16 = 7,
+
+ PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102 = 8,
+ PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102 = 9,
+ PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102 = 10,
+ PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102 = 11,
+
+ PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32 = 12,
+ PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32 = 13,
+ PVR_TRANSFER_PBE_PIXEL_SRC_US32S32 = 14,
+ PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32 = 15,
+
+ PVR_TRANSFER_PBE_PIXEL_SRC_F16F16 = 16,
+ PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM = 17,
+ PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM = 18,
+
+ PVR_TRANSFER_PBE_PIXEL_SRC_F32X4 = 19,
+ PVR_TRANSFER_PBE_PIXEL_SRC_F32X2 = 20,
+ PVR_TRANSFER_PBE_PIXEL_SRC_F32 = 21,
+
+ PVR_TRANSFER_PBE_PIXEL_SRC_RAW32 = 22,
+ PVR_TRANSFER_PBE_PIXEL_SRC_RAW64 = 23,
+ PVR_TRANSFER_PBE_PIXEL_SRC_RAW128 = 24,
+
+ /* f16 to U8 conversion in shader. */
+ PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8 = 25,
+
+ PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB = 26,
+ PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45 = 27,
+
+ PVR_TRANSFER_PBE_PIXEL_SRC_D24S8 = 28,
+ PVR_TRANSFER_PBE_PIXEL_SRC_S8D24 = 29,
+ PVR_TRANSFER_PBE_PIXEL_SRC_D32S8 = 30,
+
+ /* D: D32_S8 */
+ PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8 = 31,
+ PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8 = 32,
+ PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8 = 33,
+ PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8 = 34,
+
+ /* D: D32 */
+ PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32 = 35,
+ PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F = 36,
+
+ /* D : D24_S8 */
+ PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8 = 37,
+ PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8 = 38,
+ PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8 = 39,
+ PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8 = 40,
+ PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8 = 41,
+ PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8 = 42,
+
+ /* ob0 holds Y and ob0 holds U or V. */
+ PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED = 43,
+
+ /* ob0 holds Y, ob1 holds U, ob2 holds V. */
+ PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V = 44,
+
+ PVR_TRANSFER_PBE_PIXEL_SRC_MASK16 = 45,
+ PVR_TRANSFER_PBE_PIXEL_SRC_MASK32 = 46,
+ PVR_TRANSFER_PBE_PIXEL_SRC_MASK48 = 47,
+ PVR_TRANSFER_PBE_PIXEL_SRC_MASK64 = 48,
+ PVR_TRANSFER_PBE_PIXEL_SRC_MASK96 = 49,
+ PVR_TRANSFER_PBE_PIXEL_SRC_MASK128 = 50,
+
+ PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8 = 51,
+
+ /* ob0 holds Y and ob0 holds V or U. */
+ PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED = 52,
+
+ /* ob0 holds Y, ob1 holds UV interleaved. */
+ PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED = 53,
+
+ /* FIXME: This changes for other BVNC's which may change the hashing logic
+ * in pvr_hash_shader.
+ */
+ PVR_TRANSFER_PBE_PIXEL_SRC_NUM = 54,
+};
+
const uint8_t *pvr_get_format_swizzle(VkFormat vk_format);
uint32_t pvr_get_tex_format(VkFormat vk_format);
uint32_t pvr_get_tex_format_aspect(VkFormat vk_format,
VkClearColorValue value,
uint32_t packed_out[static const 4]);
+/* TODO: alpha_type is of 'enum pvr_int_pbe_pixel_num_loads' type. See if we can
+ * move that in here. It's currently in pvr_common.h and it doesn't seem
+ * appropriate including that in here. Also moving the definition in here would
+ * make pvr_common.h include this which would mean that the compiler would be
+ * pulling in vulkan specific format stuff.
+ */
+uint32_t pvr_pbe_pixel_num_loads(enum pvr_transfer_pbe_pixel_src pbe_format,
+ uint32_t alpha_type);
+
#endif /* PVR_FORMATS_H */
#ifndef PVR_USCGEN_H
#define PVR_USCGEN_H
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "pvr_common.h"
+#include "pvr_formats.h"
#include "util/u_dynarray.h"
-#include <stdint.h>
+enum pvr_int_coord_set_floats {
+ PVR_INT_COORD_SET_FLOATS_0 = 0,
+ PVR_INT_COORD_SET_FLOATS_4 = 1,
+ /* For rate changes to 0 base screen space. */
+ PVR_INT_COORD_SET_FLOATS_6 = 2,
+ PVR_INT_COORD_SET_FLOATS_NUM = 3
+};
+
+struct pvr_tq_shader_properties {
+ /* Controls whether this is an iterated shader. */
+ bool iterated;
+
+ /* Controls whether this is meant to be running at full rate. */
+ bool full_rate;
+
+ /* Sample specific channel of pixel. */
+ bool pick_component;
+
+ /* Alpha type from transfer API. */
+ uint32_t alpha_type;
+
+ struct pvr_tq_layer_properties {
+ /* Controls whether we need to send the sample count to the TPU. */
+ bool msaa;
+
+ /* In case we run pixel rate, to do an USC resolve - but still in MSAA TPU
+ * samples.
+ */
+ uint32_t sample_count;
+
+ enum pvr_resolve_op resolve_op;
+
+ /* Selects the pixel conversion that we have to perform. */
+ enum pvr_transfer_pbe_pixel_src pbe_format;
+
+ /* Sampling from a 3D texture with a constant Z position. */
+ bool sample;
+
+ /* Number of float coefficients to get from screen space to texture space.
+ */
+ enum pvr_int_coord_set_floats layer_floats;
+
+ /* Unaligned texture address in bytes. */
+ uint32_t byte_unwind;
+
+ /* Enable bilinear filter in shader. */
+ bool linear;
+ } layer_props;
+};
+
+/* All offsets are in dwords. */
+/* Devices may have more than 256 sh regs but we're expecting to use vary few so
+ * let's use uint8_t.
+ */
+struct pvr_tq_frag_sh_reg_layout {
+ struct {
+ /* How many image sampler descriptors are present. */
+ uint8_t count;
+ /* TODO: See if we ever need more than one combined image sampler
+ * descriptor. If this is linked to the amount of layers used, we only
+ * ever use one layer so this wouldn't need to be an array.
+ */
+ struct {
+ uint8_t image;
+ uint8_t sampler;
+ } offsets[PVR_TRANSFER_MAX_IMAGES];
+ } combined_image_samplers;
+
+ /* TODO: Dynamic consts are used for various things so do this properly by
+ * having an actual layout instead of chucking them all together using an
+ * implicit layout.
+ */
+ struct {
+ /* How many dynamic consts regs have been allocated. */
+ uint8_t count;
+ uint8_t offset;
+ } dynamic_consts;
+
+ /* Total sh regs allocated by the driver. It does not include the regs
+ * necessary for compiler_out.
+ */
+ uint8_t driver_total;
+
+ /* Provided by the compiler to the driver to be appended to the shareds. */
+ /* No offset field since these will be appended at the end so driver_total
+ * can be used instead.
+ */
+ struct {
+ struct {
+ /* TODO: Remove this count and just use `compiler_out_total`? Or remove
+ * that one and use this one?
+ */
+ uint8_t count;
+ /* TODO: The array size is chosen arbitrarily based on the max
+ * constants currently produced by the compiler. Make this dynamic?
+ */
+ /* Values to fill in into each shared reg used for usc constants. */
+ uint32_t values[10];
+ } usc_constants;
+ } compiler_out;
+
+ /* Total extra sh regs needed by the compiler that need to be appended to the
+ * shareds by the driver.
+ */
+ uint8_t compiler_out_total;
+};
/* TODO: Shader caching (not pipeline caching) support. */
void pvr_uscgen_nop(struct util_dynarray *binary);
+void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
+ struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+ unsigned *temps_used,
+ struct util_dynarray *binary);
+
#endif /* PVR_USCGEN_H */
--- /dev/null
+/*
+ * Copyright © 2023 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include "pvr_uscgen.h"
+#include "rogue/rogue.h"
+#include "rogue/rogue_builder.h"
+#include "util/u_dynarray.h"
+
+void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
+ struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+ unsigned *temps_used,
+ struct util_dynarray *binary)
+{
+ rogue_builder b;
+ rogue_shader *shader = rogue_shader_create(NULL, MESA_SHADER_NONE);
+
+ unsigned smp_coord_size = 2;
+ unsigned smp_coord_idx = 0;
+ rogue_regarray *smp_coords;
+
+ unsigned channels = 0;
+ unsigned output_idx = 1;
+ rogue_regarray *outputs = NULL;
+
+ unsigned image_state_size = 4;
+ unsigned image_state_idx;
+ rogue_regarray *image_state;
+
+ unsigned smp_state_size = 4;
+ unsigned smp_state_idx;
+ rogue_regarray *smp_state;
+
+ rogue_set_shader_name(shader, "TQ (fragment)");
+ rogue_builder_init(&b, shader);
+ rogue_push_block(&b);
+
+ smp_coords =
+ rogue_ssa_vec_regarray(b.shader, smp_coord_size, smp_coord_idx, 0);
+
+ /* TODO: Unrestrict. */
+ assert(shader_props->full_rate == false);
+ assert(shader_props->pick_component == false);
+ assert(shader_props->alpha_type == 0);
+
+ const struct pvr_tq_layer_properties *layer_props =
+ &shader_props->layer_props;
+ uint32_t loads;
+
+ /* TODO: Unrestrict. */
+ assert(layer_props->msaa == false);
+ assert(layer_props->sample_count == 1U);
+ assert(layer_props->resolve_op == PVR_RESOLVE_BLEND);
+ assert(layer_props->pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_RAW64 ||
+ layer_props->pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_RAW128);
+ assert(layer_props->sample == false);
+ assert(layer_props->layer_floats == PVR_INT_COORD_SET_FLOATS_0);
+ assert(layer_props->byte_unwind == 0);
+ assert(layer_props->linear == false);
+
+ loads = pvr_pbe_pixel_num_loads(layer_props->pbe_format,
+ shader_props->alpha_type);
+ for (uint32_t load = 0; load < loads; ++load) {
+ if (shader_props->iterated) {
+ /* TODO: feed{back,forward} the coeff index to/from shader_info. */
+ unsigned coeff_index = 0;
+ rogue_regarray *coeffs =
+ rogue_coeff_regarray(b.shader, smp_coord_size * 4, coeff_index);
+
+ rogue_instr *instr = &rogue_FITR_PIXEL(&b,
+ rogue_ref_regarray(smp_coords),
+ rogue_ref_drc(0),
+ rogue_ref_regarray(coeffs),
+ rogue_ref_val(smp_coord_size))
+ ->instr;
+ rogue_add_instr_comment(instr, "load_iterated");
+ } else {
+ rogue_instr *instr;
+ rogue_regarray *smp_coord_x =
+ rogue_ssa_vec_regarray(b.shader, 1, smp_coord_idx, 0);
+ rogue_regarray *smp_coord_y =
+ rogue_ssa_vec_regarray(b.shader, 1, smp_coord_idx, 1);
+
+ /* (X,Y).P, pixel (X,Y) coordinates, pixel mode. */
+ rogue_reg *in_x = rogue_special_reg(b.shader, 97);
+ rogue_reg *in_y = rogue_special_reg(b.shader, 100);
+
+ instr =
+ &rogue_MOV(&b, rogue_ref_regarray(smp_coord_x), rogue_ref_reg(in_x))
+ ->instr;
+ rogue_add_instr_comment(instr, "load_x");
+
+ instr =
+ &rogue_MOV(&b, rogue_ref_regarray(smp_coord_y), rogue_ref_reg(in_y))
+ ->instr;
+ rogue_add_instr_comment(instr, "load_y");
+ }
+
+ if (!layer_props->msaa) {
+ } else {
+ unreachable("Unsupported layer property (MSAA).");
+ }
+ }
+
+ /* Source conversion. */
+ switch (layer_props->pbe_format) {
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
+ break;
+
+ default:
+ unreachable("Unsupported layer property (format).");
+ }
+
+ /* TODO: Select the texture_regs index appropriately. */
+ assert(sh_reg_layout->combined_image_samplers.count == 1);
+ image_state_idx = sh_reg_layout->combined_image_samplers.offsets[0].image;
+ image_state =
+ rogue_shared_regarray(b.shader, image_state_size, image_state_idx);
+
+ smp_state_idx = sh_reg_layout->combined_image_samplers.offsets[0].sampler;
+ smp_state = rogue_shared_regarray(b.shader, smp_state_size, smp_state_idx);
+
+ /* Pack/blend phase. */
+ rogue_backend_instr *smp2d;
+
+ switch (layer_props->pbe_format) {
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128: {
+ switch (layer_props->pbe_format) {
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
+ channels = 2;
+ break;
+
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
+ channels = 4;
+ break;
+
+ default:
+ unreachable("Unsupported layer property (format).");
+ }
+
+ outputs = rogue_ssa_vec_regarray(b.shader, channels, output_idx, 0);
+
+ smp2d = rogue_SMP2D(&b,
+ rogue_ref_regarray(outputs),
+ rogue_ref_drc(0),
+ rogue_ref_regarray(image_state),
+ rogue_ref_regarray(smp_coords),
+ rogue_ref_regarray(smp_state),
+ rogue_ref_io(ROGUE_IO_NONE),
+ rogue_ref_val(channels));
+ rogue_set_backend_op_mod(smp2d, ROGUE_BACKEND_OP_MOD_SLCWRITEBACK);
+ rogue_add_instr_comment(&smp2d->instr, "pack/blend");
+
+ if (!shader_props->iterated)
+ rogue_set_backend_op_mod(smp2d, ROGUE_BACKEND_OP_MOD_NNCOORDS);
+ break;
+ }
+
+ default:
+ unreachable("Unsupported layer property (format).");
+ }
+
+ assert(channels && outputs);
+
+ /* Copy outputs. */
+ for (unsigned u = 0; u < channels; ++u) {
+ rogue_regarray *output_elem =
+ rogue_ssa_vec_regarray(b.shader, 1, output_idx, u);
+ rogue_reg *pixout_elem = rogue_pixout_reg(b.shader, u);
+ rogue_MOV(&b,
+ rogue_ref_reg(pixout_elem),
+ rogue_ref_regarray(output_elem));
+ }
+
+ rogue_END(&b);
+
+ rogue_shader_passes(shader);
+ rogue_encode_shader(NULL, shader, binary);
+
+ *temps_used = rogue_count_used_regs(shader, ROGUE_REG_CLASS_TEMP);
+
+ sh_reg_layout->compiler_out.usc_constants.count = 0;
+ sh_reg_layout->compiler_out_total = 0;
+
+ ralloc_free(shader);
+}