* SOFTWARE.
*/
+#include <assert.h>
#include <stdbool.h>
-#include <stdint.h>
#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
#include <vulkan/vulkan.h>
+#include "pvr_csb.h"
+#include "pvr_csb_enum_helpers.h"
+#include "pvr_formats.h"
#include "pvr_job_common.h"
#include "pvr_job_context.h"
#include "pvr_job_transfer.h"
#include "pvr_private.h"
+#include "pvr_tex_state.h"
+#include "pvr_transfer_frag_store.h"
+#include "pvr_types.h"
+#include "pvr_uscgen.h"
+#include "pvr_util.h"
#include "pvr_winsys.h"
+#include "util/bitscan.h"
#include "util/list.h"
#include "util/macros.h"
+#include "util/u_math.h"
+#include "util/xxhash.h"
+#include "vk_format.h"
+#include "vk_log.h"
#include "vk_sync.h"
-/* FIXME: Implement gpu based transfer support. */
-VkResult pvr_transfer_job_submit(struct pvr_device *device,
- struct pvr_transfer_ctx *ctx,
- struct pvr_sub_cmd_transfer *sub_cmd,
- struct vk_sync *wait_sync,
- struct vk_sync *signal_sync)
+#define PVR_TRANSFER_MAX_PASSES 10U
+#define PVR_TRANSFER_MAX_CLIP_RECTS 4U
+#define PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT 16U
+
+/* Number of triangles sent to the TSP per raster. */
+#define PVR_TRANSFER_NUM_LAYERS 1U
+
+#define PVR_MAX_WIDTH 16384
+#define PVR_MAX_HEIGHT 16384
+
+#define PVR_MAX_CLIP_SIZE(dev_info) \
+ (PVR_HAS_FEATURE(dev_info, screen_size8K) ? 8192U : 16384U)
+
+enum pvr_paired_tiles {
+ PVR_PAIRED_TILES_NONE,
+ PVR_PAIRED_TILES_X,
+ PVR_PAIRED_TILES_Y
+};
+
+struct pvr_transfer_pass {
+ uint32_t dst_offset;
+
+ uint32_t src_offset;
+ uint32_t mapping_count;
+ struct pvr_rect_mapping mappings[PVR_TRANSFER_MAX_CUSTOM_MAPPINGS];
+ bool extend_height;
+ bool byte_unwind;
+
+ uint32_t clip_rects_count;
+ VkRect2D clip_rects[PVR_TRANSFER_MAX_CLIP_RECTS];
+};
+
+/* Structure representing a layer iteration. */
+struct pvr_transfer_custom_mapping {
+ bool double_stride;
+ uint32_t texel_unwind_src;
+ uint32_t texel_unwind_dst;
+ uint32_t texel_extend_src;
+ uint32_t texel_extend_dst;
+ uint32_t pass_count;
+ struct pvr_transfer_pass passes[PVR_TRANSFER_MAX_PASSES];
+ uint32_t max_clip_rects;
+ int32_t max_clip_size;
+ uint32_t byte_unwind_src;
+};
+
+struct pvr_transfer_3d_iteration {
+ uint32_t texture_coords[12];
+};
+
+struct pvr_transfer_3d_state {
+ struct pvr_winsys_transfer_regs regs;
+
+ bool empty_dst;
+ bool down_scale;
+ /* Write all channels present in the dst from the USC even if those are
+ * constants.
+ */
+ bool dont_force_pbe;
+
+ /* The rate of the shader. */
+ uint32_t msaa_multiplier;
+ /* Top left corner of the render in ISP tiles. */
+ uint32_t origin_x_in_tiles;
+ /* Top left corner of the render in ISP tiles. */
+ uint32_t origin_y_in_tiles;
+ /* Width of the render in ISP tiles. */
+ uint32_t width_in_tiles;
+ /* Height of the render in ISP tiles. */
+ uint32_t height_in_tiles;
+
+ /* Width of a sample in registers (pixel partition width). */
+ uint32_t usc_pixel_width;
+
+ /* Properties of the USC shader. */
+ struct pvr_tq_shader_properties shader_props;
+
+ /* TODO: Use pvr_dev_addr_t of an offset type for these. */
+ uint32_t pds_shader_task_offset;
+ uint32_t tex_state_data_offset;
+ uint32_t uni_tex_code_offset;
+
+ uint32_t uniform_data_size;
+ uint32_t tex_state_data_size;
+ uint32_t usc_coeff_regs;
+
+ /* Pointer into the common store. */
+ uint32_t common_ptr;
+ /* Pointer into the dynamic constant reg buffer. */
+ uint32_t dynamic_const_reg_ptr;
+ /* Pointer into the USC constant reg buffer. */
+ uint32_t usc_const_reg_ptr;
+
+ uint32_t pds_coeff_task_offset;
+ uint32_t coeff_data_size;
+
+ /* Number of temporary 32bit registers used by PDS. */
+ uint32_t pds_temps;
+
+ struct pvr_transfer_custom_mapping custom_mapping;
+ uint32_t pass_idx;
+
+ enum pvr_filter filter;
+ bool custom_filter;
+
+ enum pvr_paired_tiles pair_tiles;
+};
+
+struct pvr_transfer_prep_data {
+ uint32_t flags;
+ struct pvr_transfer_3d_state state;
+};
+
+struct pvr_transfer_submit {
+ uint32_t prep_count;
+ struct pvr_transfer_prep_data
+ prep_array[PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT];
+};
+
+static enum pvr_transfer_pbe_pixel_src pvr_pbe_src_format_raw(VkFormat format)
+{
+ uint32_t bpp = vk_format_get_blocksizebits(format);
+
+ if (bpp <= 32U)
+ return PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
+ else if (bpp <= 64U)
+ return PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
+
+ return PVR_TRANSFER_PBE_PIXEL_SRC_RAW128;
+}
+
+/**
+ * How the PBE expects the output buffer for an RGBA space conversion.
+ */
+static VkResult
+pvr_pbe_src_format_normal(VkFormat src_format,
+ VkFormat dst_format,
+ bool down_scale,
+ bool dont_force_pbe,
+ enum pvr_transfer_pbe_pixel_src *src_format_out)
+{
+ bool dst_signed = vk_format_is_sint(dst_format);
+
+ if (vk_format_is_int(dst_format)) {
+ uint32_t red_width;
+ bool src_signed;
+ uint32_t count;
+
+ if (!vk_format_is_int(src_format))
+ return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
+
+ src_signed = vk_format_is_sint(src_format);
+
+ red_width = vk_format_get_component_bits(dst_format,
+ UTIL_FORMAT_COLORSPACE_RGB,
+ 0);
+
+ switch (red_width) {
+ case 8:
+ if (!src_signed && !dst_signed)
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_UU8888;
+ else if (src_signed && !dst_signed)
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SU8888;
+ else if (!src_signed && dst_signed)
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_US8888;
+ else
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SS8888;
+
+ break;
+
+ case 10:
+ switch (dst_format) {
+ case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+ *src_format_out = src_signed ? PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102
+ : PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102;
+ break;
+
+ case VK_FORMAT_A2R10G10B10_UINT_PACK32:
+ *src_format_out = src_signed
+ ? PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102
+ : PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102;
+ break;
+
+ default:
+ return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+ break;
+
+ case 16:
+ if (!src_signed && !dst_signed)
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16;
+ else if (src_signed && !dst_signed)
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16;
+ else if (!src_signed && dst_signed)
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_US16S16;
+ else
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16;
+
+ break;
+
+ case 32:
+ if (dont_force_pbe) {
+ count = vk_format_get_blocksizebits(dst_format) / 32U;
+ } else {
+ count =
+ vk_format_get_common_color_channel_count(src_format, dst_format);
+ }
+
+ if (!src_signed && !dst_signed) {
+ *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_RAW128
+ : PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
+ } else if (src_signed && !dst_signed) {
+ *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32
+ : PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32;
+ } else if (!src_signed && dst_signed) {
+ *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32
+ : PVR_TRANSFER_PBE_PIXEL_SRC_US32S32;
+ } else {
+ *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_RAW128
+ : PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
+ }
+ break;
+
+ default:
+ return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+
+ } else if (vk_format_is_float(dst_format) ||
+ vk_format_is_normalized(dst_format)) {
+ bool is_float = true;
+
+ if (!vk_format_is_float(src_format) &&
+ !vk_format_is_normalized(src_format)) {
+ return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+
+ if (vk_format_is_normalized(dst_format)) {
+ uint32_t chan_width;
+
+ is_float = false;
+
+ /* Alpha only. */
+ switch (dst_format) {
+ case VK_FORMAT_D16_UNORM:
+ chan_width = 16;
+ break;
+
+ default:
+ chan_width =
+ vk_format_get_component_bits(dst_format,
+ UTIL_FORMAT_COLORSPACE_RGB,
+ 0U);
+ break;
+ }
+
+ if (src_format == dst_format) {
+ switch (chan_width) {
+ case 16U:
+ if (down_scale) {
+ *src_format_out = dst_signed
+ ? PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM
+ : PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
+ } else {
+ *src_format_out = dst_signed
+ ? PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16
+ : PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16;
+ }
+ break;
+
+ case 32U:
+ *src_format_out = pvr_pbe_src_format_raw(dst_format);
+ break;
+ default:
+ is_float = true;
+ break;
+ }
+ } else {
+ switch (chan_width) {
+ case 16U:
+ *src_format_out = dst_signed
+ ? PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM
+ : PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
+ break;
+ default:
+ is_float = true;
+ break;
+ }
+ }
+ }
+
+ if (is_float) {
+ if (vk_format_has_32bit_component(dst_format)) {
+ uint32_t count;
+
+ if (dont_force_pbe) {
+ count = vk_format_get_blocksizebits(dst_format) / 32U;
+ } else {
+ count = vk_format_get_common_color_channel_count(src_format,
+ dst_format);
+ }
+
+ switch (count) {
+ case 1U:
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32;
+ break;
+ case 2U:
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32X2;
+ break;
+ default:
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32X4;
+ break;
+ }
+ } else {
+ if (dst_format == VK_FORMAT_B8G8R8A8_UNORM ||
+ dst_format == VK_FORMAT_R8G8B8A8_UNORM) {
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8;
+ } else {
+ *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F16F16;
+ }
+ }
+ }
+ } else {
+ return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+
+ return VK_SUCCESS;
+}
+
+static inline uint32_t
+pvr_get_blit_flags(const struct pvr_transfer_cmd *transfer_cmd)
+{
+ return transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FAST2D
+ ? 0
+ : transfer_cmd->flags;
+}
+
+static VkResult pvr_pbe_src_format(struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_3d_state *state,
+ struct pvr_tq_shader_properties *prop)
+{
+ struct pvr_tq_layer_properties *layer = &prop->layer_props;
+ VkFormat dst_format = transfer_cmd->dst.vk_format;
+ VkFormat src_format;
+ bool down_scale;
+
+ if (transfer_cmd->src_present) {
+ src_format = transfer_cmd->src.vk_format;
+ down_scale = transfer_cmd->resolve_op == PVR_RESOLVE_BLEND &&
+ transfer_cmd->src.sample_count > 1U &&
+ transfer_cmd->dst.sample_count <= 1U;
+ } else {
+ src_format = dst_format;
+ down_scale = false;
+ }
+
+ /* This has to come before the rest as S8 for instance is integer and
+ * signedness check fails on D24S8.
+ */
+ if (!down_scale &&
+ (vk_format_is_depth_or_stencil(src_format) ||
+ vk_format_is_depth_or_stencil(dst_format) ||
+ pvr_get_blit_flags(transfer_cmd) & PVR_TRANSFER_CMD_FLAGS_DSMERGE)) {
+ pvr_finishme("Complete pvr_pbe_src_format().");
+ }
+
+ return pvr_pbe_src_format_normal(src_format,
+ dst_format,
+ down_scale,
+ state->dont_force_pbe,
+ &layer->pbe_format);
+}
+
+static inline void pvr_setup_hwbg_object(const struct pvr_device_info *dev_info,
+ struct pvr_transfer_3d_state *state)
+{
+ struct pvr_winsys_transfer_regs *regs = &state->regs;
+
+ pvr_csb_pack (®s->pds_bgnd0_base, CR_PDS_BGRND0_BASE, reg) {
+ reg.shader_addr = PVR_DEV_ADDR(state->pds_shader_task_offset);
+ assert(pvr_dev_addr_is_aligned(
+ reg.shader_addr,
+ PVRX(CR_PDS_BGRND0_BASE_SHADER_ADDR_ALIGNMENT)));
+ reg.texunicode_addr = PVR_DEV_ADDR(state->uni_tex_code_offset);
+ assert(pvr_dev_addr_is_aligned(
+ reg.texunicode_addr,
+ PVRX(CR_PDS_BGRND0_BASE_TEXUNICODE_ADDR_ALIGNMENT)));
+ }
+
+ pvr_csb_pack (®s->pds_bgnd1_base, CR_PDS_BGRND1_BASE, reg) {
+ reg.texturedata_addr = PVR_DEV_ADDR(state->tex_state_data_offset);
+ assert(pvr_dev_addr_is_aligned(
+ reg.texturedata_addr,
+ PVRX(CR_PDS_BGRND1_BASE_TEXTUREDATA_ADDR_ALIGNMENT)));
+ }
+
+ /* BGRND 2 not needed, background object PDS doesn't use uniform program. */
+
+ pvr_csb_pack (®s->pds_bgnd3_sizeinfo, CR_PDS_BGRND3_SIZEINFO, reg) {
+ reg.usc_sharedsize =
+ DIV_ROUND_UP(state->common_ptr,
+ PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
+
+ assert(!(state->uniform_data_size &
+ (PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_UNIFORMSIZE_UNIT_SIZE) - 1)));
+ reg.pds_uniformsize =
+ state->uniform_data_size /
+ PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_UNIFORMSIZE_UNIT_SIZE);
+
+ assert(
+ !(state->tex_state_data_size &
+ (PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE) - 1)));
+ reg.pds_texturestatesize =
+ state->tex_state_data_size /
+ PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE);
+
+ reg.pds_tempsize =
+ DIV_ROUND_UP(state->pds_temps,
+ PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
+ }
+}
+
+static inline bool
+pvr_is_surface_aligned(pvr_dev_addr_t dev_addr, bool is_input, uint32_t bpp)
{
+ /* 96 bpp is 32 bit granular. */
+ if (bpp == 64U || bpp == 128U) {
+ uint64_t mask = (uint64_t)((bpp >> 3U) - 1U);
+
+ if ((dev_addr.addr & mask) != 0ULL)
+ return false;
+ }
+
+ if (is_input) {
+ if ((dev_addr.addr &
+ (PVRX(TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_ALIGNMENT) - 1U)) !=
+ 0ULL) {
+ return false;
+ }
+ } else {
+ if ((dev_addr.addr &
+ (PVRX(PBESTATE_STATE_WORD0_ADDRESS_LOW_ALIGNMENT) - 1U)) != 0ULL) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static inline VkResult
+pvr_mem_layout_spec(const struct pvr_transfer_cmd_surface *surface,
+ uint32_t load,
+ bool is_input,
+ uint32_t *width_out,
+ uint32_t *height_out,
+ uint32_t *stride_out,
+ enum pvr_memlayout *mem_layout_out,
+ pvr_dev_addr_t *dev_addr_out)
+{
+ const uint32_t bpp = vk_format_get_blocksizebits(surface->vk_format);
+ uint32_t unsigned_stride;
+
+ *mem_layout_out = surface->mem_layout;
+ *height_out = surface->height;
+ *width_out = surface->width;
+ *stride_out = surface->stride;
+ *dev_addr_out = surface->dev_addr;
+
+ if (surface->mem_layout != PVR_MEMLAYOUT_LINEAR &&
+ !pvr_is_surface_aligned(*dev_addr_out, is_input, bpp)) {
+ return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+
+ switch (surface->mem_layout) {
+ case PVR_MEMLAYOUT_LINEAR:
+ if (surface->stride == 0U)
+ return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
+
+ unsigned_stride = *stride_out;
+
+ if (!pvr_is_surface_aligned(*dev_addr_out, is_input, bpp))
+ return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
+
+ if (unsigned_stride < *width_out)
+ return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
+
+ if (!is_input) {
+ if (unsigned_stride == 1U) {
+ /* Change the setup to twiddling as that doesn't hit the stride
+ * limit and twiddled == strided when 1px stride.
+ */
+ *mem_layout_out = PVR_MEMLAYOUT_TWIDDLED;
+ }
+ }
+
+ *stride_out = unsigned_stride;
+ break;
+
+ case PVR_MEMLAYOUT_TWIDDLED:
+ case PVR_MEMLAYOUT_3DTWIDDLED:
+ if (surface->stride != 0U)
+ mesa_logi("Ignoring stride value for twiddled/tiled surface!");
+
+ *stride_out = *width_out;
+ break;
+
+ default:
+ return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_pbe_setup_codegen_defaults(const struct pvr_device_info *dev_info,
+ const struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_3d_state *state,
+ struct pvr_pbe_surf_params *surface_params,
+ struct pvr_pbe_render_params *render_params)
+{
+ const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
+ const uint8_t *swizzle;
+ VkFormat format;
VkResult result;
- result = vk_sync_wait(&device->vk,
- wait_sync,
- 0U,
- VK_SYNC_WAIT_COMPLETE,
- UINT64_MAX);
+ switch (dst->vk_format) {
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ format = VK_FORMAT_R32_UINT;
+ break;
+
+ default:
+ format = dst->vk_format;
+ break;
+ }
+
+ swizzle = pvr_get_format_swizzle(format);
+ memcpy(surface_params->swizzle, swizzle, sizeof(surface_params->swizzle));
+
+ pvr_pbe_get_src_format_and_gamma(format,
+ PVR_PBE_GAMMA_NONE,
+ false,
+ &surface_params->source_format,
+ &surface_params->gamma);
+
+ surface_params->is_normalized = vk_format_is_normalized(format);
+ surface_params->pbe_packmode = pvr_get_pbe_packmode(format);
+ surface_params->nr_components = vk_format_get_nr_components(format);
+
+ result = pvr_mem_layout_spec(dst,
+ 0U,
+ false,
+ &surface_params->width,
+ &surface_params->height,
+ &surface_params->stride,
+ &surface_params->mem_layout,
+ &surface_params->addr);
if (result != VK_SUCCESS)
return result;
- list_for_each_entry_safe (struct pvr_transfer_cmd,
- transfer_cmd,
- &sub_cmd->transfer_cmds,
- link) {
- bool src_mapped = false;
- bool dst_mapped = false;
- void *src_addr;
- void *dst_addr;
- void *ret_ptr;
+ surface_params->z_only_render = false;
+ surface_params->gamma = PVR_PBE_GAMMA_NONE;
+ surface_params->depth = dst->depth;
+ surface_params->down_scale = state->down_scale;
+
+ if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
+ render_params->slice = (uint32_t)MAX2(dst->z_position, 0.0f);
+ else
+ render_params->slice = 0U;
+
+ uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
+ uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
+
+ /* If the rectangle happens to be empty / off-screen we clip away
+ * everything.
+ */
+ if (state->empty_dst) {
+ render_params->min_x_clip = 2U * tile_size_x;
+ render_params->max_x_clip = 3U * tile_size_x;
+ render_params->min_y_clip = 2U * tile_size_y;
+ render_params->max_y_clip = 3U * tile_size_y;
+ state->origin_x_in_tiles = 0U;
+ state->origin_y_in_tiles = 0U;
+ state->height_in_tiles = 1U;
+ state->width_in_tiles = 1U;
+ } else {
+ const VkRect2D *scissor = &transfer_cmd->scissor;
+
+ /* Clamp */
+ render_params->min_x_clip =
+ MAX2(MIN2(scissor->offset.x, (int32_t)surface_params->width), 0U);
+ render_params->max_x_clip =
+ MAX2(MIN2(scissor->offset.x + scissor->extent.width,
+ (int32_t)surface_params->width),
+ 0U) -
+ 1U;
+
+ render_params->min_y_clip =
+ MAX2(MIN2(scissor->offset.y, surface_params->height), 0U);
+ render_params->max_y_clip =
+ MAX2(MIN2(scissor->offset.y + scissor->extent.height,
+ surface_params->height),
+ 0U) -
+ 1U;
+
+ if (state->custom_mapping.pass_count > 0U) {
+ struct pvr_transfer_pass *pass =
+ &state->custom_mapping.passes[state->pass_idx];
+
+ render_params->min_x_clip = (uint32_t)pass->clip_rects[0U].offset.x;
+ render_params->max_x_clip =
+ (uint32_t)(pass->clip_rects[0U].offset.x +
+ pass->clip_rects[0U].extent.width) -
+ 1U;
+ render_params->min_y_clip = (uint32_t)pass->clip_rects[0U].offset.y;
+ render_params->max_y_clip =
+ (uint32_t)(pass->clip_rects[0U].offset.y +
+ pass->clip_rects[0U].extent.height) -
+ 1U;
+ }
+
+ state->origin_x_in_tiles = render_params->min_x_clip / tile_size_x;
+ state->origin_y_in_tiles = render_params->min_y_clip / tile_size_y;
+ state->width_in_tiles =
+ (render_params->max_x_clip + tile_size_x) / tile_size_x;
+ state->height_in_tiles =
+ (render_params->max_y_clip + tile_size_y) / tile_size_y;
+
+ /* Be careful here as this isn't the same as ((max_x_clip -
+ * min_x_clip) + tile_size_x) >> tile_size_x.
+ */
+ state->width_in_tiles -= state->origin_x_in_tiles;
+ state->height_in_tiles -= state->origin_y_in_tiles;
+ }
+
+ render_params->source_start = PVR_PBE_STARTPOS_BIT0;
+ render_params->mrt_index = 0U;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_pbe_setup_modify_defaults(const struct pvr_transfer_cmd_surface *dst,
+ struct pvr_transfer_3d_state *state,
+ uint32_t rt_idx,
+ struct pvr_pbe_surf_params *surf_params,
+ struct pvr_pbe_render_params *render_params)
+{
+ pvr_finishme("Implement pvr_pbe_setup_modify_defaults().");
+ return VK_SUCCESS;
+}
+
+static uint32_t
+pvr_pbe_get_pixel_size(enum pvr_transfer_pbe_pixel_src pixel_format)
+{
+ switch (pixel_format) {
+ case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
+ return 1U;
+
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
+ return 2U;
+
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
+ return 4U;
+
+ case PVR_TRANSFER_PBE_PIXEL_SRC_NUM:
+ default:
+ break;
+ }
+
+ return 0U;
+}
- /* Map if bo is not mapped. */
- if (!transfer_cmd->src->vma->bo->map) {
- src_mapped = true;
- ret_ptr = device->ws->ops->buffer_map(transfer_cmd->src->vma->bo);
- if (!ret_ptr)
- return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
+static void pvr_pbe_setup_swizzle(const struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_3d_state *state,
+ struct pvr_pbe_surf_params *surf_params)
+{
+ bool color_fill = !!(transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL);
+ const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
+
+ const uint32_t pixel_size =
+ pvr_pbe_get_pixel_size(state->shader_props.layer_props.pbe_format);
+
+ state->usc_pixel_width = MAX2(pixel_size, 1U);
+
+ switch (dst->vk_format) {
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ unreachable("Handle depth stencil format swizzle.");
+ break;
+
+ default: {
+ const uint32_t red_width =
+ vk_format_get_component_bits(dst->vk_format,
+ UTIL_FORMAT_COLORSPACE_RGB,
+ 0U);
+
+ if (transfer_cmd->src_present && vk_format_is_alpha(dst->vk_format)) {
+ if (vk_format_has_alpha(transfer_cmd->src.vk_format)) {
+ /* Modify the destination format swizzle to always source from
+ * src0.
+ */
+ surf_params->swizzle[0U] = PIPE_SWIZZLE_X;
+ surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
+ surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
+ surf_params->swizzle[3U] = PIPE_SWIZZLE_1;
+ break;
+ }
+
+ /* Source format having no alpha channel still allocates 4 output
+ * buffer registers.
+ */
+ }
+
+ if (vk_format_is_normalized(dst->vk_format)) {
+ if (color_fill && (dst->vk_format == VK_FORMAT_B8G8R8A8_UNORM ||
+ dst->vk_format == VK_FORMAT_R8G8B8A8_UNORM)) {
+ surf_params->source_format =
+ PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+ } else if (state->shader_props.layer_props.pbe_format ==
+ PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8) {
+ surf_params->source_format =
+ PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+ } else if (red_width <= 8U) {
+ surf_params->source_format =
+ PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
+ }
+ } else if (red_width == 32U && !state->dont_force_pbe) {
+ uint32_t count = 0U;
+
+ if (transfer_cmd->src_present) {
+ VkFormat src_format = transfer_cmd->src.vk_format;
+ count = vk_format_get_common_color_channel_count(src_format,
+ dst->vk_format);
+ }
+
+ switch (count) {
+ case 1U:
+ surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
+ FALLTHROUGH;
+ case 2U:
+ surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
+ FALLTHROUGH;
+ case 3U:
+ surf_params->swizzle[3U] = PIPE_SWIZZLE_1;
+ break;
+
+ case 4U:
+ default:
+ break;
+ }
+ } else {
+ unreachable("Invalid case in pvr_pbe_setup_swizzle.");
+ }
+ break;
+ }
+ }
+}
+
+/**
+ * Calculates the required PBE byte mask based on the incoming transfer command.
+ *
+ * @param transfer_cmd the transfer command
+ * @return the bytemask (active high disable mask)
+ */
+
+static uint64_t pvr_pbe_byte_mask(const struct pvr_device_info *dev_info,
+ const struct pvr_transfer_cmd *transfer_cmd)
+{
+ uint32_t flags = pvr_get_blit_flags(transfer_cmd);
+
+ assert(PVR_HAS_ERN(dev_info, 42064));
+
+ if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
+ uint32_t mask = 0U;
+
+ switch (transfer_cmd->dst.vk_format) {
+ case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ mask = 0xF0F0F0F0U;
+ break;
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ mask = 0x88888888U;
+ break;
+ default:
+ break;
}
- if (!transfer_cmd->dst->vma->bo->map) {
- dst_mapped = true;
- ret_ptr = device->ws->ops->buffer_map(transfer_cmd->dst->vma->bo);
- if (!ret_ptr)
- return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
+ if ((flags & PVR_TRANSFER_CMD_FLAGS_PICKD) == 0U)
+ mask = ~mask;
+
+ return mask;
+ }
+
+ /* The mask is as it was inactive on cores without the ERN. This keeps the
+ * firmware agnostic to the feature.
+ */
+ return 0U;
+}
+
+static VkResult pvr_pbe_setup_emit(const struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_ctx *ctx,
+ struct pvr_transfer_3d_state *state,
+ uint32_t rt_count,
+ uint32_t *pbe_setup_words)
+{
+ struct pvr_device *const device = ctx->device;
+ const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
+
+ struct pvr_winsys_transfer_regs *regs = &state->regs;
+ struct pvr_pds_event_program program = {
+ .emit_words = pbe_setup_words,
+ .num_emit_word_pairs = rt_count,
+ };
+ struct pvr_pds_upload pds_upload;
+ uint32_t staging_buffer_size;
+ uint32_t *staging_buffer;
+ pvr_dev_addr_t addr;
+ VkResult result;
+
+ /* Precondition, make sure to use a valid index for ctx->usc_eot_bos. */
+ assert(rt_count <= ARRAY_SIZE(ctx->usc_eot_bos));
+ assert(rt_count > 0U);
+
+ addr.addr = ctx->usc_eot_bos[rt_count - 1U]->vma->dev_addr.addr -
+ device->heaps.usc_heap->base_addr.addr;
+
+ pvr_pds_setup_doutu(&program.task_control,
+ addr.addr,
+ 0U,
+ PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
+ false);
+
+ pvr_pds_set_sizes_pixel_event(&program, dev_info);
+
+ staging_buffer_size =
+ (program.code_size + program.data_size) * sizeof(*staging_buffer);
+
+ staging_buffer = vk_alloc(&device->vk.alloc,
+ staging_buffer_size,
+ 8U,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!staging_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ pvr_pds_generate_pixel_event_data_segment(&program,
+ staging_buffer,
+ dev_info);
+
+ /* TODO: We can save some memory by generating a code segment for each
+ * rt_count, which at the time of writing is a maximum of 3, in
+ * pvr_setup_transfer_eot_shaders() when we setup the corresponding EOT
+ * USC programs.
+ */
+ pvr_pds_generate_pixel_event_code_segment(&program,
+ staging_buffer + program.data_size,
+ dev_info);
+
+ result =
+ pvr_cmd_buffer_upload_pds(transfer_cmd->cmd_buffer,
+ staging_buffer,
+ program.data_size,
+ PVRX(CR_EVENT_PIXEL_PDS_DATA_ADDR_ALIGNMENT),
+ staging_buffer + program.data_size,
+ program.code_size,
+ PVRX(CR_EVENT_PIXEL_PDS_CODE_ADDR_ALIGNMENT),
+ PVRX(CR_EVENT_PIXEL_PDS_DATA_ADDR_ALIGNMENT),
+ &pds_upload);
+ vk_free(&device->vk.alloc, staging_buffer);
+ if (result != VK_SUCCESS)
+ return result;
+
+ pvr_csb_pack (®s->event_pixel_pds_info, CR_EVENT_PIXEL_PDS_INFO, reg) {
+ reg.temp_stride = 0U;
+ reg.const_size =
+ DIV_ROUND_UP(program.data_size,
+ PVRX(CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE));
+ reg.usc_sr_size =
+ DIV_ROUND_UP(rt_count * PVR_STATE_PBE_DWORDS,
+ PVRX(CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE));
+ }
+
+ pvr_csb_pack (®s->event_pixel_pds_data, CR_EVENT_PIXEL_PDS_DATA, reg) {
+ reg.addr = PVR_DEV_ADDR(pds_upload.data_offset);
+ }
+
+ pvr_csb_pack (®s->event_pixel_pds_code, CR_EVENT_PIXEL_PDS_CODE, reg) {
+ reg.addr = PVR_DEV_ADDR(pds_upload.code_offset);
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult pvr_pbe_setup(const struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_ctx *ctx,
+ struct pvr_transfer_3d_state *state)
+{
+ struct pvr_device *const device = ctx->device;
+ const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
+
+ const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
+ uint32_t num_rts = vk_format_get_plane_count(dst->vk_format);
+ uint32_t pbe_setup_words[PVR_TRANSFER_MAX_RENDER_TARGETS *
+ ROGUE_NUM_PBESTATE_STATE_WORDS];
+ struct pvr_pbe_render_params render_params;
+ struct pvr_pbe_surf_params surf_params;
+ VkResult result;
+
+ if (state->custom_mapping.pass_count > 0U)
+ num_rts = state->custom_mapping.passes[state->pass_idx].clip_rects_count;
+
+ if (PVR_HAS_FEATURE(dev_info, paired_tiles))
+ state->pair_tiles = PVR_PAIRED_TILES_NONE;
+
+ for (uint32_t i = 0U; i < num_rts; i++) {
+ uint64_t *pbe_regs;
+ uint32_t *pbe_words;
+
+ /* Ensure the access into the pbe_wordx_mrty is made within its bounds. */
+ assert(i * ROGUE_NUM_PBESTATE_REG_WORDS <
+ ARRAY_SIZE(state->regs.pbe_wordx_mrty));
+ /* Ensure the access into pbe_setup_words is made within its bounds. */
+ assert(i * ROGUE_NUM_PBESTATE_STATE_WORDS < ARRAY_SIZE(pbe_setup_words));
+
+ pbe_regs = &state->regs.pbe_wordx_mrty[i * ROGUE_NUM_PBESTATE_REG_WORDS];
+ pbe_words = &pbe_setup_words[i * ROGUE_NUM_PBESTATE_STATE_WORDS];
+
+ if (PVR_HAS_ERN(dev_info, 42064))
+ pbe_regs[2U] = 0UL;
+
+ if (i == 0U) {
+ result = pvr_pbe_setup_codegen_defaults(dev_info,
+ transfer_cmd,
+ state,
+ &surf_params,
+ &render_params);
+ if (result != VK_SUCCESS)
+ return result;
+ } else {
+ result = pvr_pbe_setup_modify_defaults(dst,
+ state,
+ i,
+ &surf_params,
+ &render_params);
+ if (result != VK_SUCCESS)
+ return result;
}
- src_addr =
- transfer_cmd->src->vma->bo->map + transfer_cmd->src->vma->bo_offset;
- dst_addr =
- transfer_cmd->dst->vma->bo->map + transfer_cmd->dst->vma->bo_offset;
+ pvr_pbe_setup_swizzle(transfer_cmd, state, &surf_params);
- for (uint32_t i = 0; i < transfer_cmd->region_count; i++) {
- VkBufferCopy2 *region = &transfer_cmd->regions[i];
+ pvr_pbe_pack_state(dev_info,
+ &surf_params,
+ &render_params,
+ pbe_words,
+ pbe_regs);
- memcpy(dst_addr + region->dstOffset,
- src_addr + region->srcOffset,
- region->size);
+ if (PVR_HAS_ERN(dev_info, 42064)) {
+ uint64_t temp_reg;
+
+ pvr_csb_pack (&temp_reg, PBESTATE_REG_WORD2, reg) {
+ reg.sw_bytemask = pvr_pbe_byte_mask(dev_info, transfer_cmd);
+ }
+
+ pbe_regs[2U] |= temp_reg;
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, paired_tiles)) {
+ if (pbe_regs[2U] &
+ (1ULL << PVRX(PBESTATE_REG_WORD2_PAIR_TILES_SHIFT))) {
+ if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_TWIDDLED)
+ state->pair_tiles = PVR_PAIRED_TILES_Y;
+ else
+ state->pair_tiles = PVR_PAIRED_TILES_X;
+ }
}
+ }
+
+ result =
+ pvr_pbe_setup_emit(transfer_cmd, ctx, state, num_rts, pbe_setup_words);
+ if (result != VK_SUCCESS)
+ return result;
- if (src_mapped)
- device->ws->ops->buffer_unmap(transfer_cmd->src->vma->bo);
+ /* Adjust tile origin and width to include all emits. */
+ if (state->custom_mapping.pass_count > 0U) {
+ const uint32_t tile_size_x =
+ PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
+ const uint32_t tile_size_y =
+ PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
+ struct pvr_transfer_pass *pass =
+ &state->custom_mapping.passes[state->pass_idx];
+ VkOffset2D offset = { 0U, 0U };
+ VkOffset2D end = { 0U, 0U };
- if (dst_mapped)
- device->ws->ops->buffer_unmap(transfer_cmd->dst->vma->bo);
+ for (uint32_t i = 0U; i < pass->clip_rects_count; i++) {
+ VkRect2D *rect = &pass->clip_rects[i];
+
+ offset.x = MIN2(offset.x, rect->offset.x);
+ offset.y = MIN2(offset.y, rect->offset.y);
+ end.x = MAX2(end.x, rect->offset.x + rect->extent.width);
+ end.y = MAX2(end.y, rect->offset.y + rect->extent.height);
+ }
+
+ state->origin_x_in_tiles = (uint32_t)offset.x / tile_size_x;
+ state->origin_y_in_tiles = (uint32_t)offset.y / tile_size_y;
+ state->width_in_tiles =
+ DIV_ROUND_UP((uint32_t)end.x, tile_size_x) - state->origin_x_in_tiles;
+ state->height_in_tiles =
+ DIV_ROUND_UP((uint32_t)end.y, tile_size_y) - state->origin_y_in_tiles;
}
- /* Given we are doing CPU based copy, completion fence should always be
- * signaled. This should be fixed when GPU based copy is implemented.
+ return VK_SUCCESS;
+}
+
+/**
+ * Writes the ISP tile registers according to the MSAA state. Sets up the USC
+ * pixel partition allocations and the number of tiles in flight.
+ */
+static VkResult pvr_isp_tiles(const struct pvr_device *device,
+ struct pvr_transfer_3d_state *state)
+{
+ const struct pvr_device_runtime_info *dev_runtime_info =
+ &device->pdevice->dev_runtime_info;
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ const uint32_t isp_samples =
+ PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1U);
+ uint32_t origin_x = state->origin_x_in_tiles;
+ uint32_t origin_y = state->origin_y_in_tiles;
+ uint32_t width = state->width_in_tiles;
+ uint32_t height = state->height_in_tiles;
+ uint32_t isp_tiles_in_flight;
+
+ /* msaa_multiplier is calculated by sample_count & ~1U. Given sample
+ * count is always in powers of two, we can get the sample count from
+ * msaa_multiplier using the following logic.
*/
- return vk_sync_signal(&device->vk, signal_sync, 0);
+ const uint32_t samples = MAX2(state->msaa_multiplier, 1U);
+
+ /* isp_samples_per_pixel feature is also know as "2x/4x for free", when
+ * this is present SAMPLES_PER_PIXEL is 2/4, otherwise 1. The following
+ * logic should end up with these numbers:
+ *
+ * |---------------------------------|
+ * | 4 SAMPLES / ISP PIXEL |
+ * |-----------------------+----+----|
+ * | MSAA | X* | Y* |
+ * | 2X | 1 | 1 |
+ * | 4X | 1 | 1 |
+ * |---------------------------------|
+ * | 2 SAMPLES / ISP PIXEL |
+ * |-----------------------+----+----|
+ * | MSAA | X* | Y* |
+ * | 2X | 1 | 1 |
+ * | 4X | 1 | 2 |
+ * | 8X | 2 | 2 |
+ * |-----------------------+----+----|
+ * | 1 SAMPLE / ISP PIXEL |
+ * |-----------------------+----+----|
+ * | MSAA | X* | Y* |
+ * | 2X | 1 | 2 |
+ * | 4X | 2 | 2 |
+ * |-----------------------+----+----|
+ */
+
+ origin_x <<= (state->msaa_multiplier >> (isp_samples + 1U)) & 1U;
+ origin_y <<= ((state->msaa_multiplier >> (isp_samples + 1U)) |
+ (state->msaa_multiplier >> isp_samples)) &
+ 1U;
+ width <<= (state->msaa_multiplier >> (isp_samples + 1U)) & 1U;
+ height <<= ((state->msaa_multiplier >> (isp_samples + 1U)) |
+ (state->msaa_multiplier >> isp_samples)) &
+ 1U;
+
+ if (PVR_HAS_FEATURE(dev_info, paired_tiles) &&
+ state->pair_tiles != PVR_PAIRED_TILES_NONE) {
+ width = ALIGN_POT(width, 2U);
+ height = ALIGN_POT(height, 2U);
+ }
+
+ pvr_csb_pack (&state->regs.isp_mtile_size, CR_ISP_MTILE_SIZE, reg) {
+ reg.x = width;
+ reg.y = height;
+ }
+
+ pvr_csb_pack (&state->regs.isp_render_origin, CR_ISP_RENDER_ORIGIN, reg) {
+ reg.x = origin_x;
+ reg.y = origin_y;
+ }
+
+ pvr_setup_tiles_in_flight(dev_info,
+ dev_runtime_info,
+ pvr_cr_isp_aa_mode_type(samples),
+ state->usc_pixel_width,
+ state->pair_tiles != PVR_PAIRED_TILES_NONE,
+ 0,
+ &isp_tiles_in_flight,
+ &state->regs.usc_pixel_output_ctrl);
+
+ pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, reg) {
+ reg.process_empty_tiles = true;
+
+ if (PVR_HAS_FEATURE(dev_info, paired_tiles)) {
+ if (state->pair_tiles == PVR_PAIRED_TILES_X) {
+ reg.pair_tiles = true;
+ } else if (state->pair_tiles == PVR_PAIRED_TILES_Y) {
+ reg.pair_tiles = true;
+ reg.pair_tiles_vert = true;
+ }
+ }
+ }
+
+ state->regs.isp_ctl |= isp_tiles_in_flight;
+
+ return VK_SUCCESS;
+}
+
+static bool
+pvr_int_pbe_pixel_changes_dst_rate(const struct pvr_device_info *dev_info,
+ enum pvr_transfer_pbe_pixel_src pbe_format)
+{
+ /* We don't emulate rate change from the USC with the pbe_yuv feature. */
+ if (!PVR_HAS_FEATURE(dev_info, pbe_yuv) &&
+ (pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED ||
+ pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V)) {
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * Number of DWORDs from the unified store that floating texture coefficients
+ * take up.
+ */
+static void pvr_uv_space(const struct pvr_device_info *dev_info,
+ const struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_3d_state *state)
+{
+ const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
+ const VkRect2D *dst_rect = &transfer_cmd->scissor;
+
+ /* This also avoids division by 0 in pvr_dma_texture_floats(). */
+ if (state->custom_mapping.pass_count == 0U &&
+ (dst_rect->extent.width == 0U || dst_rect->extent.height == 0U ||
+ MAX2(dst_rect->offset.x, dst_rect->offset.x + dst_rect->extent.width) <
+ 0U ||
+ MIN2(dst_rect->offset.x, dst_rect->offset.x + dst_rect->extent.width) >
+ (int32_t)dst->width ||
+ MAX2(dst_rect->offset.y, dst_rect->offset.y + dst_rect->extent.height) <
+ 0U ||
+ MIN2(dst_rect->offset.y, dst_rect->offset.y + dst_rect->extent.height) >
+ (int32_t)dst->height)) {
+ state->empty_dst = true;
+ } else {
+ state->empty_dst = false;
+
+ if (transfer_cmd->src_present) {
+ struct pvr_tq_layer_properties *layer =
+ &state->shader_props.layer_props;
+
+ const VkRect2D *src_rect = &transfer_cmd->mappings[0U].src_rect;
+ const VkRect2D *dst_rect = &transfer_cmd->mappings[0U].dst_rect;
+ int32_t dst_x1 = dst_rect->offset.x + dst_rect->extent.width;
+ int32_t dst_y1 = dst_rect->offset.y + dst_rect->extent.height;
+ int32_t src_x1 = src_rect->offset.x + src_rect->extent.width;
+ int32_t src_y1 = src_rect->offset.y + src_rect->extent.height;
+
+ if (state->filter > PVR_FILTER_POINT) {
+ layer->layer_floats = PVR_INT_COORD_SET_FLOATS_4;
+ } else if (src_rect->extent.width == 0U ||
+ src_rect->extent.height == 0U) {
+ layer->layer_floats = PVR_INT_COORD_SET_FLOATS_0;
+ } else if ((src_rect->offset.x * dst_x1 !=
+ src_x1 * dst_rect->offset.x) ||
+ (src_rect->offset.y * dst_y1 !=
+ src_y1 * dst_rect->offset.y) ||
+ (src_rect->extent.width != dst_rect->extent.width) ||
+ (src_rect->extent.height != dst_rect->extent.height)) {
+ layer->layer_floats = PVR_INT_COORD_SET_FLOATS_4;
+ } else {
+ layer->layer_floats = PVR_INT_COORD_SET_FLOATS_0;
+ }
+
+ /* We have to adjust the rate. */
+ if (layer->layer_floats != PVR_INT_COORD_SET_FLOATS_0 &&
+ pvr_int_pbe_pixel_changes_dst_rate(dev_info, layer->pbe_format)) {
+ layer->layer_floats = PVR_INT_COORD_SET_FLOATS_6;
+ }
+ }
+ }
+}
+
+static uint32_t pvr_int_pbe_pixel_num_sampler_and_image_states(
+ enum pvr_transfer_pbe_pixel_src pbe_format,
+ uint32_t alpha_type)
+{
+ switch (pbe_format) {
+ case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
+ return 1U;
+ default:
+ return pvr_pbe_pixel_num_loads(pbe_format, alpha_type);
+ }
+}
+
+static VkResult pvr_sampler_state_for_surface(
+ const struct pvr_device_info *dev_info,
+ const struct pvr_transfer_cmd_surface *surface,
+ enum pvr_filter filter,
+ const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+ uint32_t sampler,
+ uint32_t *mem_ptr)
+{
+ uint64_t sampler_state[2U] = { 0UL, 0UL };
+
+ pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER, reg) {
+ reg.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
+ reg.minlod = PVRX(TEXSTATE_CLAMP_MIN);
+ reg.maxlod = PVRX(TEXSTATE_CLAMP_MIN);
+ reg.dadjust = PVRX(TEXSTATE_DADJUST_MIN_UINT);
+
+ if (filter == PVR_FILTER_DONTCARE || filter == PVR_FILTER_POINT) {
+ reg.minfilter = PVRX(TEXSTATE_FILTER_POINT);
+ reg.magfilter = PVRX(TEXSTATE_FILTER_POINT);
+ } else if (filter == PVR_FILTER_LINEAR) {
+ reg.minfilter = PVRX(TEXSTATE_FILTER_LINEAR);
+ reg.magfilter = PVRX(TEXSTATE_FILTER_LINEAR);
+ } else {
+ assert(PVR_HAS_FEATURE(dev_info, tf_bicubic_filter));
+ reg.minfilter = PVRX(TEXSTATE_FILTER_BICUBIC);
+ reg.magfilter = PVRX(TEXSTATE_FILTER_BICUBIC);
+ }
+
+ reg.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
+ reg.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
+
+ if (surface->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
+ reg.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
+ }
+
+ assert(sampler < PVR_TRANSFER_MAX_IMAGES);
+
+ assert(sampler <= sh_reg_layout->combined_image_samplers.count);
+ mem_ptr += sh_reg_layout->combined_image_samplers.offsets[sampler].sampler;
+
+ memcpy(mem_ptr, sampler_state, sizeof(sampler_state));
+
+ return VK_SUCCESS;
+}
+
+static inline VkResult pvr_image_state_set_codegen_defaults(
+ struct pvr_device *device,
+ struct pvr_transfer_3d_state *state,
+ const struct pvr_transfer_cmd_surface *surface,
+ uint32_t load,
+ uint64_t *mem_ptr)
+{
+ struct pvr_tq_layer_properties *layer = &state->shader_props.layer_props;
+ struct pvr_texture_state_info info = { 0U };
+ VkResult result;
+
+ switch (surface->vk_format) {
+ /* ERN 46863 */
+ case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ switch (layer->pbe_format) {
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
+ info.format = VK_FORMAT_R32G32_UINT;
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ info.format = VK_FORMAT_R32_UINT;
+ break;
+
+ default:
+ info.format = surface->vk_format;
+ break;
+ }
+
+ info.flags = 0U;
+ info.base_level = 0U;
+ info.mip_levels = 1U;
+ info.mipmaps_present = false;
+ info.sample_count = MAX2(surface->sample_count, 1U);
+
+ if (surface->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
+ info.extent.depth = surface->depth;
+ else
+ info.extent.depth = 0U;
+
+ if (PVR_HAS_FEATURE(&device->pdevice->dev_info, tpu_array_textures))
+ info.array_size = 0U;
+
+ result = pvr_mem_layout_spec(surface,
+ load,
+ true,
+ &info.extent.width,
+ &info.extent.height,
+ &info.stride,
+ &info.mem_layout,
+ &info.addr);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (state->custom_mapping.texel_extend_dst > 1U) {
+ info.extent.width /= state->custom_mapping.texel_extend_dst;
+ info.stride /= state->custom_mapping.texel_extend_dst;
+ }
+
+ info.tex_state_type = PVR_TEXTURE_STATE_SAMPLE;
+ memcpy(info.swizzle,
+ pvr_get_format_swizzle(info.format),
+ sizeof(info.swizzle));
+
+ if (info.extent.depth > 0U)
+ info.type = VK_IMAGE_VIEW_TYPE_3D;
+ else if (info.extent.height > 1U)
+ info.type = VK_IMAGE_VIEW_TYPE_2D;
+ else
+ info.type = VK_IMAGE_VIEW_TYPE_1D;
+
+ result = pvr_pack_tex_state(device, &info, mem_ptr);
+ if (result != VK_SUCCESS)
+ return result;
+
+ return VK_SUCCESS;
+}
+
+static VkResult pvr_image_state_for_surface(
+ const struct pvr_transfer_ctx *ctx,
+ const struct pvr_transfer_cmd *transfer_cmd,
+ const struct pvr_transfer_cmd_surface *surface,
+ uint32_t load,
+ const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+ struct pvr_transfer_3d_state *state,
+ uint32_t uf_image,
+ uint32_t *mem_ptr)
+{
+ uint32_t tex_state[ROGUE_MAXIMUM_IMAGE_STATE_SIZE] = { 0U };
+ VkResult result;
+ uint8_t offset;
+
+ result = pvr_image_state_set_codegen_defaults(ctx->device,
+ state,
+ surface,
+ load,
+ (uint64_t *)tex_state);
+ if (result != VK_SUCCESS)
+ return result;
+
+ assert(uf_image < PVR_TRANSFER_MAX_IMAGES);
+
+ /* Offset of the shared registers containing the hardware image state. */
+ assert(uf_image < sh_reg_layout->combined_image_samplers.count);
+ offset = sh_reg_layout->combined_image_samplers.offsets[uf_image].image;
+
+ /* Copy the image state to the buffer which is loaded into the shared
+ * registers.
+ */
+ memcpy(mem_ptr + offset, tex_state, sizeof(tex_state));
+
+ return VK_SUCCESS;
+}
+
+/* Writes the texture state/sampler state into DMAed memory. */
+static VkResult
+pvr_sampler_image_state(struct pvr_transfer_ctx *ctx,
+ const struct pvr_transfer_cmd *transfer_cmd,
+ const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+ struct pvr_transfer_3d_state *state,
+ uint32_t *mem_ptr)
+{
+ if (!state->empty_dst) {
+ struct pvr_tq_layer_properties *layer =
+ &state->shader_props.layer_props;
+ uint32_t max_load =
+ pvr_pbe_pixel_num_loads(layer->pbe_format,
+ state->shader_props.alpha_type);
+ uint32_t uf_sampler = 0U;
+ uint32_t uf_image = 0U;
+
+ for (uint32_t load = 0U; load < max_load; load++) {
+ const struct pvr_transfer_cmd_surface *surface;
+ enum pvr_filter filter;
+ VkResult result;
+
+ switch (layer->pbe_format) {
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
+ if (load > 0U) {
+ surface = &transfer_cmd->dst;
+
+ if (state->shader_props.alpha_type != PVR_ALPHA_NONE)
+ filter = PVR_FILTER_POINT;
+ else
+ filter = transfer_cmd->filter;
+ } else {
+ surface = &transfer_cmd->src;
+ filter = state->filter;
+ }
+ break;
+
+ case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
+ surface = &transfer_cmd->src;
+ filter = state->filter;
+ break;
+
+ default:
+ surface = &transfer_cmd->src;
+ filter = state->filter;
+ break;
+ }
+
+ if (load < pvr_int_pbe_pixel_num_sampler_and_image_states(
+ layer->pbe_format,
+ state->shader_props.alpha_type)) {
+ const struct pvr_device_info *dev_info =
+ &transfer_cmd->cmd_buffer->device->pdevice->dev_info;
+
+ result = pvr_sampler_state_for_surface(dev_info,
+ surface,
+ filter,
+ sh_reg_layout,
+ uf_sampler,
+ mem_ptr);
+ if (result != VK_SUCCESS)
+ return result;
+
+ uf_sampler++;
+
+ result = pvr_image_state_for_surface(ctx,
+ transfer_cmd,
+ surface,
+ load,
+ sh_reg_layout,
+ state,
+ uf_image,
+ mem_ptr);
+ if (result != VK_SUCCESS)
+ return result;
+
+ uf_image++;
+ }
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+/* The returned offset is in dwords. */
+static inline uint32_t pvr_dynamic_const_reg_advance(
+ const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+ struct pvr_transfer_3d_state *state)
+{
+ const uint32_t offset = sh_reg_layout->dynamic_consts.offset;
+
+ assert(state->dynamic_const_reg_ptr < sh_reg_layout->dynamic_consts.count);
+
+ return offset + state->dynamic_const_reg_ptr++;
+}
+
+static inline void
+pvr_dma_global_alpha(const struct pvr_transfer_alpha *alpha,
+ struct pvr_transfer_3d_state *state,
+ const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+ uint32_t *mem_ptr)
+{
+ float global = (float)alpha->global / 255.0f;
+
+ mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] = fui(global);
+}
+
+/** Scales coefficients for sampling. (non normalized). */
+static inline void
+pvr_dma_texture_floats(const struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_3d_state *state,
+ const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+ uint32_t *mem_ptr)
+
+{
+ if (transfer_cmd->src_present) {
+ struct pvr_tq_layer_properties *layer = &state->shader_props.layer_props;
+ const struct pvr_rect_mapping *mapping = &transfer_cmd->mappings[0U];
+ VkRect2D src_rect = mapping->src_rect;
+ VkRect2D dst_rect = mapping->dst_rect;
+
+ switch (layer->layer_floats) {
+ case PVR_INT_COORD_SET_FLOATS_0:
+ break;
+
+ case PVR_INT_COORD_SET_FLOATS_6:
+ case PVR_INT_COORD_SET_FLOATS_4: {
+ int32_t consts[2U] = { 0U, 0U };
+ int32_t denom[2U] = { 0U, 0U };
+ int32_t nums[2U] = { 0U, 0U };
+ int32_t src_x, dst_x;
+ int32_t src_y, dst_y;
+ float offset = 0.0f;
+ float tmp;
+
+ dst_x = dst_rect.extent.width;
+ dst_y = dst_rect.extent.height;
+ src_x = src_rect.extent.width;
+ src_y = src_rect.extent.height;
+
+ nums[0U] = src_x;
+ denom[0U] = dst_x;
+ consts[0U] = src_rect.offset.x * dst_x - src_x * dst_rect.offset.x;
+ nums[1U] = src_y;
+ denom[1U] = dst_y;
+ consts[1U] = src_rect.offset.y * dst_y - src_y * dst_rect.offset.y;
+
+ for (uint32_t i = 0U; i < 2U; i++) {
+ tmp = (float)(nums[i]) / (float)(denom[i]);
+ mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
+ fui(tmp);
+
+ tmp = ((float)(consts[i]) + (i == 1U ? offset : 0.0f)) /
+ (float)(denom[i]);
+ mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
+ fui(tmp);
+ }
+
+ if (layer->layer_floats == PVR_INT_COORD_SET_FLOATS_6) {
+ tmp = (float)MIN2(dst_rect.offset.x, dst_rect.offset.x + dst_x);
+ mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
+ fui(tmp);
+
+ tmp = (float)MIN2(dst_rect.offset.y, dst_rect.offset.y + dst_y);
+ mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
+ fui(tmp);
+ }
+ break;
+ }
+
+ default:
+ unreachable("Unknown COORD_SET_FLOATS.");
+ break;
+ }
+ }
+}
+
+static bool pvr_int_pbe_pixel_requires_usc_filter(
+ const struct pvr_device_info *dev_info,
+ enum pvr_transfer_pbe_pixel_src pixel_format)
+{
+ switch (pixel_format) {
+ case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
+ return true;
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
+ return !PVR_HAS_FEATURE(dev_info, pbe_filterable_f16);
+ default:
+ return false;
+ }
+}
+
+/**
+ * Sets up the MSAA related bits in the operation
+ *
+ * TPU sample count is read directly from transfer_cmd in the TPU code. An MSAA
+ * src can be read from sample rate or instance rate shaders as long as the
+ * sample count is set on the TPU. If a layer is single sample we expect the
+ * same sample replicated in full rate shaders. If the layer is multi sample,
+ * instance rate shaders are used to emulate the filter or to select the
+ * specified sample. The sample number is static in the programs.
+ */
+static VkResult pvr_msaa_state(const struct pvr_device_info *dev_info,
+ const struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_3d_state *state)
+{
+ struct pvr_tq_shader_properties *shader_props = &state->shader_props;
+ struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
+ struct pvr_winsys_transfer_regs *const regs = &state->regs;
+ uint32_t src_sample_count = transfer_cmd->src.sample_count & ~1U;
+ uint32_t dst_sample_count = transfer_cmd->dst.sample_count & ~1U;
+ uint32_t bsample_count = 0U;
+
+ shader_props->full_rate = false;
+ state->msaa_multiplier = 1U;
+ state->down_scale = false;
+
+ /* clang-format off */
+ pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg);
+ /* clang-format on */
+
+ layer->sample_count = 1U;
+ layer->resolve_op = PVR_RESOLVE_BLEND;
+
+ bsample_count |= src_sample_count | dst_sample_count;
+
+ if (bsample_count > PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 0U))
+ return vk_error(transfer_cmd->cmd_buffer, VK_ERROR_FORMAT_NOT_SUPPORTED);
+
+ /* Shouldn't get two distinct bits set (implies different sample counts).
+ * The reason being the rate at which the shader runs has to match.
+ */
+ if ((bsample_count & (bsample_count - 1U)) != 0U)
+ return vk_error(transfer_cmd->cmd_buffer, VK_ERROR_FORMAT_NOT_SUPPORTED);
+
+ if (src_sample_count == 0U && dst_sample_count == 0U) {
+ /* S -> S (no MSAA involved). */
+ layer->msaa = false;
+ } else if (src_sample_count != 0U && dst_sample_count == 0U) {
+ /* M -> S (resolve). */
+ layer->resolve_op = transfer_cmd->resolve_op;
+
+ if ((uint32_t)layer->resolve_op >=
+ (src_sample_count + (uint32_t)PVR_RESOLVE_SAMPLE0)) {
+ return vk_error(transfer_cmd->cmd_buffer,
+ VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+
+ layer->msaa = true;
+
+ switch (layer->resolve_op) {
+ case PVR_RESOLVE_MIN:
+ case PVR_RESOLVE_MAX:
+ switch (transfer_cmd->src.vk_format) {
+ case VK_FORMAT_D32_SFLOAT:
+ case VK_FORMAT_D16_UNORM:
+ case VK_FORMAT_S8_UINT:
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ if (transfer_cmd->src.vk_format != transfer_cmd->dst.vk_format) {
+ return vk_error(transfer_cmd->cmd_buffer,
+ VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+ break;
+
+ default:
+ return vk_error(transfer_cmd->cmd_buffer,
+ VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+
+ /* Instance rate. */
+ layer->sample_count = src_sample_count;
+ state->shader_props.full_rate = false;
+ break;
+
+ case PVR_RESOLVE_BLEND:
+ if (pvr_int_pbe_pixel_requires_usc_filter(dev_info,
+ layer->pbe_format)) {
+ /* Instance rate. */
+ layer->sample_count = src_sample_count;
+ state->shader_props.full_rate = false;
+ } else {
+ /* Sample rate. */
+ state->shader_props.full_rate = true;
+ state->msaa_multiplier = src_sample_count;
+ state->down_scale = true;
+
+ pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
+ reg.mode = pvr_cr_isp_aa_mode_type(src_sample_count);
+ }
+ }
+ break;
+
+ default:
+ /* Shader doesn't have to know the number of samples. It's enough
+ * if the TPU knows, and the shader sets the right sno (given to the
+ * shader in resolve_op).
+ */
+ state->shader_props.full_rate = false;
+ break;
+ }
+ } else {
+ state->msaa_multiplier = dst_sample_count;
+
+ pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
+ reg.mode = pvr_cr_isp_aa_mode_type(dst_sample_count);
+ }
+
+ if (src_sample_count == 0U && dst_sample_count != 0U) {
+ /* S -> M (replicate samples) */
+ layer->msaa = false;
+ state->shader_props.full_rate = !state->shader_props.iterated;
+ } else {
+ /* M -> M (sample to sample) */
+ layer->msaa = true;
+ state->shader_props.full_rate = true;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+static bool pvr_requires_usc_linear_filter(VkFormat format)
+{
+ switch (format) {
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_D32_SFLOAT:
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool
+pvr_int_pbe_usc_linear_filter(enum pvr_transfer_pbe_pixel_src pbe_format,
+ bool sample,
+ bool msaa,
+ bool full_rate)
+{
+ if (sample || msaa || full_rate)
+ return false;
+
+ switch (pbe_format) {
+ case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
+ case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool pvr_pick_component_needed(
+ const struct pvr_transfer_custom_mapping *custom_mapping)
+{
+ return custom_mapping->pass_count > 0U &&
+ custom_mapping->texel_extend_dst > 1U &&
+ custom_mapping->texel_extend_src <= 1U;
+}
+
+/** Writes the shader related constants into the DMA space. */
+static void
+pvr_write_usc_constants(const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+ uint32_t *dma_space)
+{
+ const uint32_t reg = sh_reg_layout->driver_total;
+ const uint32_t consts_count =
+ sh_reg_layout->compiler_out.usc_constants.count;
+
+ /* If not we likely need to write more consts. */
+ assert(consts_count == sh_reg_layout->compiler_out_total);
+
+ /* Append the usc consts after the driver allocated regs. */
+ for (uint32_t i = 0U; i < consts_count; i++)
+ dma_space[reg + i] = sh_reg_layout->compiler_out.usc_constants.values[i];
+}
+
+static inline void
+pvr_dma_texel_unwind(struct pvr_transfer_3d_state *state,
+ const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
+ uint32_t *mem_ptr)
+
+{
+ const uint32_t coord_sample_mask =
+ state->custom_mapping.texel_extend_dst - 1U;
+
+ mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
+ coord_sample_mask;
+ mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
+ state->custom_mapping.texel_unwind_dst;
+}
+
+/** Writes the Uniform/Texture state data segments + the UniTex code. */
+static inline VkResult
+pvr_pds_unitex(const struct pvr_device_info *dev_info,
+ struct pvr_transfer_ctx *ctx,
+ const struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_pds_pixel_shader_sa_program *program,
+ struct pvr_transfer_prep_data *prep_data)
+{
+ struct pvr_pds_upload *unitex_code =
+ &ctx->pds_unitex_code[program->num_texture_dma_kicks]
+ [program->num_uniform_dma_kicks];
+ struct pvr_transfer_3d_state *state = &prep_data->state;
+ struct pvr_bo *pvr_bo;
+ VkResult result;
+
+ /* Uniform program is not used. */
+ assert(program->num_uniform_dma_kicks == 0U);
+
+ if (program->num_texture_dma_kicks == 0U) {
+ state->uniform_data_size = 0U;
+ state->tex_state_data_size = 0U;
+ state->tex_state_data_offset = 0U;
+ state->uni_tex_code_offset = 0U;
+
+ return VK_SUCCESS;
+ }
+
+ pvr_pds_set_sizes_pixel_shader_sa_uniform_data(program, dev_info);
+ assert(program->data_size == 0U);
+ state->uniform_data_size = 0U;
+
+ pvr_pds_set_sizes_pixel_shader_sa_texture_data(program, dev_info);
+ state->tex_state_data_size =
+ ALIGN_POT(program->data_size,
+ PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE));
+
+ result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
+ ctx->device->heaps.pds_heap,
+ state->tex_state_data_size << 2U,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ state->tex_state_data_offset =
+ pvr_bo->vma->dev_addr.addr - ctx->device->heaps.pds_heap->base_addr.addr;
+
+ pvr_pds_generate_pixel_shader_sa_texture_state_data(program,
+ pvr_bo->bo->map,
+ dev_info);
+
+ pvr_bo_cpu_unmap(transfer_cmd->cmd_buffer->device, pvr_bo);
+
+ /* Save the dev_addr and size in the 3D state. */
+ state->uni_tex_code_offset = unitex_code->code_offset;
+ state->pds_temps = program->temps_used;
+
+ return VK_SUCCESS;
+}
+
+/** Converts a float in range 0 to 1 to an N-bit fixed-point integer. */
+static uint32_t pvr_float_to_ufixed(float value, uint32_t bits)
+{
+ uint32_t max = (1U << bits) - 1U;
+
+ /* NaN and Inf and overflow. */
+ if (util_is_inf_or_nan(value) || value >= 1.0f)
+ return max;
+ else if (value < 0.0f)
+ return 0U;
+
+ /* Normalise. */
+ value = value * (float)max;
+
+ /* Cast to double so that we can accurately represent the sum for N > 23. */
+ return (uint32_t)floor((double)value + 0.5f);
+}
+
+/** Converts a float in range -1 to 1 to a signed N-bit fixed-point integer. */
+static uint32_t pvr_float_to_sfixed(float value, uint32_t N)
+{
+ int32_t max = (1 << (N - 1)) - 1;
+ int32_t min = 0 - (1 << (N - 1));
+ union fi x;
+
+ /* NaN and Inf and overflow. */
+ if (util_is_inf_or_nan(value) || value >= 1.0f)
+ return (uint32_t)max;
+ else if (value == 0.0f)
+ return 0U;
+ else if (value <= -1.0f)
+ return (uint32_t)min;
+
+ /* Normalise. */
+ value *= (float)max;
+
+ /* Cast to double so that we can accurately represent the sum for N > 23. */
+ if (value > 0.0f)
+ x.i = (int32_t)floor((double)value + 0.5f);
+ else
+ x.i = (int32_t)floor((double)value - 0.5f);
+
+ return x.ui;
+}
+
+/** Convert a value in IEEE single precision format to 16-bit floating point
+ * format.
+ */
+/* TODO: See if we can use _mesa_float_to_float16_rtz_slow() instead. */
+static uint16_t pvr_float_to_f16(float value, bool round_to_even)
+{
+ uint32_t input_value;
+ uint32_t exponent;
+ uint32_t mantissa;
+ uint16_t output;
+
+ /* 0.0f can be exactly expressed in binary using IEEE float format. */
+ if (value == 0.0f)
+ return 0U;
+
+ if (value < 0U) {
+ output = 0x8000;
+ value = -value;
+ } else {
+ output = 0U;
+ }
+
+ /* 2^16 * (2 - 1/1024) = highest f16 representable value. */
+ value = MIN2(value, 131008);
+ input_value = fui(value);
+
+ /* Extract the exponent and mantissa. */
+ exponent = util_get_float32_exponent(value) + 15;
+ mantissa = input_value & ((1 << 23) - 1);
+
+ /* If the exponent is outside the supported range then denormalise the
+ * mantissa.
+ */
+ if ((int32_t)exponent <= 0) {
+ uint32_t shift;
+
+ mantissa |= (1 << 23);
+ exponent = input_value >> 23;
+ shift = -14 + 127 - exponent;
+
+ if (shift < 24)
+ mantissa >>= shift;
+ else
+ mantissa = 0;
+ } else {
+ output = (uint16_t)(output | ((exponent << 10) & 0x7C00));
+ }
+
+ output = (uint16_t)(output | (((mantissa >> 13) << 0) & 0x03FF));
+
+ if (round_to_even) {
+ /* Round to nearest even. */
+ if ((((int)value) % 2 != 0) && (((1 << 13) - 1) & mantissa))
+ output++;
+ } else {
+ /* Round to nearest. */
+ if (mantissa & (1 << 12))
+ output++;
+ }
+
+ return output;
+}
+
+static VkResult pvr_pack_clear_color(VkFormat format,
+ const union fi color[static 4],
+ uint32_t pkd_color[static 4])
+{
+ const uint32_t red_width =
+ vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0U);
+ uint32_t pbe_pack_mode = pvr_get_pbe_packmode(format);
+ const bool pbe_norm = vk_format_is_normalized(format);
+
+ if (pbe_pack_mode == PVRX(PBESTATE_PACKMODE_INVALID))
+ return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
+
+ if (format == VK_FORMAT_A2B10G10R10_UINT_PACK32 ||
+ format == VK_FORMAT_A2R10G10B10_UINT_PACK32) {
+ pbe_pack_mode = PVRX(PBESTATE_PACKMODE_R10B10G10A2);
+ } else if (format == VK_FORMAT_B8G8R8A8_UNORM ||
+ format == VK_FORMAT_R8G8B8A8_UNORM) {
+ pbe_pack_mode = PVRX(PBESTATE_PACKMODE_U8U8U8U8);
+ } else if (format != VK_FORMAT_D16_UNORM &&
+ format != VK_FORMAT_X8_D24_UNORM_PACK32 && red_width <= 8U &&
+ vk_format_is_normalized(format)) {
+ pbe_pack_mode = PVRX(PBESTATE_PACKMODE_F16F16F16F16);
+ } else if (vk_format_is_srgb(format)) {
+ pbe_pack_mode = PVRX(PBESTATE_PACKMODE_F16F16F16F16);
+ }
+
+ /* Set packed color based on PBE pack mode and PBE norm. */
+ switch (pbe_pack_mode) {
+ case PVRX(PBESTATE_PACKMODE_U8U8U8U8):
+ case PVRX(PBESTATE_PACKMODE_A1R5G5B5):
+ case PVRX(PBESTATE_PACKMODE_R5G5B5A1):
+ case PVRX(PBESTATE_PACKMODE_A4R4G4B4):
+ case PVRX(PBESTATE_PACKMODE_A8R3G3B2):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 8) & 0xFFU) << 8;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 8) & 0xFFU) << 16;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[3].f, 8) & 0xFFU) << 24;
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
+ pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
+ pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_S8S8S8S8):
+ case PVRX(PBESTATE_PACKMODE_X8U8S8S8):
+ case PVRX(PBESTATE_PACKMODE_X8S8S8U8):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_sfixed(color[0].f, 8) & 0xFFU;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 8) & 0xFFU) << 8;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 8) & 0xFFU) << 16;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[3].f, 8) & 0xFFU) << 24;
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
+ pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
+ pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_U16U16U16U16):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
+ pkd_color[1] = pvr_float_to_ufixed(color[2].f, 16) & 0xFFFFU;
+ pkd_color[1] |= (pvr_float_to_ufixed(color[3].f, 16) & 0xFFFFU) << 16;
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
+ pkd_color[1] = color[2].ui & 0xFFFFU;
+ pkd_color[1] |= (color[3].ui & 0xFFFFU) << 16;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_S16S16S16S16):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
+ pkd_color[1] = (pvr_float_to_sfixed(color[2].f, 16) & 0xFFFFU);
+ pkd_color[1] |= (pvr_float_to_sfixed(color[3].f, 16) & 0xFFFFU) << 16;
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
+ pkd_color[1] = color[2].ui & 0xFFFFU;
+ pkd_color[1] |= (color[3].ui & 0xFFFFU) << 16;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_A2_XRBIAS_U10U10U10):
+ case PVRX(PBESTATE_PACKMODE_ARGBV16_XR10):
+ case PVRX(PBESTATE_PACKMODE_F16F16F16F16):
+ case PVRX(PBESTATE_PACKMODE_A2R10B10G10):
+ if (red_width > 0) {
+ pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
+ pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
+ pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, false);
+ pkd_color[1] |= (uint32_t)pvr_float_to_f16(color[3].f, false) << 16;
+ } else {
+ /* Swizzle only uses first channel for alpha formats. */
+ pkd_color[0] = (uint32_t)pvr_float_to_f16(color[3].f, false);
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_U32U32U32U32):
+ pkd_color[0] = color[0].ui;
+ pkd_color[1] = color[1].ui;
+ pkd_color[2] = color[2].ui;
+ pkd_color[3] = color[3].ui;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_S32S32S32S32):
+ pkd_color[0] = (uint32_t)color[0].i;
+ pkd_color[1] = (uint32_t)color[1].i;
+ pkd_color[2] = (uint32_t)color[2].i;
+ pkd_color[3] = (uint32_t)color[3].i;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_F32F32F32F32):
+ memcpy(pkd_color, &color[0].f, 4U * sizeof(float));
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_R10B10G10A2):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 10) & 0xFFU;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 10) & 0xFFU) << 10;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 10) & 0xFFU) << 20;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[3].f, 2) & 0xFFU) << 30;
+ } else if (format == VK_FORMAT_A2R10G10B10_UINT_PACK32) {
+ pkd_color[0] = color[2].ui & 0x3FFU;
+ pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
+ pkd_color[0] |= (color[0].ui & 0x3FFU) << 20;
+ pkd_color[0] |= (color[3].ui & 0x3U) << 30;
+ } else {
+ pkd_color[0] = color[0].ui & 0x3FFU;
+ pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
+ pkd_color[0] |= (color[2].ui & 0x3FFU) << 20;
+ pkd_color[0] |= (color[3].ui & 0x3U) << 30;
+ }
+
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_A2F10F10F10):
+ case PVRX(PBESTATE_PACKMODE_F10F10F10A2):
+ pkd_color[0] = pvr_float_to_sfixed(color[0].f, 10) & 0xFFU;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 10) & 0xFFU) << 10;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 10) & 0xFFU) << 20;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[3].f, 2) & 0xFFU) << 30;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_U8U8U8):
+ case PVRX(PBESTATE_PACKMODE_R5G6B5):
+ case PVRX(PBESTATE_PACKMODE_R5SG5SB6):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 8) & 0xFFU) << 8;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 8) & 0xFFU) << 16;
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
+ pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_S8S8S8):
+ case PVRX(PBESTATE_PACKMODE_B6G5SR5S):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_sfixed(color[0].f, 8) & 0xFFU;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 8) & 0xFFU) << 8;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 8) & 0xFFU) << 16;
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
+ pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_U16U16U16):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
+ pkd_color[1] = (pvr_float_to_ufixed(color[2].f, 16) & 0xFFFFU);
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
+ pkd_color[1] = color[2].ui & 0xFFFFU;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_S16S16S16):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
+ pkd_color[1] = pvr_float_to_sfixed(color[2].f, 16) & 0xFFFFU;
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
+ pkd_color[1] = color[2].ui & 0xFFFFU;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_F16F16F16):
+ case PVRX(PBESTATE_PACKMODE_F11F11F10):
+ case PVRX(PBESTATE_PACKMODE_F10F11F11):
+ case PVRX(PBESTATE_PACKMODE_SE9995):
+ pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
+ pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, true) << 16;
+ pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, true);
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_U32U32U32):
+ pkd_color[0] = color[0].ui;
+ pkd_color[1] = color[1].ui;
+ pkd_color[2] = color[2].ui;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_S32S32S32):
+ pkd_color[0] = (uint32_t)color[0].i;
+ pkd_color[1] = (uint32_t)color[1].i;
+ pkd_color[2] = (uint32_t)color[2].i;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_X24G8X32):
+ case PVRX(PBESTATE_PACKMODE_U8X24):
+ pkd_color[1] = (color[1].ui & 0xFFU) << 24;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_F32F32F32):
+ memcpy(pkd_color, &color[0].f, 3U * sizeof(float));
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_U8U8):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 8) & 0xFFU) << 8;
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_S8S8):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_sfixed(color[0].f, 8) & 0xFFU;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 8) & 0xFFU) << 8;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 8) & 0xFFU) << 16;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[3].f, 8) & 0xFFU) << 24;
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
+ pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
+ pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_U16U16):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
+ pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_S16S16):
+ if (pbe_norm) {
+ pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
+ pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
+ } else {
+ pkd_color[0] = color[0].ui & 0xFFFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
+ }
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_F16F16):
+ pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
+ pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, true) << 16;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_U32U32):
+ pkd_color[0] = color[0].ui;
+ pkd_color[1] = color[1].ui;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_S32S32):
+ pkd_color[0] = (uint32_t)color[0].i;
+ pkd_color[1] = (uint32_t)color[1].i;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_X24U8F32):
+ case PVRX(PBESTATE_PACKMODE_X24X8F32):
+ memcpy(pkd_color, &color[0].f, 1U * sizeof(float));
+ pkd_color[1] = color[1].ui & 0xFFU;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_F32F32):
+ memcpy(pkd_color, &color[0].f, 2U * sizeof(float));
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_ST8U24):
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
+ pkd_color[0] |= color[1].ui << 24;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_U8):
+ if (format == VK_FORMAT_S8_UINT)
+ pkd_color[0] = color[1].ui & 0xFFU;
+ else if (pbe_norm)
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
+ else
+ pkd_color[0] = color[0].ui & 0xFFU;
+
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_S8):
+ if (pbe_norm)
+ pkd_color[0] = pvr_float_to_sfixed(color[0].f, 8) & 0xFFU;
+ else
+ pkd_color[0] = color[0].ui & 0xFFU;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_U16):
+ if (pbe_norm)
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
+ else
+ pkd_color[0] = color[0].ui & 0xFFFFU;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_S16):
+ if (pbe_norm)
+ pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
+ else
+ pkd_color[0] = color[0].ui & 0xFFFFU;
+ break;
+
+ case PVRX(PBESTATE_PACKMODE_F16):
+ pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
+ break;
+
+ /* U32 */
+ case PVRX(PBESTATE_PACKMODE_U32):
+ if (format == VK_FORMAT_X8_D24_UNORM_PACK32) {
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
+ } else if (format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
+ pkd_color[0] |= (color[1].ui & 0xFFU) << 24;
+ } else {
+ pkd_color[0] = color[0].ui;
+ }
+ break;
+
+ /* U24ST8 */
+ case PVRX(PBESTATE_PACKMODE_U24ST8):
+ pkd_color[1] = (color[1].ui & 0xFFU) << 24;
+ pkd_color[1] |= pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
+ break;
+
+ /* S32 */
+ case PVRX(PBESTATE_PACKMODE_S32):
+ pkd_color[0] = (uint32_t)color[0].i;
+ break;
+
+ /* F32 */
+ case PVRX(PBESTATE_PACKMODE_F32):
+ memcpy(pkd_color, &color[0].f, sizeof(float));
+ break;
+
+ /* X8U24 */
+ case PVRX(PBESTATE_PACKMODE_X8U24):
+ pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
+ break;
+
+ default:
+ break;
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult pvr_3d_copy_blit_core(struct pvr_transfer_ctx *ctx,
+ struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_prep_data *prep_data,
+ uint32_t pass_idx,
+ bool *finished_out)
+{
+ struct pvr_transfer_3d_state *const state = &prep_data->state;
+ struct pvr_winsys_transfer_regs *const regs = &state->regs;
+ struct pvr_device *const device = ctx->device;
+ const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
+
+ VkResult result;
+
+ *finished_out = true;
+
+ state->common_ptr = 0U;
+ state->dynamic_const_reg_ptr = 0U;
+ state->usc_const_reg_ptr = 0U;
+
+ if ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U) {
+ uint32_t packed_color[4U] = { 0U };
+
+ if (vk_format_is_compressed(transfer_cmd->dst.vk_format))
+ return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
+
+ /* No shader. */
+ state->pds_temps = 0U;
+ state->uniform_data_size = 0U;
+ state->tex_state_data_size = 0U;
+
+ /* No background enabled. */
+ /* clang-format off */
+ pvr_csb_pack (®s->isp_bgobjvals, CR_ISP_BGOBJVALS, reg);
+ /* clang-format on */
+ pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
+ reg.mode = pvr_cr_isp_aa_mode_type(transfer_cmd->dst.sample_count);
+ }
+
+ result = pvr_pack_clear_color(transfer_cmd->dst.vk_format,
+ transfer_cmd->clear_color,
+ packed_color);
+ if (result != VK_SUCCESS)
+ return result;
+
+ pvr_csb_pack (®s->usc_clear_register0, CR_USC_CLEAR_REGISTER0, reg) {
+ reg.val = packed_color[0U];
+ }
+
+ pvr_csb_pack (®s->usc_clear_register1, CR_USC_CLEAR_REGISTER1, reg) {
+ reg.val = packed_color[1U];
+ }
+
+ pvr_csb_pack (®s->usc_clear_register2, CR_USC_CLEAR_REGISTER2, reg) {
+ reg.val = packed_color[2U];
+ }
+
+ pvr_csb_pack (®s->usc_clear_register3, CR_USC_CLEAR_REGISTER3, reg) {
+ reg.val = packed_color[3U];
+ }
+
+ state->msaa_multiplier = transfer_cmd->dst.sample_count & ~1U;
+ state->pds_shader_task_offset = 0U;
+ state->uni_tex_code_offset = 0U;
+ state->tex_state_data_offset = 0U;
+ } else if (transfer_cmd->src_present) {
+ const struct pvr_tq_frag_sh_reg_layout nop_sh_reg_layout = {
+ /* TODO: Setting this to 1 so that we don't try to pvr_bo_alloc() with
+ * zero size. The device will ignore the PDS program if USC_SHAREDSIZE
+ * is zero and in the case of the nop shader we're expecting it to be
+ * zero. See if we can safely pass PVR_DEV_ADDR_INVALID for the unitex
+ * program.
+ */
+ .driver_total = 1,
+ };
+ const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout;
+ struct pvr_pds_pixel_shader_sa_program unitex_prog = { 0U };
+ uint32_t tex_state_dma_size_dw;
+ struct pvr_bo *pvr_bo;
+ uint32_t *dma_space;
+
+ result = pvr_pbe_src_format(transfer_cmd, state, &state->shader_props);
+ if (result != VK_SUCCESS)
+ return result;
+
+ pvr_uv_space(dev_info, transfer_cmd, state);
+
+ state->shader_props.iterated = false;
+
+ state->shader_props.layer_props.byte_unwind = 0U;
+ state->shader_props.layer_props.sample =
+ transfer_cmd->src.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED;
+
+ result = pvr_msaa_state(dev_info, transfer_cmd, state);
+ if (result != VK_SUCCESS)
+ return result;
+
+ state->shader_props.pick_component =
+ pvr_pick_component_needed(&state->custom_mapping);
+ state->shader_props.alpha_type = transfer_cmd->blit.alpha.type;
+
+ if (state->shader_props.alpha_type != PVR_ALPHA_NONE &&
+ (state->shader_props.layer_props.pbe_format !=
+ PVR_TRANSFER_PBE_PIXEL_SRC_F16F16 &&
+ state->shader_props.layer_props.pbe_format !=
+ PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8)) {
+ return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+
+ if (state->filter == PVR_FILTER_LINEAR &&
+ pvr_requires_usc_linear_filter(transfer_cmd->src.vk_format)) {
+ if (pvr_int_pbe_usc_linear_filter(
+ state->shader_props.layer_props.pbe_format,
+ state->shader_props.layer_props.sample,
+ state->shader_props.layer_props.msaa,
+ state->shader_props.full_rate)) {
+ state->shader_props.layer_props.linear = true;
+ } else {
+ mesa_logw("Transfer: F32 linear filter not supported.");
+ }
+ }
+
+ if (state->empty_dst) {
+ sh_reg_layout = &nop_sh_reg_layout;
+ state->pds_shader_task_offset = device->nop_program.pds.data_offset;
+ } else {
+ pvr_dev_addr_t kick_usc_pds_dev_addr;
+
+ result =
+ pvr_transfer_frag_store_get_shader_info(device,
+ &ctx->frag_store,
+ &state->shader_props,
+ &kick_usc_pds_dev_addr,
+ &sh_reg_layout);
+ if (result != VK_SUCCESS)
+ return result;
+
+ assert(kick_usc_pds_dev_addr.addr <= UINT32_MAX);
+ state->pds_shader_task_offset = (uint32_t)kick_usc_pds_dev_addr.addr;
+ }
+
+ unitex_prog.kick_usc = false;
+ unitex_prog.clear = false;
+
+ tex_state_dma_size_dw =
+ sh_reg_layout->driver_total + sh_reg_layout->compiler_out_total;
+
+ unitex_prog.num_texture_dma_kicks = 1U;
+ unitex_prog.num_uniform_dma_kicks = 0U;
+
+ result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
+ device->heaps.general_heap,
+ tex_state_dma_size_dw << 2U,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ dma_space = (uint32_t *)pvr_bo->bo->map;
+
+ result = pvr_sampler_image_state(ctx,
+ transfer_cmd,
+ sh_reg_layout,
+ state,
+ dma_space);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (state->shader_props.alpha_type == PVR_ALPHA_GLOBAL ||
+ state->shader_props.alpha_type ==
+ PVR_ALPHA_PREMUL_SOURCE_WITH_GLOBAL) {
+ pvr_dma_global_alpha(&transfer_cmd->blit.alpha,
+ state,
+ sh_reg_layout,
+ dma_space);
+ }
+
+ pvr_dma_texture_floats(transfer_cmd, state, sh_reg_layout, dma_space);
+
+ if (transfer_cmd->src.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
+ dma_space[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
+ fui(transfer_cmd->src.z_position);
+ }
+
+ pvr_write_usc_constants(sh_reg_layout, dma_space);
+
+ if (pvr_pick_component_needed(&state->custom_mapping))
+ pvr_dma_texel_unwind(state, sh_reg_layout, dma_space);
+
+ pvr_pds_encode_dma_burst(unitex_prog.texture_dma_control,
+ unitex_prog.texture_dma_address,
+ state->common_ptr,
+ tex_state_dma_size_dw,
+ pvr_bo->vma->dev_addr.addr,
+ true,
+ dev_info);
+
+ state->common_ptr += tex_state_dma_size_dw;
+
+ result =
+ pvr_pds_unitex(dev_info, ctx, transfer_cmd, &unitex_prog, prep_data);
+ if (result != VK_SUCCESS)
+ return result;
+
+ pvr_csb_pack (®s->isp_bgobjvals, CR_ISP_BGOBJVALS, reg) {
+ reg.enablebgtag = true;
+ }
+
+ /* clang-format off */
+ pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg);
+ /* clang-format on */
+ } else {
+ /* No shader. */
+ state->pds_temps = 0U;
+ state->uniform_data_size = 0U;
+ state->tex_state_data_size = 0U;
+
+ /* No background enabled. */
+ /* clang-format off */
+ pvr_csb_pack (®s->isp_bgobjvals, CR_ISP_BGOBJVALS, reg);
+ /* clang-format on */
+ pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
+ reg.mode = pvr_cr_isp_aa_mode_type(transfer_cmd->dst.sample_count);
+ }
+ state->msaa_multiplier = transfer_cmd->dst.sample_count & ~1U;
+ state->pds_shader_task_offset = 0U;
+ state->uni_tex_code_offset = 0U;
+ state->tex_state_data_offset = 0U;
+
+ result = pvr_pbe_src_format(transfer_cmd, state, &state->shader_props);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ pvr_setup_hwbg_object(dev_info, state);
+
+ pvr_csb_pack (®s->isp_render, CR_ISP_RENDER, reg) {
+ reg.mode_type = PVRX(CR_ISP_RENDER_MODE_TYPE_FAST_SCALE);
+ pvr_finishme("Remove direction hardcoding.");
+ reg.dir_type = PVRX(CR_DIR_TYPE_TL2BR);
+ }
+
+ /* Set up pixel event handling. */
+ result = pvr_pbe_setup(transfer_cmd, ctx, state);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_isp_tiles(device, state);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (PVR_HAS_FEATURE(&device->pdevice->dev_info, gpu_multicore_support)) {
+ pvr_csb_pack (®s->frag_screen, CR_FRAG_SCREEN, reg) {
+ reg.xmax = transfer_cmd->dst.width - 1;
+ reg.ymax = transfer_cmd->dst.height - 1;
+ }
+ }
+
+ if ((pass_idx + 1U) < state->custom_mapping.pass_count)
+ *finished_out = false;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_pbe_src_format_f2d(uint32_t merge_flags,
+ struct pvr_transfer_cmd_surface *src,
+ VkFormat dst_format,
+ bool down_scale,
+ bool dont_force_pbe,
+ enum pvr_transfer_pbe_pixel_src *pixel_format_out)
+{
+ VkFormat src_format = src->vk_format;
+
+ /* This has to come before the rest as S8 for instance is integer and
+ * signedsess check fails on D24S8.
+ */
+ if (vk_format_is_depth_or_stencil(src_format) ||
+ vk_format_is_depth_or_stencil(dst_format) ||
+ merge_flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
+ pvr_finishme("Complete pvr_pbe_src_format_f2d().");
+ }
+
+ return pvr_pbe_src_format_normal(src_format,
+ dst_format,
+ down_scale,
+ dont_force_pbe,
+ pixel_format_out);
+}
+
+/** Writes the coefficient loading PDS task. */
+static inline VkResult
+pvr_pds_coeff_task(struct pvr_transfer_ctx *ctx,
+ const struct pvr_transfer_cmd *transfer_cmd,
+ const bool sample_3d,
+ struct pvr_transfer_prep_data *prep_data)
+{
+ struct pvr_transfer_3d_state *state = &prep_data->state;
+ struct pvr_pds_coeff_loading_program program = { 0U };
+ struct pvr_bo *pvr_bo;
+ VkResult result;
+
+ program.num_fpu_iterators = 1U;
+
+ pvr_csb_pack (&program.FPU_iterators[0U],
+ PDSINST_DOUT_FIELDS_DOUTI_SRC,
+ reg) {
+ if (sample_3d)
+ reg.size = PVRX(PDSINST_DOUTI_SIZE_3D);
+ else
+ reg.size = PVRX(PDSINST_DOUTI_SIZE_2D);
+
+ reg.perspective = false;
+
+ /* Varying wrap on the TSP means that the TSP chooses the shorter path
+ * out of the normal and the wrapping path i.e. chooses between u0->u1
+ * and u1->1.0 == 0.0 -> u0. We don't need this behavior.
+ */
+ /*
+ * if RHW ever needed offset SRC_F32 to the first U in 16 bit units
+ * l0 U <= offs 0
+ * l0 V
+ * l1 U <= offs 4
+ * ...
+ */
+ reg.shademodel = PVRX(PDSINST_DOUTI_SHADEMODEL_GOURUAD);
+ reg.f32_offset = 0U;
+ }
+
+ if (sample_3d)
+ state->usc_coeff_regs = 12U;
+ else
+ state->usc_coeff_regs = 8U;
+
+ pvr_pds_set_sizes_coeff_loading(&program);
+
+ result =
+ pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
+ ctx->device->heaps.pds_heap,
+ (program.data_size + program.code_size) << 2U,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ state->pds_coeff_task_offset =
+ pvr_bo->vma->dev_addr.addr - ctx->device->heaps.pds_heap->base_addr.addr;
+
+ pvr_pds_generate_coeff_loading_program(&program, pvr_bo->bo->map);
+
+ state->coeff_data_size = program.data_size;
+ state->pds_temps = program.temps_used;
+
+ return VK_SUCCESS;
+}
+
+#define X 0U
+#define Y 1U
+#define Z 2U
+
+static void pvr_tsp_floats(const struct pvr_device_info *dev_info,
+ VkRect2D *rect,
+ const float recips[3U],
+ bool custom_filter,
+ bool z_present,
+ float z_value,
+ struct pvr_transfer_3d_iteration *layer)
+{
+#define U0 0U
+#define U1 1U
+#define V0 2U
+#define V1 3U
+
+ const uint32_t indices[8U] = { U0, V0, U0, V1, U1, V1, U1, V0 };
+ float delta[2U] = { 0.0f, 0.0f };
+ int32_t non_normalized[4U];
+ uint32_t src_flipped[2U];
+ uint32_t normalized[4U];
+ int32_t src_span[2U];
+
+ non_normalized[U0] = rect->offset.x;
+ non_normalized[U1] = rect->offset.x + rect->extent.width;
+ non_normalized[V0] = rect->offset.y;
+ non_normalized[V1] = rect->offset.y + rect->extent.height;
+
+ /* Filter adjust. */
+ src_span[X] = rect->extent.width;
+ src_flipped[X] = src_span[X] > 0U ? 0U : 1U;
+ src_span[Y] = rect->extent.height;
+ src_flipped[Y] = src_span[Y] > 0U ? 0U : 1U;
+ /*
+ * | X | Y | srcFlipX | srcFlipY |
+ * +----+----+----------+----------|
+ * | X | Y | 0 | 0 |
+ * | -X | Y | 1 | 0 |
+ * | X | -Y | 0 | 1 |
+ * | -X | -Y | 1 | 1 |
+ */
+ for (uint32_t i = X; i <= Y; i++) {
+ if (custom_filter) {
+ if (src_flipped[i] != 0U)
+ delta[i] += 0.25;
+ else
+ delta[i] -= 0.25;
+ }
+ }
+
+ /* Normalize. */
+ for (uint32_t i = 0U; i < ARRAY_SIZE(normalized); i++) {
+ uint32_t tmp;
+ float ftmp;
+
+ ftmp = (float)non_normalized[i] + delta[i >> 1U];
+ ftmp *= recips[i >> 1U];
+
+ tmp = fui(ftmp);
+ if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
+ tmp = XXH_rotl32(tmp, 1U);
+
+ normalized[i] = tmp;
+ }
+
+ /* Apply indices. */
+ for (uint32_t i = 0U; i < 8U; i++)
+ layer->texture_coords[i] = normalized[indices[i]];
+
+ if (z_present) {
+ uint32_t tmp = fui(z_value * recips[2U]);
+
+ if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
+ tmp = XXH_rotl32(tmp, 1U);
+
+ for (uint32_t i = 8U; i < 12U; i++)
+ layer->texture_coords[i] = tmp;
+ }
+
+#undef U0
+#undef U1
+#undef V0
+#undef V1
+}
+
+static void
+pvr_isp_prim_block_tsp_vertex_block(const struct pvr_device_info *dev_info,
+ const struct pvr_transfer_cmd_surface *src,
+ struct pvr_rect_mapping *mappings,
+ bool custom_filter,
+ uint32_t num_mappings,
+ uint32_t mapping_offset,
+ enum pvr_filter filter,
+ uint32_t tsp_comp_format_in_dw,
+ uint32_t **const cs_ptr_out)
+{
+ struct pvr_transfer_3d_iteration layer;
+ uint32_t *cs_ptr = *cs_ptr_out;
+
+ /* |<-32b->|
+ * +-------+-----
+ * | RHW | | X num_isp_vertices
+ * +-------+-- |
+ * | U | | |
+ * | V | | X PVR_TRANSFER_NUM_LAYERS
+ * +-------+-----
+ *
+ * RHW is not there any more in the Transfer. The comment still explains
+ * where it should go if ever needed.
+ */
+ for (uint32_t i = mapping_offset; i < mapping_offset + num_mappings; i++) {
+ bool z_present = src->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED;
+ const float recips[3U] = {
+ [X] = 1.0f / (float)src->width,
+ [Y] = 1.0f / (float)src->height,
+ [Z] = z_present ? 1.0f / (float)src->depth : 0.0f,
+ };
+ float z_pos = (filter < PVR_FILTER_LINEAR) ? floor(src->z_position + 0.5f)
+ : src->z_position;
+
+ pvr_tsp_floats(dev_info,
+ &mappings[i].src_rect,
+ recips,
+ custom_filter,
+ z_present,
+ z_pos,
+ &layer);
+
+ /* We request UVs from TSP for ISP triangle:
+ * 0 u 1
+ * +---,
+ * v| /|
+ * | / |
+ * 2'/--'3
+ */
+ for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
+ *cs_ptr++ = layer.texture_coords[0U];
+ *cs_ptr++ = layer.texture_coords[1U];
+ }
+
+ if (z_present) {
+ *cs_ptr++ = layer.texture_coords[8U];
+ *cs_ptr++ = 0U;
+ }
+
+ for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
+ *cs_ptr++ = layer.texture_coords[6U];
+ *cs_ptr++ = layer.texture_coords[7U];
+ }
+
+ if (z_present) {
+ *cs_ptr++ = layer.texture_coords[11U];
+ *cs_ptr++ = 0U;
+ }
+
+ for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
+ *cs_ptr++ = layer.texture_coords[2U];
+ *cs_ptr++ = layer.texture_coords[3U];
+ }
+
+ if (z_present) {
+ *cs_ptr++ = layer.texture_coords[9U];
+ *cs_ptr++ = 0U;
+ }
+
+ for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
+ *cs_ptr++ = layer.texture_coords[4U];
+ *cs_ptr++ = layer.texture_coords[5U];
+ }
+
+ if (z_present) {
+ *cs_ptr++ = layer.texture_coords[10U];
+ *cs_ptr++ = 0U;
+ }
+ }
+
+ if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ /* Skipped optional primitive id. */
+ for (uint32_t i = 0U; i < tsp_comp_format_in_dw; i++)
+ *cs_ptr++ = 0x88888888U;
+ } else {
+ /* Align back to 64 bits. */
+ if (((uintptr_t)cs_ptr & 7U) != 0U)
+ cs_ptr++;
+ }
+
+ *cs_ptr_out = cs_ptr;
+}
+
+#undef X
+#undef Y
+#undef Z
+
+static void pvr_isp_prim_block_pds_state(const struct pvr_device_info *dev_info,
+ struct pvr_transfer_ctx *ctx,
+ struct pvr_transfer_3d_state *state,
+ uint32_t **const cs_ptr_out)
+{
+ uint32_t *cs_ptr = *cs_ptr_out;
+
+ pvr_csb_pack (cs_ptr, TA_STATE_PDS_SHADERBASE, shader_base) {
+ shader_base.addr = PVR_DEV_ADDR(state->pds_shader_task_offset);
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, TA_STATE_PDS_TEXUNICODEBASE, tex_base) {
+ tex_base.addr = PVR_DEV_ADDR(state->uni_tex_code_offset);
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, TA_STATE_PDS_SIZEINFO1, info1) {
+ info1.pds_uniformsize =
+ state->uniform_data_size /
+ PVRX(TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE);
+
+ info1.pds_texturestatesize =
+ state->tex_state_data_size /
+ PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE);
+
+ info1.pds_varyingsize =
+ state->coeff_data_size /
+ PVRX(TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE);
+
+ info1.usc_varyingsize =
+ ALIGN_POT(state->usc_coeff_regs,
+ PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE)) /
+ PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE);
+
+ info1.pds_tempsize =
+ ALIGN_POT(state->pds_temps,
+ PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE)) /
+ PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE);
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, TA_STATE_PDS_VARYINGBASE, base) {
+ base.addr = PVR_DEV_ADDR(state->pds_coeff_task_offset);
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, TA_STATE_PDS_TEXTUREDATABASE, base) {
+ base.addr = PVR_DEV_ADDR(state->tex_state_data_offset);
+ }
+ cs_ptr++;
+
+ /* PDS uniform program not used. */
+ pvr_csb_pack (cs_ptr, TA_STATE_PDS_UNIFORMDATABASE, base) {
+ base.addr = PVR_DEV_ADDR(0U);
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, TA_STATE_PDS_SIZEINFO2, info) {
+ info.usc_sharedsize =
+ ALIGN_POT(state->common_ptr,
+ PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE)) /
+ PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE);
+ info.pds_tri_merge_disable = !PVR_HAS_ERN(dev_info, 42307);
+ info.pds_batchnum = 0U;
+ }
+ cs_ptr++;
+
+ /* Get back to 64 bits boundary. */
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
+ cs_ptr++;
+
+ *cs_ptr_out = cs_ptr;
+}
+
+static void pvr_isp_prim_block_isp_state(const struct pvr_device_info *dev_info,
+ UNUSED uint32_t tsp_comp_format_in_dw,
+ uint32_t tsp_data_size_in_bytes,
+ uint32_t num_isp_vertices,
+ bool read_bgnd,
+ uint32_t **const cs_ptr_out)
+{
+ uint32_t *cs_ptr = *cs_ptr_out;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2)) {
+ pvr_finishme("Unimplemented path.");
+ } else {
+ /* ISP state words. */
+
+ /* clang-format off */
+ pvr_csb_pack (cs_ptr, TA_STATE_ISPCTL, ispctl);
+ /* clang-format on */
+ cs_ptr += pvr_cmd_length(TA_STATE_ISPCTL);
+
+ pvr_csb_pack (cs_ptr, TA_STATE_ISPA, ispa) {
+ ispa.objtype = PVRX(TA_OBJTYPE_TRIANGLE);
+ ispa.passtype = read_bgnd ? PVRX(TA_PASSTYPE_TRANSLUCENT)
+ : PVRX(TA_PASSTYPE_OPAQUE);
+ ispa.dcmpmode = PVRX(TA_CMPMODE_ALWAYS);
+ ispa.dwritedisable = true;
+ }
+ cs_ptr += pvr_cmd_length(TA_STATE_ISPA);
+
+ /* How many bytes the TSP compression format needs? */
+ pvr_csb_pack (cs_ptr, IPF_COMPRESSION_SIZE_WORD, word) {
+ word.cs_isp_comp_table_size = 0U;
+ word.cs_tsp_comp_format_size = tsp_comp_format_in_dw;
+ word.cs_tsp_comp_table_size = 0U;
+ word.cs_tsp_comp_vertex_size =
+ tsp_data_size_in_bytes / num_isp_vertices;
+ }
+ cs_ptr += pvr_cmd_length(IPF_COMPRESSION_SIZE_WORD);
+
+ /* ISP vertex compression. */
+ pvr_csb_pack (cs_ptr, IPF_ISP_COMPRESSION_WORD_0, word0) {
+ word0.cf_isp_comp_fmt_x0 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
+ word0.cf_isp_comp_fmt_x1 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
+ word0.cf_isp_comp_fmt_x2 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
+ word0.cf_isp_comp_fmt_y0 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
+ word0.cf_isp_comp_fmt_y1 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
+ word0.cf_isp_comp_fmt_y2 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
+ word0.cf_isp_comp_fmt_z0 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
+ word0.cf_isp_comp_fmt_z1 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
+ }
+ cs_ptr += pvr_cmd_length(IPF_ISP_COMPRESSION_WORD_0);
+
+ pvr_csb_pack (cs_ptr, IPF_ISP_COMPRESSION_WORD_1, word1) {
+ word1.vf_prim_msaa = 0U;
+ word1.vf_prim_id_pres = 0U;
+ word1.vf_vertex_clipped = 0U;
+ word1.vf_vertex_total = num_isp_vertices - 1U;
+ word1.cf_isp_comp_fmt_z3 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
+ word1.cf_isp_comp_fmt_z2 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
+ }
+ cs_ptr += pvr_cmd_length(IPF_ISP_COMPRESSION_WORD_1);
+ }
+
+ *cs_ptr_out = cs_ptr;
+}
+
+static void
+pvr_isp_prim_block_index_block(const struct pvr_device_info *dev_info,
+ uint32_t num_mappings,
+ uint32_t **const cs_ptr_out)
+{
+ uint32_t *cs_ptr = *cs_ptr_out;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ pvr_finishme("Unimplemented path.");
+ } else {
+ for (uint32_t i = 0U, j = 0U; i < num_mappings; i++, j += 4U) {
+ if ((i & 1U) == 0U) {
+ pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
+ word.ix_index0_0 = j;
+ word.ix_index0_1 = j + 1U;
+ word.ix_index0_2 = j + 2U;
+ word.ix_index1_0 = j + 3U;
+ }
+ cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
+
+ /* Don't increment cs_ptr here. IPF_INDEX_DATA is patched in the
+ * else part and then cs_ptr is incremented.
+ */
+ pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
+ word.ix_index0_0 = j + 2U;
+ word.ix_index0_1 = j + 1U;
+ }
+ } else {
+ uint32_t tmp;
+
+ pvr_csb_pack (&tmp, IPF_INDEX_DATA, word) {
+ word.ix_index0_2 = j;
+ word.ix_index1_0 = j + 1U;
+ }
+ *cs_ptr |= tmp;
+ cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
+
+ pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
+ word.ix_index0_0 = j + 2U;
+ word.ix_index0_1 = j + 3U;
+ word.ix_index0_2 = j + 2U;
+ word.ix_index1_0 = j + 1U;
+ }
+ cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
+ }
+ }
+
+ /* The last pass didn't ++. */
+ if ((num_mappings & 1U) != 0U)
+ cs_ptr++;
+ }
+
+ *cs_ptr_out = cs_ptr;
+}
+
+/* Calculates a 24 bit fixed point (biased) representation of a signed integer.
+ */
+static inline VkResult
+pvr_int32_to_isp_xy_vtx(const struct pvr_device_info *dev_info,
+ int32_t val,
+ UNUSED bool bias,
+ uint32_t *word_out)
+{
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ pvr_finishme("Unimplemented path.");
+ return VK_SUCCESS;
+ }
+
+ val += PVRX(IPF_ISP_VERTEX_XY_BIAS_VALUE);
+
+ if (((uint32_t)val & 0x7fff8000U) != 0U)
+ return vk_error(NULL, VK_ERROR_UNKNOWN);
+
+ pvr_csb_pack (word_out, IPF_ISP_VERTEX_XY, word) {
+ word.sign = val < 0U;
+ word.integer = val;
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_isp_prim_block_isp_vertices(const struct pvr_device_info *dev_info,
+ struct pvr_transfer_3d_state *state,
+ struct pvr_rect_mapping *mappings,
+ uint32_t num_mappings,
+ uint32_t mapping_offset,
+ uint32_t **const cs_ptr_out)
+{
+ uint32_t *cs_ptr = *cs_ptr_out;
+ bool bias = true;
+ uint32_t i;
+
+ if (PVR_HAS_FEATURE(dev_info, screen_size8K))
+ bias = state->width_in_tiles <= 256U && state->height_in_tiles <= 256U;
+
+ for (i = mapping_offset; i < mapping_offset + num_mappings; i++) {
+ uint32_t bottom = 0U;
+ uint32_t right = 0U;
+ uint32_t left = 0U;
+ uint32_t top = 0U;
+ VkResult result;
+
+ /* ISP vertex data (X, Y, Z). */
+ result = pvr_int32_to_isp_xy_vtx(dev_info,
+ mappings[i].dst_rect.offset.y,
+ bias,
+ &top);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_int32_to_isp_xy_vtx(dev_info,
+ mappings[i].dst_rect.offset.y +
+ mappings[i].dst_rect.extent.height,
+ bias,
+ &bottom);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_int32_to_isp_xy_vtx(dev_info,
+ mappings[i].dst_rect.offset.x,
+ bias,
+ &left);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_int32_to_isp_xy_vtx(dev_info,
+ mappings[i].dst_rect.offset.x +
+ mappings[i].dst_rect.extent.width,
+ bias,
+ &right);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ pvr_finishme("Unimplemented path.");
+ } else {
+ /* ISP vertices 0 and 1. */
+ pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_0, word0) {
+ word0.x0 = left;
+ word0.y0 = top & 0xFF;
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_1, word1) {
+ word1.y0 = top >> PVRX(IPF_ISP_VERTEX_WORD_1_Y0_SHIFT);
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_2, word2) {
+ word2.x1 = right & 0xFFFF;
+ word2.z0 = 0U;
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_3, word3) {
+ word3.x1 = right >> PVRX(IPF_ISP_VERTEX_WORD_3_X1_SHIFT);
+ word3.y1 = top;
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_4, word4) {
+ word4.z1 = 0U;
+ }
+ cs_ptr++;
+
+ /* ISP vertices 2 and 3. */
+ pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_0, word0) {
+ word0.x0 = left;
+ word0.y0 = bottom & 0xFF;
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_1, word1) {
+ word1.y0 = bottom >> PVRX(IPF_ISP_VERTEX_WORD_1_Y0_SHIFT);
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_2, word2) {
+ word2.x1 = right & 0xFFFF;
+ word2.z0 = 0U;
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_3, word3) {
+ word3.x1 = right >> PVRX(IPF_ISP_VERTEX_WORD_3_X1_SHIFT);
+ word3.y1 = bottom;
+ }
+ cs_ptr++;
+
+ pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_4, word4) {
+ word4.z1 = 0U;
+ }
+ cs_ptr++;
+ }
+ }
+ *cs_ptr_out = cs_ptr;
+
+ return VK_SUCCESS;
+}
+
+static uint32_t
+pvr_isp_primitive_block_size(const struct pvr_device_info *dev_info,
+ const struct pvr_transfer_cmd_surface *src,
+ uint32_t num_mappings)
+{
+ uint32_t num_isp_vertices = num_mappings * 4U;
+ uint32_t num_tsp_vertices_per_isp_vertex;
+ uint32_t isp_vertex_data_size_dw;
+ bool color_fill = (src == NULL);
+ uint32_t tsp_comp_format_dw;
+ uint32_t isp_state_size_dw;
+ uint32_t pds_state_size_dw;
+ uint32_t idx_data_size_dw;
+ uint32_t tsp_data_size;
+ uint32_t stream_size;
+
+ if (color_fill) {
+ num_tsp_vertices_per_isp_vertex = 0U;
+ } else {
+ num_tsp_vertices_per_isp_vertex =
+ src->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED ? 4U : 2U;
+ }
+
+ tsp_data_size = num_isp_vertices * PVR_TRANSFER_NUM_LAYERS * 4U *
+ num_tsp_vertices_per_isp_vertex;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ /* An XYZ vertex is 16/16/32 bits => 8 bytes. */
+ isp_vertex_data_size_dw = num_isp_vertices * 2U;
+
+ /* Round to even for 64 bit boundary. */
+ idx_data_size_dw = ALIGN_POT(num_mappings, 2U);
+ tsp_comp_format_dw = 0U;
+ isp_state_size_dw = 4U;
+ pds_state_size_dw = 8U;
+ } else {
+ tsp_comp_format_dw = color_fill ? 0U : PVR_TRANSFER_NUM_LAYERS;
+
+ if (!color_fill) {
+ if (src->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
+ tsp_comp_format_dw *= 2U;
+ }
+
+ /* An XYZ vertex is 24/24/32 bits => 10 bytes with last padded to 4 byte
+ * burst align.
+ */
+ isp_vertex_data_size_dw = DIV_ROUND_UP(num_isp_vertices * 10U, 4U);
+
+ /* 4 triangles fit in 3 dw: t0t0t0t1_t1t1t2t2_t2t3t3t3. */
+ idx_data_size_dw = num_mappings + DIV_ROUND_UP(num_mappings, 2U);
+ isp_state_size_dw = 5U;
+ pds_state_size_dw = 7U;
+ }
+
+ stream_size = tsp_data_size + (idx_data_size_dw + tsp_comp_format_dw +
+ isp_vertex_data_size_dw + isp_state_size_dw +
+ pds_state_size_dw) *
+ 4U;
+
+ return stream_size;
+}
+
+static VkResult
+pvr_isp_primitive_block(const struct pvr_device_info *dev_info,
+ struct pvr_transfer_ctx *ctx,
+ const struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_prep_data *prep_data,
+ const struct pvr_transfer_cmd_surface *src,
+ bool custom_filter,
+ struct pvr_rect_mapping *mappings,
+ uint32_t num_mappings,
+ uint32_t mapping_offset,
+ bool read_bgnd,
+ uint32_t *cs_start_offset,
+ uint32_t **cs_ptr_out)
+{
+ struct pvr_transfer_3d_state *state = &prep_data->state;
+ uint32_t num_isp_vertices = num_mappings * 4U;
+ uint32_t num_tsp_vertices_per_isp_vert;
+ uint32_t tsp_data_size_in_bytes;
+ uint32_t tsp_comp_format_in_dw;
+ bool color_fill = src == NULL;
+ uint32_t stream_size_in_bytes;
+ uint32_t *cs_ptr_start;
+ VkResult result;
+
+ if (color_fill) {
+ num_tsp_vertices_per_isp_vert = 0U;
+ } else {
+ num_tsp_vertices_per_isp_vert =
+ src->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED ? 4U : 2U;
+ }
+
+ tsp_data_size_in_bytes = num_isp_vertices * PVR_TRANSFER_NUM_LAYERS * 4U *
+ num_tsp_vertices_per_isp_vert;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ tsp_comp_format_in_dw = 0U;
+ } else {
+ tsp_comp_format_in_dw = color_fill ? 0U : PVR_TRANSFER_NUM_LAYERS;
+
+ if (!color_fill && src->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
+ tsp_comp_format_in_dw *= 2U;
+ }
+
+ stream_size_in_bytes =
+ pvr_isp_primitive_block_size(dev_info, src, num_mappings);
+
+ cs_ptr_start = *cs_ptr_out;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ pvr_finishme("Unimplemented path.");
+ } else {
+ if (!color_fill) {
+ /* This includes:
+ * Compressed TSP vertex data & tables.
+ * Primitive id.
+ * TSP compression formats.
+ */
+ pvr_isp_prim_block_tsp_vertex_block(dev_info,
+ src,
+ mappings,
+ custom_filter,
+ num_mappings,
+ mapping_offset,
+ transfer_cmd->filter,
+ tsp_comp_format_in_dw,
+ cs_ptr_out);
+ }
+
+ pvr_isp_prim_block_pds_state(dev_info, ctx, state, cs_ptr_out);
+
+ /* Point the CS_PRIM_BASE here. */
+ *cs_start_offset = (*cs_ptr_out - cs_ptr_start) * sizeof(uint32_t);
+
+ /* This includes:
+ * ISP state words.
+ * Compression size word.
+ * ISP compression and vertex formats.
+ */
+ pvr_isp_prim_block_isp_state(dev_info,
+ tsp_comp_format_in_dw,
+ tsp_data_size_in_bytes,
+ num_isp_vertices,
+ read_bgnd,
+ cs_ptr_out);
+
+ pvr_isp_prim_block_index_block(dev_info, num_mappings, cs_ptr_out);
+
+ result = pvr_isp_prim_block_isp_vertices(dev_info,
+ state,
+ mappings,
+ num_mappings,
+ mapping_offset,
+ cs_ptr_out);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ assert((*cs_ptr_out - cs_ptr_start) * sizeof(uint32_t) ==
+ stream_size_in_bytes);
+
+ return VK_SUCCESS;
+}
+
+static inline uint32_t
+pvr_transfer_prim_blocks_per_alloc(const struct pvr_device_info *dev_info)
+{
+ uint32_t ret = PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS) * sizeof(uint32_t);
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
+ return ret / sizeof(uint64_t) / 2U;
+
+ return ret / sizeof(uint32_t) / 2U - 1U;
+}
+
+static inline uint32_t
+pvr_transfer_max_quads_per_pb(const struct pvr_device_info *dev_info)
+{
+ return PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U
+ : 16U;
+}
+
+/**
+ * Writes ISP ctrl stream.
+ *
+ * We change sampler/texture state when we process a new TQ source. The
+ * primitive block contains the shader pointers, but we supply the primitive
+ * blocks with shaders from here.
+ */
+static VkResult pvr_isp_ctrl_stream(const struct pvr_device_info *dev_info,
+ struct pvr_transfer_ctx *ctx,
+ struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_prep_data *prep_data)
+{
+ const uint32_t max_mappings_per_pb = pvr_transfer_max_quads_per_pb(dev_info);
+ struct pvr_transfer_3d_state *const state = &prep_data->state;
+ struct pvr_winsys_transfer_regs *const regs = &state->regs;
+ struct pvr_transfer_pass *pass = NULL;
+ uint32_t flags = transfer_cmd->flags;
+ uint32_t num_prim_blks = 0U;
+ uint32_t prim_blk_size = 0U;
+ uint32_t region_arrays_size;
+ uint32_t num_region_arrays;
+ uint32_t total_stream_size;
+ struct pvr_bo *pvr_cs_bo;
+ uint32_t rem_mappings;
+ uint32_t *blk_cs_ptr;
+ uint32_t *cs_ptr;
+ VkResult result;
+
+ if (state->custom_mapping.pass_count > 0U) {
+ uint32_t num_mappings;
+
+ pass = &state->custom_mapping.passes[state->pass_idx];
+ num_mappings = pass->mapping_count;
+
+ while (num_mappings > 0U) {
+ if (flags & PVR_TRANSFER_CMD_FLAGS_FILL) {
+ prim_blk_size += pvr_isp_primitive_block_size(
+ dev_info,
+ NULL,
+ MIN2(max_mappings_per_pb, num_mappings));
+ }
+
+ if (transfer_cmd->src_present) {
+ prim_blk_size += pvr_isp_primitive_block_size(
+ dev_info,
+ &transfer_cmd->src,
+ MIN2(max_mappings_per_pb, num_mappings));
+ }
+
+ num_mappings -= MIN2(max_mappings_per_pb, num_mappings);
+ num_prim_blks++;
+ }
+ } else if ((flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U) {
+ num_prim_blks = 1U;
+ prim_blk_size +=
+ pvr_isp_primitive_block_size(dev_info,
+ NULL,
+ MIN2(max_mappings_per_pb, 1U));
+
+ if (transfer_cmd->src_present) {
+ uint32_t num_mappings = transfer_cmd->mapping_count;
+
+ while (num_mappings > 0U) {
+ prim_blk_size += pvr_isp_primitive_block_size(
+ dev_info,
+ &transfer_cmd->src,
+ MIN2(max_mappings_per_pb, num_mappings));
+
+ num_mappings -= MIN2(max_mappings_per_pb, num_mappings);
+ num_prim_blks++;
+ }
+ }
+ } else {
+ pvr_finishme("Unimplemented path.");
+ }
+
+ num_region_arrays =
+ (num_prim_blks + (pvr_transfer_prim_blocks_per_alloc(dev_info) - 1U)) /
+ pvr_transfer_prim_blocks_per_alloc(dev_info);
+ region_arrays_size = PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS) *
+ sizeof(uint32_t) * num_region_arrays;
+ total_stream_size = region_arrays_size + prim_blk_size;
+
+ /* Allocate space for IPF control stream. */
+ result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
+ ctx->device->heaps.transfer_3d_heap,
+ total_stream_size,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_cs_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ cs_ptr = pvr_cs_bo->bo->map;
+ blk_cs_ptr = cs_ptr + region_arrays_size / sizeof(uint32_t);
+
+ if (flags & PVR_TRANSFER_CMD_FLAGS_FILL)
+ rem_mappings = pass ? pass->mapping_count : 1U;
+ else
+ rem_mappings = transfer_cmd->mapping_count;
+
+ if ((transfer_cmd->src_present || flags & PVR_TRANSFER_CMD_FLAGS_FILL) &&
+ rem_mappings != 0U) {
+ struct pvr_pds_pixel_shader_sa_program unitex_pds_prog = { 0U };
+ struct pvr_transfer_cmd_surface *src = &transfer_cmd->src;
+ struct pvr_rect_mapping fill_mapping;
+ uint32_t mapping_offset = 0U;
+ bool read_bgnd = false;
+
+ if (flags & PVR_TRANSFER_CMD_FLAGS_FILL) {
+ uint32_t packed_color[4U] = { 0U };
+
+ if (vk_format_is_compressed(transfer_cmd->dst.vk_format)) {
+ return vk_error(transfer_cmd->cmd_buffer,
+ VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+
+ state->pds_shader_task_offset = 0U;
+ state->uni_tex_code_offset = 0U;
+ state->tex_state_data_offset = 0U;
+ state->common_ptr = 0U;
+
+ result = pvr_pack_clear_color(transfer_cmd->dst.vk_format,
+ transfer_cmd->clear_color,
+ packed_color);
+ if (result != VK_SUCCESS)
+ return result;
+
+ fill_mapping.dst_rect = transfer_cmd->scissor;
+
+ pvr_csb_pack (®s->usc_clear_register0,
+ CR_USC_CLEAR_REGISTER0,
+ reg) {
+ reg.val = packed_color[0U];
+ }
+
+ pvr_csb_pack (®s->usc_clear_register1,
+ CR_USC_CLEAR_REGISTER1,
+ reg) {
+ reg.val = packed_color[1U];
+ }
+
+ pvr_csb_pack (®s->usc_clear_register2,
+ CR_USC_CLEAR_REGISTER2,
+ reg) {
+ reg.val = packed_color[2U];
+ }
+
+ pvr_csb_pack (®s->usc_clear_register3,
+ CR_USC_CLEAR_REGISTER3,
+ reg) {
+ reg.val = packed_color[3U];
+ }
+
+ state->pds_shader_task_offset =
+ transfer_cmd->cmd_buffer->device->nop_program.pds.data_offset;
+
+ unitex_pds_prog.kick_usc = false;
+ unitex_pds_prog.clear = false;
+ } else {
+ const bool down_scale =
+ transfer_cmd->resolve_op == PVR_RESOLVE_BLEND &&
+ src->sample_count > 1U && transfer_cmd->dst.sample_count <= 1U;
+ struct pvr_tq_shader_properties *shader_props =
+ &state->shader_props;
+ struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
+ const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout;
+ enum pvr_transfer_pbe_pixel_src pbe_src_format;
+ uint32_t tex_state_dma_size;
+ pvr_dev_addr_t dev_offset;
+ struct pvr_bo *pvr_bo;
+
+ /* Reset the shared register bank ptrs each src implies new texture
+ * state (Note that we don't change texture state per prim block).
+ */
+ state->common_ptr = 0U;
+ state->usc_const_reg_ptr = 0U;
+ /* We don't use state->dynamic_const_reg_ptr here. */
+
+ if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE)
+ read_bgnd = true;
+
+ result = pvr_pbe_src_format_f2d(flags,
+ src,
+ transfer_cmd->dst.vk_format,
+ down_scale,
+ state->dont_force_pbe,
+ &pbe_src_format);
+ if (result != VK_SUCCESS)
+ return result;
+
+ memset(shader_props, 0U, sizeof(*shader_props));
+
+ if (state->custom_mapping.byte_unwind_src > 0U &&
+ state->custom_mapping.passes[0U].byte_unwind) {
+ pvr_finishme("Unimplemented path.");
+ }
+
+ layer->pbe_format = pbe_src_format;
+ layer->sample = (src->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED);
+ shader_props->iterated = true;
+
+ shader_props->pick_component =
+ pvr_pick_component_needed(&state->custom_mapping);
+
+ result = pvr_msaa_state(dev_info, transfer_cmd, state);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (state->filter == PVR_FILTER_LINEAR &&
+ pvr_requires_usc_linear_filter(src->vk_format)) {
+ if (pvr_int_pbe_usc_linear_filter(layer->pbe_format,
+ layer->sample,
+ layer->msaa,
+ shader_props->full_rate)) {
+ layer->linear = true;
+ } else {
+ mesa_logw("Transfer: F32 linear filter not supported.");
+ }
+ }
+
+ result = pvr_transfer_frag_store_get_shader_info(
+ transfer_cmd->cmd_buffer->device,
+ &ctx->frag_store,
+ shader_props,
+ &dev_offset,
+ &sh_reg_layout);
+ if (result != VK_SUCCESS)
+ return result;
+
+ assert(dev_offset.addr <= UINT32_MAX);
+ prep_data->state.pds_shader_task_offset = (uint32_t)dev_offset.addr;
+
+ result =
+ pvr_pds_coeff_task(ctx, transfer_cmd, layer->sample, prep_data);
+ if (result != VK_SUCCESS)
+ return result;
+
+ unitex_pds_prog.kick_usc = false;
+ unitex_pds_prog.clear = false;
+
+ tex_state_dma_size =
+ sh_reg_layout->driver_total + sh_reg_layout->compiler_out_total;
+
+ unitex_pds_prog.num_texture_dma_kicks = 1U;
+ unitex_pds_prog.num_uniform_dma_kicks = 0U;
+
+ /* Allocate memory for DMA. */
+ result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
+ ctx->device->heaps.general_heap,
+ tex_state_dma_size << 2U,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_sampler_state_for_surface(dev_info,
+ &transfer_cmd->src,
+ state->filter,
+ sh_reg_layout,
+ 0U,
+ pvr_bo->bo->map);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_image_state_for_surface(ctx,
+ transfer_cmd,
+ &transfer_cmd->src,
+ 0U,
+ sh_reg_layout,
+ state,
+ 0U,
+ pvr_bo->bo->map);
+ if (result != VK_SUCCESS)
+ return result;
+
+ pvr_pds_encode_dma_burst(unitex_pds_prog.texture_dma_control,
+ unitex_pds_prog.texture_dma_address,
+ state->common_ptr,
+ tex_state_dma_size,
+ pvr_bo->vma->dev_addr.addr,
+ true,
+ dev_info);
+
+ state->common_ptr += tex_state_dma_size;
+
+ pvr_write_usc_constants(sh_reg_layout, pvr_bo->bo->map);
+
+ if (pvr_pick_component_needed(&state->custom_mapping)) {
+ pvr_dma_texel_unwind(state, sh_reg_layout, pvr_bo->bo->map);
+ }
+ }
+
+ result = pvr_pds_unitex(dev_info,
+ ctx,
+ transfer_cmd,
+ &unitex_pds_prog,
+ prep_data);
+ if (result != VK_SUCCESS)
+ return result;
+
+ while (rem_mappings > 0U) {
+ const uint64_t transfer_heap_base =
+ transfer_cmd->cmd_buffer->device->heaps.transfer_3d_heap
+ ->base_addr.addr;
+ const uint32_t num_mappings = MIN2(max_mappings_per_pb, rem_mappings);
+ struct pvr_rect_mapping *mappings = NULL;
+ uint32_t stream_start_offset = 0U;
+ pvr_dev_addr_t prim_blk_addr;
+
+ if (PVR_HAS_FEATURE(dev_info, ipf_creq_pf))
+ pvr_finishme("Unimplemented path.");
+
+ if (flags & PVR_TRANSFER_CMD_FLAGS_FILL)
+ mappings = pass ? pass->mappings : &fill_mapping;
+ else
+ mappings = transfer_cmd->mappings;
+
+ prim_blk_addr =
+ PVR_DEV_ADDR(pvr_cs_bo->vma->dev_addr.addr - transfer_heap_base);
+ prim_blk_addr.addr +=
+ (uintptr_t)blk_cs_ptr - (uintptr_t)pvr_cs_bo->bo->map;
+
+ result = pvr_isp_primitive_block(
+ dev_info,
+ ctx,
+ transfer_cmd,
+ prep_data,
+ flags & PVR_TRANSFER_CMD_FLAGS_FILL ? NULL : src,
+ state->custom_filter,
+ mappings,
+ num_mappings,
+ mapping_offset,
+ read_bgnd,
+ &stream_start_offset,
+ &blk_cs_ptr);
+ if (result != VK_SUCCESS)
+ return result;
+
+ prim_blk_addr.addr += stream_start_offset;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2)) {
+ pvr_finishme("Unimplemented path.");
+ } else {
+ pvr_csb_pack (cs_ptr, IPF_PRIMITIVE_FORMAT, word) {
+ word.cs_type = PVRX(IPF_CS_TYPE_PRIM);
+ word.cs_isp_state_read = true;
+ word.cs_isp_state_size = 2U;
+ word.cs_prim_total = 2U * num_mappings - 1U;
+ word.cs_mask_fmt = PVRX(IPF_CS_MASK_FMT_FULL);
+ word.cs_prim_base_pres = true;
+ }
+ cs_ptr += pvr_cmd_length(IPF_PRIMITIVE_FORMAT);
+
+ pvr_csb_pack (cs_ptr, IPF_PRIMITIVE_BASE, word) {
+ word.cs_prim_base = prim_blk_addr;
+ }
+ cs_ptr += pvr_cmd_length(IPF_PRIMITIVE_BASE);
+ }
+
+ rem_mappings -= num_mappings;
+ mapping_offset += num_mappings;
+ }
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, ipf_creq_pf))
+ pvr_finishme("Unimplemented path.");
+
+ pvr_csb_pack (cs_ptr, IPF_CONTROL_STREAM, word) {
+ word.cs_type = PVRX(IPF_CS_TYPE_TERM);
+ }
+ cs_ptr += pvr_cmd_length(IPF_CONTROL_STREAM);
+
+ pvr_csb_pack (®s->isp_mtile_base, CR_ISP_MTILE_BASE, reg) {
+ reg.addr =
+ PVR_DEV_ADDR(pvr_cs_bo->vma->dev_addr.addr -
+ ctx->device->heaps.transfer_3d_heap->base_addr.addr);
+ }
+
+ pvr_csb_pack (®s->isp_render, CR_ISP_RENDER, reg) {
+ reg.mode_type = PVRX(CR_ISP_RENDER_MODE_TYPE_FAST_2D);
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2))
+ pvr_finishme("Unimplemented path.");
+ else
+ regs->isp_rgn = 0UL;
+
+ return VK_SUCCESS;
+}
+
+static void pvr_transfer_set_filter(struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_3d_state *state)
+{
+ VkRect2D *src = &transfer_cmd->mappings[0U].src_rect;
+ VkRect2D *dst = &transfer_cmd->mappings[0U].dst_rect;
+
+ if (!transfer_cmd->src_present)
+ return;
+
+ /* If no scaling is applied to the copy region, we can use point
+ * filtering.
+ */
+ if (!state->custom_filter && (src->extent.width == dst->extent.width) &&
+ (src->extent.height == dst->extent.height))
+ state->filter = PVR_FILTER_POINT;
+ else
+ state->filter = transfer_cmd->filter;
+}
+
+/** Generates hw resources to kick a 3D clip blit. */
+static VkResult pvr_3d_clip_blit(struct pvr_transfer_ctx *ctx,
+ struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_prep_data *prep_data,
+ uint32_t pass_idx,
+ bool *finished_out)
+{
+ struct pvr_transfer_3d_state *state = &prep_data->state;
+ uint32_t texel_unwind_src = state->custom_mapping.texel_unwind_src;
+ struct pvr_transfer_cmd bg_cmd = { 0U };
+ uint32_t control_reg;
+ VkResult result;
+
+ state->dont_force_pbe = false;
+ bg_cmd.scissor = transfer_cmd->scissor;
+ bg_cmd.cmd_buffer = transfer_cmd->cmd_buffer;
+ bg_cmd.flags = transfer_cmd->flags;
+ bg_cmd.flags &=
+ ~(PVR_TRANSFER_CMD_FLAGS_FAST2D | PVR_TRANSFER_CMD_FLAGS_FILL |
+ PVR_TRANSFER_CMD_FLAGS_DSMERGE | PVR_TRANSFER_CMD_FLAGS_PICKD);
+
+ bg_cmd.src_present = state->custom_mapping.pass_count > 0U ? false : true;
+ if (bg_cmd.src_present) {
+ bg_cmd.mappings[0U].src_rect = transfer_cmd->scissor;
+ bg_cmd.mappings[0U].dst_rect = transfer_cmd->scissor;
+ bg_cmd.resolve_op = PVR_RESOLVE_BLEND;
+ bg_cmd.src = transfer_cmd->dst;
+ }
+
+ state->filter = PVR_FILTER_DONTCARE;
+ bg_cmd.dst = transfer_cmd->dst;
+ state->custom_mapping.texel_unwind_src =
+ state->custom_mapping.texel_unwind_dst;
+
+ result =
+ pvr_3d_copy_blit_core(ctx, &bg_cmd, prep_data, pass_idx, finished_out);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* If the destination has 4 channels and the source has at most 2, we still
+ * need all 4 channels from the USC into the PBE.
+ */
+ state->dont_force_pbe = true;
+ state->custom_mapping.texel_unwind_src = texel_unwind_src;
+
+ /* We need the viewport mask, otherwise all pixels would be disabled. */
+ pvr_csb_pack (&control_reg, CR_ISP_BGOBJVALS, reg) {
+ reg.mask = true;
+ }
+ state->regs.isp_bgobjvals |= control_reg;
+
+ pvr_transfer_set_filter(transfer_cmd, state);
+ result = pvr_isp_ctrl_stream(&ctx->device->pdevice->dev_info,
+ ctx,
+ transfer_cmd,
+ prep_data);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* In case of resolve M -> S, the accumulation is read from and written to a
+ * single sampled surface. Make sure that we are resolving and we have the
+ * right number of tiles.
+ */
+ if (state->down_scale) {
+ uint64_t tmp;
+
+ pvr_csb_pack (&tmp, CR_PBE_WORD0_MRT0, reg) {
+ reg.downscale = true;
+ }
+ state->regs.pbe_wordx_mrty[0U] |= tmp;
+
+ result = pvr_isp_tiles(ctx->device, state);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
+static bool pvr_texel_unwind(uint32_t bpp,
+ pvr_dev_addr_t dev_addr,
+ bool is_input,
+ uint32_t texel_extend,
+ uint32_t *texel_unwind_out)
+{
+ uint32_t texel_unwind = 0U;
+
+ for (uint32_t i = 0U; i < 16U; i++) {
+ if (pvr_is_surface_aligned(dev_addr, is_input, bpp)) {
+ break;
+ } else {
+ if (i == 15U) {
+ return false;
+ } else {
+ dev_addr.addr -= (bpp / texel_extend) / 8U;
+ texel_unwind++;
+ }
+ }
+ }
+
+ *texel_unwind_out = texel_unwind;
+
+ return true;
+}
+
+static bool pvr_byte_unwind(uint32_t bpp,
+ pvr_dev_addr_t dev_addr,
+ bool is_input,
+ uint32_t *byte_unwind_out)
+{
+ uint32_t byte_unwind = 0U;
+
+ for (uint32_t i = 0U; i < 16U; i++) {
+ if (pvr_is_surface_aligned(dev_addr, is_input, bpp)) {
+ break;
+ } else {
+ if (i == 15U) {
+ return false;
+ } else {
+ dev_addr.addr -= 1U;
+ byte_unwind++;
+ }
+ }
+ }
+
+ *byte_unwind_out = byte_unwind;
+
+ return true;
+}
+
+static bool pvr_is_identity_mapping(const struct pvr_rect_mapping *mapping)
+{
+ return (mapping->src_rect.offset.x == mapping->dst_rect.offset.x &&
+ mapping->src_rect.offset.y == mapping->dst_rect.offset.y &&
+ mapping->src_rect.extent.width == mapping->dst_rect.extent.width &&
+ mapping->src_rect.extent.height == mapping->dst_rect.extent.height);
+}
+
+static inline bool pvr_is_pbe_stride_aligned(const uint32_t stride)
+{
+ if (stride == 1U)
+ return true;
+
+ return ((stride & (PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE) - 1U)) ==
+ 0x0U);
+}
+
+static struct pvr_transfer_pass *
+pvr_create_pass(struct pvr_transfer_custom_mapping *custom_mapping,
+ uint32_t dst_offset,
+ uint32_t src_offset,
+ bool extend_height)
+{
+ struct pvr_transfer_pass *pass;
+
+ assert(custom_mapping->pass_count < PVR_TRANSFER_MAX_PASSES);
+
+ pass = &custom_mapping->passes[custom_mapping->pass_count];
+ pass->clip_rects_count = 0U;
+ pass->dst_offset = dst_offset;
+ pass->src_offset = src_offset;
+ pass->mapping_count = 0U;
+ pass->extend_height = extend_height;
+
+ custom_mapping->pass_count++;
+
+ return pass;
+}
+
+/* Acquire pass with given offset. If one doesn't exist, create new. */
+static struct pvr_transfer_pass *
+pvr_acquire_pass(struct pvr_transfer_custom_mapping *custom_mapping,
+ uint32_t dst_offset,
+ uint32_t src_offset,
+ bool extend_height)
+{
+ for (uint32_t i = 0U; i < custom_mapping->pass_count; i++) {
+ if (custom_mapping->passes[i].dst_offset == dst_offset)
+ return &custom_mapping->passes[i];
+ }
+
+ return pvr_create_pass(custom_mapping, dst_offset, src_offset, extend_height);
+}
+
+static void pvr_remove_mapping(struct pvr_transfer_pass *pass, uint32_t idx)
+{
+ assert(idx < pass->mapping_count);
+
+ for (uint32_t i = idx; i < (pass->mapping_count - 1U); i++)
+ pass->mappings[i] = pass->mappings[i + 1U];
+
+ pass->mapping_count--;
+}
+
+static struct pvr_rect_mapping *
+pvr_create_mapping(struct pvr_transfer_pass *pass)
+{
+ assert(pass->mapping_count < ARRAY_SIZE(pass->mappings));
+
+ return &pass->mappings[pass->mapping_count++];
+}
+
+/**
+ * If PBE can't write to surfaces with odd stride, the stride of
+ * destination surface is doubled to make it even. Height of the surface is
+ * halved. The source surface is not resized. Each half of the modified
+ * destination surface samples every second row from the source surface. This
+ * only works with nearest filtering.
+ */
+static bool pvr_double_stride(struct pvr_transfer_pass *pass, uint32_t stride)
+{
+ struct pvr_rect_mapping *mappings = pass->mappings;
+
+ if (stride == 1U)
+ return false;
+
+ if (mappings[0U].dst_rect.extent.height == 1U && pass->mapping_count == 1U) {
+ /* Only one mapping required if height is 1. */
+ if ((mappings[0U].dst_rect.offset.y & 1U) != 0U) {
+ mappings[0U].dst_rect.offset.x += (int32_t)stride;
+ mappings[0U].dst_rect.offset.y /= 2U;
+ mappings[0U].dst_rect.extent.height =
+ (mappings[0U].dst_rect.extent.height + 1U) / 2U;
+ } else {
+ mappings[0U].dst_rect.extent.height =
+ (mappings[0U].dst_rect.offset.y +
+ mappings[0U].dst_rect.extent.height + 1U) /
+ 2U -
+ mappings[0U].dst_rect.offset.y;
+ mappings[0U].dst_rect.offset.y /= 2U;
+ }
+
+ return true;
+ }
+
+ pvr_finishme("Add support for multiple mappings.");
+
+ return false;
+}
+
+static void pvr_unwind_rects(uint32_t width,
+ uint32_t height,
+ uint32_t texel_unwind,
+ bool input,
+ struct pvr_transfer_pass *pass)
+{
+ pvr_finishme("Implement pvr_unwind_rects().");
+}
+
+/**
+ * Assign clip rects to rectangle mappings. TDM can only do two PBE clip
+ * rects per screen.
+ */
+static void
+pvr_map_clip_rects(struct pvr_transfer_custom_mapping *custom_mapping)
+{
+ for (uint32_t i = 0U; i < custom_mapping->pass_count; i++) {
+ struct pvr_transfer_pass *pass = &custom_mapping->passes[i];
+
+ pass->clip_rects_count = 0U;
+ for (uint32_t j = 0U; j < pass->mapping_count; j++) {
+ struct pvr_rect_mapping *mappings = pass->mappings;
+ VkRect2D *clip_rects = pass->clip_rects;
+ bool merged = false;
+
+ /* Try merge adjacent clip rects. */
+ for (uint32_t k = 0U; k < pass->clip_rects_count; k++) {
+ if (clip_rects[k].offset.y == mappings[j].dst_rect.offset.y &&
+ clip_rects[k].extent.height ==
+ mappings[j].dst_rect.extent.height &&
+ clip_rects[k].offset.x + clip_rects[k].extent.width ==
+ mappings[j].dst_rect.offset.x) {
+ clip_rects[k].extent.width += mappings[j].dst_rect.extent.width;
+ merged = true;
+ break;
+ }
+
+ if (clip_rects[k].offset.y == mappings[j].dst_rect.offset.y &&
+ clip_rects[k].extent.height ==
+ mappings[j].dst_rect.extent.height &&
+ clip_rects[k].offset.x ==
+ mappings[j].dst_rect.offset.x +
+ mappings[j].dst_rect.extent.width) {
+ clip_rects[k].offset.x = mappings[j].dst_rect.offset.x;
+ clip_rects[k].extent.width += mappings[j].dst_rect.extent.width;
+ merged = true;
+ break;
+ }
+
+ if (clip_rects[k].offset.x == mappings[j].dst_rect.offset.x &&
+ clip_rects[k].extent.width ==
+ mappings[j].dst_rect.extent.width &&
+ clip_rects[k].offset.y + clip_rects[k].extent.height ==
+ mappings[j].dst_rect.offset.y) {
+ clip_rects[k].extent.height +=
+ mappings[j].dst_rect.extent.height;
+ merged = true;
+ break;
+ }
+
+ if (clip_rects[k].offset.x == mappings[j].dst_rect.offset.x &&
+ clip_rects[k].extent.width ==
+ mappings[j].dst_rect.extent.width &&
+ clip_rects[k].offset.y ==
+ mappings[j].dst_rect.offset.y +
+ mappings[j].dst_rect.extent.height) {
+ clip_rects[k].extent.height +=
+ mappings[j].dst_rect.extent.height;
+ clip_rects[k].offset.y = mappings[j].dst_rect.offset.y;
+ merged = true;
+ break;
+ }
+ }
+
+ if (merged)
+ continue;
+
+ /* Create new pass if needed, TDM can only have 2 clip rects. */
+ if (pass->clip_rects_count >= custom_mapping->max_clip_rects) {
+ struct pvr_transfer_pass *new_pass =
+ pvr_create_pass(custom_mapping,
+ pass->dst_offset,
+ pass->src_offset,
+ pass->extend_height);
+ struct pvr_rect_mapping *new_mapping = pvr_create_mapping(new_pass);
+
+ new_pass->clip_rects_count = 1U;
+ *new_mapping = pass->mappings[j];
+
+ pvr_remove_mapping(pass, j);
+
+ /* Redo - mapping was replaced. */
+ j--;
+ } else {
+ pass->clip_rects[pass->clip_rects_count] =
+ pass->mappings[j].dst_rect;
+
+ pass->clip_rects_count++;
+
+ assert(pass->clip_rects_count <= ARRAY_SIZE(pass->clip_rects));
+ }
+ }
+ }
+}
+
+static void
+pvr_generate_custom_mapping(uint32_t src_stride,
+ uint32_t src_width,
+ uint32_t src_height,
+ uint32_t dst_stride,
+ uint32_t dst_width,
+ uint32_t dst_height,
+ enum pvr_memlayout dst_mem_layout,
+ struct pvr_transfer_custom_mapping *custom_mapping)
+{
+ src_stride *= custom_mapping->texel_extend_src;
+ src_width *= custom_mapping->texel_extend_src;
+ dst_stride *= custom_mapping->texel_extend_dst;
+ dst_width *= custom_mapping->texel_extend_dst;
+
+ if (custom_mapping->texel_unwind_src > 0U) {
+ pvr_unwind_rects(src_stride,
+ src_height,
+ custom_mapping->texel_unwind_src,
+ true,
+ &custom_mapping->passes[0U]);
+ }
+
+ if (custom_mapping->double_stride) {
+ custom_mapping->double_stride =
+ pvr_double_stride(&custom_mapping->passes[0U], dst_stride);
+
+ dst_stride *= 2U;
+ }
+
+ pvr_unwind_rects(dst_stride,
+ dst_height,
+ custom_mapping->texel_unwind_dst,
+ false,
+ &custom_mapping->passes[0U]);
+
+ pvr_map_clip_rects(custom_mapping);
+
+ /* If the last row of the source mapping is sampled, height of the surface
+ * can only be increased if the new area contains a valid region. Some blits
+ * are split to two sources.
+ */
+ if (custom_mapping->texel_unwind_src > 0U) {
+ pvr_finishme("Implement pvr_generate_custom_mapping().");
+ }
+}
+
+static bool
+pvr_get_custom_mapping(const struct pvr_device_info *dev_info,
+ const struct pvr_transfer_cmd *transfer_cmd,
+ uint32_t max_clip_rects,
+ struct pvr_transfer_custom_mapping *custom_mapping)
+{
+ const uint32_t dst_bpp =
+ vk_format_get_blocksizebits(transfer_cmd->dst.vk_format);
+ struct pvr_transfer_pass *pass;
+ bool ret;
+
+ custom_mapping->max_clip_rects = max_clip_rects;
+ custom_mapping->texel_unwind_src = 0U;
+ custom_mapping->texel_unwind_dst = 0U;
+ custom_mapping->texel_extend_src = 1U;
+ custom_mapping->texel_extend_dst = 1U;
+ custom_mapping->byte_unwind_src = 0U;
+ custom_mapping->pass_count = 0U;
+
+ custom_mapping->max_clip_size = PVR_MAX_CLIP_SIZE(dev_info);
+
+ ret = pvr_texel_unwind(dst_bpp,
+ transfer_cmd->dst.dev_addr,
+ false,
+ 1U,
+ &custom_mapping->texel_unwind_dst);
+ if (!ret) {
+ custom_mapping->texel_extend_dst = dst_bpp / 8U;
+ if (transfer_cmd->src_present) {
+ if (transfer_cmd->src.mem_layout == PVR_MEMLAYOUT_LINEAR) {
+ custom_mapping->texel_extend_src = custom_mapping->texel_extend_dst;
+ } else if (transfer_cmd->src.mem_layout == PVR_MEMLAYOUT_TWIDDLED &&
+ transfer_cmd->src.height == 1U) {
+ custom_mapping->texel_extend_src = custom_mapping->texel_extend_dst;
+ }
+ }
+
+ ret = pvr_texel_unwind(dst_bpp,
+ transfer_cmd->dst.dev_addr,
+ false,
+ custom_mapping->texel_extend_dst,
+ &custom_mapping->texel_unwind_dst);
+ if (!ret)
+ return false;
+ }
+
+ if (transfer_cmd->src_present) {
+ const uint32_t src_bpp =
+ vk_format_get_blocksizebits(transfer_cmd->src.vk_format);
+
+ ret = pvr_is_surface_aligned(transfer_cmd->src.dev_addr, true, src_bpp);
+
+ if (!ret && (transfer_cmd->src.mem_layout == PVR_MEMLAYOUT_LINEAR ||
+ transfer_cmd->src.height == 1U)) {
+ ret = pvr_texel_unwind(src_bpp,
+ transfer_cmd->src.dev_addr,
+ true,
+ custom_mapping->texel_extend_src,
+ &custom_mapping->texel_unwind_src);
+ }
+
+ if (!ret && dst_bpp != 24U) {
+ ret = pvr_byte_unwind(src_bpp,
+ transfer_cmd->src.dev_addr,
+ true,
+ &custom_mapping->byte_unwind_src);
+ }
+
+ if (!ret) {
+ custom_mapping->texel_extend_src = dst_bpp / 8U;
+ custom_mapping->texel_extend_dst = custom_mapping->texel_extend_src;
+
+ ret = pvr_texel_unwind(src_bpp,
+ transfer_cmd->src.dev_addr,
+ true,
+ custom_mapping->texel_extend_src,
+ &custom_mapping->texel_unwind_src);
+ }
+
+ if (!ret)
+ return false;
+ }
+
+ VkRect2D rect = transfer_cmd->scissor;
+ assert(
+ (rect.offset.x + rect.extent.width) <= custom_mapping->max_clip_size &&
+ (rect.offset.y + rect.extent.height) <= custom_mapping->max_clip_size);
+
+ /* Texel extend only works with strided memory layout, because pixel width is
+ * changed. Texel unwind only works with strided memory layout. 1D blits are
+ * allowed.
+ */
+ if (transfer_cmd->src.height > 1U && transfer_cmd->src_present &&
+ (custom_mapping->texel_extend_src > 1U ||
+ custom_mapping->texel_unwind_src > 0U) &&
+ transfer_cmd->src.mem_layout != PVR_MEMLAYOUT_LINEAR) {
+ return false;
+ }
+
+ /* Texel extend only works with strided memory layout, because pixel width is
+ * changed. Texel unwind only works with strided memory layout. 1D blits are
+ * allowed.
+ */
+ if ((custom_mapping->texel_extend_dst > 1U ||
+ custom_mapping->texel_unwind_dst > 0U) &&
+ transfer_cmd->dst.mem_layout != PVR_MEMLAYOUT_LINEAR &&
+ transfer_cmd->dst.height > 1U) {
+ return false;
+ }
+
+ if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_LINEAR) {
+ custom_mapping->double_stride = !pvr_is_pbe_stride_aligned(
+ transfer_cmd->dst.stride * custom_mapping->texel_extend_dst);
+ }
+
+ if (custom_mapping->byte_unwind_src > 0U ||
+ custom_mapping->texel_unwind_src > 0U ||
+ custom_mapping->texel_unwind_dst > 0U || custom_mapping->double_stride) {
+ struct pvr_rect_mapping *mapping;
+
+ pass = pvr_acquire_pass(custom_mapping, 0U, 0U, false);
+ mapping = pvr_create_mapping(pass);
+
+ if (transfer_cmd->src_present) {
+ *mapping = transfer_cmd->mappings[0U];
+ } else {
+ mapping->src_rect = transfer_cmd->scissor;
+ mapping->dst_rect = transfer_cmd->scissor;
+ }
+ } else {
+ return false;
+ }
+
+ if (custom_mapping->texel_extend_src > 1U ||
+ custom_mapping->texel_extend_dst > 1U) {
+ pass->mappings[0U].src_rect.offset.x *=
+ (int32_t)custom_mapping->texel_extend_dst;
+ pass->mappings[0U].src_rect.extent.width *=
+ (int32_t)custom_mapping->texel_extend_dst;
+ pass->mappings[0U].dst_rect.offset.x *=
+ (int32_t)custom_mapping->texel_extend_dst;
+ pass->mappings[0U].dst_rect.extent.width *=
+ (int32_t)custom_mapping->texel_extend_dst;
+ }
+
+ if (transfer_cmd->src_present) {
+ pvr_generate_custom_mapping(transfer_cmd->src.stride,
+ transfer_cmd->src.width,
+ transfer_cmd->src.height,
+ transfer_cmd->dst.stride,
+ transfer_cmd->dst.width,
+ transfer_cmd->dst.height,
+ transfer_cmd->dst.mem_layout,
+ custom_mapping);
+ } else {
+ pvr_generate_custom_mapping(0U,
+ 0U,
+ 0U,
+ transfer_cmd->dst.stride,
+ transfer_cmd->dst.width,
+ transfer_cmd->dst.height,
+ transfer_cmd->dst.mem_layout,
+ custom_mapping);
+ }
+
+ return true;
+}
+
+static void
+pvr_modify_command(struct pvr_transfer_custom_mapping *custom_mapping,
+ uint32_t pass_idx,
+ struct pvr_transfer_cmd *transfer_cmd)
+{
+ struct pvr_transfer_pass *pass = &custom_mapping->passes[pass_idx];
+ uint32_t bpp;
+
+ if (custom_mapping->texel_extend_src > 1U) {
+ pvr_finishme("Complete pvr_modify_command().");
+ } else if (custom_mapping->texel_extend_dst > 1U) {
+ pvr_finishme("Complete pvr_modify_command().");
+ }
+
+ if (custom_mapping->double_stride) {
+ transfer_cmd->dst.width *= 2U;
+ transfer_cmd->dst.stride *= 2U;
+ }
+
+ if (custom_mapping->texel_unwind_src > 0U) {
+ if (transfer_cmd->src.height == 1U) {
+ transfer_cmd->src.width += custom_mapping->texel_unwind_src;
+ transfer_cmd->src.stride += custom_mapping->texel_unwind_src;
+ } else if (transfer_cmd->src.stride == 1U) {
+ transfer_cmd->src.height += custom_mapping->texel_unwind_src;
+ } else {
+ /* Increase source width by texel unwind. If texel unwind is less than
+ * the distance between width and stride. The blit can be done with one
+ * rectangle mapping, but the width of the surface needs be to
+ * increased in case we sample from the area between width and stride.
+ */
+ transfer_cmd->src.width =
+ MIN2(transfer_cmd->src.width + custom_mapping->texel_unwind_src,
+ transfer_cmd->src.stride);
+ }
+ }
+
+ transfer_cmd->mapping_count = pass->mapping_count;
+ for (uint32_t i = 0U; i < transfer_cmd->mapping_count; i++)
+ transfer_cmd->mappings[i] = pass->mappings[i];
+
+ if (pass->extend_height)
+ transfer_cmd->src.height += 1U;
+
+ transfer_cmd->src.width = MIN2(PVR_MAX_WIDTH, transfer_cmd->src.width);
+ transfer_cmd->src.height = MIN2(PVR_MAX_WIDTH, transfer_cmd->src.height);
+ transfer_cmd->src.stride = MIN2(PVR_MAX_WIDTH, transfer_cmd->src.stride);
+
+ if (transfer_cmd->dst.height == 1U) {
+ transfer_cmd->dst.width =
+ transfer_cmd->dst.stride + custom_mapping->texel_unwind_dst;
+ transfer_cmd->dst.mem_layout = PVR_MEMLAYOUT_TWIDDLED;
+ }
+
+ if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_TWIDDLED) {
+ transfer_cmd->dst.width =
+ MIN2((uint32_t)custom_mapping->max_clip_size, transfer_cmd->dst.width);
+ transfer_cmd->dst.height = MIN2((uint32_t)custom_mapping->max_clip_size,
+ transfer_cmd->dst.height);
+ } else {
+ transfer_cmd->dst.width = MIN2(PVR_MAX_WIDTH, transfer_cmd->dst.width);
+ }
+
+ if (transfer_cmd->src_present) {
+ bpp = vk_format_get_blocksizebits(transfer_cmd->src.vk_format);
+
+ transfer_cmd->src.dev_addr.addr -=
+ custom_mapping->texel_unwind_src * bpp / 8U;
+ transfer_cmd->src.dev_addr.addr +=
+ MAX2(transfer_cmd->src.sample_count, 1U) * pass->src_offset * bpp / 8U;
+ }
+
+ bpp = vk_format_get_blocksizebits(transfer_cmd->dst.vk_format);
+ transfer_cmd->dst.dev_addr.addr -=
+ custom_mapping->texel_unwind_dst * bpp / 8U;
+ transfer_cmd->dst.dev_addr.addr +=
+ MAX2(transfer_cmd->dst.sample_count, 1U) * pass->dst_offset * bpp / 8U;
+
+ transfer_cmd->src_present =
+ transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL ? false : true;
+}
+
+static VkResult pvr_reroute_to_clip(struct pvr_transfer_ctx *ctx,
+ const struct pvr_transfer_cmd *transfer_cmd,
+ const struct VkRect2D *dst_rect,
+ struct pvr_transfer_prep_data *prep_data,
+ uint32_t pass_idx,
+ bool *finished_out)
+{
+ pvr_finishme("Unimplemented path.");
+ return VK_SUCCESS;
+}
+
+static VkResult pvr_3d_copy_blit(struct pvr_transfer_ctx *ctx,
+ struct pvr_transfer_cmd *transfer_cmd,
+ struct pvr_transfer_prep_data *prep_data,
+ uint32_t pass_idx,
+ bool *finished_out)
+{
+ const struct pvr_device_info *const dev_info =
+ &ctx->device->pdevice->dev_info;
+
+ const struct pvr_transfer_blit *blit = &transfer_cmd->blit;
+ struct pvr_transfer_3d_state *state = &prep_data->state;
+ struct pvr_transfer_cmd *active_cmd = transfer_cmd;
+ struct pvr_transfer_cmd int_cmd;
+ VkResult result;
+
+ state->dont_force_pbe = false;
+ state->pass_idx = pass_idx;
+
+ pvr_transfer_set_filter(transfer_cmd, state);
+
+ if (transfer_cmd->src_present) {
+ /* Try to work out a condition to map pixel formats to RAW. That is only
+ * possible if we don't perform any kind of 2D operation on the blit as we
+ * don't know the actual pixel values - i.e. it has to be point sampled -
+ * scaling doesn't matter as long as point sampled.
+ */
+ if (transfer_cmd->src.vk_format == transfer_cmd->dst.vk_format &&
+ state->filter == PVR_FILTER_POINT &&
+ transfer_cmd->src.sample_count <= transfer_cmd->dst.sample_count &&
+ (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) == 0U &&
+ transfer_cmd->blit.alpha.type == PVR_ALPHA_NONE) {
+ uint32_t bpp;
+
+ int_cmd = *transfer_cmd;
+ active_cmd = &int_cmd;
+ bpp = vk_format_get_blocksizebits(int_cmd.dst.vk_format);
+
+ if (bpp > 0U) {
+ switch (bpp) {
+ case 8U:
+ int_cmd.src.vk_format = VK_FORMAT_R8_UINT;
+ break;
+ case 16U:
+ int_cmd.src.vk_format = VK_FORMAT_R8G8_UINT;
+ break;
+ case 24U:
+ int_cmd.src.vk_format = VK_FORMAT_R8G8B8_UINT;
+ break;
+ case 32U:
+ int_cmd.src.vk_format = VK_FORMAT_R32_UINT;
+ break;
+ case 48U:
+ int_cmd.src.vk_format = VK_FORMAT_R16G16B16_UINT;
+ break;
+ case 64U:
+ int_cmd.src.vk_format = VK_FORMAT_R32G32_UINT;
+ break;
+ case 96U:
+ int_cmd.src.vk_format = VK_FORMAT_R32G32B32_UINT;
+ break;
+ case 128U:
+ int_cmd.src.vk_format = VK_FORMAT_R32G32B32A32_UINT;
+ break;
+ default:
+ active_cmd = transfer_cmd;
+ break;
+ }
+ }
+
+ int_cmd.dst.vk_format = int_cmd.src.vk_format;
+ }
+ }
+
+ if (pass_idx == 0U) {
+ pvr_get_custom_mapping(dev_info, active_cmd, 3U, &state->custom_mapping);
+
+ if (state->custom_mapping.texel_extend_src > 1U)
+ state->custom_mapping.texel_extend_dst = 1U;
+ }
+
+ if (state->custom_mapping.pass_count > 0U) {
+ struct pvr_transfer_pass *pass = &state->custom_mapping.passes[pass_idx];
+
+ if (blit->alpha.type != PVR_ALPHA_NONE)
+ return vk_error(ctx->device, VK_ERROR_FORMAT_NOT_SUPPORTED);
+
+ if (active_cmd != &int_cmd) {
+ int_cmd = *active_cmd;
+ active_cmd = &int_cmd;
+ }
+
+ state->custom_filter = true;
+
+ pvr_modify_command(&state->custom_mapping, pass_idx, active_cmd);
+
+ if (state->custom_mapping.double_stride || pass->mapping_count > 1U) {
+ result =
+ pvr_3d_clip_blit(ctx, active_cmd, prep_data, pass_idx, finished_out);
+ } else {
+ struct pvr_rect_mapping *mappings = &pass->mappings[0U];
+
+ mappings[0U].src_rect.offset.x /=
+ MAX2(1U, state->custom_mapping.texel_extend_dst);
+ mappings[0U].src_rect.extent.width /=
+ MAX2(1U, state->custom_mapping.texel_extend_dst);
+
+ if (int_cmd.src_present) {
+ for (uint32_t i = 0U; i < pass->mapping_count; i++)
+ active_cmd->mappings[i] = mappings[i];
+ }
+
+ active_cmd->scissor = mappings[0U].dst_rect;
+
+ result = pvr_3d_copy_blit_core(ctx,
+ active_cmd,
+ prep_data,
+ pass_idx,
+ finished_out);
+ }
+
+ return result;
+ }
+
+ /* Route DS merge blits to Clip blit. Background object is used to preserve
+ * the unmerged channel.
+ */
+ if ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) != 0U) {
+ /* PBE byte mask could be used for DS merge with FastScale. Clearing the
+ * other channel on a DS merge requires Clip blit.
+ */
+ if (!PVR_HAS_ERN(dev_info, 42064) ||
+ ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U)) {
+ return pvr_reroute_to_clip(ctx,
+ active_cmd,
+ &active_cmd->scissor,
+ prep_data,
+ pass_idx,
+ finished_out);
+ }
+ }
+
+ return pvr_3d_copy_blit_core(ctx,
+ active_cmd,
+ prep_data,
+ pass_idx,
+ finished_out);
+}
+
+static bool pvr_3d_validate_addr(struct pvr_transfer_cmd *transfer_cmd)
+{
+ /* TODO: Complete this function, based on TQ_3DValidateVaddr. */
+ pvr_finishme("Complete pvr_3d_validate_addr().");
+ return true;
+}
+
+static void
+pvr_submit_info_stream_init(struct pvr_transfer_ctx *ctx,
+ struct pvr_transfer_prep_data *prep_data,
+ struct pvr_winsys_transfer_cmd *cmd)
+{
+ const struct pvr_winsys_transfer_regs *const regs = &prep_data->state.regs;
+ const struct pvr_physical_device *const pdevice = ctx->device->pdevice;
+ const struct pvr_device_info *const dev_info = &pdevice->dev_info;
+
+ uint32_t *stream_ptr = (uint32_t *)cmd->fw_stream;
+
+ *(uint64_t *)stream_ptr = regs->pds_bgnd0_base;
+ stream_ptr += pvr_cmd_length(CR_PDS_BGRND0_BASE);
+
+ *(uint64_t *)stream_ptr = regs->pds_bgnd1_base;
+ stream_ptr += pvr_cmd_length(CR_PDS_BGRND1_BASE);
+
+ *(uint64_t *)stream_ptr = regs->pds_bgnd3_sizeinfo;
+ stream_ptr += pvr_cmd_length(CR_PDS_BGRND3_SIZEINFO);
+
+ *(uint64_t *)stream_ptr = regs->isp_mtile_base;
+ stream_ptr += pvr_cmd_length(CR_ISP_MTILE_BASE);
+
+ STATIC_ASSERT(ARRAY_SIZE(regs->pbe_wordx_mrty) == 9U);
+ STATIC_ASSERT(sizeof(regs->pbe_wordx_mrty[0]) == sizeof(uint64_t));
+ memcpy(stream_ptr, regs->pbe_wordx_mrty, sizeof(regs->pbe_wordx_mrty));
+ stream_ptr += 9U * 2U;
+
+ *stream_ptr = regs->isp_bgobjvals;
+ stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
+
+ *stream_ptr = regs->usc_pixel_output_ctrl;
+ stream_ptr += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
+
+ *stream_ptr = regs->usc_clear_register0;
+ stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER0);
+
+ *stream_ptr = regs->usc_clear_register1;
+ stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER1);
+
+ *stream_ptr = regs->usc_clear_register2;
+ stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER2);
+
+ *stream_ptr = regs->usc_clear_register3;
+ stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER3);
+
+ *stream_ptr = regs->isp_mtile_size;
+ stream_ptr += pvr_cmd_length(CR_ISP_MTILE_SIZE);
+
+ *stream_ptr = regs->isp_render_origin;
+ stream_ptr += pvr_cmd_length(CR_ISP_RENDER_ORIGIN);
+
+ *stream_ptr = regs->isp_ctl;
+ stream_ptr += pvr_cmd_length(CR_ISP_CTL);
+
+ *stream_ptr = regs->isp_aa;
+ stream_ptr += pvr_cmd_length(CR_ISP_AA);
+
+ *stream_ptr = regs->event_pixel_pds_info;
+ stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
+
+ *stream_ptr = regs->event_pixel_pds_code;
+ stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_CODE);
+
+ *stream_ptr = regs->event_pixel_pds_data;
+ stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
+
+ *stream_ptr = regs->isp_render;
+ stream_ptr += pvr_cmd_length(CR_ISP_RENDER);
+
+ *stream_ptr = regs->isp_rgn;
+ stream_ptr++;
+
+ if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+ *stream_ptr = regs->frag_screen;
+ stream_ptr++;
+ }
+
+ cmd->fw_stream_len = (uint8_t *)stream_ptr - cmd->fw_stream;
+ assert(cmd->fw_stream_len <= ARRAY_SIZE(cmd->fw_stream));
+}
+
+static void pvr_transfer_job_ws_submit_info_init(
+ struct pvr_transfer_ctx *ctx,
+ struct pvr_transfer_submit *submit,
+ struct vk_sync *wait,
+ struct pvr_winsys_transfer_submit_info *submit_info)
+{
+ const struct pvr_device *const device = ctx->device;
+
+ submit_info->frame_num = device->global_queue_present_count;
+ submit_info->job_num = device->global_cmd_buffer_submit_count;
+ submit_info->wait = wait;
+ submit_info->cmd_count = submit->prep_count;
+
+ for (uint32_t i = 0U; i < submit->prep_count; i++) {
+ struct pvr_winsys_transfer_cmd *const cmd = &submit_info->cmds[i];
+ struct pvr_transfer_prep_data *prep_data = &submit->prep_array[i];
+
+ pvr_submit_info_stream_init(ctx, prep_data, cmd);
+
+ cmd->flags = prep_data->flags;
+ }
+}
+
+static VkResult pvr_submit_transfer(struct pvr_transfer_ctx *ctx,
+ struct pvr_transfer_submit *submit,
+ struct vk_sync *wait,
+ struct vk_sync *signal_sync)
+{
+ struct pvr_winsys_transfer_submit_info submit_info;
+
+ pvr_transfer_job_ws_submit_info_init(ctx, submit, wait, &submit_info);
+
+ return ctx->device->ws->ops->transfer_submit(ctx->ws_ctx,
+ &submit_info,
+ &ctx->device->pdevice->dev_info,
+ signal_sync);
+}
+
+static VkResult pvr_queue_transfer(struct pvr_transfer_ctx *ctx,
+ struct pvr_transfer_cmd *transfer_cmd,
+ struct vk_sync *wait,
+ struct vk_sync *signal_sync)
+{
+ struct pvr_transfer_prep_data *prep_data = NULL;
+ struct pvr_transfer_prep_data *prev_prep_data;
+ struct pvr_transfer_submit submit = { 0U };
+ bool finished = false;
+ uint32_t pass = 0U;
+ VkResult result;
+
+ /* Transfer queue might decide to do a blit in multiple passes. When the
+ * prepare doesn't set the finished flag this code will keep calling the
+ * prepare with increasing pass. If queued transfers are submitted from
+ * here we submit them straight away. That's why we only need a single
+ * prepare for the blit rather then one for each pass. Otherwise we insert
+ * each prepare into the prepare array. When the client does blit batching
+ * and we split the blit into multiple passes each pass in each queued
+ * transfer adds one more prepare. Thus the prepare array after 2
+ * pvr_queue_transfer calls might look like:
+ *
+ * +------+------++-------+-------+-------+
+ * |B0/P0 |B0/P1 || B1/P0 | B1/P1 | B1/P2 |
+ * +------+------++-------+-------+-------+
+ * F S/U F S/U
+ *
+ * Bn/Pm : nth blit (queue transfer call) / mth prepare
+ * F : fence point
+ * S/U : update / server sync update point
+ */
+
+ while (!finished) {
+ prev_prep_data = prep_data;
+ prep_data = &submit.prep_array[submit.prep_count++];
+
+ /* Clear down the memory before we write to this prep. */
+ memset(prep_data, 0U, sizeof(*prep_data));
+
+ if (pass == 0U) {
+ if (!pvr_3d_validate_addr(transfer_cmd))
+ return vk_error(ctx->device, VK_ERROR_FEATURE_NOT_PRESENT);
+ } else {
+ /* Transfer queue workarounds could use more than one pass with 3D
+ * path.
+ */
+ prep_data->state = prev_prep_data->state;
+ }
+
+ if (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FAST2D) {
+ result =
+ pvr_3d_clip_blit(ctx, transfer_cmd, prep_data, pass, &finished);
+ } else {
+ result =
+ pvr_3d_copy_blit(ctx, transfer_cmd, prep_data, pass, &finished);
+ }
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Submit if we have finished the blit or if we are out of prepares. */
+ if (finished || submit.prep_count == ARRAY_SIZE(submit.prep_array)) {
+ result = pvr_submit_transfer(ctx,
+ &submit,
+ wait,
+ finished ? signal_sync : NULL);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Check if we need to reset prep_count. */
+ if (submit.prep_count == ARRAY_SIZE(submit.prep_array))
+ submit.prep_count = 0U;
+ }
+
+ pass++;
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult pvr_transfer_job_submit(struct pvr_transfer_ctx *ctx,
+ struct pvr_sub_cmd_transfer *sub_cmd,
+ struct vk_sync *wait_sync,
+ struct vk_sync *signal_sync)
+{
+ list_for_each_entry_safe (struct pvr_transfer_cmd,
+ transfer_cmd,
+ &sub_cmd->transfer_cmds,
+ link) {
+ /* The fw guarantees that any kick on the same context will be
+ * synchronized in submission order. This means only the first kick must
+ * wait, and only the last kick need signal.
+ */
+ struct vk_sync *first_cmd_wait_sync = NULL;
+ struct vk_sync *last_cmd_signal_sync = NULL;
+ VkResult result;
+
+ if (list_first_entry(&sub_cmd->transfer_cmds,
+ struct pvr_transfer_cmd,
+ link) == transfer_cmd) {
+ first_cmd_wait_sync = wait_sync;
+ }
+
+ if (list_last_entry(&sub_cmd->transfer_cmds,
+ struct pvr_transfer_cmd,
+ link) == transfer_cmd) {
+ last_cmd_signal_sync = signal_sync;
+ }
+
+ result = pvr_queue_transfer(ctx,
+ transfer_cmd,
+ first_cmd_wait_sync,
+ last_cmd_signal_sync);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ return VK_SUCCESS;
}